diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index b1607f8f306d..6c0ef7571ffe 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -669,7 +669,7 @@ impl ParentState<'_> { } } - // Return the offset of the underlying buffer at the time of calling this method. + // Return the current offset of the underlying buffer. Used as a savepoint for rollback. fn buffer_current_offset(&self) -> usize { match self { ParentState::Variant { buffer, .. } @@ -678,8 +678,9 @@ impl ParentState<'_> { } } - // Return the current index of the undelying metadata buffer at the time of calling this method. - fn metadata_current_offset(&self) -> usize { + // Return the current dictionary size of the undelying metadata builder. Used as a savepoint for + // rollback. + fn metadata_num_fields(&self) -> usize { match self { ParentState::Variant { metadata_builder, .. @@ -689,7 +690,7 @@ impl ParentState<'_> { } | ParentState::List { metadata_builder, .. - } => metadata_builder.metadata_buffer.len(), + } => metadata_builder.field_names.len(), } } } @@ -1021,6 +1022,28 @@ impl VariantBuilder { self } + /// Builder-style API for appending a value to the list and returning self to enable method chaining. + /// + /// # Panics + /// + /// This method will panic if the variant contains duplicate field names in objects + /// when validation is enabled. For a fallible version, use [`ListBuilder::try_with_value`]. + pub fn with_value<'m, 'd, T: Into>>(mut self, value: T) -> Self { + self.append_value(value); + self + } + + /// Builder-style API for appending a value to the list and returns self for method chaining. + /// + /// This is the fallible version of [`ListBuilder::with_value`]. + pub fn try_with_value<'m, 'd, T: Into>>( + mut self, + value: T, + ) -> Result { + self.try_append_value(value)?; + Ok(self) + } + /// This method reserves capacity for field names in the Variant metadata, /// which can improve performance when you know the approximate number of unique field /// names that will be used across all objects in the [`Variant`]. @@ -1130,7 +1153,7 @@ pub struct ListBuilder<'a> { impl<'a> ListBuilder<'a> { fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> Self { let parent_value_offset_base = parent_state.buffer_current_offset(); - let parent_metadata_offset_base = parent_state.metadata_current_offset(); + let parent_metadata_offset_base = parent_state.metadata_num_fields(); Self { parent_state, offsets: vec![], @@ -1312,7 +1335,7 @@ pub struct ObjectBuilder<'a> { impl<'a> ObjectBuilder<'a> { fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> Self { let offset_base = parent_state.buffer_current_offset(); - let meta_offset_base = parent_state.metadata_current_offset(); + let meta_offset_base = parent_state.metadata_num_fields(); Self { parent_state, fields: IndexMap::new(), @@ -2928,13 +2951,16 @@ mod tests { // The parent object should only contain the original fields object_builder.finish().unwrap(); let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); - assert_eq!(metadata.len(), 1); - assert_eq!(&metadata[0], "second"); + assert_eq!(metadata.len(), 2); + assert_eq!(&metadata[0], "first"); + assert_eq!(&metadata[1], "second"); let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); let obj = variant.as_object().unwrap(); - assert_eq!(obj.len(), 1); + assert_eq!(obj.len(), 2); + assert_eq!(obj.get("first"), Some(Variant::Int8(1))); assert_eq!(obj.get("second"), Some(Variant::Int8(2))); } @@ -2979,13 +3005,16 @@ mod tests { // The parent object should only contain the original fields object_builder.finish().unwrap(); let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); - assert_eq!(metadata.len(), 1); // the fields of nested_object_builder has been rolled back - assert_eq!(&metadata[0], "second"); + assert_eq!(metadata.len(), 2); // the fields of nested_object_builder has been rolled back + assert_eq!(&metadata[0], "first"); + assert_eq!(&metadata[1], "second"); let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); let obj = variant.as_object().unwrap(); - assert_eq!(obj.len(), 1); + assert_eq!(obj.len(), 2); + assert_eq!(obj.get("first"), Some(Variant::Int8(1))); assert_eq!(obj.get("second"), Some(Variant::Int8(2))); } @@ -3121,4 +3150,50 @@ mod tests { builder.finish() } + + // Make sure that we can correctly build deeply nested objects even when some of the nested + // builders don't finish. + #[test] + fn test_append_list_object_list_object() { + // An infinite counter + let mut counter = 0..; + let mut take = move |i| (&mut counter).take(i).collect::>(); + let mut builder = VariantBuilder::new(); + let skip = 5; + { + let mut list = builder.new_list(); + for i in take(4) { + let mut object = list.new_object(); + for i in take(4) { + let field_name = format!("field{i}"); + let mut list = object.new_list(&field_name); + for i in take(3) { + let mut object = list.new_object(); + for i in take(3) { + if i % skip != 0 { + object.insert(&format!("field{i}"), i); + } + } + if i % skip != 0 { + object.finish().unwrap(); + } + } + if i % skip != 0 { + list.finish(); + } + } + if i % skip != 0 { + object.finish().unwrap(); + } + } + list.finish(); + } + let (metadata, value) = builder.finish(); + let v1 = Variant::try_new(&metadata, &value).unwrap(); + + let (metadata, value) = VariantBuilder::new().with_value(v1.clone()).finish(); + let v2 = Variant::try_new(&metadata, &value).unwrap(); + + assert_eq!(format!("{v1:?}"), format!("{v2:?}")); + } }