diff --git a/encodings/sparse/src/canonical.rs b/encodings/sparse/src/canonical.rs index 441ee671898..e3b5a2deb0e 100644 --- a/encodings/sparse/src/canonical.rs +++ b/encodings/sparse/src/canonical.rs @@ -50,6 +50,7 @@ use vortex_buffer::buffer_mut; use vortex_error::VortexError; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_panic; use crate::ConstantArray; @@ -117,6 +118,7 @@ pub(super) fn execute_sparse( execute_sparse_fixed_size_list(array, *nullability, ctx)? } DType::Extension(_ext_dtype) => todo!(), + DType::Variant => vortex_bail!("Sparse canonicalization does not support Variant"), }) } diff --git a/fuzz/src/array/compare.rs b/fuzz/src/array/compare.rs index 698885afa9c..99b19a802c8 100644 --- a/fuzz/src/array/compare.rs +++ b/fuzz/src/array/compare.rs @@ -141,7 +141,7 @@ pub fn compare_canonical_array( ) .into_array() } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/filter.rs b/fuzz/src/array/filter.rs index fb4f96cd484..baef9ea630b 100644 --- a/fuzz/src/array/filter.rs +++ b/fuzz/src/array/filter.rs @@ -115,7 +115,7 @@ pub fn filter_canonical_array(array: &ArrayRef, filter: &[bool]) -> VortexResult } take_canonical_array_non_nullable_indices(array, indices.as_slice()) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/mod.rs b/fuzz/src/array/mod.rs index dbeebaa592a..8d0f2da9669 100644 --- a/fuzz/src/array/mod.rs +++ b/fuzz/src/array/mod.rs @@ -488,6 +488,8 @@ fn actions_for_dtype(dtype: &DType) -> HashSet { // Extension types delegate to storage dtype, support most operations ActionType::iter().collect() } + // Currently, no support at all + DType::Variant => unreachable!("Variant dtype shouldn't be fuzzed"), } } diff --git a/fuzz/src/array/search_sorted.rs b/fuzz/src/array/search_sorted.rs index dac57c8d297..2697b7081f7 100644 --- a/fuzz/src/array/search_sorted.rs +++ b/fuzz/src/array/search_sorted.rs @@ -131,7 +131,7 @@ pub fn search_sorted_canonical_array( .collect::>>()?; scalar_vals.search_sorted(&scalar.cast(array.dtype())?, side) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/slice.rs b/fuzz/src/array/slice.rs index ca0524949cf..04492a3786f 100644 --- a/fuzz/src/array/slice.rs +++ b/fuzz/src/array/slice.rs @@ -114,7 +114,7 @@ pub fn slice_canonical_array( .into_array(), ) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/sort.rs b/fuzz/src/array/sort.rs index f6bce78f621..96072bef56e 100644 --- a/fuzz/src/array/sort.rs +++ b/fuzz/src/array/sort.rs @@ -81,7 +81,7 @@ pub fn sort_canonical_array(array: &ArrayRef) -> VortexResult { }); take_canonical_array_non_nullable_indices(array, &sort_indices) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/fuzz/src/array/take.rs b/fuzz/src/array/take.rs index 8c963c2c066..d5921ae952d 100644 --- a/fuzz/src/array/take.rs +++ b/fuzz/src/array/take.rs @@ -138,7 +138,7 @@ pub fn take_canonical_array(array: &ArrayRef, indices: &[Option]) -> Vort } Ok(builder.finish()) } - d @ (DType::Null | DType::Extension(_)) => { + d @ (DType::Null | DType::Extension(_) | DType::Variant) => { unreachable!("DType {d} not supported for fuzzing") } } diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 8b822467195..c1834ff808b 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -10402,6 +10402,8 @@ pub vortex_array::dtype::DType::Struct(vortex_array::dtype::StructFields, vortex pub vortex_array::dtype::DType::Utf8(vortex_array::dtype::Nullability) +pub vortex_array::dtype::DType::Variant + impl vortex_array::dtype::DType pub const vortex_array::dtype::DType::BYTES: Self @@ -10478,6 +10480,8 @@ pub fn vortex_array::dtype::DType::is_unsigned_int(&self) -> bool pub fn vortex_array::dtype::DType::is_utf8(&self) -> bool +pub fn vortex_array::dtype::DType::is_variant(&self) -> bool + pub fn vortex_array::dtype::DType::list(dtype: impl core::convert::Into, nullability: vortex_array::dtype::Nullability) -> Self pub fn vortex_array::dtype::DType::nullability(&self) -> vortex_array::dtype::Nullability @@ -15330,6 +15334,8 @@ pub vortex_array::scalar::ScalarValue::Primitive(vortex_array::scalar::PValue) pub vortex_array::scalar::ScalarValue::Utf8(vortex_buffer::string::BufferString) +pub vortex_array::scalar::ScalarValue::Variant(vortex_array::scalar::VariantValue) + impl vortex_array::scalar::ScalarValue pub fn vortex_array::scalar::ScalarValue::as_binary(&self) -> &vortex_buffer::ByteBuffer @@ -15344,6 +15350,8 @@ pub fn vortex_array::scalar::ScalarValue::as_primitive(&self) -> &vortex_array:: pub fn vortex_array::scalar::ScalarValue::as_utf8(&self) -> &vortex_buffer::string::BufferString +pub fn vortex_array::scalar::ScalarValue::as_variant(&self) -> &vortex_array::scalar::VariantValue + pub fn vortex_array::scalar::ScalarValue::into_binary(self) -> vortex_buffer::ByteBuffer pub fn vortex_array::scalar::ScalarValue::into_bool(self) -> bool @@ -15356,6 +15364,8 @@ pub fn vortex_array::scalar::ScalarValue::into_primitive(self) -> vortex_array:: pub fn vortex_array::scalar::ScalarValue::into_utf8(self) -> vortex_buffer::string::BufferString +pub fn vortex_array::scalar::ScalarValue::into_variant(self) -> vortex_array::scalar::VariantValue + impl vortex_array::scalar::ScalarValue pub fn vortex_array::scalar::ScalarValue::from_proto(value: &vortex_proto::scalar::ScalarValue, dtype: &vortex_array::dtype::DType, session: &vortex_session::VortexSession) -> vortex_error::VortexResult> @@ -15556,6 +15566,52 @@ impl core::convert::From> for vortex_array::scalar::Scalar pub fn vortex_array::scalar::ScalarValue::from(vec: alloc::vec::Vec) -> Self +pub enum vortex_array::scalar::VariantValue + +pub vortex_array::scalar::VariantValue::Binary(vortex_buffer::ByteBuffer) + +pub vortex_array::scalar::VariantValue::Bool(bool) + +pub vortex_array::scalar::VariantValue::Decimal(vortex_array::scalar::DecimalValue) + +pub vortex_array::scalar::VariantValue::List(alloc::vec::Vec) + +pub vortex_array::scalar::VariantValue::Null + +pub vortex_array::scalar::VariantValue::Object(alloc::vec::Vec<(vortex_buffer::string::BufferString, vortex_array::scalar::VariantValue)>) + +pub vortex_array::scalar::VariantValue::Primitive(vortex_array::scalar::PValue) + +pub vortex_array::scalar::VariantValue::Utf8(vortex_buffer::string::BufferString) + +impl core::clone::Clone for vortex_array::scalar::VariantValue + +pub fn vortex_array::scalar::VariantValue::clone(&self) -> vortex_array::scalar::VariantValue + +impl core::cmp::Eq for vortex_array::scalar::VariantValue + +impl core::cmp::PartialEq for vortex_array::scalar::VariantValue + +pub fn vortex_array::scalar::VariantValue::eq(&self, other: &vortex_array::scalar::VariantValue) -> bool + +impl core::cmp::PartialOrd for vortex_array::scalar::VariantValue + +pub fn vortex_array::scalar::VariantValue::partial_cmp(&self, other: &Self) -> core::option::Option + +impl core::fmt::Debug for vortex_array::scalar::VariantValue + +pub fn vortex_array::scalar::VariantValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::fmt::Display for vortex_array::scalar::VariantValue + +pub fn vortex_array::scalar::VariantValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_array::scalar::VariantValue + +pub fn vortex_array::scalar::VariantValue::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_array::scalar::VariantValue + pub struct vortex_array::scalar::BinaryScalar<'a> impl<'a> vortex_array::scalar::BinaryScalar<'a> @@ -16030,6 +16086,10 @@ pub fn vortex_array::scalar::Scalar::as_utf8(&self) -> vortex_array::scalar::Utf pub fn vortex_array::scalar::Scalar::as_utf8_opt(&self) -> core::option::Option> +pub fn vortex_array::scalar::Scalar::as_variant(&self) -> &vortex_array::scalar::VariantValue + +pub fn vortex_array::scalar::Scalar::as_variant_opt(&self) -> core::option::Option<&vortex_array::scalar::VariantValue> + impl vortex_array::scalar::Scalar pub fn vortex_array::scalar::Scalar::binary(buffer: impl core::convert::Into, nullability: vortex_array::dtype::Nullability) -> Self diff --git a/vortex-array/src/arrays/arbitrary.rs b/vortex-array/src/arrays/arbitrary.rs index 47b2d316ea5..e559a53e536 100644 --- a/vortex-array/src/arrays/arbitrary.rs +++ b/vortex-array/src/arrays/arbitrary.rs @@ -162,7 +162,10 @@ fn random_array_chunk( random_fixed_size_list(u, elem_dtype, *list_size, *null, chunk_len) } DType::Extension(..) => { - todo!("Extension arrays are not implemented") + unimplemented!("Extension arrays are not implemented") + } + DType::Variant => { + unimplemented!("Variant arrays are not implemented") } } } diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index 3540257fb85..517f07d538f 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -8,6 +8,7 @@ use vortex_buffer::Buffer; use vortex_buffer::buffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use crate::Canonical; use crate::IntoArray; @@ -164,6 +165,9 @@ pub(crate) fn constant_canonicalize(array: &ConstantArray) -> VortexResult { + vortex_bail!("Variant don't currently support canonicalization"); + } }) } diff --git a/vortex-array/src/builders/mod.rs b/vortex-array/src/builders/mod.rs index 4c6bbf0dbf6..427599041b3 100644 --- a/vortex-array/src/builders/mod.rs +++ b/vortex-array/src/builders/mod.rs @@ -280,5 +280,8 @@ pub fn builder_with_capacity(dtype: &DType, capacity: usize) -> Box { Box::new(ExtensionBuilder::with_capacity(ext_dtype.clone(), capacity)) } + DType::Variant => { + unimplemented!() + } } } diff --git a/vortex-array/src/builders/tests.rs b/vortex-array/src/builders/tests.rs index ec46ca8879d..8959315face 100644 --- a/vortex-array/src/builders/tests.rs +++ b/vortex-array/src/builders/tests.rs @@ -630,6 +630,7 @@ fn create_test_scalars_for_dtype(dtype: &DType, count: usize) -> Vec { }; Scalar::extension_ref(ext_dtype.clone(), storage_scalar) } + DType::Variant => continue, }; scalars.push(scalar); } diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 7a333542228..ab14915d399 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -221,6 +221,9 @@ impl Canonical { ext_dtype.clone(), Canonical::empty(ext_dtype.storage_dtype()).into_array(), )), + DType::Variant => { + vortex_panic!(InvalidArgument: "Canonical empty is not supported for Variant") + } } } diff --git a/vortex-array/src/compute/conformance/consistency.rs b/vortex-array/src/compute/conformance/consistency.rs index e6452e4b4de..f55ce1cbfbf 100644 --- a/vortex-array/src/compute/conformance/consistency.rs +++ b/vortex-array/src/compute/conformance/consistency.rs @@ -1235,6 +1235,7 @@ fn test_cast_slice_consistency(array: &ArrayRef) { )] } DType::Extension(_) => vec![], // Extension types typically only cast to themselves + DType::Variant => unimplemented!(), }; // Test each target dtype diff --git a/vortex-array/src/dtype/arrow.rs b/vortex-array/src/dtype/arrow.rs index 93d39b05212..7ecf57d3feb 100644 --- a/vortex-array/src/dtype/arrow.rs +++ b/vortex-array/src/dtype/arrow.rs @@ -210,6 +210,14 @@ impl FromArrowType<(&DataType, Nullability)> for DType { impl FromArrowType<&Field> for DType { fn from_arrow(field: &Field) -> Self { + if field + .metadata() + .get("ARROW:extension:name") + .map(|s| s.as_str()) + == Some("arrow.parquet.variant") + { + return DType::Variant; + } Self::from_arrow((field.data_type(), field.is_nullable().into())) } } @@ -227,11 +235,23 @@ impl DType { let mut builder = SchemaBuilder::with_capacity(struct_dtype.names().len()); for (field_name, field_dtype) in struct_dtype.names().iter().zip(struct_dtype.fields()) { - builder.push(FieldRef::from(Field::new( - field_name.as_ref(), - field_dtype.to_arrow_dtype()?, - field_dtype.is_nullable(), - ))); + let field = if field_dtype.is_variant() { + let storage = DataType::Struct(variant_storage_fields_minimal()); + Field::new(field_name.as_ref(), storage, field_dtype.is_nullable()).with_metadata( + [( + "ARROW:extension:name".to_owned(), + "arrow.parquet.variant".to_owned(), + )] + .into(), + ) + } else { + Field::new( + field_name.as_ref(), + field_dtype.to_arrow_dtype()?, + field_dtype.is_nullable(), + ) + }; + builder.push(field); } Ok(builder.finish()) @@ -300,6 +320,9 @@ impl DType { DataType::Struct(Fields::from(fields)) } + DType::Variant => vortex_bail!( + "DType::Variant requires Arrow Field metadata; use to_arrow_schema or a Field helper" + ), DType::Extension(ext_dtype) => { // Try and match against the known extension DTypes. if let Some(temporal) = ext_dtype.metadata_opt::() { @@ -332,6 +355,13 @@ impl DType { } } +fn variant_storage_fields_minimal() -> Fields { + Fields::from(vec![ + Field::new("metadata", DataType::Binary, false), + Field::new("value", DataType::Binary, true), + ]) +} + #[cfg(test)] mod test { use arrow_schema::DataType; @@ -399,6 +429,12 @@ mod test { ); } + #[test] + fn test_variant_dtype_to_arrow_dtype_errors() { + let err = DType::Variant.to_arrow_dtype().unwrap_err().to_string(); + assert!(err.contains("Variant")); + } + #[test] fn infer_nullable_list_element() { let list_non_nullable = DType::List( @@ -455,6 +491,21 @@ mod test { ); } + #[test] + fn test_schema_variant_field_metadata() { + let dtype = DType::struct_([("v", DType::Variant)], Nullability::NonNullable); + let schema = dtype.to_arrow_schema().unwrap(); + let field = schema.field(0); + assert_eq!( + field + .metadata() + .get("ARROW:extension:name") + .map(|s| s.as_str()), + Some("arrow.parquet.variant") + ); + assert!(matches!(field.data_type(), DataType::Struct(_))); + } + #[rstest] #[should_panic] fn test_schema_conversion_panics(the_struct: StructFields) { diff --git a/vortex-array/src/dtype/dtype_impl.rs b/vortex-array/src/dtype/dtype_impl.rs index 36476d6f620..57aec470e2d 100644 --- a/vortex-array/src/dtype/dtype_impl.rs +++ b/vortex-array/src/dtype/dtype_impl.rs @@ -54,7 +54,7 @@ impl DType { #[inline] pub fn is_nullable(&self) -> bool { match self { - Null => true, + Null | Variant => true, Extension(ext_dtype) => ext_dtype.storage_dtype().is_nullable(), Bool(null) | Primitive(_, null) @@ -90,6 +90,7 @@ impl DType { List(edt, _) => List(edt.clone(), nullability), FixedSizeList(edt, size, _) => FixedSizeList(edt.clone(), *size, nullability), Extension(ext) => Extension(ext.with_nullability(nullability)), + Variant => Variant, } } @@ -122,6 +123,7 @@ impl DType { (Extension(lhs_extdtype), Extension(rhs_extdtype)) => { lhs_extdtype.eq_ignore_nullability(rhs_extdtype) } + (Variant, Variant) => true, _ => false, } } @@ -246,11 +248,16 @@ impl DType { matches!(self, Extension(_)) } + /// Check if `self` is a [`DType::Variant`] type + pub fn is_variant(&self) -> bool { + matches!(self, Variant) + } + /// Check if `self` is a nested type, i.e. list, fixed size list, struct, or extension of a /// recursive type. pub fn is_nested(&self) -> bool { match self { - List(..) | FixedSizeList(..) | Struct(..) => true, + List(..) | FixedSizeList(..) | Struct(..) | Variant => true, Extension(ext) => ext.storage_dtype().is_nested(), _ => false, } @@ -284,6 +291,7 @@ impl DType { Some(sum) } Extension(ext) => ext.storage_dtype().element_size(), + Variant => None, } } @@ -459,6 +467,7 @@ impl Display for DType { List(edt, null) => write!(f, "list({edt}){null}"), FixedSizeList(edt, size, null) => write!(f, "fixed_size_list({edt})[{size}]{null}"), Extension(ext) => write!(f, "{}", ext), + Variant => write!(f, "variant"), } } } diff --git a/vortex-array/src/dtype/mod.rs b/vortex-array/src/dtype/mod.rs index 6c5f58b966e..3f3aab0c7c2 100644 --- a/vortex-array/src/dtype/mod.rs +++ b/vortex-array/src/dtype/mod.rs @@ -100,6 +100,9 @@ pub enum DType { /// /// See [`ExtDTypeRef`] for more information. Extension(ExtDTypeRef), + + /// Variant type + Variant, } pub use bigint::*; diff --git a/vortex-array/src/dtype/serde/flatbuffers.rs b/vortex-array/src/dtype/serde/flatbuffers.rs index 2cd12f5dc77..3e973495ff4 100644 --- a/vortex-array/src/dtype/serde/flatbuffers.rs +++ b/vortex-array/src/dtype/serde/flatbuffers.rs @@ -231,9 +231,11 @@ impl TryFrom for DType { Ok(Self::Extension(ext_dtype)) } - // This is here to fail to compile if another variant is included. - #[allow(clippy::wildcard_in_or_patterns)] - fb::Type(11) => Err(vortex_err!("Unknown DType variant")), + fb::Type::Variant => { + fb.type__as_variant() + .ok_or_else(|| vortex_err!("failed to parse variant from flatbuffer"))?; + Ok(Self::Variant) + } _ => Err(vortex_err!("Unknown DType variant")), } } @@ -349,6 +351,7 @@ impl WriteFlatBuffer for DType { ) .as_union_value() } + Self::Variant => fb::Variant::create(fbb, &fb::VariantArgs {}).as_union_value(), }; let dtype_type = match self { @@ -362,6 +365,7 @@ impl WriteFlatBuffer for DType { Self::List(..) => fb::Type::List, Self::FixedSizeList(..) => fb::Type::FixedSizeList, Self::Extension { .. } => fb::Type::Extension, + Self::Variant => fb::Type::Variant, }; Ok(fb::DType::create( @@ -477,6 +481,7 @@ mod test { ], ), Nullability::NonNullable, - )) + )); + roundtrip_dtype(DType::Variant); } } diff --git a/vortex-array/src/dtype/serde/mod.rs b/vortex-array/src/dtype/serde/mod.rs index ca8e06dd863..e7ffc1c6766 100644 --- a/vortex-array/src/dtype/serde/mod.rs +++ b/vortex-array/src/dtype/serde/mod.rs @@ -69,6 +69,19 @@ mod test { ); } + #[test] + fn test_serde_variant_dtype() { + use serde_test::assert_ser_tokens; + + assert_ser_tokens( + &DType::Variant, + &[Token::UnitVariant { + name: "DType", + variant: "Variant", + }], + ); + } + #[test] fn test_serde_nullability() { assert_tokens(&Nullability::NonNullable, &[Token::Bool(false)]); @@ -153,4 +166,16 @@ mod test { .unwrap(); assert_eq!(fields, from_value); } + + #[test] + fn test_serde_variant_dtype_json_roundtrip() { + let json = serde_json::to_string(&DType::Variant).unwrap(); + assert_eq!(json, "\"Variant\""); + + let mut deserializer = serde_json::Deserializer::from_str(&json); + let deserialized: DType = DTypeSerde::::new(&SESSION) + .deserialize(&mut deserializer) + .unwrap(); + assert_eq!(DType::Variant, deserialized); + } } diff --git a/vortex-array/src/dtype/serde/proto.rs b/vortex-array/src/dtype/serde/proto.rs index 98c1453da02..88097c4235b 100644 --- a/vortex-array/src/dtype/serde/proto.rs +++ b/vortex-array/src/dtype/serde/proto.rs @@ -99,6 +99,7 @@ impl DType { let ext_dtype = vtable.deserialize(e.metadata(), storage_dtype)?; Ok(Self::Extension(ext_dtype)) } + DtypeType::Variant(..) => Ok(Self::Variant), } } } @@ -152,6 +153,7 @@ impl TryFrom<&DType> for pb::DType { storage_dtype: Some(Box::new(e.storage_dtype().try_into()?)), metadata: Some(e.serialize_metadata()?), })), + DType::Variant => DtypeType::Variant(pb::Variant {}), }), }) } @@ -361,6 +363,12 @@ mod tests { assert_eq!(ext_dtype, converted); } + #[test] + fn test_variant_round_trip() { + let converted = round_trip_dtype(&DType::Variant); + assert_eq!(DType::Variant, converted); + } + #[test] fn test_field_path_round_trip() { let test_paths = vec![ diff --git a/vortex-array/src/dtype/serde/serde.rs b/vortex-array/src/dtype/serde/serde.rs index a14f9c47321..a73234e37ae 100644 --- a/vortex-array/src/dtype/serde/serde.rs +++ b/vortex-array/src/dtype/serde/serde.rs @@ -117,6 +117,7 @@ impl Serialize for DType { DType::Extension(ext) => { serializer.serialize_newtype_variant("DType", 9, "Extension", ext) } + DType::Variant => serializer.serialize_unit_variant("DType", 10, "Variant"), } } } @@ -156,6 +157,7 @@ impl<'de> DeserializeSeed<'de> for DTypeSerde<'_, DType> { "FixedSizeList", "Struct", "Extension", + "Variant", ]; struct DTypeVisitor<'a> { @@ -217,6 +219,10 @@ impl<'de> DeserializeSeed<'de> for DTypeSerde<'_, DType> { .newtype_variant_seed(DTypeSerde::::new(self.session))?; Ok(DType::Extension(ext)) } + "Variant" => { + access.unit_variant()?; + Ok(DType::Variant) + } _ => Err(de::Error::unknown_variant(&variant, VARIANTS)), } } diff --git a/vortex-array/src/scalar/arbitrary.rs b/vortex-array/src/scalar/arbitrary.rs index 342d4fa59b0..49486803af3 100644 --- a/vortex-array/src/scalar/arbitrary.rs +++ b/vortex-array/src/scalar/arbitrary.rs @@ -98,6 +98,7 @@ pub fn random_scalar(u: &mut Unstructured, dtype: &DType) -> Result { DType::Extension(..) => { unreachable!("Can't yet generate arbitrary scalars for ext dtype") } + DType::Variant => todo!(), }) } diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index 45672e745fa..3bc0f26beb3 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -65,6 +65,7 @@ impl TryFrom<&Scalar> for Arc { DType::List(..) => unimplemented!("list scalar conversion"), DType::FixedSizeList(..) => unimplemented!("fixed-size list scalar conversion"), DType::Extension(..) => extension_to_arrow(value.as_extension()), + DType::Variant => unimplemented!("Variant scalar conversion"), } } } diff --git a/vortex-array/src/scalar/cast.rs b/vortex-array/src/scalar/cast.rs index 82986cd278d..e61bb4f49bd 100644 --- a/vortex-array/src/scalar/cast.rs +++ b/vortex-array/src/scalar/cast.rs @@ -5,6 +5,7 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use crate::dtype::DType; @@ -58,6 +59,7 @@ impl Scalar { DType::Struct(..) => self.as_struct().cast(target_dtype), DType::List(..) | DType::FixedSizeList(..) => self.as_list().cast(target_dtype), DType::Extension(..) => self.as_extension().cast(target_dtype), + DType::Variant => vortex_bail!("Variant scalars can't be cast to {target_dtype}"), } } diff --git a/vortex-array/src/scalar/display.rs b/vortex-array/src/scalar/display.rs index f43d8626c9e..eeb09258bce 100644 --- a/vortex-array/src/scalar/display.rs +++ b/vortex-array/src/scalar/display.rs @@ -21,6 +21,10 @@ impl Display for Scalar { DType::Struct(..) => write!(f, "{}", self.as_struct()), DType::List(..) | DType::FixedSizeList(..) => write!(f, "{}", self.as_list()), DType::Extension(_) => write!(f, "{}", self.as_extension()), + DType::Variant => match self.value() { + None => write!(f, "null"), + Some(value) => write!(f, "{value}"), + }, } } } diff --git a/vortex-array/src/scalar/downcast.rs b/vortex-array/src/scalar/downcast.rs index 6de05dab4e3..f4629558e5c 100644 --- a/vortex-array/src/scalar/downcast.rs +++ b/vortex-array/src/scalar/downcast.rs @@ -20,6 +20,7 @@ use crate::scalar::Scalar; use crate::scalar::ScalarValue; use crate::scalar::StructScalar; use crate::scalar::Utf8Scalar; +use crate::scalar::VariantValue; impl Scalar { /// Returns a view of the scalar as a boolean scalar. @@ -153,6 +154,22 @@ impl Scalar { // for this extension type. Some(ExtScalar::new_unchecked(self.dtype(), self.value())) } + + /// Returns the semantic variant value, panicking if the scalar is not a variant. + pub fn as_variant(&self) -> &VariantValue { + self.value() + .vortex_expect("Failed to convert null scalar to variant value") + .as_variant() + } + + /// Returns the semantic variant value if the scalar has `DType::Variant` and is non-null. + pub fn as_variant_opt(&self) -> Option<&VariantValue> { + self.dtype() + .is_variant() + .then(|| self.value()) + .flatten() + .map(ScalarValue::as_variant) + } } impl ScalarValue { @@ -255,4 +272,20 @@ impl ScalarValue { _ => vortex_panic!("ScalarValue is not a List"), } } + + /// Returns the semantic variant value, panicking if the value is not a variant. + pub fn as_variant(&self) -> &VariantValue { + match self { + ScalarValue::Variant(value) => value, + _ => vortex_panic!("ScalarValue is not a Variant"), + } + } + + /// Returns the semantic variant value, panicking if the value is not a variant. + pub fn into_variant(self) -> VariantValue { + match self { + ScalarValue::Variant(value) => value, + _ => vortex_panic!("ScalarValue is not a Variant"), + } + } } diff --git a/vortex-array/src/scalar/mod.rs b/vortex-array/src/scalar/mod.rs index 8496037ea20..d65466f6ae6 100644 --- a/vortex-array/src/scalar/mod.rs +++ b/vortex-array/src/scalar/mod.rs @@ -24,10 +24,12 @@ mod scalar_value; mod truncation; mod typed_view; mod validate; +mod variant_value; pub use scalar_value::*; pub use truncation::*; pub use typed_view::*; +pub use variant_value::*; use crate::dtype::DType; diff --git a/vortex-array/src/scalar/proto.rs b/vortex-array/src/scalar/proto.rs index b4cdf9a76e6..866bd0e4bc0 100644 --- a/vortex-array/src/scalar/proto.rs +++ b/vortex-array/src/scalar/proto.rs @@ -16,6 +16,9 @@ use vortex_error::vortex_err; use vortex_proto::scalar as pb; use vortex_proto::scalar::ListValue; use vortex_proto::scalar::scalar_value::Kind; +use vortex_proto::scalar::variant_decimal; +use vortex_proto::scalar::variant_primitive; +use vortex_proto::scalar::variant_value; use vortex_session::VortexSession; use crate::dtype::DType; @@ -26,6 +29,7 @@ use crate::scalar::DecimalValue; use crate::scalar::PValue; use crate::scalar::Scalar; use crate::scalar::ScalarValue; +use crate::scalar::VariantValue; //////////////////////////////////////////////////////////////////////////////////////////////////// // Serialize INTO proto. @@ -110,6 +114,9 @@ impl From<&ScalarValue> for pb::ScalarValue { kind: Some(Kind::ListValue(ListValue { values })), } } + ScalarValue::Variant(v) => pb::ScalarValue { + kind: Some(Kind::VariantValue(variant_to_proto(v))), + }, } } } @@ -238,6 +245,16 @@ impl ScalarValue { .as_ref() .ok_or_else(|| vortex_err!(Serde: "Scalar value missing kind"))?; + if matches!(dtype, DType::Variant) { + return match kind { + Kind::NullValue(_) => Ok(None), + Kind::VariantValue(v) => Ok(Some(ScalarValue::Variant(variant_from_proto(v)?))), + _ => vortex_bail!( + Serde: "expected VariantValue proto for Variant dtype, got {kind:?}" + ), + }; + } + // `DType::Extension` store their serialized values using the storage `DType`. let dtype = match dtype { DType::Extension(ext) => ext.storage_dtype(), @@ -255,6 +272,9 @@ impl ScalarValue { Kind::StringValue(s) => string_from_proto(s, dtype)?, Kind::BytesValue(b) => bytes_from_proto(b, dtype)?, Kind::ListValue(v) => list_from_proto(v, dtype, session)?, + Kind::VariantValue(_) => { + vortex_bail!(Serde: "expected non-Variant scalar proto for dtype {dtype}") + } })) } } @@ -448,6 +468,177 @@ fn list_from_proto( Ok(ScalarValue::List(values)) } +fn variant_to_proto(value: &VariantValue) -> pb::VariantValue { + let kind = match value { + VariantValue::Null => variant_value::Kind::NullValue(0), + VariantValue::Bool(v) => variant_value::Kind::BoolValue(*v), + VariantValue::Primitive(v) => { + variant_value::Kind::PrimitiveValue(variant_primitive_to_proto(v)) + } + VariantValue::Decimal(v) => variant_value::Kind::DecimalValue(variant_decimal_to_proto(v)), + VariantValue::Utf8(v) => variant_value::Kind::StringValue(v.to_string()), + VariantValue::Binary(v) => variant_value::Kind::BytesValue(v.to_vec()), + VariantValue::List(values) => variant_value::Kind::ListValue(pb::VariantListValue { + values: values.iter().map(variant_to_proto).collect(), + }), + VariantValue::Object(fields) => variant_value::Kind::ObjectValue(pb::VariantObjectValue { + fields: fields + .iter() + .map(|(name, value)| pb::VariantObjectField { + name: name.to_string(), + value: Some(variant_to_proto(value)), + }) + .collect(), + }), + }; + + pb::VariantValue { kind: Some(kind) } +} + +fn variant_primitive_to_proto(value: &PValue) -> pb::VariantPrimitive { + let kind = match value { + PValue::I8(v) => variant_primitive::Kind::Int8Value((*v).into()), + PValue::I16(v) => variant_primitive::Kind::Int16Value((*v).into()), + PValue::I32(v) => variant_primitive::Kind::Int32Value((*v).into()), + PValue::I64(v) => variant_primitive::Kind::Int64Value(*v), + PValue::U8(v) => variant_primitive::Kind::Uint8Value((*v).into()), + PValue::U16(v) => variant_primitive::Kind::Uint16Value((*v).into()), + PValue::U32(v) => variant_primitive::Kind::Uint32Value((*v).into()), + PValue::U64(v) => variant_primitive::Kind::Uint64Value(*v), + PValue::F16(v) => variant_primitive::Kind::F16Value((*v).to_bits().into()), + PValue::F32(v) => variant_primitive::Kind::F32Value(*v), + PValue::F64(v) => variant_primitive::Kind::F64Value(*v), + }; + + pb::VariantPrimitive { kind: Some(kind) } +} + +fn variant_decimal_to_proto(value: &DecimalValue) -> pb::VariantDecimal { + let kind = match value { + DecimalValue::I8(v) => variant_decimal::Kind::Int8Value((*v).into()), + DecimalValue::I16(v) => variant_decimal::Kind::Int16Value((*v).into()), + DecimalValue::I32(v) => variant_decimal::Kind::Int32Value((*v).into()), + DecimalValue::I64(v) => variant_decimal::Kind::Int64Value(*v), + DecimalValue::I128(v) => variant_decimal::Kind::Int128Value(v.to_le_bytes().to_vec()), + DecimalValue::I256(v) => variant_decimal::Kind::Int256Value(v.to_le_bytes().to_vec()), + }; + + pb::VariantDecimal { kind: Some(kind) } +} + +fn variant_from_proto(value: &pb::VariantValue) -> VortexResult { + let kind = value + .kind + .as_ref() + .ok_or_else(|| vortex_err!(Serde: "Variant value missing kind"))?; + + Ok(match kind { + variant_value::Kind::NullValue(_) => VariantValue::Null, + variant_value::Kind::BoolValue(v) => VariantValue::Bool(*v), + variant_value::Kind::PrimitiveValue(v) => { + VariantValue::Primitive(variant_primitive_from_proto(v)?) + } + variant_value::Kind::DecimalValue(v) => { + VariantValue::Decimal(variant_decimal_from_proto(v)?) + } + variant_value::Kind::StringValue(v) => VariantValue::Utf8(v.as_str().into()), + variant_value::Kind::BytesValue(v) => VariantValue::Binary(v.clone().into()), + variant_value::Kind::ListValue(v) => VariantValue::List( + v.values + .iter() + .map(variant_from_proto) + .collect::>>()?, + ), + variant_value::Kind::ObjectValue(v) => VariantValue::Object( + v.fields + .iter() + .map(|field| { + let value = field + .value + .as_ref() + .ok_or_else(|| vortex_err!(Serde: "Variant object field missing value"))?; + Ok((field.name.as_str().into(), variant_from_proto(value)?)) + }) + .collect::>>()?, + ), + }) +} + +fn variant_primitive_from_proto(value: &pb::VariantPrimitive) -> VortexResult { + let kind = value + .kind + .as_ref() + .ok_or_else(|| vortex_err!(Serde: "Variant primitive missing kind"))?; + + Ok(match kind { + variant_primitive::Kind::Int8Value(v) => PValue::I8( + i8::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant i8 value {v} out of range"))?, + ), + variant_primitive::Kind::Int16Value(v) => PValue::I16( + i16::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant i16 value {v} out of range"))?, + ), + variant_primitive::Kind::Int32Value(v) => PValue::I32( + i32::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant i32 value {v} out of range"))?, + ), + variant_primitive::Kind::Int64Value(v) => PValue::I64(*v), + variant_primitive::Kind::Uint8Value(v) => PValue::U8( + u8::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant u8 value {v} out of range"))?, + ), + variant_primitive::Kind::Uint16Value(v) => PValue::U16( + u16::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant u16 value {v} out of range"))?, + ), + variant_primitive::Kind::Uint32Value(v) => PValue::U32( + u32::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant u32 value {v} out of range"))?, + ), + variant_primitive::Kind::Uint64Value(v) => PValue::U64(*v), + variant_primitive::Kind::F16Value(v) => PValue::F16(f16::from_bits( + u16::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant f16 bits {v} out of range"))?, + )), + variant_primitive::Kind::F32Value(v) => PValue::F32(*v), + variant_primitive::Kind::F64Value(v) => PValue::F64(*v), + }) +} + +fn variant_decimal_from_proto(value: &pb::VariantDecimal) -> VortexResult { + let kind = value + .kind + .as_ref() + .ok_or_else(|| vortex_err!(Serde: "Variant decimal missing kind"))?; + + Ok(match kind { + variant_decimal::Kind::Int8Value(v) => DecimalValue::I8( + i8::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant decimal i8 value {v} out of range"))?, + ), + variant_decimal::Kind::Int16Value(v) => DecimalValue::I16( + i16::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant decimal i16 value {v} out of range"))?, + ), + variant_decimal::Kind::Int32Value(v) => DecimalValue::I32( + i32::try_from(*v) + .map_err(|_| vortex_err!(Serde: "Variant decimal i32 value {v} out of range"))?, + ), + variant_decimal::Kind::Int64Value(v) => DecimalValue::I64(*v), + variant_decimal::Kind::Int128Value(v) => { + DecimalValue::I128(i128::from_le_bytes(v.as_slice().try_into().map_err( + |_| vortex_err!(Serde: "invalid variant decimal i128 byte length: {}", v.len()), + )?)) + } + variant_decimal::Kind::Int256Value(v) => { + DecimalValue::I256(i256::from_le_bytes(v.as_slice().try_into().map_err( + |_| vortex_err!(Serde: "invalid variant decimal i256 byte length: {}", v.len()), + )?)) + } + }) +} + #[cfg(test)] mod tests { use std::sync::Arc; @@ -466,6 +657,7 @@ mod tests { use crate::scalar::DecimalValue; use crate::scalar::Scalar; use crate::scalar::ScalarValue; + use crate::scalar::VariantValue; fn session() -> VortexSession { VortexSession::empty() @@ -596,6 +788,51 @@ mod tests { round_trip(Scalar::utf8("hello", Nullability::NonNullable)); } + #[test] + fn test_variant_scalar_roundtrip() { + round_trip(Scalar::new( + DType::Variant, + Some(ScalarValue::Variant(VariantValue::Object(vec![ + ("flag".into(), VariantValue::Bool(true)), + ( + "nums".into(), + VariantValue::List(vec![ + VariantValue::Primitive(PValue::I16(-7)), + VariantValue::Primitive(PValue::U32(42)), + VariantValue::Decimal(DecimalValue::I128(123_456_789)), + ]), + ), + ( + "bytes".into(), + VariantValue::Binary(ByteBuffer::copy_from(b"abc")), + ), + ("null".into(), VariantValue::Null), + ]))), + )); + } + + #[test] + fn test_variant_scalar_proto_preserves_scalar_null_vs_variant_null() { + let scalar_null = Scalar::null(DType::Variant); + let variant_null = Scalar::new( + DType::Variant, + Some(ScalarValue::Variant(VariantValue::Null)), + ); + + let scalar_null_pb = pb::Scalar::from(&scalar_null); + let variant_null_pb = pb::Scalar::from(&variant_null); + + assert_ne!(scalar_null_pb, variant_null_pb); + assert_eq!( + Scalar::from_proto(&scalar_null_pb, &session()).unwrap(), + scalar_null, + ); + assert_eq!( + Scalar::from_proto(&variant_null_pb, &session()).unwrap(), + variant_null, + ); + } + #[test] fn test_backcompat_f16_serialized_as_u64() { // Backwards compatibility test for the legacy f16 serialization format. diff --git a/vortex-array/src/scalar/scalar_impl.rs b/vortex-array/src/scalar/scalar_impl.rs index c7a0e9b7999..a1cf17e9707 100644 --- a/vortex-array/src/scalar/scalar_impl.rs +++ b/vortex-array/src/scalar/scalar_impl.rs @@ -190,6 +190,10 @@ impl Scalar { DType::FixedSizeList(_, list_size, _) => value.as_list().len() == *list_size as usize, DType::Struct(struct_fields, _) => value.as_list().len() == struct_fields.nfields(), DType::Extension(_) => self.as_extension().to_storage_scalar().is_zero()?, + DType::Variant => matches!( + value, + ScalarValue::Variant(crate::scalar::VariantValue::Null) + ), }; Some(is_zero) @@ -257,6 +261,7 @@ impl Scalar { .map(|fields| fields.into_iter().map(|f| f.approx_nbytes()).sum::()) .unwrap_or_default(), DType::Extension(_) => self.as_extension().to_storage_scalar().approx_nbytes(), + DType::Variant => self.value().map_or(0, |value| format!("{value}").len()), } } } diff --git a/vortex-array/src/scalar/scalar_value.rs b/vortex-array/src/scalar/scalar_value.rs index 71bf4fcfc77..e5e1890bdf9 100644 --- a/vortex-array/src/scalar/scalar_value.rs +++ b/vortex-array/src/scalar/scalar_value.rs @@ -15,6 +15,7 @@ use vortex_error::vortex_panic; use crate::dtype::DType; use crate::scalar::DecimalValue; use crate::scalar::PValue; +use crate::scalar::VariantValue; /// The value stored in a [`Scalar`][crate::scalar::Scalar]. /// @@ -34,6 +35,8 @@ pub enum ScalarValue { Binary(ByteBuffer), /// A list of potentially null scalar values. List(Vec>), + /// A semantic variant value. + Variant(VariantValue), } impl ScalarValue { @@ -64,6 +67,7 @@ impl ScalarValue { // zero storage value and try to make an extension scalar from that. Self::zero_value(ext_dtype.storage_dtype()) } + DType::Variant => Self::Variant(VariantValue::Null), } } @@ -100,6 +104,7 @@ impl ScalarValue { // default storage value and try to make an extension scalar from that. Self::default_value(ext_dtype.storage_dtype())? } + DType::Variant => Self::Variant(VariantValue::Null), }) } } @@ -113,6 +118,7 @@ impl PartialOrd for ScalarValue { (ScalarValue::Utf8(a), ScalarValue::Utf8(b)) => a.partial_cmp(b), (ScalarValue::Binary(a), ScalarValue::Binary(b)) => a.partial_cmp(b), (ScalarValue::List(a), ScalarValue::List(b)) => a.partial_cmp(b), + (ScalarValue::Variant(a), ScalarValue::Variant(b)) => a.partial_cmp(b), // (ScalarValue::Extension(a), ScalarValue::Extension(b)) => a.partial_cmp(b), _ => None, } @@ -163,6 +169,7 @@ impl Display for ScalarValue { } write!(f, "]") } + ScalarValue::Variant(value) => write!(f, "{value}"), } } } diff --git a/vortex-array/src/scalar/validate.rs b/vortex-array/src/scalar/validate.rs index 3fed201ed93..bf99c22a2f4 100644 --- a/vortex-array/src/scalar/validate.rs +++ b/vortex-array/src/scalar/validate.rs @@ -119,6 +119,12 @@ impl Scalar { } } DType::Extension(ext_dtype) => ext_dtype.validate_storage_value(value)?, + DType::Variant => { + vortex_ensure!( + matches!(value, ScalarValue::Variant(_)), + "variant dtype expected Variant value, got {value}", + ); + } } Ok(()) diff --git a/vortex-array/src/scalar/variant_value.rs b/vortex-array/src/scalar/variant_value.rs new file mode 100644 index 00000000000..b4b0cb48822 --- /dev/null +++ b/vortex-array/src/scalar/variant_value.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::cmp::Ordering; +use std::fmt::Display; +use std::fmt::Formatter; + +use vortex_buffer::BufferString; +use vortex_buffer::ByteBuffer; + +use crate::scalar::DecimalValue; +use crate::scalar::PValue; + +/// Semantic value for a `DType::Variant` scalar. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum VariantValue { + Null, + Bool(bool), + Primitive(PValue), + Decimal(DecimalValue), + Utf8(BufferString), + Binary(ByteBuffer), + List(Vec), + Object(Vec<(BufferString, VariantValue)>), +} + +impl PartialOrd for VariantValue { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (Self::Null, Self::Null) => Some(Ordering::Equal), + (Self::Bool(a), Self::Bool(b)) => a.partial_cmp(b), + (Self::Primitive(a), Self::Primitive(b)) => a.partial_cmp(b), + (Self::Decimal(a), Self::Decimal(b)) => a.partial_cmp(b), + (Self::Utf8(a), Self::Utf8(b)) => a.partial_cmp(b), + (Self::Binary(a), Self::Binary(b)) => a.partial_cmp(b), + (Self::List(a), Self::List(b)) => a.partial_cmp(b), + (Self::Object(a), Self::Object(b)) => a.partial_cmp(b), + _ => None, + } + } +} + +impl Display for VariantValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Null => write!(f, "null"), + Self::Bool(v) => write!(f, "{v}"), + Self::Primitive(v) => write!(f, "{v}"), + Self::Decimal(v) => write!(f, "{v}"), + Self::Utf8(v) => write!(f, "\"{}\"", v.as_str()), + Self::Binary(v) => write!(f, "\"{} bytes\"", v.len()), + Self::List(values) => { + write!(f, "[")?; + for (idx, value) in values.iter().enumerate() { + if idx > 0 { + write!(f, ", ")?; + } + write!(f, "{value}")?; + } + write!(f, "]") + } + Self::Object(fields) => { + write!(f, "{{")?; + for (idx, (name, value)) in fields.iter().enumerate() { + if idx > 0 { + write!(f, ", ")?; + } + write!(f, "{}: {}", name.as_str(), value)?; + } + write!(f, "}}") + } + } + } +} diff --git a/vortex-datafusion/src/convert/scalars.rs b/vortex-datafusion/src/convert/scalars.rs index c2a5d9d16c8..d2a07c82828 100644 --- a/vortex-datafusion/src/convert/scalars.rs +++ b/vortex-datafusion/src/convert/scalars.rs @@ -160,6 +160,7 @@ impl TryToDataFusion for Scalar { }, } } + DType::Variant => vortex_bail!("Variant scalars aren't supported with DF"), }) } } diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index 7a96b66c729..49d7134573e 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -236,6 +236,9 @@ impl TryFrom<&DType> for LogicalType { let element_logical_type = LogicalType::try_from(element_dtype.as_ref())?; return LogicalType::array_type(element_logical_type, *list_size); } + DType::Variant => { + vortex_bail!("Vortex Variant array aren't supported in DuckDB") + } DType::Extension(ext_dtype) => { let Some(temporal) = ext_dtype.metadata_opt::() else { vortex_bail!("Unsupported extension type \"{}\"", ext_dtype.id()); diff --git a/vortex-duckdb/src/convert/scalar.rs b/vortex-duckdb/src/convert/scalar.rs index 56f2b6baf5b..ac34611fcc8 100644 --- a/vortex-duckdb/src/convert/scalar.rs +++ b/vortex-duckdb/src/convert/scalar.rs @@ -79,6 +79,9 @@ impl ToDuckDBScalar for Scalar { DType::Utf8(_) => self.as_utf8().try_to_duckdb_scalar(), DType::Binary(_) => self.as_binary().try_to_duckdb_scalar(), DType::Struct(..) | DType::List(..) | DType::FixedSizeList(..) => todo!(), + DType::Variant => { + vortex_bail!("Vortex Variant scalars aren't supported in DuckDB") + } } } } diff --git a/vortex-ffi/src/dtype.rs b/vortex-ffi/src/dtype.rs index a05a53459ca..a6c22fc1e81 100644 --- a/vortex-ffi/src/dtype.rs +++ b/vortex-ffi/src/dtype.rs @@ -68,6 +68,7 @@ impl From<&DType> for vx_dtype_variant { DType::List(..) => vx_dtype_variant::DTYPE_LIST, DType::FixedSizeList(..) => vx_dtype_variant::DTYPE_FIXED_SIZE_LIST, DType::Extension(_) => vx_dtype_variant::DTYPE_EXTENSION, + DType::Variant => vortex_panic!("Variant DType is not supported in FFI yet"), } } } diff --git a/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs b/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs index ae3b813c80e..1f1f450744d 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-dtype/dtype.fbs @@ -63,6 +63,8 @@ table Extension { metadata: [ubyte]; } +table Variant {} + union Type { Null = 1, Bool = 2, @@ -74,6 +76,7 @@ union Type { List = 8, Extension = 9, FixedSizeList = 10, // This is after `Extension` for backwards compatibility. + Variant = 11, } table DType { diff --git a/vortex-flatbuffers/public-api.lock b/vortex-flatbuffers/public-api.lock index 8f1a4291000..952046b78db 100644 --- a/vortex-flatbuffers/public-api.lock +++ b/vortex-flatbuffers/public-api.lock @@ -574,6 +574,8 @@ pub enum vortex_flatbuffers::dtype::Struct_Offset pub enum vortex_flatbuffers::dtype::Utf8Offset +pub enum vortex_flatbuffers::dtype::VariantOffset + pub struct vortex_flatbuffers::dtype::Binary<'a> pub vortex_flatbuffers::dtype::Binary::_tab: flatbuffers::table::Table<'a> @@ -726,6 +728,8 @@ pub fn vortex_flatbuffers::dtype::DType<'a>::type__as_struct_(&self) -> core::op pub fn vortex_flatbuffers::dtype::DType<'a>::type__as_utf_8(&self) -> core::option::Option> +pub fn vortex_flatbuffers::dtype::DType<'a>::type__as_variant(&self) -> core::option::Option> + pub fn vortex_flatbuffers::dtype::DType<'a>::type_type(&self) -> vortex_flatbuffers::dtype::Type impl core::fmt::Debug for vortex_flatbuffers::dtype::DType<'_> @@ -1380,6 +1384,8 @@ pub const vortex_flatbuffers::dtype::Type::Struct_: Self pub const vortex_flatbuffers::dtype::Type::Utf8: Self +pub const vortex_flatbuffers::dtype::Type::Variant: Self + pub fn vortex_flatbuffers::dtype::Type::variant_name(self) -> core::option::Option<&'static str> impl core::clone::Clone for vortex_flatbuffers::dtype::Type @@ -1502,6 +1508,56 @@ pub fn vortex_flatbuffers::dtype::Utf8Builder<'a, 'b, A>::finish(self) -> flatbu pub fn vortex_flatbuffers::dtype::Utf8Builder<'a, 'b, A>::new(_fbb: &'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>) -> vortex_flatbuffers::dtype::Utf8Builder<'a, 'b, A> +pub struct vortex_flatbuffers::dtype::Variant<'a> + +pub vortex_flatbuffers::dtype::Variant::_tab: flatbuffers::table::Table<'a> + +impl<'a> vortex_flatbuffers::dtype::Variant<'a> + +pub fn vortex_flatbuffers::dtype::Variant<'a>::create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::builder::Allocator + 'bldr>(_fbb: &'mut_bldr mut flatbuffers::builder::FlatBufferBuilder<'bldr, A>, _args: &'args vortex_flatbuffers::dtype::VariantArgs) -> flatbuffers::primitives::WIPOffset> + +pub unsafe fn vortex_flatbuffers::dtype::Variant<'a>::init_from_table(table: flatbuffers::table::Table<'a>) -> Self + +impl core::fmt::Debug for vortex_flatbuffers::dtype::Variant<'_> + +pub fn vortex_flatbuffers::dtype::Variant<'_>::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl flatbuffers::verifier::Verifiable for vortex_flatbuffers::dtype::Variant<'_> + +pub fn vortex_flatbuffers::dtype::Variant<'_>::run_verifier(v: &mut flatbuffers::verifier::Verifier<'_, '_>, pos: usize) -> core::result::Result<(), flatbuffers::verifier::InvalidFlatbuffer> + +impl<'a> core::clone::Clone for vortex_flatbuffers::dtype::Variant<'a> + +pub fn vortex_flatbuffers::dtype::Variant<'a>::clone(&self) -> vortex_flatbuffers::dtype::Variant<'a> + +impl<'a> core::cmp::PartialEq for vortex_flatbuffers::dtype::Variant<'a> + +pub fn vortex_flatbuffers::dtype::Variant<'a>::eq(&self, other: &vortex_flatbuffers::dtype::Variant<'a>) -> bool + +impl<'a> core::marker::Copy for vortex_flatbuffers::dtype::Variant<'a> + +impl<'a> core::marker::StructuralPartialEq for vortex_flatbuffers::dtype::Variant<'a> + +impl<'a> flatbuffers::follow::Follow<'a> for vortex_flatbuffers::dtype::Variant<'a> + +pub type vortex_flatbuffers::dtype::Variant<'a>::Inner = vortex_flatbuffers::dtype::Variant<'a> + +pub unsafe fn vortex_flatbuffers::dtype::Variant<'a>::follow(buf: &'a [u8], loc: usize) -> Self::Inner + +pub struct vortex_flatbuffers::dtype::VariantArgs + +impl<'a> core::default::Default for vortex_flatbuffers::dtype::VariantArgs + +pub fn vortex_flatbuffers::dtype::VariantArgs::default() -> Self + +pub struct vortex_flatbuffers::dtype::VariantBuilder<'a: 'b, 'b, A: flatbuffers::builder::Allocator + 'a> + +impl<'a: 'b, 'b, A: flatbuffers::builder::Allocator + 'a> vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A> + +pub fn vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A>::finish(self) -> flatbuffers::primitives::WIPOffset> + +pub fn vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A>::new(_fbb: &'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>) -> vortex_flatbuffers::dtype::VariantBuilder<'a, 'b, A> + pub const vortex_flatbuffers::dtype::ENUM_MAX_PTYPE: u8 pub const vortex_flatbuffers::dtype::ENUM_MAX_TYPE: u8 @@ -1512,7 +1568,7 @@ pub const vortex_flatbuffers::dtype::ENUM_MIN_TYPE: u8 pub const vortex_flatbuffers::dtype::ENUM_VALUES_PTYPE: [vortex_flatbuffers::dtype::PType; 11] -pub const vortex_flatbuffers::dtype::ENUM_VALUES_TYPE: [vortex_flatbuffers::dtype::Type; 11] +pub const vortex_flatbuffers::dtype::ENUM_VALUES_TYPE: [vortex_flatbuffers::dtype::Type; 12] pub fn vortex_flatbuffers::dtype::finish_dtype_buffer<'a, 'b, A: flatbuffers::builder::Allocator + 'a>(fbb: &'b mut flatbuffers::builder::FlatBufferBuilder<'a, A>, root: flatbuffers::primitives::WIPOffset>) diff --git a/vortex-flatbuffers/src/generated/dtype.rs b/vortex-flatbuffers/src/generated/dtype.rs index 1ba03d8a3c3..34439cbf386 100644 --- a/vortex-flatbuffers/src/generated/dtype.rs +++ b/vortex-flatbuffers/src/generated/dtype.rs @@ -133,10 +133,10 @@ impl flatbuffers::SimpleToVerifyInSlice for PType {} #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] pub const ENUM_MIN_TYPE: u8 = 0; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] -pub const ENUM_MAX_TYPE: u8 = 10; +pub const ENUM_MAX_TYPE: u8 = 11; #[deprecated(since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021.")] #[allow(non_camel_case_types)] -pub const ENUM_VALUES_TYPE: [Type; 11] = [ +pub const ENUM_VALUES_TYPE: [Type; 12] = [ Type::NONE, Type::Null, Type::Bool, @@ -148,6 +148,7 @@ pub const ENUM_VALUES_TYPE: [Type; 11] = [ Type::List, Type::Extension, Type::FixedSizeList, + Type::Variant, ]; #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] @@ -166,9 +167,10 @@ impl Type { pub const List: Self = Self(8); pub const Extension: Self = Self(9); pub const FixedSizeList: Self = Self(10); + pub const Variant: Self = Self(11); pub const ENUM_MIN: u8 = 0; - pub const ENUM_MAX: u8 = 10; + pub const ENUM_MAX: u8 = 11; pub const ENUM_VALUES: &'static [Self] = &[ Self::NONE, Self::Null, @@ -181,6 +183,7 @@ impl Type { Self::List, Self::Extension, Self::FixedSizeList, + Self::Variant, ]; /// Returns the variant's name or "" if unknown. pub fn variant_name(self) -> Option<&'static str> { @@ -196,6 +199,7 @@ impl Type { Self::List => Some("List"), Self::Extension => Some("Extension"), Self::FixedSizeList => Some("FixedSizeList"), + Self::Variant => Some("Variant"), _ => None, } } @@ -1375,6 +1379,85 @@ impl core::fmt::Debug for Extension<'_> { ds.finish() } } +pub enum VariantOffset {} +#[derive(Copy, Clone, PartialEq)] + +pub struct Variant<'a> { + pub _tab: flatbuffers::Table<'a>, +} + +impl<'a> flatbuffers::Follow<'a> for Variant<'a> { + type Inner = Variant<'a>; + #[inline] + unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner { + Self { _tab: unsafe { flatbuffers::Table::new(buf, loc) } } + } +} + +impl<'a> Variant<'a> { + + #[inline] + pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { + Variant { _tab: table } + } + #[allow(unused_mut)] + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + _args: &'args VariantArgs + ) -> flatbuffers::WIPOffset> { + let mut builder = VariantBuilder::new(_fbb); + builder.finish() + } + +} + +impl flatbuffers::Verifiable for Variant<'_> { + #[inline] + fn run_verifier( + v: &mut flatbuffers::Verifier, pos: usize + ) -> Result<(), flatbuffers::InvalidFlatbuffer> { + use self::flatbuffers::Verifiable; + v.visit_table(pos)? + .finish(); + Ok(()) + } +} +pub struct VariantArgs { +} +impl<'a> Default for VariantArgs { + #[inline] + fn default() -> Self { + VariantArgs { + } + } +} + +pub struct VariantBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, + start_: flatbuffers::WIPOffset, +} +impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> VariantBuilder<'a, 'b, A> { + #[inline] + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> VariantBuilder<'a, 'b, A> { + let start = _fbb.start_table(); + VariantBuilder { + fbb_: _fbb, + start_: start, + } + } + #[inline] + pub fn finish(self) -> flatbuffers::WIPOffset> { + let o = self.fbb_.end_table(self.start_); + flatbuffers::WIPOffset::new(o.value()) + } +} + +impl core::fmt::Debug for Variant<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let mut ds = f.debug_struct("Variant"); + ds.finish() + } +} pub enum DTypeOffset {} #[derive(Copy, Clone, PartialEq)] @@ -1574,6 +1657,21 @@ impl<'a> DType<'a> { } } + #[inline] + #[allow(non_snake_case)] + pub fn type__as_variant(&self) -> Option> { + if self.type_type() == Type::Variant { + self.type_().map(|t| { + // Safety: + // Created from a valid Table for this object + // Which contains a valid union in this slot + unsafe { Variant::init_from_table(t) } + }) + } else { + None + } + } + } impl flatbuffers::Verifiable for DType<'_> { @@ -1595,6 +1693,7 @@ impl flatbuffers::Verifiable for DType<'_> { Type::List => v.verify_union_variant::>("Type::List", pos), Type::Extension => v.verify_union_variant::>("Type::Extension", pos), Type::FixedSizeList => v.verify_union_variant::>("Type::FixedSizeList", pos), + Type::Variant => v.verify_union_variant::>("Type::Variant", pos), _ => Ok(()), } })? @@ -1719,6 +1818,13 @@ impl core::fmt::Debug for DType<'_> { ds.field("type_", &"InvalidFlatbuffer: Union discriminant does not match value.") } }, + Type::Variant => { + if let Some(x) = self.type__as_variant() { + ds.field("type_", &x) + } else { + ds.field("type_", &"InvalidFlatbuffer: Union discriminant does not match value.") + } + }, _ => { let x: Option<()> = None; ds.field("type_", &x) diff --git a/vortex-flatbuffers/src/generated/message.rs b/vortex-flatbuffers/src/generated/message.rs index 618bdedb808..0748adec10a 100644 --- a/vortex-flatbuffers/src/generated/message.rs +++ b/vortex-flatbuffers/src/generated/message.rs @@ -3,8 +3,8 @@ // @generated -use crate::dtype::*; use crate::array::*; +use crate::dtype::*; use core::mem; use core::cmp::Ordering; diff --git a/vortex-jni/src/dtype.rs b/vortex-jni/src/dtype.rs index 8830064b8cd..38037c9ad9f 100644 --- a/vortex-jni/src/dtype.rs +++ b/vortex-jni/src/dtype.rs @@ -98,6 +98,7 @@ pub extern "system" fn Java_dev_vortex_jni_NativeDTypeMethods_getVariant( unimplemented!("TODO(connor)[FixedSizeList]") } DType::Extension(_) => DTYPE_EXTENSION, + DType::Variant => unimplemented!("Variant DType is not supported in JNI yet"), } } diff --git a/vortex-proto/proto/dtype.proto b/vortex-proto/proto/dtype.proto index 12af29d1c1e..945101f5f5f 100644 --- a/vortex-proto/proto/dtype.proto +++ b/vortex-proto/proto/dtype.proto @@ -70,6 +70,8 @@ message Extension { optional bytes metadata = 3; } +message Variant {} + message DType { oneof dtype_type { Null null = 1; @@ -82,6 +84,7 @@ message DType { List list = 8; Extension extension = 9; FixedSizeList fixed_size_list = 10; // This is after `Extension` for backwards compatibility. + Variant variant = 11; } } diff --git a/vortex-proto/proto/scalar.proto b/vortex-proto/proto/scalar.proto index fcc31b25e3e..1ecca6ab5a6 100644 --- a/vortex-proto/proto/scalar.proto +++ b/vortex-proto/proto/scalar.proto @@ -28,9 +28,63 @@ message ScalarValue { bytes bytes_value = 8; ListValue list_value = 9; uint64 f16_value = 10; + VariantValue variant_value = 11; } } message ListValue { repeated ScalarValue values = 1; } + +message VariantValue { + oneof kind { + google.protobuf.NullValue null_value = 1; + bool bool_value = 2; + VariantPrimitive primitive_value = 3; + VariantDecimal decimal_value = 4; + string string_value = 5; + bytes bytes_value = 6; + VariantListValue list_value = 7; + VariantObjectValue object_value = 8; + } +} + +message VariantPrimitive { + oneof kind { + sint64 int8_value = 1; + sint64 int16_value = 2; + sint64 int32_value = 3; + sint64 int64_value = 4; + uint64 uint8_value = 5; + uint64 uint16_value = 6; + uint64 uint32_value = 7; + uint64 uint64_value = 8; + uint32 f16_value = 9; + float f32_value = 10; + double f64_value = 11; + } +} + +message VariantDecimal { + oneof kind { + sint64 int8_value = 1; + sint64 int16_value = 2; + sint64 int32_value = 3; + sint64 int64_value = 4; + bytes int128_value = 5; + bytes int256_value = 6; + } +} + +message VariantListValue { + repeated VariantValue values = 1; +} + +message VariantObjectField { + string name = 1; + VariantValue value = 2; +} + +message VariantObjectValue { + repeated VariantObjectField fields = 1; +} diff --git a/vortex-proto/public-api.lock b/vortex-proto/public-api.lock index 1f6a2f409e7..261da0eed99 100644 --- a/vortex-proto/public-api.lock +++ b/vortex-proto/public-api.lock @@ -26,6 +26,8 @@ pub vortex_proto::dtype::d_type::DtypeType::Struct(vortex_proto::dtype::Struct) pub vortex_proto::dtype::d_type::DtypeType::Utf8(vortex_proto::dtype::Utf8) +pub vortex_proto::dtype::d_type::DtypeType::Variant(vortex_proto::dtype::Variant) + impl vortex_proto::dtype::d_type::DtypeType pub fn vortex_proto::dtype::d_type::DtypeType::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) @@ -608,6 +610,40 @@ pub fn vortex_proto::dtype::Utf8::clear(&mut self) pub fn vortex_proto::dtype::Utf8::encoded_len(&self) -> usize +pub struct vortex_proto::dtype::Variant + +impl core::clone::Clone for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::clone(&self) -> vortex_proto::dtype::Variant + +impl core::cmp::Eq for vortex_proto::dtype::Variant + +impl core::cmp::PartialEq for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::eq(&self, other: &vortex_proto::dtype::Variant) -> bool + +impl core::default::Default for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::default() -> Self + +impl core::fmt::Debug for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::Copy for vortex_proto::dtype::Variant + +impl core::marker::StructuralPartialEq for vortex_proto::dtype::Variant + +impl prost::message::Message for vortex_proto::dtype::Variant + +pub fn vortex_proto::dtype::Variant::clear(&mut self) + +pub fn vortex_proto::dtype::Variant::encoded_len(&self) -> usize + pub mod vortex_proto::expr pub mod vortex_proto::expr::binary_opts @@ -1140,6 +1176,8 @@ pub vortex_proto::scalar::scalar_value::Kind::StringValue(alloc::string::String) pub vortex_proto::scalar::scalar_value::Kind::Uint64Value(u64) +pub vortex_proto::scalar::scalar_value::Kind::VariantValue(vortex_proto::scalar::VariantValue) + impl vortex_proto::scalar::scalar_value::Kind pub fn vortex_proto::scalar::scalar_value::Kind::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) @@ -1162,6 +1200,142 @@ pub fn vortex_proto::scalar::scalar_value::Kind::fmt(&self, f: &mut core::fmt::F impl core::marker::StructuralPartialEq for vortex_proto::scalar::scalar_value::Kind +pub mod vortex_proto::scalar::variant_decimal + +pub enum vortex_proto::scalar::variant_decimal::Kind + +pub vortex_proto::scalar::variant_decimal::Kind::Int128Value(alloc::vec::Vec) + +pub vortex_proto::scalar::variant_decimal::Kind::Int16Value(i64) + +pub vortex_proto::scalar::variant_decimal::Kind::Int256Value(alloc::vec::Vec) + +pub vortex_proto::scalar::variant_decimal::Kind::Int32Value(i64) + +pub vortex_proto::scalar::variant_decimal::Kind::Int64Value(i64) + +pub vortex_proto::scalar::variant_decimal::Kind::Int8Value(i64) + +impl vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) + +pub fn vortex_proto::scalar::variant_decimal::Kind::encoded_len(&self) -> usize + +pub fn vortex_proto::scalar::variant_decimal::Kind::merge(field: &mut core::option::Option, tag: u32, wire_type: prost::encoding::wire_type::WireType, buf: &mut impl bytes::buf::buf_impl::Buf, ctx: prost::encoding::DecodeContext) -> core::result::Result<(), prost::error::DecodeError> + +impl core::clone::Clone for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::clone(&self) -> vortex_proto::scalar::variant_decimal::Kind + +impl core::cmp::Eq for vortex_proto::scalar::variant_decimal::Kind + +impl core::cmp::PartialEq for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::eq(&self, other: &vortex_proto::scalar::variant_decimal::Kind) -> bool + +impl core::fmt::Debug for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::scalar::variant_decimal::Kind + +pub fn vortex_proto::scalar::variant_decimal::Kind::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::variant_decimal::Kind + +pub mod vortex_proto::scalar::variant_primitive + +pub enum vortex_proto::scalar::variant_primitive::Kind + +pub vortex_proto::scalar::variant_primitive::Kind::F16Value(u32) + +pub vortex_proto::scalar::variant_primitive::Kind::F32Value(f32) + +pub vortex_proto::scalar::variant_primitive::Kind::F64Value(f64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int16Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int32Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int64Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Int8Value(i64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint16Value(u64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint32Value(u64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint64Value(u64) + +pub vortex_proto::scalar::variant_primitive::Kind::Uint8Value(u64) + +impl vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) + +pub fn vortex_proto::scalar::variant_primitive::Kind::encoded_len(&self) -> usize + +pub fn vortex_proto::scalar::variant_primitive::Kind::merge(field: &mut core::option::Option, tag: u32, wire_type: prost::encoding::wire_type::WireType, buf: &mut impl bytes::buf::buf_impl::Buf, ctx: prost::encoding::DecodeContext) -> core::result::Result<(), prost::error::DecodeError> + +impl core::clone::Clone for vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::clone(&self) -> vortex_proto::scalar::variant_primitive::Kind + +impl core::cmp::PartialEq for vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::eq(&self, other: &vortex_proto::scalar::variant_primitive::Kind) -> bool + +impl core::fmt::Debug for vortex_proto::scalar::variant_primitive::Kind + +pub fn vortex_proto::scalar::variant_primitive::Kind::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::Copy for vortex_proto::scalar::variant_primitive::Kind + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::variant_primitive::Kind + +pub mod vortex_proto::scalar::variant_value + +pub enum vortex_proto::scalar::variant_value::Kind + +pub vortex_proto::scalar::variant_value::Kind::BoolValue(bool) + +pub vortex_proto::scalar::variant_value::Kind::BytesValue(alloc::vec::Vec) + +pub vortex_proto::scalar::variant_value::Kind::DecimalValue(vortex_proto::scalar::VariantDecimal) + +pub vortex_proto::scalar::variant_value::Kind::ListValue(vortex_proto::scalar::VariantListValue) + +pub vortex_proto::scalar::variant_value::Kind::NullValue(i32) + +pub vortex_proto::scalar::variant_value::Kind::ObjectValue(vortex_proto::scalar::VariantObjectValue) + +pub vortex_proto::scalar::variant_value::Kind::PrimitiveValue(vortex_proto::scalar::VariantPrimitive) + +pub vortex_proto::scalar::variant_value::Kind::StringValue(alloc::string::String) + +impl vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::encode(&self, buf: &mut impl bytes::buf::buf_mut::BufMut) + +pub fn vortex_proto::scalar::variant_value::Kind::encoded_len(&self) -> usize + +pub fn vortex_proto::scalar::variant_value::Kind::merge(field: &mut core::option::Option, tag: u32, wire_type: prost::encoding::wire_type::WireType, buf: &mut impl bytes::buf::buf_impl::Buf, ctx: prost::encoding::DecodeContext) -> core::result::Result<(), prost::error::DecodeError> + +impl core::clone::Clone for vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::clone(&self) -> vortex_proto::scalar::variant_value::Kind + +impl core::cmp::PartialEq for vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::eq(&self, other: &vortex_proto::scalar::variant_value::Kind) -> bool + +impl core::fmt::Debug for vortex_proto::scalar::variant_value::Kind + +pub fn vortex_proto::scalar::variant_value::Kind::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::variant_value::Kind + pub struct vortex_proto::scalar::ListValue pub vortex_proto::scalar::ListValue::values: alloc::vec::Vec @@ -1247,3 +1421,181 @@ impl prost::message::Message for vortex_proto::scalar::ScalarValue pub fn vortex_proto::scalar::ScalarValue::clear(&mut self) pub fn vortex_proto::scalar::ScalarValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantDecimal + +pub vortex_proto::scalar::VariantDecimal::kind: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::clone(&self) -> vortex_proto::scalar::VariantDecimal + +impl core::cmp::Eq for vortex_proto::scalar::VariantDecimal + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::eq(&self, other: &vortex_proto::scalar::VariantDecimal) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::hash::Hash for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::hash<__H: core::hash::Hasher>(&self, state: &mut __H) + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantDecimal + +impl prost::message::Message for vortex_proto::scalar::VariantDecimal + +pub fn vortex_proto::scalar::VariantDecimal::clear(&mut self) + +pub fn vortex_proto::scalar::VariantDecimal::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantListValue + +pub vortex_proto::scalar::VariantListValue::values: alloc::vec::Vec + +impl core::clone::Clone for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::clone(&self) -> vortex_proto::scalar::VariantListValue + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::eq(&self, other: &vortex_proto::scalar::VariantListValue) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantListValue + +impl prost::message::Message for vortex_proto::scalar::VariantListValue + +pub fn vortex_proto::scalar::VariantListValue::clear(&mut self) + +pub fn vortex_proto::scalar::VariantListValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantObjectField + +pub vortex_proto::scalar::VariantObjectField::name: alloc::string::String + +pub vortex_proto::scalar::VariantObjectField::value: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::clone(&self) -> vortex_proto::scalar::VariantObjectField + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::eq(&self, other: &vortex_proto::scalar::VariantObjectField) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantObjectField + +impl prost::message::Message for vortex_proto::scalar::VariantObjectField + +pub fn vortex_proto::scalar::VariantObjectField::clear(&mut self) + +pub fn vortex_proto::scalar::VariantObjectField::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantObjectValue + +pub vortex_proto::scalar::VariantObjectValue::fields: alloc::vec::Vec + +impl core::clone::Clone for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::clone(&self) -> vortex_proto::scalar::VariantObjectValue + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::eq(&self, other: &vortex_proto::scalar::VariantObjectValue) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantObjectValue + +impl prost::message::Message for vortex_proto::scalar::VariantObjectValue + +pub fn vortex_proto::scalar::VariantObjectValue::clear(&mut self) + +pub fn vortex_proto::scalar::VariantObjectValue::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantPrimitive + +pub vortex_proto::scalar::VariantPrimitive::kind: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::clone(&self) -> vortex_proto::scalar::VariantPrimitive + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::eq(&self, other: &vortex_proto::scalar::VariantPrimitive) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::Copy for vortex_proto::scalar::VariantPrimitive + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantPrimitive + +impl prost::message::Message for vortex_proto::scalar::VariantPrimitive + +pub fn vortex_proto::scalar::VariantPrimitive::clear(&mut self) + +pub fn vortex_proto::scalar::VariantPrimitive::encoded_len(&self) -> usize + +pub struct vortex_proto::scalar::VariantValue + +pub vortex_proto::scalar::VariantValue::kind: core::option::Option + +impl core::clone::Clone for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::clone(&self) -> vortex_proto::scalar::VariantValue + +impl core::cmp::PartialEq for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::eq(&self, other: &vortex_proto::scalar::VariantValue) -> bool + +impl core::default::Default for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::default() -> Self + +impl core::fmt::Debug for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::marker::StructuralPartialEq for vortex_proto::scalar::VariantValue + +impl prost::message::Message for vortex_proto::scalar::VariantValue + +pub fn vortex_proto::scalar::VariantValue::clear(&mut self) + +pub fn vortex_proto::scalar::VariantValue::encoded_len(&self) -> usize diff --git a/vortex-proto/src/generated/vortex.dtype.rs b/vortex-proto/src/generated/vortex.dtype.rs index 49a3defb90f..ac9ba872746 100644 --- a/vortex-proto/src/generated/vortex.dtype.rs +++ b/vortex-proto/src/generated/vortex.dtype.rs @@ -66,9 +66,11 @@ pub struct Extension { #[prost(bytes = "vec", optional, tag = "3")] pub metadata: ::core::option::Option<::prost::alloc::vec::Vec>, } +#[derive(Clone, Copy, PartialEq, Eq, Hash, ::prost::Message)] +pub struct Variant {} #[derive(Clone, PartialEq, ::prost::Message)] pub struct DType { - #[prost(oneof = "d_type::DtypeType", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10")] + #[prost(oneof = "d_type::DtypeType", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")] pub dtype_type: ::core::option::Option, } /// Nested message and enum types in `DType`. @@ -96,6 +98,8 @@ pub mod d_type { /// This is after `Extension` for backwards compatibility. #[prost(message, tag = "10")] FixedSizeList(::prost::alloc::boxed::Box), + #[prost(message, tag = "11")] + Variant(super::Variant), } } #[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] diff --git a/vortex-proto/src/generated/vortex.scalar.rs b/vortex-proto/src/generated/vortex.scalar.rs index 9f4e3047e54..557affee723 100644 --- a/vortex-proto/src/generated/vortex.scalar.rs +++ b/vortex-proto/src/generated/vortex.scalar.rs @@ -8,7 +8,7 @@ pub struct Scalar { } #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScalarValue { - #[prost(oneof = "scalar_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10")] + #[prost(oneof = "scalar_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")] pub kind: ::core::option::Option, } /// Nested message and enum types in `ScalarValue`. @@ -35,6 +35,8 @@ pub mod scalar_value { ListValue(super::ListValue), #[prost(uint64, tag = "10")] F16Value(u64), + #[prost(message, tag = "11")] + VariantValue(super::VariantValue), } } #[derive(Clone, PartialEq, ::prost::Message)] @@ -42,3 +44,106 @@ pub struct ListValue { #[prost(message, repeated, tag = "1")] pub values: ::prost::alloc::vec::Vec, } +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantValue { + #[prost(oneof = "variant_value::Kind", tags = "1, 2, 3, 4, 5, 6, 7, 8")] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `VariantValue`. +pub mod variant_value { + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum Kind { + #[prost(enumeration = "::prost_types::NullValue", tag = "1")] + NullValue(i32), + #[prost(bool, tag = "2")] + BoolValue(bool), + #[prost(message, tag = "3")] + PrimitiveValue(super::VariantPrimitive), + #[prost(message, tag = "4")] + DecimalValue(super::VariantDecimal), + #[prost(string, tag = "5")] + StringValue(::prost::alloc::string::String), + #[prost(bytes, tag = "6")] + BytesValue(::prost::alloc::vec::Vec), + #[prost(message, tag = "7")] + ListValue(super::VariantListValue), + #[prost(message, tag = "8")] + ObjectValue(super::VariantObjectValue), + } +} +#[derive(Clone, Copy, PartialEq, ::prost::Message)] +pub struct VariantPrimitive { + #[prost( + oneof = "variant_primitive::Kind", + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11" + )] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `VariantPrimitive`. +pub mod variant_primitive { + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] + pub enum Kind { + #[prost(sint64, tag = "1")] + Int8Value(i64), + #[prost(sint64, tag = "2")] + Int16Value(i64), + #[prost(sint64, tag = "3")] + Int32Value(i64), + #[prost(sint64, tag = "4")] + Int64Value(i64), + #[prost(uint64, tag = "5")] + Uint8Value(u64), + #[prost(uint64, tag = "6")] + Uint16Value(u64), + #[prost(uint64, tag = "7")] + Uint32Value(u64), + #[prost(uint64, tag = "8")] + Uint64Value(u64), + #[prost(uint32, tag = "9")] + F16Value(u32), + #[prost(float, tag = "10")] + F32Value(f32), + #[prost(double, tag = "11")] + F64Value(f64), + } +} +#[derive(Clone, PartialEq, Eq, Hash, ::prost::Message)] +pub struct VariantDecimal { + #[prost(oneof = "variant_decimal::Kind", tags = "1, 2, 3, 4, 5, 6")] + pub kind: ::core::option::Option, +} +/// Nested message and enum types in `VariantDecimal`. +pub mod variant_decimal { + #[derive(Clone, PartialEq, Eq, Hash, ::prost::Oneof)] + pub enum Kind { + #[prost(sint64, tag = "1")] + Int8Value(i64), + #[prost(sint64, tag = "2")] + Int16Value(i64), + #[prost(sint64, tag = "3")] + Int32Value(i64), + #[prost(sint64, tag = "4")] + Int64Value(i64), + #[prost(bytes, tag = "5")] + Int128Value(::prost::alloc::vec::Vec), + #[prost(bytes, tag = "6")] + Int256Value(::prost::alloc::vec::Vec), + } +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantListValue { + #[prost(message, repeated, tag = "1")] + pub values: ::prost::alloc::vec::Vec, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantObjectField { + #[prost(string, tag = "1")] + pub name: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub value: ::core::option::Option, +} +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct VariantObjectValue { + #[prost(message, repeated, tag = "1")] + pub fields: ::prost::alloc::vec::Vec, +} diff --git a/vortex-python/src/dtype/mod.rs b/vortex-python/src/dtype/mod.rs index 876e41cbbb5..0d8f3dae18c 100644 --- a/vortex-python/src/dtype/mod.rs +++ b/vortex-python/src/dtype/mod.rs @@ -26,6 +26,7 @@ use pyo3::PyClass; use pyo3::PyClassInitializer; use pyo3::PyResult; use pyo3::Python; +use pyo3::exceptions::PyValueError; use pyo3::prelude::PyModule; use pyo3::prelude::PyModuleMethods; use pyo3::pyclass; @@ -124,6 +125,9 @@ impl PyDType { DType::List(..) => Self::with_subclass(py, dtype, PyListDType), DType::FixedSizeList(..) => Self::with_subclass(py, dtype, PyFixedSizeListDType), DType::Extension(..) => Self::with_subclass(py, dtype, PyExtensionDType), + DType::Variant => Err(PyValueError::new_err( + "Variant DType is not supported in Python yet", + )), } } diff --git a/vortex-python/src/python_repr.rs b/vortex-python/src/python_repr.rs index 3c4fe045e94..8786238b2d1 100644 --- a/vortex-python/src/python_repr.rs +++ b/vortex-python/src/python_repr.rs @@ -103,6 +103,7 @@ impl Display for DTypePythonRepr<'_> { } write!(f, ")") } + DType::Variant => write!(f, "variant()"), } } } diff --git a/vortex-python/src/scalar/into_py.rs b/vortex-python/src/scalar/into_py.rs index 70a890821ff..899619c8c5c 100644 --- a/vortex-python/src/scalar/into_py.rs +++ b/vortex-python/src/scalar/into_py.rs @@ -9,6 +9,7 @@ use pyo3::PyAny; use pyo3::PyErr; use pyo3::PyResult; use pyo3::Python; +use pyo3::exceptions::PyValueError; use pyo3::prelude::PyAnyMethods; use pyo3::prelude::PyDictMethods; use pyo3::types::PyBytes; @@ -85,6 +86,9 @@ impl<'py> IntoPyObject<'py> for PyVortex<&'_ Scalar> { DType::Extension(_) => { PyVortex(&self.0.as_extension().to_storage_scalar()).into_pyobject(py) } + DType::Variant => Err(PyValueError::new_err( + "Variant scalars are not supported in Python yet", + )), } } } diff --git a/vortex-python/src/scalar/mod.rs b/vortex-python/src/scalar/mod.rs index d6c18d64ea5..d97fd15d3b1 100644 --- a/vortex-python/src/scalar/mod.rs +++ b/vortex-python/src/scalar/mod.rs @@ -20,6 +20,7 @@ mod struct_; mod utf8; use pyo3::PyClass; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use vortex::dtype::DType; use vortex::error::VortexError; @@ -109,6 +110,9 @@ impl PyScalar { Self::with_subclass(py, scalar, PyListScalar) } DType::Extension(..) => Self::with_subclass(py, scalar, PyExtensionScalar), + DType::Variant => Err(PyValueError::new_err( + "Variant scalars are not supported in Python yet", + )), } }