Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion encodings/datetime-parts/src/compute/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ mod test {
.unwrap();

// Timestamp with a value larger than i32::MAX.
let rhs = dtp_array_from_timestamp(i64::MAX, rhs_validity);
// https://github.com/BurntSushi/jiff/blob/e5b7f0d061e4da9598aed73f6171e78baa8b007f/src/shared/tzif.rs#L23
let rhs = dtp_array_from_timestamp(253_402_207_200i64, rhs_validity);

let comparison = lhs
.clone()
Expand Down
2 changes: 1 addition & 1 deletion fuzz/src/array/mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult<A
let ext_dtype = array
.ext_dtype()
.with_nullability(masked_storage.dtype().nullability());
ExtensionArray::new(ext_dtype, masked_storage).into_array()
ExtensionArray::try_new(ext_dtype, masked_storage)?.into_array()
}
})
}
Expand Down
18 changes: 18 additions & 0 deletions vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -10238,6 +10238,8 @@ pub fn vortex_array::dtype::extension::ExtVTable::serialize_metadata(&self, meta

pub fn vortex_array::dtype::extension::ExtVTable::unpack_native<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::dtype::extension::ExtVTable::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::dtype::extension::ExtVTable::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::dtype::extension::ExtVTable::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand All @@ -10256,6 +10258,8 @@ pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metada

pub fn vortex_array::extension::datetime::Date::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::datetime::Date::validate_array<'a>(&self, _ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, _storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand All @@ -10274,6 +10278,8 @@ pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metada

pub fn vortex_array::extension::datetime::Time::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::datetime::Time::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand All @@ -10292,6 +10298,8 @@ pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, m

pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::datetime::Timestamp::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand All @@ -10310,6 +10318,8 @@ pub fn vortex_array::extension::uuid::Uuid::serialize_metadata(&self, metadata:

pub fn vortex_array::extension::uuid::Uuid::unpack_native<'a>(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::uuid::Uuid::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::uuid::Uuid::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::uuid::Uuid::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand Down Expand Up @@ -14142,6 +14152,8 @@ pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metada

pub fn vortex_array::extension::datetime::Date::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::datetime::Date::validate_array<'a>(&self, _ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, _storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand Down Expand Up @@ -14192,6 +14204,8 @@ pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metada

pub fn vortex_array::extension::datetime::Time::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::datetime::Time::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand Down Expand Up @@ -14244,6 +14258,8 @@ pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, m

pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::datetime::Timestamp::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand Down Expand Up @@ -14320,6 +14336,8 @@ pub fn vortex_array::extension::uuid::Uuid::serialize_metadata(&self, metadata:

pub fn vortex_array::extension::uuid::Uuid::unpack_native<'a>(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<Self::NativeValue>

pub fn vortex_array::extension::uuid::Uuid::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType<Self>, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::uuid::Uuid::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>) -> vortex_error::VortexResult<()>

pub fn vortex_array::extension::uuid::Uuid::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType<Self>, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()>
Expand Down
13 changes: 12 additions & 1 deletion vortex-array/src/arrays/constant/vtable/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,19 @@ pub(crate) fn constant_canonicalize(array: &ConstantArray) -> VortexResult<Canon
let s = scalar.as_extension();

let storage_scalar = s.to_storage_scalar();

// Validate that this scalar is valid for this type.
if let Some(storage_value) = storage_scalar.value() {
ext_dtype.validate_storage_value(storage_value)?;
}

let storage_self = ConstantArray::new(storage_scalar, array.len()).into_array();
Canonical::Extension(ExtensionArray::new(ext_dtype.clone(), storage_self))

Canonical::Extension(
// SAFETY: We validated that the scalar was valid, and since this array only
// contains this scalar, we know the storage array is valid.
unsafe { ExtensionArray::new_unchecked(ext_dtype.clone(), storage_self) },
)
}
})
}
Expand Down
3 changes: 3 additions & 0 deletions vortex-array/src/arrays/datetime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub struct TemporalArray {
}

impl TemporalArray {
// TODO(connor): This should really be fallible!!!
/// Create a new `TemporalArray` holding either i32 day offsets, or i64 millisecond offsets
/// that are evenly divisible by the number of 86,400,000.
///
Expand All @@ -69,6 +70,7 @@ impl TemporalArray {
}
}

// TODO(connor): This should really be fallible!!!
/// Create a new `TemporalArray` holding one of the following values:
///
/// * `i32` values representing seconds since midnight
Expand Down Expand Up @@ -97,6 +99,7 @@ impl TemporalArray {
}
}

// TODO(connor): This should really be fallible!!!
/// Create a new `TemporalArray` holding Arrow spec compliant Timestamp data, with an
/// optional timezone.
///
Expand Down
33 changes: 19 additions & 14 deletions vortex-array/src/arrays/extension/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,11 @@ impl ExtensionArray {
///
/// Returns an error if the storage array in not compatible with the extension dtype.
pub fn try_new(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> VortexResult<Self> {
// TODO(connor): Replace these statements once we add `validate_storage_array`.
// ext_dtype.validate_storage_array(&storage_array)?;
assert_eq!(
ext_dtype.storage_dtype(),
storage_array.dtype(),
"ExtensionArray: storage_dtype must match storage array DType",
);
if storage_array.is_host() {
ext_dtype.validate_storage_array(&storage_array)?;
} else {
eprintln!("Unable to validate storage array on GPU")
}

// SAFETY: we validate that the inputs are valid above.
Ok(unsafe { Self::new_unchecked(ext_dtype, storage_array) })
Expand All @@ -95,15 +93,22 @@ impl ExtensionArray {
/// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been
/// called successfully on this storage array.
pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self {
// TODO(connor): Replace these statements once we add `validate_storage_array`.
// #[cfg(debug_assertions)]
// ext_dtype
// .validate_storage_array(&storage_array)
// .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`");
debug_assert_eq!(
#[cfg(debug_assertions)]
{
if storage_array.is_host() {
ext_dtype
.validate_storage_array(&storage_array)
.vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`");
} else {
eprintln!("Unable to validate storage array on GPU")
}
}

// The dtype check is much cheaper than the storage validation, so we might as well do this.
assert_eq!(
ext_dtype.storage_dtype(),
storage_array.dtype(),
"ExtensionArray: storage_dtype must match storage array DType",
"tried to construct an extension array with a storage array that has the wrong dtype"
);

Self {
Expand Down
7 changes: 5 additions & 2 deletions vortex-array/src/arrays/extension/compute/cast.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexResult;

use crate::ArrayRef;
use crate::IntoArray;
use crate::arrays::Extension;
Expand All @@ -10,7 +12,7 @@ use crate::dtype::DType;
use crate::scalar_fn::fns::cast::CastReduce;

impl CastReduce for Extension {
fn cast(array: &ExtensionArray, dtype: &DType) -> vortex_error::VortexResult<Option<ArrayRef>> {
fn cast(array: &ExtensionArray, dtype: &DType) -> VortexResult<Option<ArrayRef>> {
if !array.dtype().eq_ignore_nullability(dtype) {
return Ok(None);
}
Expand All @@ -30,8 +32,9 @@ impl CastReduce for Extension {
}
};

// We have no idea if the cast invalidated the storage array, so we must run validation.
Ok(Some(
ExtensionArray::new(ext_dtype.clone(), new_storage).into_array(),
ExtensionArray::try_new(ext_dtype.clone(), new_storage)?.into_array(),
))
}
}
Expand Down
12 changes: 8 additions & 4 deletions vortex-array/src/arrays/extension/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ use crate::arrays::filter::FilterReduce;
impl FilterReduce for Extension {
fn filter(array: &ExtensionArray, mask: &Mask) -> VortexResult<Option<ArrayRef>> {
Ok(Some(
ExtensionArray::new(
array.ext_dtype().clone(),
array.storage_array().filter(mask.clone())?,
)
// SAFETY: The storage array is filtered from an already-valid extension array, which
// preserves the storage dtype and does not change values.
unsafe {
ExtensionArray::new_unchecked(
array.ext_dtype().clone(),
array.storage_array().filter(mask.clone())?,
)
}
.into_array(),
))
}
Expand Down
17 changes: 11 additions & 6 deletions vortex-array/src/arrays/extension/compute/mask.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@ impl MaskReduce for Extension {
EmptyOptions,
[array.storage_array().clone(), mask.clone()],
)?;

Ok(Some(
ExtensionArray::new(
array
.ext_dtype()
.with_nullability(masked_storage.dtype().nullability()),
masked_storage,
)
// SAFETY: The storage array is masked from an already-valid extension array, which
// preserves the storage dtype and does not change values.
unsafe {
ExtensionArray::new_unchecked(
array
.ext_dtype()
.with_nullability(masked_storage.dtype().nullability()),
masked_storage,
)
}
.into_array(),
))
}
Expand Down
22 changes: 21 additions & 1 deletion vortex-array/src/arrays/extension/compute/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,12 @@ impl ArrayParentReduceRule<Extension> for ExtensionFilterPushDownRule {
.storage_array()
.clone()
.filter(parent.filter_mask().clone())?;

Ok(Some(
ExtensionArray::new(child.ext_dtype().clone(), filtered_storage).into_array(),
// SAFETY: The storage array is filtered from an already-valid extension array, which
// preserves the storage dtype and does not change values.
unsafe { ExtensionArray::new_unchecked(child.ext_dtype().clone(), filtered_storage) }
.into_array(),
))
}
}
Expand Down Expand Up @@ -106,6 +110,14 @@ mod tests {
) -> VortexResult<Self::NativeValue<'a>> {
Ok("")
}

fn validate_array<'a>(
&self,
_ext_dtype: &'a ExtDType<Self>,
_storage_array: &'a dyn DynArray,
) -> VortexResult<()> {
Ok(())
}
}

fn test_ext_dtype() -> ExtDTypeRef {
Expand Down Expand Up @@ -203,6 +215,14 @@ mod tests {
) -> VortexResult<Self::NativeValue<'a>> {
Ok("")
}

fn validate_array<'a>(
&self,
_ext_dtype: &'a ExtDType<Self>,
_storage_array: &'a dyn DynArray,
) -> VortexResult<()> {
Ok(())
}
}

let ext_dtype1 = ExtDType::<TestExt>::try_new(
Expand Down
12 changes: 8 additions & 4 deletions vortex-array/src/arrays/extension/compute/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ use crate::arrays::slice::SliceReduce;
impl SliceReduce for Extension {
fn slice(array: &Self::Array, range: Range<usize>) -> VortexResult<Option<ArrayRef>> {
Ok(Some(
ExtensionArray::new(
array.ext_dtype().clone(),
array.storage_array().slice(range)?,
)
// SAFETY: The storage array is sliced from an already-valid extension array, which
// preserves the storage dtype and does not change values.
unsafe {
ExtensionArray::new_unchecked(
array.ext_dtype().clone(),
array.storage_array().slice(range)?,
)
}
.into_array(),
))
}
Expand Down
17 changes: 11 additions & 6 deletions vortex-array/src/arrays/extension/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,18 @@ impl TakeExecute for Extension {
_ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
let taken_storage = array.storage_array().take(indices.to_array())?;

Ok(Some(
ExtensionArray::new(
array
.ext_dtype()
.with_nullability(taken_storage.dtype().nullability()),
taken_storage,
)
// SAFETY: The storage array is taken from an already-valid extension array, which
// preserves the storage dtype and does not change values.
unsafe {
ExtensionArray::new_unchecked(
array
.ext_dtype()
.with_nullability(taken_storage.dtype().nullability()),
taken_storage,
)
}
.into_array(),
))
}
Expand Down
5 changes: 4 additions & 1 deletion vortex-array/src/arrays/extension/vtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ impl VTable for Extension {
vortex_bail!("Expected 1 child, got {}", children.len());
}
let storage = children.get(0, ext_dtype.storage_dtype(), len)?;
Ok(ExtensionArray::new(ext_dtype.clone(), storage))

// TODO(connor): Is this correct?
// We do not know what might be on disk, so we must run validation on read.
ExtensionArray::try_new(ext_dtype.clone(), storage)
}

fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
Expand Down
Loading
Loading