From b58f1d5f971a773f27c2a003082d3ab493a9d89f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 27 May 2026 10:46:05 +0000 Subject: [PATCH 1/3] Add MaskNullAsFalse execution target and make Mask strict Mask execution now requires a non-nullable boolean array and errors on nullable input. The new MaskNullAsFalse executable preserves the previous null-as-false coercion for filter and pruning predicates over nullable data, where SQL semantics treat NULL as not matching. Predicate-evaluation call sites (filter, prune, dict filter, is_constant) use MaskNullAsFalse; validity-array call sites keep the stricter Mask. Signed-off-by: Claude --- vortex-array/public-api.lock | 18 +++ .../src/aggregate_fn/fns/is_constant/mod.rs | 4 +- vortex-array/src/mask.rs | 107 ++++++++++++++---- vortex-cuda/src/layout.rs | 5 +- vortex-layout/src/layouts/dict/reader.rs | 6 +- vortex-layout/src/layouts/flat/reader.rs | 5 +- vortex-layout/src/layouts/partitioned.rs | 7 +- vortex-layout/src/layouts/row_idx/mod.rs | 6 +- vortex-layout/src/layouts/zoned/zone_map.rs | 7 +- vortex/public-api.lock | 2 + vortex/src/lib.rs | 1 + 11 files changed, 131 insertions(+), 37 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 9525aef3a88..82384e2e9fe 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -14734,6 +14734,20 @@ pub fn vortex_array::scalar_fn::fns::zip::ZipExecuteAdaptor::execute_parent(& pub mod vortex_array::mask +pub struct vortex_array::mask::MaskNullAsFalse(_) + +impl vortex_array::mask::MaskNullAsFalse + +pub fn vortex_array::mask::MaskNullAsFalse::into_mask(self) -> vortex_mask::Mask + +impl core::convert::From for vortex_mask::Mask + +pub fn vortex_mask::Mask::from(vortex_array::mask::MaskNullAsFalse) -> Self + +impl vortex_array::Executable for vortex_array::mask::MaskNullAsFalse + +pub fn vortex_array::mask::MaskNullAsFalse::execute(vortex_array::ArrayRef, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + pub mod vortex_array::matcher pub struct vortex_array::matcher::AnyArray @@ -26326,6 +26340,10 @@ impl vortex_array::Executable for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::execute(vortex_array::ArrayRef, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +impl vortex_array::Executable for vortex_array::mask::MaskNullAsFalse + +pub fn vortex_array::mask::MaskNullAsFalse::execute(vortex_array::ArrayRef, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + impl vortex_array::Executable for vortex_buffer::bit::buf::BitBuffer pub fn vortex_buffer::bit::buf::BitBuffer::execute(vortex_array::ArrayRef, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult diff --git a/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs b/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs index b14b2a050f8..9c3a6278f02 100644 --- a/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs +++ b/vortex-array/src/aggregate_fn/fns/is_constant/mod.rs @@ -13,7 +13,6 @@ mod varbin; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; -use vortex_mask::Mask; use self::bool::check_bool_constant; use self::decimal::check_decimal_constant; @@ -44,6 +43,7 @@ use crate::expr::stats::Precision; use crate::expr::stats::Stat; use crate::expr::stats::StatsProvider; use crate::expr::stats::StatsProviderExt; +use crate::mask::MaskNullAsFalse; use crate::scalar::Scalar; use crate::scalar_fn::fns::operators::Operator; @@ -74,7 +74,7 @@ fn arrays_value_equal(a: &ArrayRef, b: &ArrayRef, ctx: &mut ExecutionCtx) -> Vor // Compare values element-wise. Result is null where both inputs are null, // true/false where both are valid. let eq_result = a.binary(b.clone(), Operator::Eq)?; - let eq_result = eq_result.execute::(ctx)?; + let eq_result = eq_result.execute::(ctx)?.into_mask(); Ok(eq_result.true_count() == valid_count) } diff --git a/vortex-array/src/mask.rs b/vortex-array/src/mask.rs index ab035670e24..fb04606c2b9 100644 --- a/vortex-array/src/mask.rs +++ b/vortex-array/src/mask.rs @@ -17,33 +17,90 @@ use crate::columnar::Columnar; use crate::dtype::DType; impl Executable for Mask { + /// Executes a boolean array into a [`Mask`]. + /// + /// The array must have a non-nullable boolean dtype. Use [`MaskNullAsFalse`] to execute a + /// nullable boolean array, coercing null elements to `false`. fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - if !matches!(array.dtype(), DType::Bool(_)) { - vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); - } + execute_mask(array, ctx, NullHandling::Reject) + } +} - if let Some(constant) = array.as_opt::() { - let mask_value = constant.scalar().as_bool().value().unwrap_or(false); - return Ok(Mask::new(array.len(), mask_value)); - } +/// An [`Executable`] target that executes a boolean array into a [`Mask`], coercing null +/// elements to `false`. +/// +/// [`Mask`] itself requires a non-nullable boolean array and errors on nullable input. Use this +/// wrapper for filter and pruning predicates over nullable data, where SQL semantics treat +/// `NULL` as not matching. +pub struct MaskNullAsFalse(Mask); - let array_len = array.len(); - Ok(match array.execute(ctx)? { - Columnar::Constant(s) => { - Mask::new(array_len, s.scalar().as_bool().value().unwrap_or(false)) - } - Columnar::Canonical(a) => { - let bool = a.into_array().execute::(ctx)?; - let mask = bool - .as_ref() - .validity()? - .execute_mask(bool.as_ref().len(), ctx)?; - let bits = bool.into_bit_buffer(); - // To handle nullable boolean arrays, we treat nulls as false in the mask. - // TODO(ngates): is this correct? Feels like we should just force the caller to - // pass non-nullable boolean arrays. - mask.bitand(&Mask::from(bits)) - } - }) +impl MaskNullAsFalse { + /// Consumes the wrapper and returns the underlying [`Mask`]. + pub fn into_mask(self) -> Mask { + self.0 + } +} + +impl From for Mask { + fn from(value: MaskNullAsFalse) -> Self { + value.0 + } +} + +impl Executable for MaskNullAsFalse { + fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { + execute_mask(array, ctx, NullHandling::AsFalse).map(Self) + } +} + +/// How [`execute_mask`] treats null elements of a nullable boolean array. +enum NullHandling { + /// Error if the boolean array is nullable. + Reject, + /// Treat null elements as `false`. + AsFalse, +} + +fn execute_mask( + array: ArrayRef, + ctx: &mut ExecutionCtx, + null_handling: NullHandling, +) -> VortexResult { + if !matches!(array.dtype(), DType::Bool(_)) { + vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); } + + if let Some(constant) = array.as_opt::() { + let mask_value = constant.scalar().as_bool().value().unwrap_or(false); + return Ok(Mask::new(array.len(), mask_value)); + } + + let array_len = array.len(); + Ok(match array.execute(ctx)? { + Columnar::Constant(s) => { + Mask::new(array_len, s.scalar().as_bool().value().unwrap_or(false)) + } + Columnar::Canonical(a) => { + let bool = a.into_array().execute::(ctx)?; + match null_handling { + NullHandling::Reject => { + if bool.as_ref().dtype().is_nullable() { + vortex_bail!( + "Mask requires a non-nullable boolean array, not {}; \ + use MaskNullAsFalse to coerce nulls to false", + bool.as_ref().dtype() + ); + } + Mask::from(bool.into_bit_buffer()) + } + NullHandling::AsFalse => { + let validity = bool + .as_ref() + .validity()? + .execute_mask(bool.as_ref().len(), ctx)?; + validity.bitand(&Mask::from(bool.into_bit_buffer())) + } + } + } + }) } diff --git a/vortex-cuda/src/layout.rs b/vortex-cuda/src/layout.rs index 76e5a5ba5fc..da0fa28931d 100644 --- a/vortex-cuda/src/layout.rs +++ b/vortex-cuda/src/layout.rs @@ -59,6 +59,7 @@ use vortex::layout::sequence::SendableSequentialStream; use vortex::layout::sequence::SequencePointer; use vortex::layout::vtable; use vortex::mask::Mask; +use vortex::mask::MaskNullAsFalse; use vortex::scalar::Scalar; use vortex::scalar::ScalarTruncation; use vortex::scalar::lower_bound; @@ -329,12 +330,12 @@ impl LayoutReader for CudaFlatReader { let array = array.apply(&expr)?; let array = array.filter(mask.clone())?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.execute::(&mut ctx)?.into_mask(); mask.intersect_by_rank(&array_mask) } else { let array = array.apply(&expr)?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.execute::(&mut ctx)?.into_mask(); mask.bitand(&array_mask) }; diff --git a/vortex-layout/src/layouts/dict/reader.rs b/vortex-layout/src/layouts/dict/reader.rs index 96f12d53ece..19e002b42ee 100644 --- a/vortex-layout/src/layouts/dict/reader.rs +++ b/vortex-layout/src/layouts/dict/reader.rs @@ -21,6 +21,7 @@ use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; use vortex_array::expr::root; +use vortex_array::mask::MaskNullAsFalse; use vortex_array::optimizer::ArrayOptimizer; use vortex_error::VortexError; use vortex_error::VortexExpect; @@ -212,7 +213,10 @@ impl LayoutReader for DictReader { let mask = mask.await?; let mut ctx = session.create_execution_ctx(); - let dict_mask = values.take(codes)?.execute::(&mut ctx)?; + let dict_mask = values + .take(codes)? + .execute::(&mut ctx)? + .into_mask(); Ok(mask.bitand(&dict_mask)) })) diff --git a/vortex-layout/src/layouts/flat/reader.rs b/vortex-layout/src/layouts/flat/reader.rs index 8817fe5c678..6b4e7b0c240 100644 --- a/vortex-layout/src/layouts/flat/reader.rs +++ b/vortex-layout/src/layouts/flat/reader.rs @@ -15,6 +15,7 @@ use vortex_array::VortexSessionExecute; use vortex_array::dtype::DType; use vortex_array::dtype::FieldMask; use vortex_array::expr::Expression; +use vortex_array::mask::MaskNullAsFalse; use vortex_array::serde::SerializedArray; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -155,14 +156,14 @@ impl LayoutReader for FlatReader { let array = array.apply(&expr)?; let array = array.filter(mask.clone())?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.execute::(&mut ctx)?.into_mask(); mask.intersect_by_rank(&array_mask) } else { // Run over the full array, with a simpler bitand at the end. let array = array.apply(&expr)?; let mut ctx = session.create_execution_ctx(); - let array_mask = array.execute::(&mut ctx)?; + let array_mask = array.execute::(&mut ctx)?.into_mask(); mask.bitand(&array_mask) }; diff --git a/vortex-layout/src/layouts/partitioned.rs b/vortex-layout/src/layouts/partitioned.rs index d5ac3d44c55..deaccd92513 100644 --- a/vortex-layout/src/layouts/partitioned.rs +++ b/vortex-layout/src/layouts/partitioned.rs @@ -15,10 +15,10 @@ use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::expr::Expression; use vortex_array::expr::transform::PartitionedExpr; +use vortex_array::mask::MaskNullAsFalse; use vortex_array::validity::Validity; use vortex_error::VortexError; use vortex_error::VortexResult; -use vortex_mask::Mask; use vortex_session::VortexSession; use crate::ArrayFuture; @@ -90,7 +90,10 @@ impl PartitionedExprEval

for PartitionedExpr

{ .into_array(); let mut ctx = session.create_execution_ctx(); - let root_mask = root_scope.apply(&self.root)?.execute::(&mut ctx)?; + let root_mask = root_scope + .apply(&self.root)? + .execute::(&mut ctx)? + .into_mask(); let mask = mask.bitand(&root_mask); diff --git a/vortex-layout/src/layouts/row_idx/mod.rs b/vortex-layout/src/layouts/row_idx/mod.rs index 18fc62b05cd..5693ae7dc38 100644 --- a/vortex-layout/src/layouts/row_idx/mod.rs +++ b/vortex-layout/src/layouts/row_idx/mod.rs @@ -32,6 +32,7 @@ use vortex_array::expr::root; use vortex_array::expr::transform::PartitionedExpr; use vortex_array::expr::transform::partition; use vortex_array::expr::transform::replace; +use vortex_array::mask::MaskNullAsFalse; use vortex_array::scalar::PValue; use vortex_error::VortexExpect; use vortex_error::VortexResult; @@ -295,7 +296,10 @@ fn row_idx_mask_future( let array = idx_array(row_offset, &row_range).into_array(); let mut ctx = session.create_execution_ctx(); - let result_mask = array.apply(&expr)?.execute::(&mut ctx)?; + let result_mask = array + .apply(&expr)? + .execute::(&mut ctx)? + .into_mask(); Ok(result_mask.bitand(&mask.await?)) }) diff --git a/vortex-layout/src/layouts/zoned/zone_map.rs b/vortex-layout/src/layouts/zoned/zone_map.rs index 16360ff287d..69b98d5e41a 100644 --- a/vortex-layout/src/layouts/zoned/zone_map.rs +++ b/vortex-layout/src/layouts/zoned/zone_map.rs @@ -14,6 +14,7 @@ use vortex_array::arrays::StructArray; use vortex_array::dtype::DType; use vortex_array::expr::Expression; use vortex_array::expr::stats::Stat; +use vortex_array::mask::MaskNullAsFalse; use vortex_array::scalar_fn::internal::row_count::contains_row_count; use vortex_array::scalar_fn::internal::row_count::substitute_row_count; use vortex_array::validity::Validity; @@ -99,12 +100,14 @@ impl ZoneMap { let applied = self.array.clone().into_array().apply(predicate)?; if num_zones == 0 || !contains_row_count(&applied) { - return applied.execute::(&mut ctx); + return Ok(applied.execute::(&mut ctx)?.into_mask()); } let row_count_array = row_count_array(self.zone_len, self.row_count, num_zones)?; let substituted = substitute_row_count(applied, &row_count_array)?; - substituted.execute::(&mut ctx) + Ok(substituted + .execute::(&mut ctx)? + .into_mask()) } } diff --git a/vortex/public-api.lock b/vortex/public-api.lock index 7be026902db..4a5f1d4fe75 100644 --- a/vortex/public-api.lock +++ b/vortex/public-api.lock @@ -112,6 +112,8 @@ pub mod vortex::mask pub use vortex::mask::<> +pub use vortex::mask::MaskNullAsFalse + pub mod vortex::metrics pub use vortex::metrics::<> diff --git a/vortex/src/lib.rs b/vortex/src/lib.rs index 8668de339cb..b5241797a01 100644 --- a/vortex/src/lib.rs +++ b/vortex/src/lib.rs @@ -76,6 +76,7 @@ pub mod layout { } pub mod mask { + pub use vortex_array::mask::MaskNullAsFalse; pub use vortex_mask::*; } From f1d5313e23884cad242322ffb1b3cecded761b81 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 27 May 2026 11:04:01 +0000 Subject: [PATCH 2/3] Inline Mask and MaskNullAsFalse execution impls Remove the shared NullHandling enum and execute_mask helper in favor of a self-contained Executable impl for each target. Mask still requires a non-nullable boolean array; MaskNullAsFalse coerces nulls to false. Signed-off-by: Claude --- vortex-array/src/mask.rs | 99 +++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 52 deletions(-) diff --git a/vortex-array/src/mask.rs b/vortex-array/src/mask.rs index fb04606c2b9..3185a607315 100644 --- a/vortex-array/src/mask.rs +++ b/vortex-array/src/mask.rs @@ -22,7 +22,32 @@ impl Executable for Mask { /// The array must have a non-nullable boolean dtype. Use [`MaskNullAsFalse`] to execute a /// nullable boolean array, coercing null elements to `false`. fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - execute_mask(array, ctx, NullHandling::Reject) + if !matches!(array.dtype(), DType::Bool(_)) { + vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); + } + + if let Some(constant) = array.as_opt::() { + let mask_value = constant.scalar().as_bool().value().unwrap_or(false); + return Ok(Mask::new(array.len(), mask_value)); + } + + let array_len = array.len(); + Ok(match array.execute(ctx)? { + Columnar::Constant(s) => { + Mask::new(array_len, s.scalar().as_bool().value().unwrap_or(false)) + } + Columnar::Canonical(a) => { + let bool = a.into_array().execute::(ctx)?; + if bool.as_ref().dtype().is_nullable() { + vortex_bail!( + "Mask requires a non-nullable boolean array, not {}; \ + use MaskNullAsFalse to coerce nulls to false", + bool.as_ref().dtype() + ); + } + Mask::from(bool.into_bit_buffer()) + } + }) } } @@ -49,58 +74,28 @@ impl From for Mask { impl Executable for MaskNullAsFalse { fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - execute_mask(array, ctx, NullHandling::AsFalse).map(Self) - } -} - -/// How [`execute_mask`] treats null elements of a nullable boolean array. -enum NullHandling { - /// Error if the boolean array is nullable. - Reject, - /// Treat null elements as `false`. - AsFalse, -} - -fn execute_mask( - array: ArrayRef, - ctx: &mut ExecutionCtx, - null_handling: NullHandling, -) -> VortexResult { - if !matches!(array.dtype(), DType::Bool(_)) { - vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); - } - - if let Some(constant) = array.as_opt::() { - let mask_value = constant.scalar().as_bool().value().unwrap_or(false); - return Ok(Mask::new(array.len(), mask_value)); - } + if !matches!(array.dtype(), DType::Bool(_)) { + vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); + } - let array_len = array.len(); - Ok(match array.execute(ctx)? { - Columnar::Constant(s) => { - Mask::new(array_len, s.scalar().as_bool().value().unwrap_or(false)) + if let Some(constant) = array.as_opt::() { + let mask_value = constant.scalar().as_bool().value().unwrap_or(false); + return Ok(Self(Mask::new(array.len(), mask_value))); } - Columnar::Canonical(a) => { - let bool = a.into_array().execute::(ctx)?; - match null_handling { - NullHandling::Reject => { - if bool.as_ref().dtype().is_nullable() { - vortex_bail!( - "Mask requires a non-nullable boolean array, not {}; \ - use MaskNullAsFalse to coerce nulls to false", - bool.as_ref().dtype() - ); - } - Mask::from(bool.into_bit_buffer()) - } - NullHandling::AsFalse => { - let validity = bool - .as_ref() - .validity()? - .execute_mask(bool.as_ref().len(), ctx)?; - validity.bitand(&Mask::from(bool.into_bit_buffer())) - } + + let array_len = array.len(); + Ok(Self(match array.execute(ctx)? { + Columnar::Constant(s) => { + Mask::new(array_len, s.scalar().as_bool().value().unwrap_or(false)) } - } - }) + Columnar::Canonical(a) => { + let bool = a.into_array().execute::(ctx)?; + let validity = bool + .as_ref() + .validity()? + .execute_mask(bool.as_ref().len(), ctx)?; + validity.bitand(&Mask::from(bool.into_bit_buffer())) + } + })) + } } From 587308b4c90f09543bbc3c8ce95d9b3e39752eb7 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 27 May 2026 13:01:13 +0100 Subject: [PATCH 3/3] fix Signed-off-by: Joe Isaacs --- vortex-array/src/mask.rs | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/vortex-array/src/mask.rs b/vortex-array/src/mask.rs index 3185a607315..7fb25d4d627 100644 --- a/vortex-array/src/mask.rs +++ b/vortex-array/src/mask.rs @@ -12,9 +12,9 @@ use crate::Executable; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::BoolArray; -use crate::arrays::Constant; use crate::columnar::Columnar; use crate::dtype::DType; +use crate::dtype::Nullability; impl Executable for Mask { /// Executes a boolean array into a [`Mask`]. @@ -22,13 +22,11 @@ impl Executable for Mask { /// The array must have a non-nullable boolean dtype. Use [`MaskNullAsFalse`] to execute a /// nullable boolean array, coercing null elements to `false`. fn execute(array: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult { - if !matches!(array.dtype(), DType::Bool(_)) { - vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); - } - - if let Some(constant) = array.as_opt::() { - let mask_value = constant.scalar().as_bool().value().unwrap_or(false); - return Ok(Mask::new(array.len(), mask_value)); + if !matches!(array.dtype(), DType::Bool(Nullability::NonNullable)) { + vortex_bail!( + "Mask array must have boolean(NonNullable) dtype, not {}", + array.dtype() + ); } let array_len = array.len(); @@ -38,13 +36,6 @@ impl Executable for Mask { } Columnar::Canonical(a) => { let bool = a.into_array().execute::(ctx)?; - if bool.as_ref().dtype().is_nullable() { - vortex_bail!( - "Mask requires a non-nullable boolean array, not {}; \ - use MaskNullAsFalse to coerce nulls to false", - bool.as_ref().dtype() - ); - } Mask::from(bool.into_bit_buffer()) } }) @@ -78,11 +69,6 @@ impl Executable for MaskNullAsFalse { vortex_bail!("Mask array must have boolean dtype, not {}", array.dtype()); } - if let Some(constant) = array.as_opt::() { - let mask_value = constant.scalar().as_bool().value().unwrap_or(false); - return Ok(Self(Mask::new(array.len(), mask_value))); - } - let array_len = array.len(); Ok(Self(match array.execute(ctx)? { Columnar::Constant(s) => {