diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs index 00e9c71df348..bfafac931df8 100644 --- a/datafusion/catalog-listing/src/helpers.rs +++ b/datafusion/catalog-listing/src/helpers.rs @@ -63,7 +63,7 @@ pub fn expr_applicable_for_cols(col_names: &[&str], expr: &Expr) -> bool { } Expr::Literal(_, _) | Expr::Alias(_) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::ScalarVariable(_, _) | Expr::Not(_) | Expr::IsNotNull(_) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0749ff0e98b7..713a5737f6cb 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -19,7 +19,7 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, HashSet}; -use std::fmt::{self, Display, Formatter, Write}; +use std::fmt::{self, Debug, Display, Formatter, Write}; use std::hash::{Hash, Hasher}; use std::mem; use std::sync::Arc; @@ -362,7 +362,7 @@ pub enum Expr { Placeholder(Placeholder), /// A placeholder which holds a reference to a qualified field /// in the outer query, used for correlated sub queries. - OuterReferenceColumn(DataType, Column), + OuterReferenceColumn(Box), /// Unnest expression Unnest(Unnest), } @@ -413,6 +413,18 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr { } } +#[derive(Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct OuterReference { + pub data_type: DataType, + pub column: Column, +} + +impl Debug for OuterReference { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{:?}, {:?}", self.data_type, self.column) + } +} + /// Literal metadata /// /// Stores metadata associated with a literal expressions @@ -1443,7 +1455,7 @@ impl Expr { Expr::Case { .. } => "Case", Expr::Cast { .. } => "Cast", Expr::Column(..) => "Column", - Expr::OuterReferenceColumn(_, _) => "Outer", + Expr::OuterReferenceColumn(_) => "Outer", Expr::Exists { .. } => "Exists", Expr::GroupingSet(..) => "GroupingSet", Expr::InList { .. } => "InList", @@ -2018,7 +2030,7 @@ impl Expr { | Expr::SimilarTo(..) | Expr::Not(..) | Expr::Negative(..) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::TryCast(..) | Expr::Unnest(..) | Expr::Wildcard { .. } @@ -2601,9 +2613,9 @@ impl HashNode for Expr { Expr::Placeholder(place_holder) => { place_holder.hash(state); } - Expr::OuterReferenceColumn(data_type, column) => { - data_type.hash(state); - column.hash(state); + Expr::OuterReferenceColumn(boxed_orc) => { + boxed_orc.data_type.hash(state); + boxed_orc.column.hash(state); } Expr::Unnest(Unnest { expr: _expr }) => {} }; @@ -2908,7 +2920,7 @@ struct SqlDisplay<'a>(&'a Expr); impl Display for SqlDisplay<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self.0 { - Expr::Literal(scalar, _) => scalar.fmt(f), + Expr::Literal(scalar, _) => Display::fmt(&scalar, f), Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), Expr::Between(Between { expr, @@ -3171,7 +3183,8 @@ impl Display for Expr { match self { Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"), Expr::Column(c) => write!(f, "{c}"), - Expr::OuterReferenceColumn(_, c) => { + Expr::OuterReferenceColumn(boxed_orc) => { + let c = &boxed_orc.column; write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})") } Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")), @@ -3818,7 +3831,7 @@ mod test { // If this test fails when you change `Expr`, please try // `Box`ing the fields to make `Expr` smaller // See https://github.com/apache/datafusion/issues/16199 for details - assert_eq!(size_of::(), 128); + assert_eq!(size_of::(), 112); assert_eq!(size_of::(), 64); assert_eq!(size_of::(), 24); // 3 ptrs assert_eq!(size_of::>(), 24); diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 001dfce59ccd..42f046e230c1 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -19,7 +19,8 @@ use crate::expr::{ AggregateFunction, BinaryExpr, Cast, Exists, GroupingSet, InList, InSubquery, - Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction, WindowFunctionParams, + OuterReference, Placeholder, TryCast, Unnest, WildcardOptions, WindowFunction, + WindowFunctionParams, }; use crate::function::{ AccumulatorArgs, AccumulatorFactoryFunction, PartitionEvaluatorFactory, @@ -69,7 +70,13 @@ pub fn col(ident: impl Into) -> Expr { /// Create an out reference column which hold a reference that has been resolved to a field /// outside of the current plan. pub fn out_ref_col(dt: DataType, ident: impl Into) -> Expr { - Expr::OuterReferenceColumn(dt, ident.into()) + Expr::OuterReferenceColumn( + OuterReference { + data_type: dt, + column: ident.into(), + } + .into(), + ) } /// Create an unqualified column expression from the provided name, without normalizing diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index 05a9425452a1..1609ae65b1e9 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -200,8 +200,9 @@ pub fn unnormalize_cols(exprs: impl IntoIterator) -> Vec { pub fn strip_outer_reference(expr: Expr) -> Expr { expr.transform(|expr| { Ok({ - if let Expr::OuterReferenceColumn(_, col) = expr { - Transformed::yes(Expr::Column(col)) + // Match the boxed (DataType, Column) tuple and extract the Column + if let Expr::OuterReferenceColumn(boxed_pair) = expr { + Transformed::yes(Expr::Column(boxed_pair.column)) } else { Transformed::no(expr) } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 8ca479bb6f9b..7561712208fb 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -112,7 +112,9 @@ impl ExprSchemable for Expr { }, Expr::Negative(expr) => expr.get_type(schema), Expr::Column(c) => Ok(schema.data_type(c)?.clone()), - Expr::OuterReferenceColumn(ty, _) => Ok(ty.clone()), + Expr::OuterReferenceColumn(boxed_orc) => { + Ok(boxed_orc.as_ref().data_type.clone()) + } Expr::ScalarVariable(ty, _) => Ok(ty.clone()), Expr::Literal(l, _) => Ok(l.data_type()), Expr::Case(case) => { @@ -276,7 +278,7 @@ impl ExprSchemable for Expr { || high.nullable(input_schema)?), Expr::Column(c) => input_schema.nullable(c), - Expr::OuterReferenceColumn(_, _) => Ok(true), + Expr::OuterReferenceColumn(_) => Ok(true), Expr::Literal(value, _) => Ok(value.is_null()), Expr::Case(case) => { // This expression is nullable if any of the input expressions are nullable @@ -411,9 +413,11 @@ impl ExprSchemable for Expr { } Expr::Negative(expr) => expr.to_field(schema).map(|(_, f)| f), Expr::Column(c) => schema.field_from_column(c).map(|f| Arc::new(f.clone())), - Expr::OuterReferenceColumn(ty, _) => { - Ok(Arc::new(Field::new(&schema_name, ty.clone(), true))) - } + Expr::OuterReferenceColumn(boxed_alias) => Ok(Arc::new(Field::new( + &schema_name, + boxed_alias.as_ref().data_type.clone(), + true, + ))), Expr::ScalarVariable(ty, _) => { Ok(Arc::new(Field::new(&schema_name, ty.clone(), true))) } diff --git a/datafusion/expr/src/tree_node.rs b/datafusion/expr/src/tree_node.rs index f953aec5a1e3..04c9a4bccbaa 100644 --- a/datafusion/expr/src/tree_node.rs +++ b/datafusion/expr/src/tree_node.rs @@ -71,7 +71,7 @@ impl TreeNode for Expr { #[expect(deprecated)] Expr::Column(_) // Treat OuterReferenceColumn as a leaf expression - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::ScalarVariable(_, _) | Expr::Literal(_, _) | Expr::Exists { .. } @@ -122,7 +122,7 @@ impl TreeNode for Expr { Expr::Column(_) | Expr::Wildcard { .. } | Expr::Placeholder(Placeholder { .. }) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::ScalarVariable(_, _) diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index a98b0fdcc3d3..567cfee95f32 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -589,7 +589,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> { | Expr::Wildcard { .. } | Expr::GroupingSet(_) | Expr::Placeholder(_) - | Expr::OuterReferenceColumn(_, _) => Ok(Transformed::no(expr)), + | Expr::OuterReferenceColumn(_) => Ok(Transformed::no(expr)), } } } diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 7b7be82b70ca..7b2849d2ca0a 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -637,8 +637,8 @@ fn outer_columns<'a>(expr: &'a Expr, columns: &mut HashSet<&'a Column>) { // inspect_expr_pre doesn't handle subquery references, so find them explicitly expr.apply(|expr| { match expr { - Expr::OuterReferenceColumn(_, col) => { - columns.insert(col); + Expr::OuterReferenceColumn(boxed_orc) => { + columns.insert(&boxed_orc.as_ref().column); } Expr::ScalarSubquery(subquery) => { outer_columns_helper_multi(&subquery.outer_ref_columns, columns); diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index bcb867f6e7fa..a6a06c0360b7 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -262,7 +262,7 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> Result { Expr::Exists { .. } | Expr::InSubquery(_) | Expr::ScalarSubquery(_) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::Unnest(_) => { is_evaluate = false; Ok(TreeNodeRecursion::Stop) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 13e27612ae49..3d9d952b83d6 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -628,7 +628,7 @@ impl<'a> ConstEvaluator<'a> { Expr::AggregateFunction { .. } | Expr::ScalarVariable(_, _) | Expr::Column(_) - | Expr::OuterReferenceColumn(_, _) + | Expr::OuterReferenceColumn(_) | Expr::Exists { .. } | Expr::InSubquery(_) | Expr::ScalarSubquery(_) diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 434ac50bce50..283ef60e534a 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -15,18 +15,18 @@ // specific language governing permissions and limitations // under the License. +use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; use arrow::datatypes::Field; use datafusion_common::{ internal_err, not_impl_err, plan_datafusion_err, plan_err, Column, DFSchema, DataFusionError, Result, Span, TableReference, }; +use datafusion_expr::expr::OuterReference; use datafusion_expr::planner::PlannerResult; +use datafusion_expr::UNNAMED_TABLE; use datafusion_expr::{Case, Expr}; use sqlparser::ast::{CaseWhen, Expr as SQLExpr, Ident}; -use crate::planner::{ContextProvider, PlannerContext, SqlToRel}; -use datafusion_expr::UNNAMED_TABLE; - impl SqlToRel<'_, S> { pub(super) fn sql_identifier_to_expr( &self, @@ -75,8 +75,11 @@ impl SqlToRel<'_, S> { { // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column return Ok(Expr::OuterReferenceColumn( - field.data_type().clone(), - Column::from((qualifier, field)), + OuterReference { + data_type: field.data_type().clone(), + column: Column::from((qualifier, field)), + } + .into(), )); } } @@ -182,8 +185,11 @@ impl SqlToRel<'_, S> { Some((field, qualifier, _nested_names)) => { // Found an exact match on a qualified name in the outer plan schema, so this is an outer reference column Ok(Expr::OuterReferenceColumn( - field.data_type().clone(), - Column::from((qualifier, field)), + OuterReference { + data_type: field.data_type().clone(), + column: Column::from((qualifier, field)), + } + .into(), )) } // Found no matching field, will return a default diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 4ddd5ccccbbd..245e6dedebff 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -512,7 +512,9 @@ impl Unparser<'_> { Expr::Placeholder(p) => { Ok(ast::Expr::value(ast::Value::Placeholder(p.id.to_string()))) } - Expr::OuterReferenceColumn(_, col) => self.col_to_sql(col), + Expr::OuterReferenceColumn(boxed_orc) => { + self.col_to_sql(&boxed_orc.as_ref().column) + } Expr::Unnest(unnest) => self.unnest_to_sql(unnest), } } diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs index 42e1f962f1d1..9ef1e0749194 100644 --- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs +++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs @@ -144,7 +144,7 @@ pub fn to_substrait_rex( Expr::Wildcard { .. } => not_impl_err!("Cannot convert {expr:?} to Substrait"), Expr::GroupingSet(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), Expr::Placeholder(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"), - Expr::OuterReferenceColumn(_, _) => { + Expr::OuterReferenceColumn(_) => { not_impl_err!("Cannot convert {expr:?} to Substrait") } Expr::Unnest(expr) => not_impl_err!("Cannot convert {expr:?} to Substrait"),