diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index dbba0f2914a6d..854e907d68b1a 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -25,7 +25,7 @@ use crate::type_coercion::functions::fields_with_udf; use crate::udf::ReturnFieldArgs; use crate::{LogicalPlan, Projection, Subquery, WindowFunctionDefinition, utils}; use arrow::compute::can_cast_types; -use arrow::datatypes::{DataType, Field}; +use arrow::datatypes::{DataType, Field, FieldRef}; use datafusion_common::datatype::FieldExt; use datafusion_common::metadata::FieldMetadata; use datafusion_common::{ @@ -156,9 +156,10 @@ impl ExprSchemable for Expr { let return_type = self.to_field(schema)?.1.data_type().clone(); Ok(return_type) } - Expr::WindowFunction(window_function) => self - .data_type_and_nullable_with_window_function(schema, window_function) - .map(|(return_type, _)| return_type), + Expr::WindowFunction(window_function) => Ok(self + .window_function_field(schema, window_function)? + .data_type() + .clone()), Expr::AggregateFunction(AggregateFunction { func, params: AggregateFunctionParams { args, .. }, @@ -357,12 +358,9 @@ impl ExprSchemable for Expr { Expr::AggregateFunction(AggregateFunction { func, .. }) => { Ok(func.is_nullable()) } - Expr::WindowFunction(window_function) => self - .data_type_and_nullable_with_window_function( - input_schema, - window_function, - ) - .map(|(_, nullable)| nullable), + Expr::WindowFunction(window_function) => Ok(self + .window_function_field(input_schema, window_function)? + .is_nullable()), Expr::ScalarVariable(field, _) => Ok(field.is_nullable()), Expr::TryCast { .. } | Expr::Unnest(_) | Expr::Placeholder(_) => Ok(true), Expr::IsNull(_) @@ -458,7 +456,7 @@ impl ExprSchemable for Expr { /// with the default implementation returning empty field metadata /// - **Aggregate functions**: Generate metadata via function's [`return_field`] method, /// with the default implementation returning empty field metadata - /// - **Window functions**: field metadata is empty + /// - **Window functions**: field metadata follows the function's return field /// /// ## Table Reference Scoping /// - Establishes proper qualified field references when columns belong to specific tables @@ -534,11 +532,7 @@ impl ExprSchemable for Expr { ))) } Expr::WindowFunction(window_function) => { - let (dt, nullable) = self.data_type_and_nullable_with_window_function( - schema, - window_function, - )?; - Ok(Arc::new(Field::new(&schema_name, dt, nullable))) + self.window_function_field(schema, window_function) } Expr::AggregateFunction(aggregate_function) => { let AggregateFunction { @@ -698,11 +692,11 @@ impl Expr { /// /// Otherwise, returns an error if there's a type mismatch between /// the window function's signature and the provided arguments. - fn data_type_and_nullable_with_window_function( + fn window_function_field( &self, schema: &dyn ExprSchema, window_function: &WindowFunction, - ) -> Result<(DataType, bool)> { + ) -> Result { let WindowFunction { fun, params: WindowFunctionParams { args, .. }, @@ -738,9 +732,7 @@ impl Expr { .into_iter() .collect::>(); - let return_field = udaf.return_field(&new_fields)?; - - Ok((return_field.data_type().clone(), return_field.is_nullable())) + udaf.return_field(&new_fields) } WindowFunctionDefinition::WindowUDF(udwf) => { let data_types = fields @@ -769,7 +761,6 @@ impl Expr { let field_args = WindowUDFFieldArgs::new(&new_fields, &function_name); udwf.field(field_args) - .map(|field| (field.data_type().clone(), field.is_nullable())) } } } diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt index 41a511b5fa09d..6ed461debb3b4 100644 --- a/datafusion/sqllogictest/test_files/metadata.slt +++ b/datafusion/sqllogictest/test_files/metadata.slt @@ -24,6 +24,22 @@ ## in the test harness as there is no way to define schema ## with metadata in SQL. +query ITTPT +select * from table_with_metadata; +---- +1 NULL NULL 2020-09-08T13:42:29.190855123 no_foo +NULL bar l_bar 2020-09-08T13:42:29.190855123 no_bar +3 baz l_baz 2020-09-08T13:42:29.190855123 no_baz + +query TTT +describe table_with_metadata; +---- +id Int32 YES +name Utf8 YES +l_name Utf8 YES +ts Timestamp(ns) NO +nonnull_name Utf8 NO + query IT select id, name from table_with_metadata; ---- @@ -235,6 +251,28 @@ order by 1 asc nulls last; 3 1 NULL 1 +# Reproducer for https://github.com/apache/datafusion/issues/18337 +# this query should not get an internal error +query TI +SELECT + 'foo' AS name, + COUNT( + CASE + WHEN prev_value = 'no_bar' AND value = 'no_baz' THEN 1 + ELSE NULL + END + ) AS count_rises +FROM + ( + SELECT + nonnull_name as value, + LAG(nonnull_name) OVER (ORDER BY ts) AS prev_value + FROM + table_with_metadata +); +---- +foo 1 + # Regression test: first_value should preserve metadata query IT select first_value(id order by id asc nulls last), arrow_metadata(first_value(id order by id asc nulls last), 'metadata_key')