Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 13 additions & 22 deletions datafusion/expr/src/expr_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::type_coercion::functions::fields_with_udf;
use crate::udf::ReturnFieldArgs;
use crate::{LogicalPlan, Projection, Subquery, WindowFunctionDefinition, utils};
use arrow::compute::can_cast_types;
use arrow::datatypes::{DataType, Field};
use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion_common::datatype::FieldExt;
use datafusion_common::metadata::FieldMetadata;
use datafusion_common::{
Expand Down Expand Up @@ -156,9 +156,10 @@ impl ExprSchemable for Expr {
let return_type = self.to_field(schema)?.1.data_type().clone();
Ok(return_type)
}
Expr::WindowFunction(window_function) => self
.data_type_and_nullable_with_window_function(schema, window_function)
.map(|(return_type, _)| return_type),
Expr::WindowFunction(window_function) => Ok(self
.window_function_field(schema, window_function)?
.data_type()
.clone()),
Expr::AggregateFunction(AggregateFunction {
func,
params: AggregateFunctionParams { args, .. },
Expand Down Expand Up @@ -357,12 +358,9 @@ impl ExprSchemable for Expr {
Expr::AggregateFunction(AggregateFunction { func, .. }) => {
Ok(func.is_nullable())
}
Expr::WindowFunction(window_function) => self
.data_type_and_nullable_with_window_function(
input_schema,
window_function,
)
.map(|(_, nullable)| nullable),
Expr::WindowFunction(window_function) => Ok(self
.window_function_field(input_schema, window_function)?
.is_nullable()),
Expr::ScalarVariable(field, _) => Ok(field.is_nullable()),
Expr::TryCast { .. } | Expr::Unnest(_) | Expr::Placeholder(_) => Ok(true),
Expr::IsNull(_)
Expand Down Expand Up @@ -458,7 +456,7 @@ impl ExprSchemable for Expr {
/// with the default implementation returning empty field metadata
/// - **Aggregate functions**: Generate metadata via function's [`return_field`] method,
/// with the default implementation returning empty field metadata
/// - **Window functions**: field metadata is empty
/// - **Window functions**: field metadata follows the function's return field
///
/// ## Table Reference Scoping
/// - Establishes proper qualified field references when columns belong to specific tables
Expand Down Expand Up @@ -534,11 +532,7 @@ impl ExprSchemable for Expr {
)))
}
Expr::WindowFunction(window_function) => {
let (dt, nullable) = self.data_type_and_nullable_with_window_function(
schema,
window_function,
)?;
Ok(Arc::new(Field::new(&schema_name, dt, nullable)))
self.window_function_field(schema, window_function)
}
Expr::AggregateFunction(aggregate_function) => {
let AggregateFunction {
Expand Down Expand Up @@ -698,11 +692,11 @@ impl Expr {
///
/// Otherwise, returns an error if there's a type mismatch between
/// the window function's signature and the provided arguments.
fn data_type_and_nullable_with_window_function(
fn window_function_field(
&self,
schema: &dyn ExprSchema,
window_function: &WindowFunction,
) -> Result<(DataType, bool)> {
) -> Result<FieldRef> {
Copy link
Contributor Author

@alamb alamb Jan 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the point is to return the entire field, rather than just the DataType and nullability so that any metadata attached to the field is also properly returned

I think the prior construction is from a time before Fields

let WindowFunction {
fun,
params: WindowFunctionParams { args, .. },
Expand Down Expand Up @@ -738,9 +732,7 @@ impl Expr {
.into_iter()
.collect::<Vec<_>>();

let return_field = udaf.return_field(&new_fields)?;

Ok((return_field.data_type().clone(), return_field.is_nullable()))
udaf.return_field(&new_fields)
}
WindowFunctionDefinition::WindowUDF(udwf) => {
let data_types = fields
Expand Down Expand Up @@ -769,7 +761,6 @@ impl Expr {
let field_args = WindowUDFFieldArgs::new(&new_fields, &function_name);

udwf.field(field_args)
.map(|field| (field.data_type().clone(), field.is_nullable()))
}
}
}
Expand Down
38 changes: 38 additions & 0 deletions datafusion/sqllogictest/test_files/metadata.slt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,22 @@
## in the test harness as there is no way to define schema
## with metadata in SQL.

query ITTPT
select * from table_with_metadata;
----
1 NULL NULL 2020-09-08T13:42:29.190855123 no_foo
NULL bar l_bar 2020-09-08T13:42:29.190855123 no_bar
3 baz l_baz 2020-09-08T13:42:29.190855123 no_baz

query TTT
describe table_with_metadata;
----
id Int32 YES
name Utf8 YES
l_name Utf8 YES
ts Timestamp(ns) NO
nonnull_name Utf8 NO

query IT
select id, name from table_with_metadata;
----
Expand Down Expand Up @@ -235,6 +251,28 @@ order by 1 asc nulls last;
3 1
NULL 1

# Reproducer for https://github.com/apache/datafusion/issues/18337
# this query should not get an internal error
query TI
SELECT
'foo' AS name,
COUNT(
CASE
WHEN prev_value = 'no_bar' AND value = 'no_baz' THEN 1
ELSE NULL
END
) AS count_rises
FROM
(
SELECT
nonnull_name as value,
LAG(nonnull_name) OVER (ORDER BY ts) AS prev_value
FROM
table_with_metadata
);
----
foo 1

# Regression test: first_value should preserve metadata
query IT
select first_value(id order by id asc nulls last), arrow_metadata(first_value(id order by id asc nulls last), 'metadata_key')
Expand Down