Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import scala.annotation.tailrec

import org.apache.spark.sql.catalyst.analysis.MultiAlias
import org.apache.spark.sql.catalyst.analysis.{MultiAlias, UnresolvedFunction}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Project}
import org.apache.spark.sql.catalyst.trees.CurrentOrigin
Expand Down Expand Up @@ -112,6 +112,10 @@ trait AliasHelper {
}

protected def trimAliases(e: Expression): Expression = e match {
// SPARK-48091: Do not descend into unresolved function calls. Aliases inside them
// (e.g., UnresolvedFunction("struct", Seq(Alias(x, "data")))) carry semantic information
// that ResolveFunctions -> CreateStruct.apply consumes to produce field names.
case u: UnresolvedFunction => u
// The children of `CreateNamedStruct` may use `Alias` to carry metadata and we should not
// trim them.
case c: CreateNamedStruct => c.mapChildren {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.trees.LeafLike
import org.apache.spark.sql.functions._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{IntegerType, StructType}
import org.apache.spark.sql.types.{ArrayType, IntegerType, StructType}

class GeneratorFunctionSuite extends SharedSparkSession {
import testImplicits._
Expand Down Expand Up @@ -765,6 +765,37 @@ class GeneratorFunctionSuite extends SharedSparkSession {
Seq(Row(0, 10, 0, 10), Row(1, 20, 1, 20))
)
}

test("SPARK-48091: explode with transform should preserve struct field aliases") {
val df = spark.createDataFrame(Seq((1, Array(1, 2, 3), Array(4, 5, 6))))
.toDF("id", "my_array", "my_array2")

// Without explode - aliases should work (baseline)
val good = df.select(
transform(col("my_array2"), x => struct(x.as("data"))).as("my_struct")
)
assert(good.schema("my_struct").dataType.asInstanceOf[ArrayType]
.elementType.asInstanceOf[StructType].fieldNames.toSeq === Seq("data"))

// With explode in same select - aliases should still be preserved
val result = df.select(
explode(col("my_array")).as("exploded"),
transform(col("my_array2"), x => struct(x.as("data"))).as("my_struct")
)
assert(result.schema("my_struct").dataType.asInstanceOf[ArrayType]
.elementType.asInstanceOf[StructType].fieldNames.toSeq === Seq("data"))

// Multiple aliases inside struct
val result2 = df.select(
explode(col("my_array")).as("exploded"),
transform(col("my_array2"),
x => struct(x.as("value"), col("id").as("key"))
).as("my_struct")
)
val fields2 = result2.schema("my_struct").dataType.asInstanceOf[ArrayType]
.elementType.asInstanceOf[StructType].fieldNames.toSeq
assert(fields2 === Seq("value", "key"))
}
}

case class EmptyGenerator() extends Generator with LeafLike[Expression] {
Expand Down