Skip to content

Commit c229245

Browse files
yaooqinndongjoon-hyun
authored andcommitted
[SPARK-52583][SQL] Add an Developer API for stringifying values in UserDefinedType
### What changes were proposed in this pull request? This PR proposes to add a Developer API for stringifying values in UserDefinedType. When casting a Class X instance that UserDefinedType represents in the Catalyst layer, this API will be called to get the string representation. The default implementation of this API is to call `obj.toString` with the AS-IS behavior. ### Why are the changes needed? The Class X's toString method doesn't always meet the needs of users, and it's difficult for them to override it in some circumstances. For example, it's a JVM implementation or from other dependent projects. This stringifyValue API gives a user or developer an extra opportunity to do the override. ### Does this PR introduce _any_ user-facing change? No, it's a developer API addition and the behavior remains ### How was this patch tested? New test cases ### Was this patch authored or co-authored using generative AI tooling? no Closes #51289 from yaooqinn/SPARK-52583. Authored-by: Kent Yao <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 3f6cefb commit c229245

File tree

3 files changed

+31
-2
lines changed

3 files changed

+31
-2
lines changed

sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,22 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
9494
}
9595

9696
override def catalogString: String = sqlType.simpleString
97+
98+
/**
99+
* This method is used to convert the value of a UDT to a string representation.
100+
*
101+
* By default, it simply calls `toString` on the object.
102+
*
103+
* @param obj
104+
* The object to convert to a string.
105+
* @return
106+
* A string representation of the object.
107+
* @since 4.1.0
108+
*/
109+
@Since("4.1.0")
110+
def stringifyValue(obj: Any): String = {
111+
obj.toString
112+
}
97113
}
98114

99115
private[spark] object UserDefinedType {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
165165
})
166166
case pudt: PythonUserDefinedType => castToString(pudt.sqlType)
167167
case udt: UserDefinedType[_] =>
168-
o => UTF8String.fromString(udt.deserialize(o).toString)
168+
o => UTF8String.fromString(udt.stringifyValue(udt.deserialize(o)))
169169
case YearMonthIntervalType(startField, endField) =>
170170
acceptAny[Int](i => UTF8String.fromString(
171171
IntervalUtils.toYearMonthIntervalString(i, ANSI_STYLE, startField, endField)))
@@ -274,7 +274,7 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
274274
case udt: UserDefinedType[_] =>
275275
val udtRef = JavaCode.global(ctx.addReferenceObj("udt", udt), udt.sqlType)
276276
(c, evPrim) =>
277-
code"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());"
277+
code"$evPrim = UTF8String.fromString($udtRef.stringifyValue($udtRef.deserialize($c)));"
278278
case i: YearMonthIntervalType =>
279279
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
280280
val iss = IntervalStringStyles.getClass.getName.stripSuffix("$")

sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,19 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
245245
checkEvaluation(ret, "(1.0, 3.0, 5.0, 7.0, 9.0)")
246246
}
247247

248+
test("SPARK-52583: Cast UserDefinedType to string with custom stringifyValue") {
249+
val udt = new TestUDT.MyDenseVectorUDT() {
250+
override def stringifyValue(obj: Any): String = {
251+
val v = obj.asInstanceOf[TestUDT.MyDenseVector]
252+
v.toString.stripPrefix("(").stripSuffix(")")
253+
}
254+
}
255+
val vector = new TestUDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))
256+
val data = udt.serialize(vector)
257+
val ret = Cast(Literal(data, udt), StringType, None)
258+
checkEvaluation(ret, "1.0, 3.0, 5.0, 7.0, 9.0")
259+
}
260+
248261
test("SPARK-28497 Can't up cast UserDefinedType to string") {
249262
val udt = new TestUDT.MyDenseVectorUDT()
250263
assert(!Cast.canUpCast(udt, StringType))

0 commit comments

Comments
 (0)