Skip to content

[SPARK-52583][SQL] Add an Developer API for stringifying values in UserDefinedType #51289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,22 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
}

override def catalogString: String = sqlType.simpleString

/**
* This method is used to convert the value of a UDT to a string representation.
*
* By default, it simply calls `toString` on the object.
*
* @param obj
* The object to convert to a string.
* @return
* A string representation of the object.
* @since 4.1.0
*/
@Since("4.1.0")
def stringifyValue(obj: Any): String = {
obj.toString
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm .. shouldn't this be defined under the instance instead of the type itself?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okie read the PR description. I am fine with this

}
}

private[spark] object UserDefinedType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
})
case pudt: PythonUserDefinedType => castToString(pudt.sqlType)
case udt: UserDefinedType[_] =>
o => UTF8String.fromString(udt.deserialize(o).toString)
o => UTF8String.fromString(udt.stringifyValue(udt.deserialize(o)))
case YearMonthIntervalType(startField, endField) =>
acceptAny[Int](i => UTF8String.fromString(
IntervalUtils.toYearMonthIntervalString(i, ANSI_STYLE, startField, endField)))
Expand Down Expand Up @@ -274,7 +274,7 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
case udt: UserDefinedType[_] =>
val udtRef = JavaCode.global(ctx.addReferenceObj("udt", udt), udt.sqlType)
(c, evPrim) =>
code"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());"
code"$evPrim = UTF8String.fromString($udtRef.stringifyValue($udtRef.deserialize($c)));"
case i: YearMonthIntervalType =>
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
val iss = IntervalStringStyles.getClass.getName.stripSuffix("$")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,19 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
checkEvaluation(ret, "(1.0, 3.0, 5.0, 7.0, 9.0)")
}

test("SPARK-52583: Cast UserDefinedType to string with custom stringifyValue") {
val udt = new TestUDT.MyDenseVectorUDT() {
override def stringifyValue(obj: Any): String = {
val v = obj.asInstanceOf[TestUDT.MyDenseVector]
v.toString.stripPrefix("(").stripSuffix(")")
}
}
val vector = new TestUDT.MyDenseVector(Array(1.0, 3.0, 5.0, 7.0, 9.0))
val data = udt.serialize(vector)
val ret = Cast(Literal(data, udt), StringType, None)
checkEvaluation(ret, "1.0, 3.0, 5.0, 7.0, 9.0")
}

test("SPARK-28497 Can't up cast UserDefinedType to string") {
val udt = new TestUDT.MyDenseVectorUDT()
assert(!Cast.canUpCast(udt, StringType))
Expand Down