Skip to content

Commit 164dbfe

Browse files
committed
feat: support concat for strings
1 parent a9ca651 commit 164dbfe

File tree

4 files changed

+48
-25
lines changed

4 files changed

+48
-25
lines changed

fuzz-testing/src/main/scala/org/apache/comet/fuzz/Meta.scala

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,21 +126,13 @@ object Meta {
126126
SparkTypeOneOf(
127127
Seq(
128128
SparkStringType,
129-
SparkNumericType,
130-
SparkBinaryType,
131129
SparkArrayType(
132130
SparkTypeOneOf(Seq(SparkStringType, SparkNumericType, SparkBinaryType))))),
133131
SparkTypeOneOf(
134132
Seq(
135133
SparkStringType,
136-
SparkNumericType,
137-
SparkBinaryType,
138134
SparkArrayType(
139135
SparkTypeOneOf(Seq(SparkStringType, SparkNumericType, SparkBinaryType))))))),
140-
createFunctionWithInputTypes(
141-
"concat",
142-
Seq(SparkStringType, SparkStringType)
143-
), // TODO: variadic
144136
createFunctionWithInputTypes("concat_ws", Seq(SparkStringType, SparkStringType)),
145137
createFunctionWithInputTypes("contains", Seq(SparkStringType, SparkStringType)),
146138
createFunctionWithInputTypes("ends_with", Seq(SparkStringType, SparkStringType)),

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
164164
classOf[BitLength] -> CometScalarFunction("bit_length"),
165165
classOf[Chr] -> CometScalarFunction("char"),
166166
classOf[ConcatWs] -> CometScalarFunction("concat_ws"),
167-
classOf[Concat] -> CometScalarFunction("concat"),
167+
classOf[Concat] -> CometConcat,
168168
classOf[Contains] -> CometScalarFunction("contains"),
169169
classOf[EndsWith] -> CometScalarFunction("ends_with"),
170170
classOf[InitCap] -> CometInitCap,

spark/src/main/scala/org/apache/comet/serde/strings.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ package org.apache.comet.serde
2121

2222
import java.util.Locale
2323

24-
import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression, InitCap, Length, Like, Literal, Lower, RegExpReplace, RLike, StringLPad, StringRepeat, StringRPad, Substring, Upper}
24+
import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, Expression, InitCap, Length, Like, Literal, Lower, RegExpReplace, RLike, StringLPad, StringRepeat, StringRPad, Substring, Upper}
2525
import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType}
2626

2727
import org.apache.comet.CometConf
@@ -113,6 +113,18 @@ object CometSubstring extends CometExpressionSerde[Substring] {
113113
}
114114
}
115115

116+
object CometConcat extends CometScalarFunction[Concat]("concat") {
117+
val unsupportedReason = "CONCAT supports only string input parameters"
118+
119+
override def getSupportLevel(expr: Concat): SupportLevel = {
120+
if (expr.children.forall(_.dataType == DataTypes.StringType)) {
121+
Compatible()
122+
} else {
123+
Incompatible(Some(unsupportedReason))
124+
}
125+
}
126+
}
127+
116128
object CometLike extends CometExpressionSerde[Like] {
117129

118130
override def convert(expr: Like, inputs: Seq[Attribute], binding: Boolean): Option[Expr] = {

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
4242
import org.apache.spark.sql.types._
4343

4444
import org.apache.comet.CometSparkSessionExtensions.isSpark40Plus
45+
import org.apache.comet.serde.CometConcat
4546
import org.apache.comet.testing.{DataGenOptions, FuzzDataGenerator}
4647

4748
class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
@@ -3233,30 +3234,48 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
32333234
}
32343235

32353236
// https://github.com/apache/datafusion-comet/issues/2647
3236-
ignore("test concat function - arrays") {
3237+
test("test concat function - arrays") {
32373238
withTable("t1") {
32383239
sql(
3239-
"create table t1 using parquet as select array(id, id+1) c1, array(id+2, id+3) c2, array() c3, array(null) c4, cast(null as array<int>) c5 from range(10)")
3240-
checkSparkAnswerAndOperator("select concat(c1, c2) AS x FROM t1")
3241-
checkSparkAnswerAndOperator("select concat(c1, c1) AS x FROM t1")
3242-
checkSparkAnswerAndOperator("select concat(c1, c2, c3) AS x FROM t1")
3243-
checkSparkAnswerAndOperator("select concat(c1, c2, c3, c5) AS x FROM t1")
3244-
checkSparkAnswerAndOperator(
3245-
"select concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM t1")
3240+
"create table t1 using parquet as select array(id, id+1) c1, array(id+2, id+3) c2, CAST(array() AS array<int>) c3, CAST(array(null) as array<int>) c4, cast(null as array<int>) c5 from range(10)")
3241+
checkSparkAnswerAndFallbackReason(
3242+
"select concat(c1, c2) AS x FROM t1",
3243+
CometConcat.unsupportedReason)
3244+
checkSparkAnswerAndFallbackReason(
3245+
"select concat(c1, c1) AS x FROM t1",
3246+
CometConcat.unsupportedReason)
3247+
checkSparkAnswerAndFallbackReason(
3248+
"select concat(c1, c2, c3) AS x FROM t1",
3249+
CometConcat.unsupportedReason)
3250+
checkSparkAnswerAndFallbackReason(
3251+
"select concat(c1, c2, c3, c5) AS x FROM t1",
3252+
CometConcat.unsupportedReason)
3253+
checkSparkAnswerAndFallbackReason(
3254+
"select concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM t1",
3255+
CometConcat.unsupportedReason)
32463256
}
32473257
}
32483258

32493259
// https://github.com/apache/datafusion-comet/issues/2647
3250-
ignore("test concat function - binary") {
3260+
test("test concat function - binary") {
32513261
withTable("t1") {
32523262
sql(
32533263
"create table t1 using parquet as select cast(uuid() as binary) c1, cast(uuid() as binary) c2, cast(uuid() as binary) c3, cast(uuid() as binary) c4, cast(null as binary) c5 from range(10)")
3254-
checkSparkAnswerAndOperator("select concat(c1, c2) AS x FROM t1")
3255-
checkSparkAnswerAndOperator("select concat(c1, c1) AS x FROM t1")
3256-
checkSparkAnswerAndOperator("select concat(c1, c2, c3) AS x FROM t1")
3257-
checkSparkAnswerAndOperator("select concat(c1, c2, c3, c5) AS x FROM t1")
3258-
checkSparkAnswerAndOperator(
3259-
"select concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM t1")
3264+
checkSparkAnswerAndFallbackReason(
3265+
"select concat(c1, c2) AS x FROM t1",
3266+
CometConcat.unsupportedReason)
3267+
checkSparkAnswerAndFallbackReason(
3268+
"select concat(c1, c1) AS x FROM t1",
3269+
CometConcat.unsupportedReason)
3270+
checkSparkAnswerAndFallbackReason(
3271+
"select concat(c1, c2, c3) AS x FROM t1",
3272+
CometConcat.unsupportedReason)
3273+
checkSparkAnswerAndFallbackReason(
3274+
"select concat(c1, c2, c3, c5) AS x FROM t1",
3275+
CometConcat.unsupportedReason)
3276+
checkSparkAnswerAndFallbackReason(
3277+
"select concat(concat(c1, c2, c3), concat(c1, c3)) AS x FROM t1",
3278+
CometConcat.unsupportedReason)
32603279
}
32613280
}
32623281
}

0 commit comments

Comments
 (0)