coderfender
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 0 additions & 8 deletions b/‎common/src/main/scala/org/apache/comet/CometConf.scala‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎docs/source/contributor-guide/benchmarking_aws_ec2.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/contributor-guide/benchmarking_aws_ec2.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/contributor-guide/benchmarking_macos.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/contributor-guide/benchmarking_macos.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/user-guide/compatibility.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/user-guide/compatibility.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/user-guide/configs.md‎
Lines changed: 0 additions & 1 deletion b/‎docs/source/user-guide/configs.md‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎docs/source/user-guide/expressions.md‎
Lines changed: 8 additions & 8 deletions b/‎docs/source/user-guide/expressions.md‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎docs/source/user-guide/kubernetes.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/user-guide/kubernetes.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spark/src/main/scala/org/apache/comet/expressions/CometCast.scala‎
Lines changed: 73 additions & 18 deletions b/‎spark/src/main/scala/org/apache/comet/expressions/CometCast.scala‎
Lines changed: 73 additions & 18 deletions
@@ -624,14 +624,6 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithDefault(false)
 
-  val COMET_CAST_ALLOW_INCOMPATIBLE: ConfigEntry[Boolean] =
-    conf("spark.comet.cast.allowIncompatible")
-      .doc(
-        "Comet is not currently fully compatible with Spark for all cast operations. " +
-          s"Set this config to true to allow them anyway. $COMPAT_GUIDE.")
-      .booleanConf
-      .createWithDefault(false)
-
   val COMET_REGEXP_ALLOW_INCOMPATIBLE: ConfigEntry[Boolean] =
     conf("spark.comet.regexp.allowIncompatible")
       .doc(
 
@@ -208,7 +208,7 @@ $SPARK_HOME/bin/spark-submit \
   --conf spark.plugins=org.apache.spark.CometPlugin \
   --conf spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager \
   --conf spark.comet.enabled=true \
-  --conf spark.comet.cast.allowIncompatible=true \
+  --conf spark.comet.expression.allowIncompatible=true \
   --conf spark.comet.exec.replaceSortMergeJoin=true \
   --conf spark.comet.exec.shuffle.enabled=true \
   --conf spark.comet.exec.shuffle.fallbackToColumnar=true \
 
@@ -144,7 +144,7 @@ $SPARK_HOME/bin/spark-submit \
     --conf spark.comet.exec.shuffle.enableFastEncoding=true \
     --conf spark.comet.exec.shuffle.fallbackToColumnar=true \
     --conf spark.comet.exec.replaceSortMergeJoin=true \
-    --conf spark.comet.cast.allowIncompatible=true \
+    --conf spark.comet.expression.allowIncompatible=true \
     /path/to/datafusion-benchmarks/runners/datafusion-comet/tpcbench.py \
     --benchmark tpch \
     --data /path/to/tpch-data/ \
 
@@ -75,7 +75,7 @@ The `native_datafusion` scan has some additional limitations:
 
 ### S3 Support with `native_iceberg_compat`
 
-- When using the default AWS S3 endpoint (no custom endpoint configured), a valid region is required. Comet 
+- When using the default AWS S3 endpoint (no custom endpoint configured), a valid region is required. Comet
   will attempt to resolve the region if it is not provided.
 
 ## ANSI Mode
@@ -130,7 +130,7 @@ Cast operations in Comet fall into three levels of support:
 - **Compatible**: The results match Apache Spark
 - **Incompatible**: The results may match Apache Spark for some inputs, but there are known issues where some inputs
   will result in incorrect results or exceptions. The query stage will fall back to Spark by default. Setting
-  `spark.comet.cast.allowIncompatible=true` will allow all incompatible casts to run natively in Comet, but this is not
+  `spark.comet.expression.allowIncompatible=true` will allow all incompatible casts to run natively in Comet, but this is not
   recommended for production use.
 - **Unsupported**: Comet does not provide a native version of this cast expression and the query stage will fall back to
   Spark.
 
@@ -28,7 +28,6 @@ Comet provides the following configuration settings.
 |--------|-------------|---------------|
 | spark.comet.batchSize | The columnar batch size, i.e., the maximum number of rows that a batch can contain. | 8192 |
 | spark.comet.caseConversion.enabled | Java uses locale-specific rules when converting strings to upper or lower case and Rust does not, so we disable upper and lower by default. | false |
-| spark.comet.cast.allowIncompatible | Comet is not currently fully compatible with Spark for all cast operations. Set this config to true to allow them anyway. For more information, refer to the Comet Compatibility Guide (https://datafusion.apache.org/comet/user-guide/compatibility.html). | false |
 | spark.comet.columnar.shuffle.async.enabled | Whether to enable asynchronous shuffle for Arrow-based shuffle. | false |
 | spark.comet.columnar.shuffle.async.max.thread.num | Maximum number of threads on an executor used for Comet async columnar shuffle. This is the upper bound of total number of shuffle threads per executor. In other words, if the number of cores * the number of shuffle threads per task `spark.comet.columnar.shuffle.async.thread.num` is larger than this config. Comet will use this config as the number of shuffle threads per executor instead. | 100 |
 | spark.comet.columnar.shuffle.async.thread.num | Number of threads used for Comet async columnar shuffle per shuffle task. Note that more threads means more memory requirement to buffer shuffle data before flushing to disk. Also, more threads may not always improve performance, and should be set based on the number of cores available. | 3 |
 
@@ -35,14 +35,14 @@ The following Spark expressions are currently available. Any known compatibility
 
 ## Binary Arithmetic
 
-| Expression             | Notes                                                                                                                                                                                                                                         |
-|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Add (`+`)              |                                                                                                                                                                                                                                               |
-| Subtract (`-`)         |                                                                                                                                                                                                                                               |
-| Multiply (`*`)         |                                                                                                                                                                                                                                               |
-| Divide (`/`)           |                                                                                                                                                                                                                                               |
-| IntegralDivide (`div`) | All operands are cast to DecimalType (in case the input type is not already decima type) with precision 19 and scale 0. Please set `spark.comet.cast.allowIncompatible` to `true` to enable DataFusion’s cast operation for LongType inputs. |
-| Remainder (`%`)        |                                                                                                                                                                                                                                               |
+| Expression             | Notes                                                                                                                                                                                                                                              |
+|------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Add (`+`)              |                                                                                                                                                                                                                                                    |
+| Subtract (`-`)         |                                                                                                                                                                                                                                                    |
+| Multiply (`*`)         |                                                                                                                                                                                                                                                    |
+| Divide (`/`)           |                                                                                                                                                                                                                                                    |
+| IntegralDivide (`div`) | All operands are cast to DecimalType (in case the input type is not already decima type) with precision 19 and scale 0. Please set `spark.comet.expression.allowIncompatible` to `true` to enable DataFusion’s cast operation for LongType inputs. |
+| Remainder (`%`)        |                                                                                                                                                                                                                                                    |
 
 ## Binary Try Arithmetic
 
 
@@ -76,7 +76,7 @@ spec:
     "spark.plugins": "org.apache.spark.CometPlugin"
     "spark.comet.enabled": "true"
     "spark.comet.exec.enabled": "true"
-    "spark.comet.cast.allowIncompatible": "true"
+    "spark.comet.expression.allowIncompatible": "true"
     "spark.comet.exec.shuffle.enabled": "true"
     "spark.comet.exec.shuffle.mode": "auto"
     "spark.shuffle.manager": "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager"
 
@@ -19,11 +19,17 @@
 
 package org.apache.comet.expressions
 
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression}
 import org.apache.spark.sql.types.{ArrayType, DataType, DataTypes, DecimalType, NullType, StructType}
 
-import org.apache.comet.serde.{Compatible, Incompatible, SupportLevel, Unsupported}
+import org.apache.comet.CometConf
+import org.apache.comet.CometSparkSessionExtensions.withInfo
+import org.apache.comet.serde.{CometExpressionSerde, Compatible, ExprOuterClass, Incompatible, SupportLevel, Unsupported}
+import org.apache.comet.serde.ExprOuterClass.Expr
+import org.apache.comet.serde.QueryPlanSerde.{evalModeToProto, exprToProtoInternal, serializeDataType}
+import org.apache.comet.shims.CometExprShim
 
-object CometCast {
+object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
 
   def supportedTypes: Seq[DataType] =
     Seq(
@@ -42,6 +48,51 @@ object CometCast {
   // TODO add DataTypes.TimestampNTZType for Spark 3.4 and later
   // https://github.com/apache/datafusion-comet/issues/378
 
+  override def getSupportLevel(cast: Cast): SupportLevel = {
+    isSupported(cast.child.dataType, cast.dataType, cast.timeZoneId, evalMode(cast))
+  }
+
+  override def convert(
+      cast: Cast,
+      inputs: Seq[Attribute],
+      binding: Boolean): Option[ExprOuterClass.Expr] = {
+    val childExpr = exprToProtoInternal(cast.child, inputs, binding)
+    if (childExpr.isDefined) {
+      castToProto(cast, cast.timeZoneId, cast.dataType, childExpr.get, evalMode(cast))
+    } else {
+      withInfo(cast, cast.child)
+      None
+    }
+  }
+
+  /**
+   * Wrap an already serialized expression in a cast.
+   */
+  def castToProto(
+      expr: Expression,
+      timeZoneId: Option[String],
+      dt: DataType,
+      childExpr: Expr,
+      evalMode: CometEvalMode.Value): Option[Expr] = {
+    serializeDataType(dt) match {
+      case Some(dataType) =>
+        val castBuilder = ExprOuterClass.Cast.newBuilder()
+        castBuilder.setChild(childExpr)
+        castBuilder.setDatatype(dataType)
+        castBuilder.setEvalMode(evalModeToProto(evalMode))
+        castBuilder.setAllowIncompat(CometConf.COMET_EXPR_ALLOW_INCOMPATIBLE.get())
+        castBuilder.setTimezone(timeZoneId.getOrElse("UTC"))
+        Some(
+          ExprOuterClass.Expr
+            .newBuilder()
+            .setCast(castBuilder)
+            .build())
+      case _ =>
+        withInfo(expr, s"Unsupported datatype in castToProto: $dt")
+        None
+    }
+  }
+
   def isSupported(
       fromType: DataType,
       toType: DataType,
@@ -62,7 +113,7 @@ object CometCast {
           case DataTypes.TimestampType | DataTypes.DateType | DataTypes.StringType =>
             Incompatible()
           case _ =>
-            Unsupported
+            unsupported(fromType, toType)
         }
       case (_: DecimalType, _: DecimalType) =>
         Compatible()
@@ -98,7 +149,7 @@ object CometCast {
           }
         }
         Compatible()
-      case _ => Unsupported
+      case _ => unsupported(fromType, toType)
     }
   }
 
@@ -136,7 +187,7 @@ object CometCast {
         // https://github.com/apache/datafusion-comet/issues/328
         Incompatible(Some("Not all valid formats are supported"))
       case _ =>
-        Unsupported
+        unsupported(DataTypes.StringType, toType)
     }
   }
 
@@ -171,13 +222,13 @@ object CometCast {
           isSupported(field.dataType, DataTypes.StringType, timeZoneId, evalMode) match {
             case s: Incompatible =>
               return s
-            case Unsupported =>
-              return Unsupported
+            case u: Unsupported =>
+              return u
             case _ =>
           }
         }
         Compatible()
-      case _ => Unsupported
+      case _ => unsupported(fromType, DataTypes.StringType)
     }
   }
 
@@ -187,21 +238,21 @@ object CometCast {
           DataTypes.IntegerType =>
         // https://github.com/apache/datafusion-comet/issues/352
         // this seems like an edge case that isn't important for us to support
-        Unsupported
+        unsupported(DataTypes.TimestampType, toType)
       case DataTypes.LongType =>
         // https://github.com/apache/datafusion-comet/issues/352
         Compatible()
       case DataTypes.StringType => Compatible()
       case DataTypes.DateType => Compatible()
-      case _ => Unsupported
+      case _ => unsupported(DataTypes.TimestampType, toType)
     }
   }
 
   private def canCastFromBoolean(toType: DataType): SupportLevel = toType match {
     case DataTypes.ByteType | DataTypes.ShortType | DataTypes.IntegerType | DataTypes.LongType |
         DataTypes.FloatType | DataTypes.DoubleType =>
       Compatible()
-    case _ => Unsupported
+    case _ => unsupported(DataTypes.BooleanType, toType)
   }
 
   private def canCastFromByte(toType: DataType): SupportLevel = toType match {
@@ -212,7 +263,7 @@ object CometCast {
     case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
       Compatible()
     case _ =>
-      Unsupported
+      unsupported(DataTypes.ByteType, toType)
   }
 
   private def canCastFromShort(toType: DataType): SupportLevel = toType match {
@@ -223,7 +274,7 @@ object CometCast {
     case DataTypes.FloatType | DataTypes.DoubleType | _: DecimalType =>
       Compatible()
     case _ =>
-      Unsupported
+      unsupported(DataTypes.ShortType, toType)
   }
 
   private def canCastFromInt(toType: DataType): SupportLevel = toType match {
@@ -236,7 +287,7 @@ object CometCast {
     case _: DecimalType =>
       Incompatible(Some("No overflow check"))
     case _ =>
-      Unsupported
+      unsupported(DataTypes.IntegerType, toType)
   }
 
   private def canCastFromLong(toType: DataType): SupportLevel = toType match {
@@ -249,7 +300,7 @@ object CometCast {
     case _: DecimalType =>
       Incompatible(Some("No overflow check"))
     case _ =>
-      Unsupported
+      unsupported(DataTypes.LongType, toType)
   }
 
   private def canCastFromFloat(toType: DataType): SupportLevel = toType match {
@@ -259,7 +310,8 @@ object CometCast {
     case _: DecimalType =>
       // https://github.com/apache/datafusion-comet/issues/1371
       Incompatible(Some("There can be rounding differences"))
-    case _ => Unsupported
+    case _ =>
+      unsupported(DataTypes.FloatType, toType)
   }
 
   private def canCastFromDouble(toType: DataType): SupportLevel = toType match {
@@ -269,14 +321,17 @@ object CometCast {
     case _: DecimalType =>
       // https://github.com/apache/datafusion-comet/issues/1371
       Incompatible(Some("There can be rounding differences"))
-    case _ => Unsupported
+    case _ => unsupported(DataTypes.DoubleType, toType)
   }
 
   private def canCastFromDecimal(toType: DataType): SupportLevel = toType match {
     case DataTypes.FloatType | DataTypes.DoubleType | DataTypes.ByteType | DataTypes.ShortType |
         DataTypes.IntegerType | DataTypes.LongType =>
       Compatible()
-    case _ => Unsupported
+    case _ => Unsupported(Some(s"Cast from DecimalType to $toType is not supported"))
   }
 
+  private def unsupported(fromType: DataType, toType: DataType): Unsupported = {
+    Unsupported(Some(s"Cast from $fromType to $toType is not supported"))
+  }
 }