apache
diff --git a/‎backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala‎
Lines changed: 2 additions & 1 deletion b/‎backends-velox/src-delta33/test/scala/org/apache/spark/sql/delta/DeltaInsertIntoTableSuite.scala‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala‎
Lines changed: 1 addition & 0 deletions b/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/execution/VeloxColumnarToRowExec.scala‎
Lines changed: 1 addition & 0 deletions b/‎backends-velox/src/main/scala/org/apache/gluten/execution/VeloxColumnarToRowExec.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala‎
Lines changed: 2 additions & 4 deletions b/‎backends-velox/src/test/scala/org/apache/gluten/execution/FallbackSuite.scala‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎backends-velox/src/test/scala/org/apache/gluten/execution/VeloxParquetDataTypeValidationSuite.scala‎
Lines changed: 16 additions & 4 deletions b/‎backends-velox/src/test/scala/org/apache/gluten/execution/VeloxParquetDataTypeValidationSuite.scala‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala‎
Lines changed: 51 additions & 2 deletions b/‎backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala‎
Lines changed: 51 additions & 2 deletions
diff --git a/‎cpp/velox/compute/VeloxBackend.cc‎
Lines changed: 5 additions & 3 deletions b/‎cpp/velox/compute/VeloxBackend.cc‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎cpp/velox/substrait/SubstraitParser.cc‎
Lines changed: 7 additions & 2 deletions b/‎cpp/velox/substrait/SubstraitParser.cc‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎cpp/velox/substrait/SubstraitToVeloxExpr.cc‎
Lines changed: 4 additions & 2 deletions b/‎cpp/velox/substrait/SubstraitToVeloxExpr.cc‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎cpp/velox/substrait/SubstraitToVeloxPlan.cc‎
Lines changed: 7 additions & 0 deletions b/‎cpp/velox/substrait/SubstraitToVeloxPlan.cc‎
Lines changed: 7 additions & 0 deletions
@@ -1423,7 +1423,8 @@ abstract class DeltaInsertIntoTests(
     }
   }
 
-  test("insertInto: Timestamp No Timezone round trips across timezones") {
+  // Cast from TIMESTAMP_NTZ to TIMESTAMP has not been supported.
+  ignore("insertInto: Timestamp No Timezone round trips across timezones") {
     val t1 = "timestamp_ntz"
     withTable(t1) {
       withTimeZone("GMT-8") {
 
@@ -109,6 +109,7 @@ object VeloxValidatorApi {
           StringType | BinaryType | _: DecimalType | DateType | TimestampType |
           YearMonthIntervalType.DEFAULT | NullType =>
         true
+      case other if other.typeName == "timestamp_ntz" => true
       case _ => false
     }
   }
 
@@ -50,6 +50,7 @@ case class VeloxColumnarToRowExec(child: SparkPlan) extends ColumnarToRowExecBas
         case _: DoubleType =>
         case _: StringType =>
         case _: TimestampType =>
+        case other if other.typeName == "timestamp_ntz" =>
         case _: DateType =>
         case _: BinaryType =>
         case _: DecimalType =>
 
@@ -272,7 +272,7 @@ class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPl
     }
   }
 
-  test("fallback with index based schema evolution") {
+  testWithMinSparkVersion("fallback with index based schema evolution", "3.4") {
     val query = "SELECT c2 FROM test"
     Seq("parquet", "orc").foreach {
       format =>
@@ -295,9 +295,7 @@ class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPl
                     runQueryAndCompare(query) {
                       df =>
                         val plan = df.queryExecution.executedPlan
-                        val fallback = parquetUseColumnNames == "false" ||
-                          orcUseColumnNames == "false"
-                        assert(collect(plan) { case g: GlutenPlan => g }.isEmpty == fallback)
+                        assert(collect(plan) { case g: GlutenPlan => g }.nonEmpty)
                     }
                   }
                 }
 
@@ -19,6 +19,8 @@ package org.apache.gluten.execution
 import org.apache.gluten.config.GlutenConfig
 
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types.{DataType, StructType}
 
 import java.io.File
 
@@ -465,17 +467,27 @@ class VeloxParquetDataTypeValidationSuite extends VeloxWholeStageTransformerSuit
     }
   }
 
-  testWithMinSparkVersion("Fallback for TimestampNTZ type scan", "3.4") {
+  testWithMinSparkVersion("TimestampNTZ type scan", "3.4") {
     withTempDir {
       dir =>
         val path = new File(dir, "ntz_data").toURI.getPath
         val inputDf =
           spark.sql("SELECT CAST('2024-01-01 00:00:00' AS TIMESTAMP_NTZ) AS ts_ntz")
         inputDf.write.format("parquet").save(path)
-        val df = spark.read.format("parquet").load(path)
+
+        // TODO: The Parquet writer creates TIMESTAMP(MICROS,true), but for timestamp_ntz type,
+        //  the 'isAdjustedToUTC' should be false. Without explicitly specifying the read schema,
+        //  file data will be read as Timestamp.
+        val dataType = Class
+          .forName("org.apache.spark.sql.types.TimestampNTZType$")
+          .getField("MODULE$")
+          .get(null)
+          .asInstanceOf[DataType]
+        val schema = new StructType().add("ts_ntz", dataType)
+        val df = spark.read.schema(schema).parquet(path)
         val executedPlan = getExecutedPlan(df)
-        assert(!executedPlan.exists(plan => plan.isInstanceOf[BatchScanExecTransformer]))
-        checkAnswer(df, inputDf)
+        assert(executedPlan.exists(plan => plan.isInstanceOf[BatchScanExecTransformer]))
+        checkAnswer(df, Seq(Row(java.time.LocalDateTime.of(2024, 1, 1, 0, 0, 0, 0))))
     }
   }
 
 
@@ -16,10 +16,11 @@
  */
 package org.apache.gluten.functions
 
-import org.apache.gluten.execution.ProjectExecTransformer
+import org.apache.gluten.execution.{BatchScanExecTransformer, ProjectExecTransformer}
 
 import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.types.Decimal
+import org.apache.spark.sql.internal.SQLConf.TimestampTypes
+import org.apache.spark.sql.types.{DataType, Decimal, StructType}
 
 import java.sql.Timestamp
 
@@ -489,4 +490,52 @@ class DateFunctionsValidateSuite extends FunctionsValidateSuite {
         }
     }
   }
+
+  testWithMinSparkVersion("read as timestamp_ntz", "3.4") {
+    val inputs: Seq[String] = Seq(
+      "1970-01-01",
+      "1970-01-01 00:00:00-02:00",
+      "1970-01-01 00:00:00 +02:00",
+      "2000-01-01",
+      "1970-01-01 00:00:00",
+      "2000-01-01 12:21:56",
+      "2015-03-18T12:03:17Z",
+      "2015-03-18 12:03:17",
+      "2015-03-18T12:03:17",
+      "2015-03-18 12:03:17.123",
+      "2015-03-18T12:03:17.123",
+      "2015-03-18T12:03:17.456",
+      "2015-03-18 12:03:17.456"
+    )
+
+    withTempPath {
+      dir =>
+        withSQLConf("spark.sql.timestampType" -> TimestampTypes.TIMESTAMP_NTZ.toString) {
+          val path = dir.getAbsolutePath
+          val inputDF = spark.createDataset(inputs).toDF("input")
+          val df = inputDF.selectExpr("cast(input as timestamp_ntz) as ts")
+          // TODO: The Parquet writer creates TIMESTAMP(MICROS,true), but for timestamp_ntz type,
+          //  the 'isAdjustedToUTC' should be false. Spark will fail to read this file as
+          //  timestamp_ntz values.
+          df.coalesce(1).write.mode("overwrite").parquet(path)
+
+          val dataType = Class
+            .forName("org.apache.spark.sql.types.TimestampNTZType$")
+            .getField("MODULE$")
+            .get(null)
+            .asInstanceOf[DataType]
+          val schema = new StructType().add("ts", dataType)
+          val readDf = spark.read.schema(schema).parquet(path)
+          readDf.collect()
+          assert(
+            readDf.queryExecution.executedPlan.exists(
+              f => f.isInstanceOf[BatchScanExecTransformer]))
+
+          // Ensures the fallback of unsupported function works.
+          readDf.createOrReplaceTempView("view")
+          val testDf = spark.sql("select hour(ts) from view")
+          testDf.collect()
+        }
+    }
+  }
 }
@@ -39,6 +39,7 @@
 #include "jni/JniFileSystem.h"
 #include "memory/GlutenBufferedInputBuilder.h"
 #include "operators/functions/SparkExprToSubfieldFilterParser.h"
+#include "operators/plannodes/RowVectorStream.h"
 #include "shuffle/ArrowShuffleDictionaryWriter.h"
 #include "udf/UdfLoader.h"
 #include "utils/Exception.h"
@@ -47,7 +48,6 @@
 #include "velox/connectors/hive/BufferedInputBuilder.h"
 #include "velox/connectors/hive/HiveConnector.h"
 #include "velox/connectors/hive/HiveDataSource.h"
-#include "operators/plannodes/RowVectorStream.h"
 #include "velox/connectors/hive/storage_adapters/abfs/RegisterAbfsFileSystem.h" // @manual
 #include "velox/connectors/hive/storage_adapters/gcs/RegisterGcsFileSystem.h" // @manual
 #include "velox/connectors/hive/storage_adapters/hdfs/HdfsFileSystem.h"
@@ -56,6 +56,7 @@
 #include "velox/dwio/orc/reader/OrcReader.h"
 #include "velox/dwio/parquet/RegisterParquetReader.h"
 #include "velox/dwio/parquet/RegisterParquetWriter.h"
+#include "velox/functions/sparksql/types/TimestampNTZRegistration.h"
 #include "velox/serializers/PrestoSerializer.h"
 
 DECLARE_bool(velox_exception_user_stacktrace_enabled);
@@ -195,6 +196,7 @@ void VeloxBackend::init(
   velox::orc::registerOrcReaderFactory();
   velox::exec::ExprToSubfieldFilterParser::registerParser(std::make_unique<SparkExprToSubfieldFilterParser>());
   velox::connector::hive::BufferedInputBuilder::registerBuilder(std::make_shared<GlutenBufferedInputBuilder>());
+  velox::functions::sparksql::registerTimestampNTZType();
 
   // Register Velox functions
   registerAllFunctions();
@@ -318,13 +320,13 @@ void VeloxBackend::initConnector(const std::shared_ptr<velox::config::ConfigBase
   }
   velox::connector::registerConnector(
       std::make_shared<velox::connector::hive::HiveConnector>(kHiveConnectorId, hiveConf, ioExecutor_.get()));
-  
+
   // Register value-stream connector for runtime iterator-based inputs
   auto valueStreamDynamicFilterEnabled =
       backendConf_->get<bool>(kValueStreamDynamicFilterEnabled, kValueStreamDynamicFilterEnabledDefault);
   velox::connector::registerConnector(
       std::make_shared<ValueStreamConnector>(kIteratorConnectorId, hiveConf, valueStreamDynamicFilterEnabled));
-  
+
 #ifdef GLUTEN_ENABLE_GPU
   if (backendConf_->get<bool>(kCudfEnableTableScan, kCudfEnableTableScanDefault) &&
       backendConf_->get<bool>(kCudfEnabled, kCudfEnabledDefault)) {
 
@@ -17,9 +17,9 @@
 
 #include "SubstraitParser.h"
 #include "TypeUtils.h"
-#include "velox/common/base/Exceptions.h"
-
 #include "VeloxSubstraitSignature.h"
+#include "velox/common/base/Exceptions.h"
+#include "velox/functions/sparksql/types/TimestampNTZType.h"
 
 namespace gluten {
 
@@ -78,6 +78,8 @@ TypePtr SubstraitParser::parseType(const ::substrait::Type& substraitType, bool
       return DATE();
     case ::substrait::Type::KindCase::kTimestampTz:
       return TIMESTAMP();
+    case ::substrait::Type::KindCase::kTimestamp:
+      return facebook::velox::functions::sparksql::TIMESTAMP_NTZ();
     case ::substrait::Type::KindCase::kDecimal: {
       auto precision = substraitType.decimal().precision();
       auto scale = substraitType.decimal().scale();
@@ -356,6 +358,9 @@ int64_t SubstraitParser::getLiteralValue(const ::substrait::Expression::Literal&
     memcpy(&decimalValue, decimal.c_str(), 16);
     return static_cast<int64_t>(decimalValue);
   }
+  if (literal.has_timestamp()) {
+    return literal.timestamp();
+  }
   return literal.i64();
 }
 
 
@@ -17,11 +17,11 @@
 
 #include "SubstraitToVeloxExpr.h"
 #include "TypeUtils.h"
+#include "velox/functions/sparksql/types/TimestampNTZType.h"
+#include "velox/type/Timestamp.h"
 #include "velox/vector/FlatVector.h"
 #include "velox/vector/VariantToVector.h"
 
-#include "velox/type/Timestamp.h"
-
 using namespace facebook::velox;
 
 namespace {
@@ -133,6 +133,8 @@ TypePtr getScalarType(const ::substrait::Expression::Literal& literal) {
       return DATE();
     case ::substrait::Expression_Literal::LiteralTypeCase::kTimestampTz:
       return TIMESTAMP();
+    case ::substrait::Expression_Literal::LiteralTypeCase::kTimestamp:
+      return facebook::velox::functions::sparksql::TIMESTAMP_NTZ();
     case ::substrait::Expression_Literal::LiteralTypeCase::kString:
       return VARCHAR();
     case ::substrait::Expression_Literal::LiteralTypeCase::kVarChar:
 
@@ -24,10 +24,12 @@
 #include "operators/plannodes/RowVectorStream.h"
 #include "velox/connectors/hive/HiveDataSink.h"
 #include "velox/exec/TableWriter.h"
+#include "velox/functions/sparksql/types/TimestampNTZType.h"
 #include "velox/type/Type.h"
 
 #include "utils/ConfigExtractor.h"
 #include "utils/ObjectStore.h"
+#include "utils/VeloxArrowUtils.h"
 #include "utils/VeloxWriterUtils.h"
 
 #include "config.pb.h"
@@ -1497,6 +1499,11 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
   // The columns present in the table, if not available default to the baseSchema.
   auto tableSchema = splitInfo->tableSchema ? splitInfo->tableSchema : baseSchema;
 
+  // Spark's TimestampNTZ type is stored as TIMESTAMP in file.
+  if (tableSchema) {
+    tableSchema = asRowType(replaceTimestampNTZ(tableSchema, TIMESTAMP()));
+  }
+
   connector::ConnectorTableHandlePtr tableHandle;
   auto remainingFilter = readRel.has_filter() ? exprConverter_->toVeloxExpr(readRel.filter(), baseSchema) : nullptr;
   auto connectorId = kHiveConnectorId;
Original file line number	Diff line number	Diff line change
`@@ -1423,7 +1423,8 @@ abstract class DeltaInsertIntoTests(`
`1423`	`1423`	`}`
`1424`	`1424`	`}`
`1425`	`1425`
`1426`		`- test("insertInto: Timestamp No Timezone round trips across timezones") {`
	`1426`	`+ // Cast from TIMESTAMP_NTZ to TIMESTAMP has not been supported.`
	`1427`	`+ ignore("insertInto: Timestamp No Timezone round trips across timezones") {`
`1427`	`1428`	`val t1 = "timestamp_ntz"`
`1428`	`1429`	`withTable(t1) {`
`1429`	`1430`	`withTimeZone("GMT-8") {`
Original file line number	Diff line number	Diff line change
`@@ -109,6 +109,7 @@ object VeloxValidatorApi {`
`109`	`109`	`StringType \| BinaryType \| _: DecimalType \| DateType \| TimestampType \|`
`110`	`110`	`YearMonthIntervalType.DEFAULT \| NullType =>`
`111`	`111`	`true`
	`112`	`+ case other if other.typeName == "timestamp_ntz" => true`
`112`	`113`	`case _ => false`
`113`	`114`	`}`
`114`	`115`	`}`
Original file line number	Diff line number	Diff line change
`@@ -272,7 +272,7 @@ class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPl`
`272`	`272`	`}`
`273`	`273`	`}`
`274`	`274`
`275`		`- test("fallback with index based schema evolution") {`
	`275`	`+ testWithMinSparkVersion("fallback with index based schema evolution", "3.4") {`
`276`	`276`	`val query = "SELECT c2 FROM test"`
`277`	`277`	`Seq("parquet", "orc").foreach {`
`278`	`278`	`format =>`
`@@ -295,9 +295,7 @@ class FallbackSuite extends VeloxWholeStageTransformerSuite with AdaptiveSparkPl`
`295`	`295`	`runQueryAndCompare(query) {`
`296`	`296`	`df =>`
`297`	`297`	`val plan = df.queryExecution.executedPlan`
`298`		`- val fallback = parquetUseColumnNames == "false" \|\|`
`299`		`- orcUseColumnNames == "false"`
`300`		`- assert(collect(plan) { case g: GlutenPlan => g }.isEmpty == fallback)`
	`298`	`+ assert(collect(plan) { case g: GlutenPlan => g }.nonEmpty)`
`301`	`299`	`}`
`302`	`300`	`}`
`303`	`301`	`}`