dremio · telemenar · Mar 18, 2026
diff --git a/cpp/src/gandiva/function_registry_common.h b/cpp/src/gandiva/function_registry_common.h
@@ -55,6 +55,13 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); }
 inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); }
 
 inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); }
+
+// Precision-specific timestamp types for explicit time unit handling
+inline DataTypePtr timestamp_sec() { return arrow::timestamp(arrow::TimeUnit::SECOND); }
+inline DataTypePtr timestamp_ms() { return arrow::timestamp(arrow::TimeUnit::MILLI); }
+inline DataTypePtr timestamp_us() { return arrow::timestamp(arrow::TimeUnit::MICRO); }
+inline DataTypePtr timestamp_ns() { return arrow::timestamp(arrow::TimeUnit::NANO); }
+
 inline DataTypePtr decimal128() { return arrow::decimal128(38, 0); }
 
 struct KeyHash {
@@ -289,6 +296,14 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
 // Iterate the inner macro over all time types
 #define TIME_TYPES(INNER, NAME, ALIASES) INNER(NAME, ALIASES, time32)
 
+// Iterate the inner macro over all timestamp precision types
+// These generate precision-specific function registrations
+#define TIMESTAMP_PRECISION_TYPES(INNER, NAME, ALIASES)  \
+  INNER(NAME, ALIASES, timestamp_sec),                   \
+  INNER(NAME, ALIASES, timestamp_ms),                    \
+  INNER(NAME, ALIASES, timestamp_us),                    \
+  INNER(NAME, ALIASES, timestamp_ns)
+
 // Iterate the inner macro over all data types
 #define VAR_LEN_TYPES(INNER, NAME, ALIASES) \
   INNER(NAME, ALIASES, utf8), INNER(NAME, ALIASES, binary)

diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
@@ -47,6 +47,12 @@ namespace gandiva {
 
 #define NEXT_DAY_FNS(name) DATE_TYPES(NEXT_DAY_SAFE_NULL_IF_NULL, name, {})
 
+// Precision-aware extraction function for timestamp types
+// Maps to extractYear_timestamp_sec, extractYear_timestamp_ms, etc.
+#define EXTRACT_TIMESTAMP_PRECISION(NAME, ALIASES, TYPE)                       \
+  NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
+                 int64(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))
+
 std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
   static std::vector<NativeFunction> date_time_fn_registry_ = {
       UNARY_SAFE_NULL_NEVER_BOOL(isnull, {}, day_time_interval),
@@ -62,6 +68,9 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
 
       NEXT_DAY_FNS(next_day),
 
+      // Precision-specific extractYear for all timestamp time units
+      TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractYear, {"extract_year"}),
+
       NativeFunction("castDATE", {}, DataTypeVector{utf8()}, date64(), kResultNullIfNull,
                      "castDATE_utf8",
                      NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),

diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc
@@ -23,6 +23,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/hash_util.h"
 #include "arrow/util/logging.h"
@@ -80,16 +81,58 @@ bool FunctionSignature::operator==(const FunctionSignature& other) const {
   return true;
 }
 
+namespace {
+
+// Helper to get the time unit from temporal types for hashing
+// Returns -1 for non-temporal types
+int GetTemporalTypeUnit(const DataTypePtr& type) {
+  switch (type->id()) {
+    case arrow::Type::TIMESTAMP: {
+      auto ts_type = checked_cast<const arrow::TimestampType*>(type.get());
+      return static_cast<int>(ts_type->unit());
+    }
+    case arrow::Type::TIME32: {
+      auto t32_type = checked_cast<const arrow::Time32Type*>(type.get());
+      return static_cast<int>(t32_type->unit());
+    }
+    case arrow::Type::TIME64: {
+      auto t64_type = checked_cast<const arrow::Time64Type*>(type.get());
+      return static_cast<int>(t64_type->unit());
+    }
+    case arrow::Type::DURATION: {
+      auto dur_type = checked_cast<const arrow::DurationType*>(type.get());
+      return static_cast<int>(dur_type->unit());
+    }
+    default:
+      return -1;
+  }
+}
+
+}  // namespace
+
 /// calculated based on name, datatype id of parameters and datatype id
-/// of return type.
+/// of return type. For temporal types (TIMESTAMP, TIME32, TIME64, DURATION),
+/// also includes the time unit to distinguish different precisions.
 std::size_t FunctionSignature::Hash() const {
   static const size_t kSeedValue = 17;
   size_t result = kSeedValue;
   hash_combine(result, AsciiToLower(base_name_));
   hash_combine(result, static_cast<size_t>(ret_type_->id()));
+
+  // Include time unit for temporal return types
+  int ret_unit = GetTemporalTypeUnit(ret_type_);
+  if (ret_unit >= 0) {
+    hash_combine(result, static_cast<size_t>(ret_unit));
+  }
+
   // not using hash_range since we only want to include the id from the data type
   for (auto& param_type : param_types_) {
     hash_combine(result, static_cast<size_t>(param_type->id()));
+    // Include time unit for temporal parameter types
+    int param_unit = GetTemporalTypeUnit(param_type);
+    if (param_unit >= 0) {
+      hash_combine(result, static_cast<size_t>(param_unit));
+    }
   }
   return result;
 }

diff --git a/cpp/src/gandiva/function_signature_test.cc b/cpp/src/gandiva/function_signature_test.cc
@@ -110,4 +110,91 @@ TEST_F(TestFunctionSignature, TestHash) {
   EXPECT_EQ(f3.Hash(), f4.Hash());
 }
 
+TEST_F(TestFunctionSignature, TestTimestampPrecisionHash) {
+  // Different timestamp precisions should have different hashes
+  FunctionSignature ts_sec("extractYear",
+                           {arrow::timestamp(arrow::TimeUnit::SECOND)},
+                           arrow::int64());
+  FunctionSignature ts_ms("extractYear",
+                          {arrow::timestamp(arrow::TimeUnit::MILLI)},
+                          arrow::int64());
+  FunctionSignature ts_us("extractYear",
+                          {arrow::timestamp(arrow::TimeUnit::MICRO)},
+                          arrow::int64());
+  FunctionSignature ts_ns("extractYear",
+                          {arrow::timestamp(arrow::TimeUnit::NANO)},
+                          arrow::int64());
+
+  // All should have different hashes
+  EXPECT_NE(ts_sec.Hash(), ts_ms.Hash());
+  EXPECT_NE(ts_sec.Hash(), ts_us.Hash());
+  EXPECT_NE(ts_sec.Hash(), ts_ns.Hash());
+  EXPECT_NE(ts_ms.Hash(), ts_us.Hash());
+  EXPECT_NE(ts_ms.Hash(), ts_ns.Hash());
+  EXPECT_NE(ts_us.Hash(), ts_ns.Hash());
+
+  // Same precision should have same hash
+  FunctionSignature ts_sec2("extractYear",
+                            {arrow::timestamp(arrow::TimeUnit::SECOND)},
+                            arrow::int64());
+  EXPECT_EQ(ts_sec.Hash(), ts_sec2.Hash());
+}
+
+TEST_F(TestFunctionSignature, TestTimestampPrecisionEquals) {
+  // Different timestamp precisions should NOT be equal
+  FunctionSignature ts_sec("extractYear",
+                           {arrow::timestamp(arrow::TimeUnit::SECOND)},
+                           arrow::int64());
+  FunctionSignature ts_ms("extractYear",
+                          {arrow::timestamp(arrow::TimeUnit::MILLI)},
+                          arrow::int64());
+
+  EXPECT_FALSE(ts_sec == ts_ms);
+
+  // Same precision should be equal
+  FunctionSignature ts_sec2("extractYear",
+                            {arrow::timestamp(arrow::TimeUnit::SECOND)},
+                            arrow::int64());
+  EXPECT_EQ(ts_sec, ts_sec2);
+}
+
+TEST_F(TestFunctionSignature, TestTimestampReturnTypeHash) {
+  // Return type precision should also be distinguished
+  FunctionSignature ret_ms("castTimestamp",
+                           {arrow::utf8()},
+                           arrow::timestamp(arrow::TimeUnit::MILLI));
+  FunctionSignature ret_ns("castTimestamp",
+                           {arrow::utf8()},
+                           arrow::timestamp(arrow::TimeUnit::NANO));
+
+  EXPECT_NE(ret_ms.Hash(), ret_ns.Hash());
+  EXPECT_FALSE(ret_ms == ret_ns);
+}
+
+TEST_F(TestFunctionSignature, TestTime32PrecisionHash) {
+  // Time32 types should also have precision-aware hashing
+  FunctionSignature t32_sec("extractHour",
+                            {arrow::time32(arrow::TimeUnit::SECOND)},
+                            arrow::int64());
+  FunctionSignature t32_ms("extractHour",
+                           {arrow::time32(arrow::TimeUnit::MILLI)},
+                           arrow::int64());
+
+  EXPECT_NE(t32_sec.Hash(), t32_ms.Hash());
+  EXPECT_FALSE(t32_sec == t32_ms);
+}
+
+TEST_F(TestFunctionSignature, TestTime64PrecisionHash) {
+  // Time64 types should also have precision-aware hashing
+  FunctionSignature t64_us("extractHour",
+                           {arrow::time64(arrow::TimeUnit::MICRO)},
+                           arrow::int64());
+  FunctionSignature t64_ns("extractHour",
+                           {arrow::time64(arrow::TimeUnit::NANO)},
+                           arrow::int64());
+
+  EXPECT_NE(t64_us.Hash(), t64_ns.Hash());
+  EXPECT_FALSE(t64_us == t64_ns);
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/epoch_time_point.h b/cpp/src/gandiva/precompiled/epoch_time_point.h
@@ -24,14 +24,20 @@ bool is_leap_year(int yy);
 bool did_days_overflow(arrow_vendored::date::year_month_day ymd);
 int last_possible_day_in_month(int month, int year);
 
-// A point of time measured in millis since epoch.
-class EpochTimePoint {
+// Template class for precision-aware time point operations.
+// Duration should be one of: std::chrono::seconds, milliseconds, microseconds, nanoseconds
+template <typename Duration>
+class EpochTimePointT {
  public:
-  explicit EpochTimePoint(std::chrono::milliseconds millis_since_epoch)
-      : tp_(millis_since_epoch) {}
+  using duration_type = Duration;
+  using time_point_type =
+      std::chrono::time_point<std::chrono::system_clock, Duration>;
 
-  explicit EpochTimePoint(int64_t millis_since_epoch)
-      : EpochTimePoint(std::chrono::milliseconds(millis_since_epoch)) {}
+  explicit EpochTimePointT(Duration duration_since_epoch)
+      : tp_(duration_since_epoch) {}
+
+  explicit EpochTimePointT(int64_t value_since_epoch)
+      : EpochTimePointT(Duration(value_since_epoch)) {}
 
   int TmYear() const { return static_cast<int>(YearMonthDay().year()) - 1900; }
 
@@ -62,19 +68,29 @@ class EpochTimePoint {
     return static_cast<int>(TimeOfDay().seconds().count());
   }
 
-  EpochTimePoint AddYears(int num_years) const {
+  // Returns sub-second component in the native duration unit
+  // For milliseconds: returns 0-999
+  // For microseconds: returns 0-999999
+  // For nanoseconds: returns 0-999999999
+  int64_t SubSeconds() const {
+    auto since_midnight = tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
+    auto secs = std::chrono::duration_cast<std::chrono::seconds>(since_midnight);
+    return (since_midnight - secs).count();
+  }
+
+  EpochTimePointT AddYears(int num_years) const {
     auto ymd = YearMonthDay() + arrow_vendored::date::years(num_years);
-    return EpochTimePoint((arrow_vendored::date::sys_days{ymd} +  // NOLINT
-                           TimeOfDay().to_duration())
-                              .time_since_epoch());
+    return EpochTimePointT(
+        std::chrono::duration_cast<Duration>(
+            (arrow_vendored::date::sys_days{ymd} + TimeOfDayDuration()).time_since_epoch()));
   }
 
-  EpochTimePoint AddMonths(int num_months) const {
+  EpochTimePointT AddMonths(int num_months) const {
     auto ymd = YearMonthDay() + arrow_vendored::date::months(num_months);
 
-    EpochTimePoint tp = EpochTimePoint((arrow_vendored::date::sys_days{ymd} +  // NOLINT
-                                        TimeOfDay().to_duration())
-                                           .time_since_epoch());
+    EpochTimePointT tp(
+        std::chrono::duration_cast<Duration>(
+            (arrow_vendored::date::sys_days{ymd} + TimeOfDayDuration()).time_since_epoch()));
 
     if (did_days_overflow(ymd)) {
       int days_to_offset =
@@ -86,26 +102,36 @@ class EpochTimePoint {
     return tp;
   }
 
-  EpochTimePoint AddDays(int num_days) const {
-    auto days_since_epoch = arrow_vendored::date::sys_days{YearMonthDay()} +  // NOLINT
+  EpochTimePointT AddDays(int num_days) const {
+    auto days_since_epoch = arrow_vendored::date::sys_days{YearMonthDay()} +
                             arrow_vendored::date::days(num_days);
-    return EpochTimePoint(
-        (days_since_epoch + TimeOfDay().to_duration()).time_since_epoch());
+    return EpochTimePointT(
+        std::chrono::duration_cast<Duration>(
+            (days_since_epoch + TimeOfDayDuration()).time_since_epoch()));
   }
 
-  EpochTimePoint ClearTimeOfDay() const {
-    return EpochTimePoint((tp_ - TimeOfDay().to_duration()).time_since_epoch());
+  EpochTimePointT ClearTimeOfDay() const {
+    return EpochTimePointT(
+        std::chrono::duration_cast<Duration>(
+            (tp_ - TimeOfDayDuration()).time_since_epoch()));
   }
 
-  bool operator==(const EpochTimePoint& other) const { return tp_ == other.tp_; }
+  bool operator==(const EpochTimePointT& other) const { return tp_ == other.tp_; }
 
-  int64_t MillisSinceEpoch() const { return tp_.time_since_epoch().count(); }
+  // Returns the value in the native duration unit
+  int64_t ValueSinceEpoch() const { return tp_.time_since_epoch().count(); }
+
+  // For backward compatibility with existing code expecting milliseconds
+  int64_t MillisSinceEpoch() const {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+               tp_.time_since_epoch())
+        .count();
+  }
 
-  arrow_vendored::date::time_of_day<std::chrono::milliseconds> TimeOfDay() const {
-    auto millis_since_midnight =
+  arrow_vendored::date::time_of_day<Duration> TimeOfDay() const {
+    auto duration_since_midnight =
         tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
-    return arrow_vendored::date::time_of_day<std::chrono::milliseconds>(
-        millis_since_midnight);
+    return arrow_vendored::date::time_of_day<Duration>(duration_since_midnight);
   }
 
  private:
@@ -114,5 +140,19 @@ class EpochTimePoint {
         arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)};  // NOLINT
   }
 
-  std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp_;
+  // Returns time of day as a duration for arithmetic operations
+  Duration TimeOfDayDuration() const {
+    return tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
+  }
+
+  time_point_type tp_;
 };
+
+// Type aliases for each precision level
+using EpochTimePointSec = EpochTimePointT<std::chrono::seconds>;
+using EpochTimePointMilli = EpochTimePointT<std::chrono::milliseconds>;
+using EpochTimePointMicro = EpochTimePointT<std::chrono::microseconds>;
+using EpochTimePointNano = EpochTimePointT<std::chrono::nanoseconds>;
+
+// Backward compatibility: existing code uses EpochTimePoint with milliseconds
+using EpochTimePoint = EpochTimePointMilli;