Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions cpp/src/gandiva/function_registry_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); }
inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); }

inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); }

// Precision-specific timestamp types for explicit time unit handling
inline DataTypePtr timestamp_sec() { return arrow::timestamp(arrow::TimeUnit::SECOND); }
inline DataTypePtr timestamp_ms() { return arrow::timestamp(arrow::TimeUnit::MILLI); }
inline DataTypePtr timestamp_us() { return arrow::timestamp(arrow::TimeUnit::MICRO); }
inline DataTypePtr timestamp_ns() { return arrow::timestamp(arrow::TimeUnit::NANO); }

inline DataTypePtr decimal128() { return arrow::decimal128(38, 0); }

struct KeyHash {
Expand Down Expand Up @@ -289,6 +296,14 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
// Iterate the inner macro over all time types
#define TIME_TYPES(INNER, NAME, ALIASES) INNER(NAME, ALIASES, time32)

// Iterate the inner macro over all timestamp precision types
// These generate precision-specific function registrations
#define TIMESTAMP_PRECISION_TYPES(INNER, NAME, ALIASES) \
INNER(NAME, ALIASES, timestamp_sec), \
INNER(NAME, ALIASES, timestamp_ms), \
INNER(NAME, ALIASES, timestamp_us), \
INNER(NAME, ALIASES, timestamp_ns)

// Iterate the inner macro over all data types
#define VAR_LEN_TYPES(INNER, NAME, ALIASES) \
INNER(NAME, ALIASES, utf8), INNER(NAME, ALIASES, binary)
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/gandiva/function_registry_datetime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ namespace gandiva {

#define NEXT_DAY_FNS(name) DATE_TYPES(NEXT_DAY_SAFE_NULL_IF_NULL, name, {})

// Precision-aware extraction function for timestamp types
// Maps to extractYear_timestamp_sec, extractYear_timestamp_ms, etc.
#define EXTRACT_TIMESTAMP_PRECISION(NAME, ALIASES, TYPE) \
NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
int64(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))

std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
static std::vector<NativeFunction> date_time_fn_registry_ = {
UNARY_SAFE_NULL_NEVER_BOOL(isnull, {}, day_time_interval),
Expand All @@ -62,6 +68,9 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {

NEXT_DAY_FNS(next_day),

// Precision-specific extractYear for all timestamp time units
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractYear, {"extract_year"}),

NativeFunction("castDATE", {}, DataTypeVector{utf8()}, date64(), kResultNullIfNull,
"castDATE_utf8",
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
Expand Down
45 changes: 44 additions & 1 deletion cpp/src/gandiva/function_signature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <utility>
#include <vector>

#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/hash_util.h"
#include "arrow/util/logging.h"
Expand Down Expand Up @@ -80,16 +81,58 @@ bool FunctionSignature::operator==(const FunctionSignature& other) const {
return true;
}

namespace {

// Helper to get the time unit from temporal types for hashing
// Returns -1 for non-temporal types
int GetTemporalTypeUnit(const DataTypePtr& type) {
switch (type->id()) {
case arrow::Type::TIMESTAMP: {
auto ts_type = checked_cast<const arrow::TimestampType*>(type.get());
return static_cast<int>(ts_type->unit());
}
case arrow::Type::TIME32: {
auto t32_type = checked_cast<const arrow::Time32Type*>(type.get());
return static_cast<int>(t32_type->unit());
}
case arrow::Type::TIME64: {
auto t64_type = checked_cast<const arrow::Time64Type*>(type.get());
return static_cast<int>(t64_type->unit());
}
case arrow::Type::DURATION: {
auto dur_type = checked_cast<const arrow::DurationType*>(type.get());
return static_cast<int>(dur_type->unit());
}
default:
return -1;
}
}

} // namespace

/// calculated based on name, datatype id of parameters and datatype id
/// of return type.
/// of return type. For temporal types (TIMESTAMP, TIME32, TIME64, DURATION),
/// also includes the time unit to distinguish different precisions.
std::size_t FunctionSignature::Hash() const {
static const size_t kSeedValue = 17;
size_t result = kSeedValue;
hash_combine(result, AsciiToLower(base_name_));
hash_combine(result, static_cast<size_t>(ret_type_->id()));

// Include time unit for temporal return types
int ret_unit = GetTemporalTypeUnit(ret_type_);
if (ret_unit >= 0) {
hash_combine(result, static_cast<size_t>(ret_unit));
}

// not using hash_range since we only want to include the id from the data type
for (auto& param_type : param_types_) {
hash_combine(result, static_cast<size_t>(param_type->id()));
// Include time unit for temporal parameter types
int param_unit = GetTemporalTypeUnit(param_type);
if (param_unit >= 0) {
hash_combine(result, static_cast<size_t>(param_unit));
}
}
return result;
}
Expand Down
87 changes: 87 additions & 0 deletions cpp/src/gandiva/function_signature_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,91 @@ TEST_F(TestFunctionSignature, TestHash) {
EXPECT_EQ(f3.Hash(), f4.Hash());
}

TEST_F(TestFunctionSignature, TestTimestampPrecisionHash) {
// Different timestamp precisions should have different hashes
FunctionSignature ts_sec("extractYear",
{arrow::timestamp(arrow::TimeUnit::SECOND)},
arrow::int64());
FunctionSignature ts_ms("extractYear",
{arrow::timestamp(arrow::TimeUnit::MILLI)},
arrow::int64());
FunctionSignature ts_us("extractYear",
{arrow::timestamp(arrow::TimeUnit::MICRO)},
arrow::int64());
FunctionSignature ts_ns("extractYear",
{arrow::timestamp(arrow::TimeUnit::NANO)},
arrow::int64());

// All should have different hashes
EXPECT_NE(ts_sec.Hash(), ts_ms.Hash());
EXPECT_NE(ts_sec.Hash(), ts_us.Hash());
EXPECT_NE(ts_sec.Hash(), ts_ns.Hash());
EXPECT_NE(ts_ms.Hash(), ts_us.Hash());
EXPECT_NE(ts_ms.Hash(), ts_ns.Hash());
EXPECT_NE(ts_us.Hash(), ts_ns.Hash());

// Same precision should have same hash
FunctionSignature ts_sec2("extractYear",
{arrow::timestamp(arrow::TimeUnit::SECOND)},
arrow::int64());
EXPECT_EQ(ts_sec.Hash(), ts_sec2.Hash());
}

TEST_F(TestFunctionSignature, TestTimestampPrecisionEquals) {
// Different timestamp precisions should NOT be equal
FunctionSignature ts_sec("extractYear",
{arrow::timestamp(arrow::TimeUnit::SECOND)},
arrow::int64());
FunctionSignature ts_ms("extractYear",
{arrow::timestamp(arrow::TimeUnit::MILLI)},
arrow::int64());

EXPECT_FALSE(ts_sec == ts_ms);

// Same precision should be equal
FunctionSignature ts_sec2("extractYear",
{arrow::timestamp(arrow::TimeUnit::SECOND)},
arrow::int64());
EXPECT_EQ(ts_sec, ts_sec2);
}

TEST_F(TestFunctionSignature, TestTimestampReturnTypeHash) {
// Return type precision should also be distinguished
FunctionSignature ret_ms("castTimestamp",
{arrow::utf8()},
arrow::timestamp(arrow::TimeUnit::MILLI));
FunctionSignature ret_ns("castTimestamp",
{arrow::utf8()},
arrow::timestamp(arrow::TimeUnit::NANO));

EXPECT_NE(ret_ms.Hash(), ret_ns.Hash());
EXPECT_FALSE(ret_ms == ret_ns);
}

TEST_F(TestFunctionSignature, TestTime32PrecisionHash) {
// Time32 types should also have precision-aware hashing
FunctionSignature t32_sec("extractHour",
{arrow::time32(arrow::TimeUnit::SECOND)},
arrow::int64());
FunctionSignature t32_ms("extractHour",
{arrow::time32(arrow::TimeUnit::MILLI)},
arrow::int64());

EXPECT_NE(t32_sec.Hash(), t32_ms.Hash());
EXPECT_FALSE(t32_sec == t32_ms);
}

TEST_F(TestFunctionSignature, TestTime64PrecisionHash) {
// Time64 types should also have precision-aware hashing
FunctionSignature t64_us("extractHour",
{arrow::time64(arrow::TimeUnit::MICRO)},
arrow::int64());
FunctionSignature t64_ns("extractHour",
{arrow::time64(arrow::TimeUnit::NANO)},
arrow::int64());

EXPECT_NE(t64_us.Hash(), t64_ns.Hash());
EXPECT_FALSE(t64_us == t64_ns);
}

} // namespace gandiva
94 changes: 67 additions & 27 deletions cpp/src/gandiva/precompiled/epoch_time_point.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,20 @@ bool is_leap_year(int yy);
bool did_days_overflow(arrow_vendored::date::year_month_day ymd);
int last_possible_day_in_month(int month, int year);

// A point of time measured in millis since epoch.
class EpochTimePoint {
// Template class for precision-aware time point operations.
// Duration should be one of: std::chrono::seconds, milliseconds, microseconds, nanoseconds
template <typename Duration>
class EpochTimePointT {
public:
explicit EpochTimePoint(std::chrono::milliseconds millis_since_epoch)
: tp_(millis_since_epoch) {}
using duration_type = Duration;
using time_point_type =
std::chrono::time_point<std::chrono::system_clock, Duration>;

explicit EpochTimePoint(int64_t millis_since_epoch)
: EpochTimePoint(std::chrono::milliseconds(millis_since_epoch)) {}
explicit EpochTimePointT(Duration duration_since_epoch)
: tp_(duration_since_epoch) {}

explicit EpochTimePointT(int64_t value_since_epoch)
: EpochTimePointT(Duration(value_since_epoch)) {}

int TmYear() const { return static_cast<int>(YearMonthDay().year()) - 1900; }

Expand Down Expand Up @@ -62,19 +68,29 @@ class EpochTimePoint {
return static_cast<int>(TimeOfDay().seconds().count());
}

EpochTimePoint AddYears(int num_years) const {
// Returns sub-second component in the native duration unit
// For milliseconds: returns 0-999
// For microseconds: returns 0-999999
// For nanoseconds: returns 0-999999999
int64_t SubSeconds() const {
auto since_midnight = tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
auto secs = std::chrono::duration_cast<std::chrono::seconds>(since_midnight);
return (since_midnight - secs).count();
}

EpochTimePointT AddYears(int num_years) const {
auto ymd = YearMonthDay() + arrow_vendored::date::years(num_years);
return EpochTimePoint((arrow_vendored::date::sys_days{ymd} + // NOLINT
TimeOfDay().to_duration())
.time_since_epoch());
return EpochTimePointT(
std::chrono::duration_cast<Duration>(
(arrow_vendored::date::sys_days{ymd} + TimeOfDayDuration()).time_since_epoch()));
}

EpochTimePoint AddMonths(int num_months) const {
EpochTimePointT AddMonths(int num_months) const {
auto ymd = YearMonthDay() + arrow_vendored::date::months(num_months);

EpochTimePoint tp = EpochTimePoint((arrow_vendored::date::sys_days{ymd} + // NOLINT
TimeOfDay().to_duration())
.time_since_epoch());
EpochTimePointT tp(
std::chrono::duration_cast<Duration>(
(arrow_vendored::date::sys_days{ymd} + TimeOfDayDuration()).time_since_epoch()));

if (did_days_overflow(ymd)) {
int days_to_offset =
Expand All @@ -86,26 +102,36 @@ class EpochTimePoint {
return tp;
}

EpochTimePoint AddDays(int num_days) const {
auto days_since_epoch = arrow_vendored::date::sys_days{YearMonthDay()} + // NOLINT
EpochTimePointT AddDays(int num_days) const {
auto days_since_epoch = arrow_vendored::date::sys_days{YearMonthDay()} +
arrow_vendored::date::days(num_days);
return EpochTimePoint(
(days_since_epoch + TimeOfDay().to_duration()).time_since_epoch());
return EpochTimePointT(
std::chrono::duration_cast<Duration>(
(days_since_epoch + TimeOfDayDuration()).time_since_epoch()));
}

EpochTimePoint ClearTimeOfDay() const {
return EpochTimePoint((tp_ - TimeOfDay().to_duration()).time_since_epoch());
EpochTimePointT ClearTimeOfDay() const {
return EpochTimePointT(
std::chrono::duration_cast<Duration>(
(tp_ - TimeOfDayDuration()).time_since_epoch()));
}

bool operator==(const EpochTimePoint& other) const { return tp_ == other.tp_; }
bool operator==(const EpochTimePointT& other) const { return tp_ == other.tp_; }

int64_t MillisSinceEpoch() const { return tp_.time_since_epoch().count(); }
// Returns the value in the native duration unit
int64_t ValueSinceEpoch() const { return tp_.time_since_epoch().count(); }

// For backward compatibility with existing code expecting milliseconds
int64_t MillisSinceEpoch() const {
return std::chrono::duration_cast<std::chrono::milliseconds>(
tp_.time_since_epoch())
.count();
}

arrow_vendored::date::time_of_day<std::chrono::milliseconds> TimeOfDay() const {
auto millis_since_midnight =
arrow_vendored::date::time_of_day<Duration> TimeOfDay() const {
auto duration_since_midnight =
tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
return arrow_vendored::date::time_of_day<std::chrono::milliseconds>(
millis_since_midnight);
return arrow_vendored::date::time_of_day<Duration>(duration_since_midnight);
}

private:
Expand All @@ -114,5 +140,19 @@ class EpochTimePoint {
arrow_vendored::date::floor<arrow_vendored::date::days>(tp_)}; // NOLINT
}

std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds> tp_;
// Returns time of day as a duration for arithmetic operations
Duration TimeOfDayDuration() const {
return tp_ - arrow_vendored::date::floor<arrow_vendored::date::days>(tp_);
}

time_point_type tp_;
};

// Type aliases for each precision level
using EpochTimePointSec = EpochTimePointT<std::chrono::seconds>;
using EpochTimePointMilli = EpochTimePointT<std::chrono::milliseconds>;
using EpochTimePointMicro = EpochTimePointT<std::chrono::microseconds>;
using EpochTimePointNano = EpochTimePointT<std::chrono::nanoseconds>;

// Backward compatibility: existing code uses EpochTimePoint with milliseconds
using EpochTimePoint = EpochTimePointMilli;
Loading
Loading