Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
15 changes: 15 additions & 0 deletions cpp/src/gandiva/function_registry_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); }
inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); }

inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); }

// Precision-specific timestamp types for explicit time unit handling
inline DataTypePtr timestamp_sec() { return arrow::timestamp(arrow::TimeUnit::SECOND); }
inline DataTypePtr timestamp_ms() { return arrow::timestamp(arrow::TimeUnit::MILLI); }
inline DataTypePtr timestamp_us() { return arrow::timestamp(arrow::TimeUnit::MICRO); }
inline DataTypePtr timestamp_ns() { return arrow::timestamp(arrow::TimeUnit::NANO); }

inline DataTypePtr decimal128() { return arrow::decimal128(38, 0); }

struct KeyHash {
Expand Down Expand Up @@ -289,6 +296,14 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH
// Iterate the inner macro over all time types
#define TIME_TYPES(INNER, NAME, ALIASES) INNER(NAME, ALIASES, time32)

// Iterate the inner macro over all timestamp precision types
// These generate precision-specific function registrations
#define TIMESTAMP_PRECISION_TYPES(INNER, NAME, ALIASES) \
INNER(NAME, ALIASES, timestamp_sec), \
INNER(NAME, ALIASES, timestamp_ms), \
INNER(NAME, ALIASES, timestamp_us), \
INNER(NAME, ALIASES, timestamp_ns)

// Iterate the inner macro over all data types
#define VAR_LEN_TYPES(INNER, NAME, ALIASES) \
INNER(NAME, ALIASES, utf8), INNER(NAME, ALIASES, binary)
Expand Down
125 changes: 125 additions & 0 deletions cpp/src/gandiva/function_registry_datetime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ namespace gandiva {

#define NEXT_DAY_FNS(name) DATE_TYPES(NEXT_DAY_SAFE_NULL_IF_NULL, name, {})

// Precision-aware extraction function for timestamp types
// Maps to extractYear_timestamp_sec, extractYear_timestamp_ms, etc.
#define EXTRACT_TIMESTAMP_PRECISION(NAME, ALIASES, TYPE) \
NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
int64(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))

// Precision-aware date truncation function for timestamp types
// Returns timestamp in the same precision as input
#define DATE_TRUNC_TIMESTAMP_PRECISION(NAME, ALIASES, TYPE) \
NativeFunction(#NAME, std::vector<std::string> ALIASES, DataTypeVector{TYPE()}, \
TYPE(), kResultNullIfNull, ARROW_STRINGIFY(NAME##_##TYPE))

std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
static std::vector<NativeFunction> date_time_fn_registry_ = {
UNARY_SAFE_NULL_NEVER_BOOL(isnull, {}, day_time_interval),
Expand All @@ -62,6 +74,119 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {

NEXT_DAY_FNS(next_day),

// Precision-specific extract functions for all timestamp time units
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractYear, {"extract_year"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractMonth, {"extract_month"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractDay, {"extract_day"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractHour, {"extract_hour"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractMinute, {"extract_minute"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractSecond, {"extract_second"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractDoy, {"extract_doy"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractDow, {"extract_dow"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractWeek, {"extract_week"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractQuarter, {"extract_quarter"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractEpoch, {"extract_epoch"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractMillennium, {"extract_millennium"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractCentury, {"extract_century"}),
TIMESTAMP_PRECISION_TYPES(EXTRACT_TIMESTAMP_PRECISION, extractDecade, {"extract_decade"}),

// Precision-specific date_trunc functions for all timestamp time units
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Second, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Minute, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Hour, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Day, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Week, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Month, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Quarter, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Year, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Decade, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Century, {}),
TIMESTAMP_PRECISION_TYPES(DATE_TRUNC_TIMESTAMP_PRECISION, date_trunc_Millennium, {}),

// Sub-millisecond truncation (only for higher precision types)
NativeFunction("date_trunc_Millisecond", {}, DataTypeVector{timestamp_us()},
timestamp_us(), kResultNullIfNull, "date_trunc_Millisecond_timestamp_us"),
NativeFunction("date_trunc_Millisecond", {}, DataTypeVector{timestamp_ns()},
timestamp_ns(), kResultNullIfNull, "date_trunc_Millisecond_timestamp_ns"),
NativeFunction("date_trunc_Microsecond", {}, DataTypeVector{timestamp_ns()},
timestamp_ns(), kResultNullIfNull, "date_trunc_Microsecond_timestamp_ns"),

// Precision-specific cast between timestamp types
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_sec()},
timestamp_ms(), kResultNullIfNull, "castTIMESTAMP_ms_timestamp_sec"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_sec()},
timestamp_us(), kResultNullIfNull, "castTIMESTAMP_us_timestamp_sec"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_sec()},
timestamp_ns(), kResultNullIfNull, "castTIMESTAMP_ns_timestamp_sec"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_ms()},
timestamp_sec(), kResultNullIfNull, "castTIMESTAMP_sec_timestamp_ms"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_ms()},
timestamp_us(), kResultNullIfNull, "castTIMESTAMP_us_timestamp_ms"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_ms()},
timestamp_ns(), kResultNullIfNull, "castTIMESTAMP_ns_timestamp_ms"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_us()},
timestamp_sec(), kResultNullIfNull, "castTIMESTAMP_sec_timestamp_us"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_us()},
timestamp_ms(), kResultNullIfNull, "castTIMESTAMP_ms_timestamp_us"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_us()},
timestamp_ns(), kResultNullIfNull, "castTIMESTAMP_ns_timestamp_us"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_ns()},
timestamp_sec(), kResultNullIfNull, "castTIMESTAMP_sec_timestamp_ns"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_ns()},
timestamp_ms(), kResultNullIfNull, "castTIMESTAMP_ms_timestamp_ns"),
NativeFunction("castTIMESTAMP", {}, DataTypeVector{timestamp_ns()},
timestamp_us(), kResultNullIfNull, "castTIMESTAMP_us_timestamp_ns"),

// Precision-specific castDATE for all timestamp types
NativeFunction("castDATE", {}, DataTypeVector{timestamp_sec()},
date64(), kResultNullIfNull, "castDATE_timestamp_sec"),
NativeFunction("castDATE", {}, DataTypeVector{timestamp_ms()},
date64(), kResultNullIfNull, "castDATE_timestamp_ms"),
NativeFunction("castDATE", {}, DataTypeVector{timestamp_us()},
date64(), kResultNullIfNull, "castDATE_timestamp_us"),
NativeFunction("castDATE", {}, DataTypeVector{timestamp_ns()},
date64(), kResultNullIfNull, "castDATE_timestamp_ns"),

// Precision-specific castTIME for all timestamp types
NativeFunction("castTIME", {}, DataTypeVector{timestamp_sec()},
time32(), kResultNullIfNull, "castTIME_timestamp_sec"),
NativeFunction("castTIME", {}, DataTypeVector{timestamp_ms()},
time32(), kResultNullIfNull, "castTIME_timestamp_ms"),
NativeFunction("castTIME", {}, DataTypeVector{timestamp_us()},
time32(), kResultNullIfNull, "castTIME_timestamp_us"),
NativeFunction("castTIME", {}, DataTypeVector{timestamp_ns()},
time32(), kResultNullIfNull, "castTIME_timestamp_ns"),

// Precision-specific datediff
NativeFunction("datediff", {}, DataTypeVector{timestamp_sec(), timestamp_sec()},
int32(), kResultNullIfNull, "datediff_timestamp_sec_timestamp_sec"),
NativeFunction("datediff", {}, DataTypeVector{timestamp_ms(), timestamp_ms()},
int32(), kResultNullIfNull, "datediff_timestamp_ms_timestamp_ms"),
NativeFunction("datediff", {}, DataTypeVector{timestamp_us(), timestamp_us()},
int32(), kResultNullIfNull, "datediff_timestamp_us_timestamp_us"),
NativeFunction("datediff", {}, DataTypeVector{timestamp_ns(), timestamp_ns()},
int32(), kResultNullIfNull, "datediff_timestamp_ns_timestamp_ns"),

// Precision-specific months_between
NativeFunction("months_between", {}, DataTypeVector{timestamp_sec(), timestamp_sec()},
float64(), kResultNullIfNull, "months_between_timestamp_sec_timestamp_sec"),
NativeFunction("months_between", {}, DataTypeVector{timestamp_ms(), timestamp_ms()},
float64(), kResultNullIfNull, "months_between_timestamp_ms_timestamp_ms"),
NativeFunction("months_between", {}, DataTypeVector{timestamp_us(), timestamp_us()},
float64(), kResultNullIfNull, "months_between_timestamp_us_timestamp_us"),
NativeFunction("months_between", {}, DataTypeVector{timestamp_ns(), timestamp_ns()},
float64(), kResultNullIfNull, "months_between_timestamp_ns_timestamp_ns"),

// Precision-specific last_day
NativeFunction("last_day", {}, DataTypeVector{timestamp_sec()},
date64(), kResultNullIfNull, "last_day_timestamp_sec"),
NativeFunction("last_day", {}, DataTypeVector{timestamp_ms()},
date64(), kResultNullIfNull, "last_day_timestamp_ms"),
NativeFunction("last_day", {}, DataTypeVector{timestamp_us()},
date64(), kResultNullIfNull, "last_day_timestamp_us"),
NativeFunction("last_day", {}, DataTypeVector{timestamp_ns()},
date64(), kResultNullIfNull, "last_day_timestamp_ns"),

NativeFunction("castDATE", {}, DataTypeVector{utf8()}, date64(), kResultNullIfNull,
"castDATE_utf8",
NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors),
Expand Down
40 changes: 39 additions & 1 deletion cpp/src/gandiva/function_registry_timestamp_arithmetic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,34 @@ namespace gandiva {
BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, date64, int64, date64), \
BINARY_GENERIC_SAFE_NULL_IF_NULL(name, ALIASES, timestamp, int64, timestamp)

// Precision-specific timestamp add functions
// Maps to timestampaddSecond_int32_timestamp_sec, etc.
#define TIMESTAMP_ADD_PRECISION_INT32(NAME, ALIASES, TYPE) \
NativeFunction(#NAME, std::vector<std::string> ALIASES, \
DataTypeVector{int32(), TYPE()}, TYPE(), kResultNullIfNull, \
ARROW_STRINGIFY(NAME##_int32_##TYPE)), \
NativeFunction(#NAME, std::vector<std::string> ALIASES, \
DataTypeVector{TYPE(), int32()}, TYPE(), kResultNullIfNull, \
ARROW_STRINGIFY(NAME##_##TYPE##_int32))

#define TIMESTAMP_ADD_PRECISION_INT64(NAME, ALIASES, TYPE) \
NativeFunction(#NAME, std::vector<std::string> ALIASES, \
DataTypeVector{int64(), TYPE()}, TYPE(), kResultNullIfNull, \
ARROW_STRINGIFY(NAME##_int64_##TYPE)), \
NativeFunction(#NAME, std::vector<std::string> ALIASES, \
DataTypeVector{TYPE(), int64()}, TYPE(), kResultNullIfNull, \
ARROW_STRINGIFY(NAME##_##TYPE##_int64))

#define TIMESTAMP_ADD_PRECISION_FNS(NAME, ALIASES) \
TIMESTAMP_ADD_PRECISION_INT32(NAME, ALIASES, timestamp_sec), \
TIMESTAMP_ADD_PRECISION_INT64(NAME, ALIASES, timestamp_sec), \
TIMESTAMP_ADD_PRECISION_INT32(NAME, ALIASES, timestamp_ms), \
TIMESTAMP_ADD_PRECISION_INT64(NAME, ALIASES, timestamp_ms), \
TIMESTAMP_ADD_PRECISION_INT32(NAME, ALIASES, timestamp_us), \
TIMESTAMP_ADD_PRECISION_INT64(NAME, ALIASES, timestamp_us), \
TIMESTAMP_ADD_PRECISION_INT32(NAME, ALIASES, timestamp_ns), \
TIMESTAMP_ADD_PRECISION_INT64(NAME, ALIASES, timestamp_ns)

std::vector<NativeFunction> GetDateTimeArithmeticFunctionRegistry() {
static std::vector<NativeFunction> datetime_fn_registry_ = {
BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, {}, date64, date64, float64),
Expand Down Expand Up @@ -81,7 +109,17 @@ std::vector<NativeFunction> GetDateTimeArithmeticFunctionRegistry() {

DATE_DIFF_FNS(date_sub, {}),
DATE_DIFF_FNS(subtract, {}),
DATE_DIFF_FNS(date_diff, {})};
DATE_DIFF_FNS(date_diff, {}),

// Precision-specific timestampadd functions
TIMESTAMP_ADD_PRECISION_FNS(timestampaddSecond, {}),
TIMESTAMP_ADD_PRECISION_FNS(timestampaddMinute, {}),
TIMESTAMP_ADD_PRECISION_FNS(timestampaddHour, {}),
TIMESTAMP_ADD_PRECISION_FNS(timestampaddDay, {}),
TIMESTAMP_ADD_PRECISION_FNS(timestampaddWeek, {}),
TIMESTAMP_ADD_PRECISION_FNS(timestampaddMonth, {}),
TIMESTAMP_ADD_PRECISION_FNS(timestampaddQuarter, {}),
TIMESTAMP_ADD_PRECISION_FNS(timestampaddYear, {})};

return datetime_fn_registry_;
}
Expand Down
45 changes: 44 additions & 1 deletion cpp/src/gandiva/function_signature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <utility>
#include <vector>

#include "arrow/type.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/hash_util.h"
#include "arrow/util/logging.h"
Expand Down Expand Up @@ -80,16 +81,58 @@ bool FunctionSignature::operator==(const FunctionSignature& other) const {
return true;
}

namespace {

// Helper to get the time unit from temporal types for hashing
// Returns -1 for non-temporal types
int GetTemporalTypeUnit(const DataTypePtr& type) {
switch (type->id()) {
case arrow::Type::TIMESTAMP: {
auto ts_type = checked_cast<const arrow::TimestampType*>(type.get());
return static_cast<int>(ts_type->unit());
}
case arrow::Type::TIME32: {
auto t32_type = checked_cast<const arrow::Time32Type*>(type.get());
return static_cast<int>(t32_type->unit());
}
case arrow::Type::TIME64: {
auto t64_type = checked_cast<const arrow::Time64Type*>(type.get());
return static_cast<int>(t64_type->unit());
}
case arrow::Type::DURATION: {
auto dur_type = checked_cast<const arrow::DurationType*>(type.get());
return static_cast<int>(dur_type->unit());
}
default:
return -1;
}
}

} // namespace

/// calculated based on name, datatype id of parameters and datatype id
/// of return type.
/// of return type. For temporal types (TIMESTAMP, TIME32, TIME64, DURATION),
/// also includes the time unit to distinguish different precisions.
std::size_t FunctionSignature::Hash() const {
static const size_t kSeedValue = 17;
size_t result = kSeedValue;
hash_combine(result, AsciiToLower(base_name_));
hash_combine(result, static_cast<size_t>(ret_type_->id()));

// Include time unit for temporal return types
int ret_unit = GetTemporalTypeUnit(ret_type_);
if (ret_unit >= 0) {
hash_combine(result, static_cast<size_t>(ret_unit));
}

// not using hash_range since we only want to include the id from the data type
for (auto& param_type : param_types_) {
hash_combine(result, static_cast<size_t>(param_type->id()));
// Include time unit for temporal parameter types
int param_unit = GetTemporalTypeUnit(param_type);
if (param_unit >= 0) {
hash_combine(result, static_cast<size_t>(param_unit));
}
}
return result;
}
Expand Down
Loading