diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 4cc8d80ba80..894ee7e10e5 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -109,7 +109,6 @@ #include "gandiva/decimal_ir.h" #include "gandiva/exported_funcs.h" #include "gandiva/exported_funcs_registry.h" -#include "gandiva/timestamp_ir.h" namespace gandiva { @@ -350,7 +349,6 @@ Status Engine::LoadFunctionIRs() { if (!functions_loaded_) { ARROW_RETURN_NOT_OK(LoadPreCompiledIR()); ARROW_RETURN_NOT_OK(DecimalIR::AddFunctions(this)); - ARROW_RETURN_NOT_OK(TimestampIR::AddFunctions(this)); ARROW_RETURN_NOT_OK(LoadExternalPreCompiledIR()); functions_loaded_ = true; } diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 7df1f96e31b..bf5207efe19 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -1290,16 +1290,8 @@ Result LLVMGenerator::ResolveTimestampPcName(const std::string& pc_ if (TimestampIR::IsTimestampIRFunction(remapped)) { return remapped; } - // No TimestampIR variant exists for this unit. Return an error so the caller - // fails loudly (wrong-result-silent is worse than a build-time error). - // The Java sieve (GandivaPushdownSieve.TIMESTAMP_IR_EXPR_NAMES) should have - // already routed unsupported functions to Java before reaching this point. - return Status::Invalid( - "Gandiva: no TimestampIR variant for '", pc_name, - "' with timestamp TimeUnit=", - (ts_unit == arrow::TimeUnit::MICRO ? "MICRO" : "NANO"), - ". Add the function to TIMESTAMP_IR_EXPR_NAMES in GandivaPushdownSieve" - " or add a TimestampIR variant in timestamp_ir.cc."); + // No precompiled _us/_ns variant registered — pass through unchanged so + // normal JIT resolution can handle it (e.g. the milli variant is acceptable). } return pc_name; } diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index e1427e25fb6..77c85bde9d9 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -26,6 +26,7 @@ set(PRECOMPILED_SRCS string_ops.cc time.cc timestamp_arithmetic.cc + timestamp_unit_ops.cc ../../arrow/util/basic_decimal.cc) set(GANDIVA_PRECOMPILED_BC_FILES) foreach(SOURCE ${PRECOMPILED_SRCS}) diff --git a/cpp/src/gandiva/precompiled/timestamp_unit_ops.cc b/cpp/src/gandiva/precompiled/timestamp_unit_ops.cc new file mode 100644 index 00000000000..09c76d62941 --- /dev/null +++ b/cpp/src/gandiva/precompiled/timestamp_unit_ops.cc @@ -0,0 +1,519 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Precompiled _us and _ns wrappers for timestamp functions. +// +// Each function scales a microsecond or nanosecond timestamp to milliseconds +// (using floor division to handle negative timestamps correctly), calls the +// existing precompiled millisecond-scale function, then scales the result back. +// +// This replaces the dynamic LLVM IR builders in timestamp_ir.cc, which built +// the same wrappers programmatically at JIT initialization time. + +#include +#include + +#include "./types.h" + +// Forward declarations for precompiled milli-scale functions not in types.h. +extern "C" { + +gdv_int64 extractEpoch_timestamp(gdv_timestamp millis); + +gdv_int64 date_trunc_Millennium_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Century_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Decade_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Year_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Quarter_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Month_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Day_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Hour_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Minute_timestamp(gdv_timestamp millis); +gdv_int64 date_trunc_Second_timestamp(gdv_timestamp millis); + +gdv_int32 timestampdiffSecond_timestamp_timestamp(gdv_timestamp ts1, gdv_timestamp ts2); +gdv_int32 timestampdiffMinute_timestamp_timestamp(gdv_timestamp ts1, gdv_timestamp ts2); +gdv_int32 timestampdiffHour_timestamp_timestamp(gdv_timestamp ts1, gdv_timestamp ts2); +gdv_int32 timestampdiffDay_timestamp_timestamp(gdv_timestamp ts1, gdv_timestamp ts2); +gdv_int32 timestampdiffWeek_timestamp_timestamp(gdv_timestamp ts1, gdv_timestamp ts2); +gdv_int32 timestampdiffQuarter_timestamp_timestamp(gdv_timestamp ts1, gdv_timestamp ts2); +gdv_int32 timestampdiffYear_timestamp_timestamp(gdv_timestamp ts1, gdv_timestamp ts2); + +gdv_int64 to_utc_timezone_timestamp(gdv_int64 ctx, gdv_int64 millis, const char* tz, + gdv_int32 tz_len); +gdv_int64 from_utc_timezone_timestamp(gdv_int64 ctx, gdv_int64 millis, const char* tz, + gdv_int32 tz_len); + +} // end forward declarations + +// Floor division: rounds toward -inf (unlike C's truncation-toward-zero). +// e.g., ts_floor_div(-1001, 1000) == -2, not -1. +static FORCE_INLINE gdv_int64 ts_floor_div(gdv_int64 ts, gdv_int64 divisor) { + gdv_int64 q = ts / divisor; + gdv_int64 r = ts % divisor; + return (ts < 0 && r != 0) ? q - 1 : q; +} + +// Floor remainder: always non-negative, satisfies ts = floor_div*divisor + floor_rem. +static FORCE_INLINE gdv_int64 ts_floor_rem(gdv_int64 ts, gdv_int64 divisor) { + gdv_int64 r = ts % divisor; + return (ts < 0 && r != 0) ? r + divisor : r; +} + +extern "C" { + +// ───────────────────────────────────────────────────────────────────────────── +// MICROSECOND variants (units per millisecond = 1000) +// ───────────────────────────────────────────────────────────────────────────── + +// Fixed-unit timestampadd: ts ± count * constant (pure arithmetic). +// Four arg-order variants per base name: (int32,ts), (int64,ts), (ts,int32), (ts,int64). +#define FIXED_ADD_US(FN, UNITS_PER_SECOND) \ + FORCE_INLINE gdv_int64 FN##_int32_timestamp_us(gdv_int32 cnt, gdv_timestamp ts) { \ + return ts + (gdv_int64)cnt * ((UNITS_PER_SECOND)*1000000LL); \ + } \ + FORCE_INLINE gdv_int64 FN##_int64_timestamp_us(gdv_int64 cnt, gdv_timestamp ts) { \ + return ts + cnt * ((UNITS_PER_SECOND)*1000000LL); \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int32_us(gdv_timestamp ts, gdv_int32 cnt) { \ + return ts + (gdv_int64)cnt * ((UNITS_PER_SECOND)*1000000LL); \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int64_us(gdv_timestamp ts, gdv_int64 cnt) { \ + return ts + cnt * ((UNITS_PER_SECOND)*1000000LL); \ + } + +FIXED_ADD_US(timestampaddSecond, 1) +FIXED_ADD_US(timestampaddMinute, 60) +FIXED_ADD_US(timestampaddHour, 3600) +FIXED_ADD_US(timestampaddDay, 86400) +FIXED_ADD_US(timestampaddWeek, 604800) + +// Calendar-based timestampadd: floor-split ts into (millis, sub-ms remainder), +// call the precompiled milli function, then reassemble. +// All four arg-order variants share the same (int32, millis) base function. +#define CALENDAR_ADD_US(FN) \ + FORCE_INLINE gdv_int64 FN##_int32_timestamp_us(gdv_int32 cnt, gdv_timestamp ts) { \ + gdv_int64 ms = ts_floor_div(ts, 1000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000LL); \ + return FN##_int32_timestamp(cnt, ms) * 1000LL + rem; \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int32_us(gdv_timestamp ts, gdv_int32 cnt) { \ + gdv_int64 ms = ts_floor_div(ts, 1000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000LL); \ + return FN##_int32_timestamp(cnt, ms) * 1000LL + rem; \ + } \ + FORCE_INLINE gdv_int64 FN##_int64_timestamp_us(gdv_int64 cnt, gdv_timestamp ts) { \ + gdv_int64 ms = ts_floor_div(ts, 1000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000LL); \ + return FN##_int32_timestamp((gdv_int32)cnt, ms) * 1000LL + rem; \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int64_us(gdv_timestamp ts, gdv_int64 cnt) { \ + gdv_int64 ms = ts_floor_div(ts, 1000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000LL); \ + return FN##_int32_timestamp((gdv_int32)cnt, ms) * 1000LL + rem; \ + } + +CALENDAR_ADD_US(timestampaddMonth) +CALENDAR_ADD_US(timestampaddQuarter) +CALENDAR_ADD_US(timestampaddYear) + +// Extract: scale ts to millis, call precompiled extractor, return scalar. +#define EXTRACT_US(FN) \ + FORCE_INLINE gdv_int64 FN##_timestamp_us(gdv_timestamp ts) { \ + return FN##_timestamp(ts_floor_div(ts, 1000LL)); \ + } + +EXTRACT_US(extractMillennium) +EXTRACT_US(extractCentury) +EXTRACT_US(extractDecade) +EXTRACT_US(extractYear) +EXTRACT_US(extractQuarter) +EXTRACT_US(extractMonth) +EXTRACT_US(extractWeek) +EXTRACT_US(extractDay) +EXTRACT_US(extractHour) +EXTRACT_US(extractMinute) +EXTRACT_US(extractSecond) +EXTRACT_US(extractDoy) +EXTRACT_US(extractDow) +EXTRACT_US(extractEpoch) + +// date_trunc: scale to millis, truncate, scale back (sub-ms remainder is zeroed). +#define TRUNC_US(FN) \ + FORCE_INLINE gdv_int64 FN##_timestamp_us(gdv_timestamp ts) { \ + return FN##_timestamp(ts_floor_div(ts, 1000LL)) * 1000LL; \ + } + +TRUNC_US(date_trunc_Millennium) +TRUNC_US(date_trunc_Century) +TRUNC_US(date_trunc_Decade) +TRUNC_US(date_trunc_Year) +TRUNC_US(date_trunc_Quarter) +TRUNC_US(date_trunc_Month) +TRUNC_US(date_trunc_Week) +TRUNC_US(date_trunc_Day) +TRUNC_US(date_trunc_Hour) +TRUNC_US(date_trunc_Minute) +TRUNC_US(date_trunc_Second) + +// timestampdiff: scale both inputs to millis, call precompiled, return int32. +#define DIFF_US(FN) \ + FORCE_INLINE gdv_int32 FN##_timestamp_timestamp_us(gdv_timestamp ts1, \ + gdv_timestamp ts2) { \ + return FN##_timestamp_timestamp(ts_floor_div(ts1, 1000LL), \ + ts_floor_div(ts2, 1000LL)); \ + } + +DIFF_US(timestampdiffSecond) +DIFF_US(timestampdiffMinute) +DIFF_US(timestampdiffHour) +DIFF_US(timestampdiffDay) +DIFF_US(timestampdiffWeek) +DIFF_US(timestampdiffMonth) +DIFF_US(timestampdiffQuarter) +DIFF_US(timestampdiffYear) + +FORCE_INLINE gdv_float64 +months_between_timestamp_timestamp_us(gdv_timestamp ts1, gdv_timestamp ts2) { + return months_between_timestamp_timestamp((gdv_uint64)ts_floor_div(ts1, 1000LL), + (gdv_uint64)ts_floor_div(ts2, 1000LL)); +} + +FORCE_INLINE gdv_int32 +datediff_timestamp_timestamp_us(gdv_timestamp ts1, gdv_timestamp ts2) { + return datediff_timestamp_timestamp(ts_floor_div(ts1, 1000LL), + ts_floor_div(ts2, 1000LL)); +} + +// Cast from timestamp (scale to millis, call precompiled cast). +FORCE_INLINE gdv_date64 castDATE_timestamp_us(gdv_timestamp ts) { + return castDATE_timestamp(ts_floor_div(ts, 1000LL)); +} +FORCE_INLINE gdv_time32 castTIME_timestamp_us(gdv_timestamp ts) { + return castTIME_timestamp(ts_floor_div(ts, 1000LL)); +} +FORCE_INLINE gdv_date64 last_day_from_timestamp_us(gdv_timestamp ts) { + return last_day_from_timestamp(ts_floor_div(ts, 1000LL)); +} + +// Date arithmetic: add or subtract whole days (pure arithmetic, 86400 s/day). +// Positive delta for add, negative for sub. +FORCE_INLINE gdv_int64 date_add_int32_timestamp_us(gdv_int32 cnt, gdv_timestamp ts) { + return ts + (gdv_int64)cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 date_add_int64_timestamp_us(gdv_int64 cnt, gdv_timestamp ts) { + return ts + cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 add_int32_timestamp_us(gdv_int32 cnt, gdv_timestamp ts) { + return ts + (gdv_int64)cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 add_int64_timestamp_us(gdv_int64 cnt, gdv_timestamp ts) { + return ts + cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 date_add_timestamp_int32_us(gdv_timestamp ts, gdv_int32 cnt) { + return ts + (gdv_int64)cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 date_add_timestamp_int64_us(gdv_timestamp ts, gdv_int64 cnt) { + return ts + cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 add_timestamp_int32_us(gdv_timestamp ts, gdv_int32 cnt) { + return ts + (gdv_int64)cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 add_timestamp_int64_us(gdv_timestamp ts, gdv_int64 cnt) { + return ts + cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 date_sub_timestamp_int32_us(gdv_timestamp ts, gdv_int32 cnt) { + return ts - (gdv_int64)cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 date_sub_timestamp_int64_us(gdv_timestamp ts, gdv_int64 cnt) { + return ts - cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 subtract_timestamp_int32_us(gdv_timestamp ts, gdv_int32 cnt) { + return ts - (gdv_int64)cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 subtract_timestamp_int64_us(gdv_timestamp ts, gdv_int64 cnt) { + return ts - cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 date_diff_timestamp_int32_us(gdv_timestamp ts, gdv_int32 cnt) { + return ts - (gdv_int64)cnt * 86400000000LL; +} +FORCE_INLINE gdv_int64 date_diff_timestamp_int64_us(gdv_timestamp ts, gdv_int64 cnt) { + return ts - cnt * 86400000000LL; +} + +// Timezone: floor-split ts, apply milli-scale tz conversion, reassemble. +// The UTC offset is a whole-second delta so sub-ms precision survives unchanged. +FORCE_INLINE gdv_int64 to_utc_timezone_timestamp_us(gdv_int64 ctx, gdv_int64 ts, + const char* tz, gdv_int32 tz_len) { + gdv_int64 ms = ts_floor_div(ts, 1000LL); + gdv_int64 rem = ts_floor_rem(ts, 1000LL); + return to_utc_timezone_timestamp(ctx, ms, tz, tz_len) * 1000LL + rem; +} + +FORCE_INLINE gdv_int64 from_utc_timezone_timestamp_us(gdv_int64 ctx, gdv_int64 ts, + const char* tz, + gdv_int32 tz_len) { + gdv_int64 ms = ts_floor_div(ts, 1000LL); + gdv_int64 rem = ts_floor_rem(ts, 1000LL); + return from_utc_timezone_timestamp(ctx, ms, tz, tz_len) * 1000LL + rem; +} + +// castVARCHAR(timestamp_us, int64): call the milli formatter for the base string, +// then append 3 sub-millisecond digits (the microseconds-within-the-millisecond). +const char* castVARCHAR_timestamp_int64_us(gdv_int64 ctx, gdv_timestamp ts, + gdv_int64 len, gdv_int32* out_len) { + gdv_int64 ms = ts_floor_div(ts, 1000LL); + const char* base_buf = castVARCHAR_timestamp_int64(ctx, ms, len, out_len); + gdv_int32 base_len = *out_len; + if ((gdv_int64)base_len >= len) return base_buf; + + gdv_int32 full_len = base_len + 3; + gdv_int32 clamped_len = (full_len < (gdv_int32)len) ? full_len : (gdv_int32)len; + char* new_buf = + reinterpret_cast(gdv_fn_context_arena_malloc(ctx, clamped_len)); + memcpy(new_buf, base_buf, base_len); + + gdv_int64 sub = ts_floor_rem(ts, 1000LL); // microseconds within the millisecond [0,999] + if (base_len + 0 < clamped_len) new_buf[base_len + 0] = '0' + (char)((sub / 100) % 10); + if (base_len + 1 < clamped_len) new_buf[base_len + 1] = '0' + (char)((sub / 10) % 10); + if (base_len + 2 < clamped_len) new_buf[base_len + 2] = '0' + (char)(sub % 10); + + *out_len = clamped_len; + return new_buf; +} + +// next_day(timestamp_us, string): scale to millis, call precompiled, return date64. +FORCE_INLINE gdv_int64 next_day_from_timestamp_us(gdv_int64 ctx, gdv_timestamp ts, + const char* day, gdv_int32 day_len) { + return next_day_from_timestamp(ctx, ts_floor_div(ts, 1000LL), day, day_len); +} + +// ───────────────────────────────────────────────────────────────────────────── +// NANOSECOND variants (units per millisecond = 1000000) +// ───────────────────────────────────────────────────────────────────────────── + +#define FIXED_ADD_NS(FN, UNITS_PER_SECOND) \ + FORCE_INLINE gdv_int64 FN##_int32_timestamp_ns(gdv_int32 cnt, gdv_timestamp ts) { \ + return ts + (gdv_int64)cnt * ((UNITS_PER_SECOND)*1000000000LL); \ + } \ + FORCE_INLINE gdv_int64 FN##_int64_timestamp_ns(gdv_int64 cnt, gdv_timestamp ts) { \ + return ts + cnt * ((UNITS_PER_SECOND)*1000000000LL); \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int32_ns(gdv_timestamp ts, gdv_int32 cnt) { \ + return ts + (gdv_int64)cnt * ((UNITS_PER_SECOND)*1000000000LL); \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int64_ns(gdv_timestamp ts, gdv_int64 cnt) { \ + return ts + cnt * ((UNITS_PER_SECOND)*1000000000LL); \ + } + +FIXED_ADD_NS(timestampaddSecond, 1) +FIXED_ADD_NS(timestampaddMinute, 60) +FIXED_ADD_NS(timestampaddHour, 3600) +FIXED_ADD_NS(timestampaddDay, 86400) +FIXED_ADD_NS(timestampaddWeek, 604800) + +#define CALENDAR_ADD_NS(FN) \ + FORCE_INLINE gdv_int64 FN##_int32_timestamp_ns(gdv_int32 cnt, gdv_timestamp ts) { \ + gdv_int64 ms = ts_floor_div(ts, 1000000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000000LL); \ + return FN##_int32_timestamp(cnt, ms) * 1000000LL + rem; \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int32_ns(gdv_timestamp ts, gdv_int32 cnt) { \ + gdv_int64 ms = ts_floor_div(ts, 1000000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000000LL); \ + return FN##_int32_timestamp(cnt, ms) * 1000000LL + rem; \ + } \ + FORCE_INLINE gdv_int64 FN##_int64_timestamp_ns(gdv_int64 cnt, gdv_timestamp ts) { \ + gdv_int64 ms = ts_floor_div(ts, 1000000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000000LL); \ + return FN##_int32_timestamp((gdv_int32)cnt, ms) * 1000000LL + rem; \ + } \ + FORCE_INLINE gdv_int64 FN##_timestamp_int64_ns(gdv_timestamp ts, gdv_int64 cnt) { \ + gdv_int64 ms = ts_floor_div(ts, 1000000LL); \ + gdv_int64 rem = ts_floor_rem(ts, 1000000LL); \ + return FN##_int32_timestamp((gdv_int32)cnt, ms) * 1000000LL + rem; \ + } + +CALENDAR_ADD_NS(timestampaddMonth) +CALENDAR_ADD_NS(timestampaddQuarter) +CALENDAR_ADD_NS(timestampaddYear) + +#define EXTRACT_NS(FN) \ + FORCE_INLINE gdv_int64 FN##_timestamp_ns(gdv_timestamp ts) { \ + return FN##_timestamp(ts_floor_div(ts, 1000000LL)); \ + } + +EXTRACT_NS(extractMillennium) +EXTRACT_NS(extractCentury) +EXTRACT_NS(extractDecade) +EXTRACT_NS(extractYear) +EXTRACT_NS(extractQuarter) +EXTRACT_NS(extractMonth) +EXTRACT_NS(extractWeek) +EXTRACT_NS(extractDay) +EXTRACT_NS(extractHour) +EXTRACT_NS(extractMinute) +EXTRACT_NS(extractSecond) +EXTRACT_NS(extractDoy) +EXTRACT_NS(extractDow) +EXTRACT_NS(extractEpoch) + +#define TRUNC_NS(FN) \ + FORCE_INLINE gdv_int64 FN##_timestamp_ns(gdv_timestamp ts) { \ + return FN##_timestamp(ts_floor_div(ts, 1000000LL)) * 1000000LL; \ + } + +TRUNC_NS(date_trunc_Millennium) +TRUNC_NS(date_trunc_Century) +TRUNC_NS(date_trunc_Decade) +TRUNC_NS(date_trunc_Year) +TRUNC_NS(date_trunc_Quarter) +TRUNC_NS(date_trunc_Month) +TRUNC_NS(date_trunc_Week) +TRUNC_NS(date_trunc_Day) +TRUNC_NS(date_trunc_Hour) +TRUNC_NS(date_trunc_Minute) +TRUNC_NS(date_trunc_Second) + +#define DIFF_NS(FN) \ + FORCE_INLINE gdv_int32 FN##_timestamp_timestamp_ns(gdv_timestamp ts1, \ + gdv_timestamp ts2) { \ + return FN##_timestamp_timestamp(ts_floor_div(ts1, 1000000LL), \ + ts_floor_div(ts2, 1000000LL)); \ + } + +DIFF_NS(timestampdiffSecond) +DIFF_NS(timestampdiffMinute) +DIFF_NS(timestampdiffHour) +DIFF_NS(timestampdiffDay) +DIFF_NS(timestampdiffWeek) +DIFF_NS(timestampdiffMonth) +DIFF_NS(timestampdiffQuarter) +DIFF_NS(timestampdiffYear) + +FORCE_INLINE gdv_float64 +months_between_timestamp_timestamp_ns(gdv_timestamp ts1, gdv_timestamp ts2) { + return months_between_timestamp_timestamp((gdv_uint64)ts_floor_div(ts1, 1000000LL), + (gdv_uint64)ts_floor_div(ts2, 1000000LL)); +} + +FORCE_INLINE gdv_int32 +datediff_timestamp_timestamp_ns(gdv_timestamp ts1, gdv_timestamp ts2) { + return datediff_timestamp_timestamp(ts_floor_div(ts1, 1000000LL), + ts_floor_div(ts2, 1000000LL)); +} + +FORCE_INLINE gdv_date64 castDATE_timestamp_ns(gdv_timestamp ts) { + return castDATE_timestamp(ts_floor_div(ts, 1000000LL)); +} +FORCE_INLINE gdv_time32 castTIME_timestamp_ns(gdv_timestamp ts) { + return castTIME_timestamp(ts_floor_div(ts, 1000000LL)); +} +FORCE_INLINE gdv_date64 last_day_from_timestamp_ns(gdv_timestamp ts) { + return last_day_from_timestamp(ts_floor_div(ts, 1000000LL)); +} + +FORCE_INLINE gdv_int64 date_add_int32_timestamp_ns(gdv_int32 cnt, gdv_timestamp ts) { + return ts + (gdv_int64)cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 date_add_int64_timestamp_ns(gdv_int64 cnt, gdv_timestamp ts) { + return ts + cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 add_int32_timestamp_ns(gdv_int32 cnt, gdv_timestamp ts) { + return ts + (gdv_int64)cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 add_int64_timestamp_ns(gdv_int64 cnt, gdv_timestamp ts) { + return ts + cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 date_add_timestamp_int32_ns(gdv_timestamp ts, gdv_int32 cnt) { + return ts + (gdv_int64)cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 date_add_timestamp_int64_ns(gdv_timestamp ts, gdv_int64 cnt) { + return ts + cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 add_timestamp_int32_ns(gdv_timestamp ts, gdv_int32 cnt) { + return ts + (gdv_int64)cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 add_timestamp_int64_ns(gdv_timestamp ts, gdv_int64 cnt) { + return ts + cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 date_sub_timestamp_int32_ns(gdv_timestamp ts, gdv_int32 cnt) { + return ts - (gdv_int64)cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 date_sub_timestamp_int64_ns(gdv_timestamp ts, gdv_int64 cnt) { + return ts - cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 subtract_timestamp_int32_ns(gdv_timestamp ts, gdv_int32 cnt) { + return ts - (gdv_int64)cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 subtract_timestamp_int64_ns(gdv_timestamp ts, gdv_int64 cnt) { + return ts - cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 date_diff_timestamp_int32_ns(gdv_timestamp ts, gdv_int32 cnt) { + return ts - (gdv_int64)cnt * 86400000000000LL; +} +FORCE_INLINE gdv_int64 date_diff_timestamp_int64_ns(gdv_timestamp ts, gdv_int64 cnt) { + return ts - cnt * 86400000000000LL; +} + +FORCE_INLINE gdv_int64 to_utc_timezone_timestamp_ns(gdv_int64 ctx, gdv_int64 ts, + const char* tz, gdv_int32 tz_len) { + gdv_int64 ms = ts_floor_div(ts, 1000000LL); + gdv_int64 rem = ts_floor_rem(ts, 1000000LL); + return to_utc_timezone_timestamp(ctx, ms, tz, tz_len) * 1000000LL + rem; +} + +FORCE_INLINE gdv_int64 from_utc_timezone_timestamp_ns(gdv_int64 ctx, gdv_int64 ts, + const char* tz, + gdv_int32 tz_len) { + gdv_int64 ms = ts_floor_div(ts, 1000000LL); + gdv_int64 rem = ts_floor_rem(ts, 1000000LL); + return from_utc_timezone_timestamp(ctx, ms, tz, tz_len) * 1000000LL + rem; +} + +// castVARCHAR(timestamp_ns, int64): append 6 sub-millisecond digits. +const char* castVARCHAR_timestamp_int64_ns(gdv_int64 ctx, gdv_timestamp ts, + gdv_int64 len, gdv_int32* out_len) { + gdv_int64 ms = ts_floor_div(ts, 1000000LL); + const char* base_buf = castVARCHAR_timestamp_int64(ctx, ms, len, out_len); + gdv_int32 base_len = *out_len; + if ((gdv_int64)base_len >= len) return base_buf; + + gdv_int32 full_len = base_len + 6; + gdv_int32 clamped_len = (full_len < (gdv_int32)len) ? full_len : (gdv_int32)len; + char* new_buf = + reinterpret_cast(gdv_fn_context_arena_malloc(ctx, clamped_len)); + memcpy(new_buf, base_buf, base_len); + + gdv_int64 sub = ts_floor_rem(ts, 1000000LL); // nanoseconds within the millisecond [0,999999] + if (base_len + 0 < clamped_len) new_buf[base_len + 0] = '0' + (char)((sub / 100000) % 10); + if (base_len + 1 < clamped_len) new_buf[base_len + 1] = '0' + (char)((sub / 10000) % 10); + if (base_len + 2 < clamped_len) new_buf[base_len + 2] = '0' + (char)((sub / 1000) % 10); + if (base_len + 3 < clamped_len) new_buf[base_len + 3] = '0' + (char)((sub / 100) % 10); + if (base_len + 4 < clamped_len) new_buf[base_len + 4] = '0' + (char)((sub / 10) % 10); + if (base_len + 5 < clamped_len) new_buf[base_len + 5] = '0' + (char)(sub % 10); + + *out_len = clamped_len; + return new_buf; +} + +FORCE_INLINE gdv_int64 next_day_from_timestamp_ns(gdv_int64 ctx, gdv_timestamp ts, + const char* day, gdv_int32 day_len) { + return next_day_from_timestamp(ctx, ts_floor_div(ts, 1000000LL), day, day_len); +} + +} // extern "C" diff --git a/cpp/src/gandiva/timestamp_ir.cc b/cpp/src/gandiva/timestamp_ir.cc index 49af53deb0d..dadc7e4a9a3 100644 --- a/cpp/src/gandiva/timestamp_ir.cc +++ b/cpp/src/gandiva/timestamp_ir.cc @@ -17,9 +17,7 @@ #include "gandiva/timestamp_ir.h" -#include "arrow/status.h" -#include "arrow/util/logging.h" -#include "gandiva/engine.h" +#include namespace gandiva { @@ -49,106 +47,52 @@ namespace gandiva { } } -// Unit suffix appended to precompiled function names. -static const char* UnitSuffix(arrow::TimeUnit::type unit) { - switch (unit) { - case arrow::TimeUnit::MICRO: - return "_us"; - case arrow::TimeUnit::NANO: - return "_ns"; - default: - return ""; - } -} - -// Fixed-unit timestampadd: ts + count * constant (pure IR) -struct FixedAdd { - const char* name; - int64_t seconds; -}; -static const FixedAdd kFixedAdds[] = { - {"timestampaddSecond", 1}, {"timestampaddMinute", 60}, {"timestampaddHour", 3600}, - {"timestampaddDay", 86400}, {"timestampaddWeek", 604800}, -}; - -// Calendar-based timestampadd: split/recombine around precompiled millis fn -static const char* kCalendarAdds[] = { - "timestampaddMonth", - "timestampaddQuarter", - "timestampaddYear", -}; - -// Extract functions: convert ts to millis, call precompiled, return int64 -// pc_name pattern: {name}_timestamp -static const char* kExtracts[] = { - "extractMillennium", "extractCentury", "extractDecade", "extractYear", - "extractQuarter", "extractMonth", "extractWeek", "extractDay", - "extractHour", "extractMinute", "extractSecond", "extractDoy", - "extractDow", "extractEpoch", -}; - -// date_trunc functions: convert ts to millis, truncate, scale back (zero remainder) -// pc_name pattern: date_trunc_{Level}_timestamp -static const char* kTruncs[] = { - "date_trunc_Millennium", "date_trunc_Century", "date_trunc_Decade", "date_trunc_Year", - "date_trunc_Quarter", "date_trunc_Month", "date_trunc_Week", "date_trunc_Day", - "date_trunc_Hour", "date_trunc_Minute", "date_trunc_Second", -}; - -// timestampdiff: convert both inputs to millis, return int32 -// pc_name pattern: {name}_timestamp_timestamp -static const char* kDiffs[] = { - "timestampdiffSecond", "timestampdiffMinute", "timestampdiffHour", - "timestampdiffDay", "timestampdiffWeek", "timestampdiffMonth", - "timestampdiffQuarter", "timestampdiffYear", -}; - -// Two-timestamp functions returning scalar -// months_between(ts,ts)->float64, datediff(ts,ts)->int32 -struct TwoTsScalar { - const char* name; - bool returns_float; // true=float64, false=int32 -}; -static const TwoTsScalar kTwoTsScalars[] = { - {"months_between", true}, - {"datediff", false}, -}; - -// Cast functions from timestamp -struct CastFromTs { - const char* name; - bool returns_i32; // true=int32 (castTIME), false=int64 (castDATE) -}; -static const CastFromTs kCastsFromTs[] = { - {"castDATE", false}, - {"castTIME", true}, - {"last_day_from", false}, // last_day_from_timestamp(ts) -> date64 -}; - -// date_add/add/date_sub/subtract/date_diff with timestamp: -// These are fixed-unit (1 day) arithmetic with varying arg orders and signs. -struct DateArith { - const char* name; - bool count_first; // true=(int,ts), false=(ts,int) - int64_t sign; // +1 for add, -1 for sub -}; -static const DateArith kDateArithEntries[] = { - {"date_add", true, 1}, {"add", true, 1}, {"date_add", false, 1}, - {"add", false, 1}, {"date_sub", false, -1}, {"subtract", false, -1}, - {"date_diff", false, -1}, -}; - -// Units to generate functions for. -static const arrow::TimeUnit::type kUnits[] = { - arrow::TimeUnit::MICRO, - arrow::TimeUnit::NANO, -}; - -// Build the deterministic set of all IR function names that AddFunctions will create. -// These names depend only on the static tables above, not on any engine state. +// The complete set of function names that exist in the precompiled bitcode +// (precompiled/timestamp_unit_ops.cc), used to validate remapped names in +// LLVMGenerator::ResolveTimestampPcName(). static std::unordered_set BuildAllFunctionNames() { std::unordered_set names; const char* suffixes[] = {"_us", "_ns"}; + + // Fixed-unit timestampadd (4 arg-order variants each) + static const struct { const char* name; } kFixedAdds[] = { + {"timestampaddSecond"}, {"timestampaddMinute"}, {"timestampaddHour"}, + {"timestampaddDay"}, {"timestampaddWeek"}, + }; + // Calendar-based timestampadd (4 arg-order variants each) + static const char* kCalendarAdds[] = { + "timestampaddMonth", "timestampaddQuarter", "timestampaddYear", + }; + // Extract functions + static const char* kExtracts[] = { + "extractMillennium", "extractCentury", "extractDecade", "extractYear", + "extractQuarter", "extractMonth", "extractWeek", "extractDay", + "extractHour", "extractMinute", "extractSecond", "extractDoy", + "extractDow", "extractEpoch", + }; + // date_trunc functions + static const char* kTruncs[] = { + "date_trunc_Millennium", "date_trunc_Century", "date_trunc_Decade", + "date_trunc_Year", "date_trunc_Quarter", "date_trunc_Month", + "date_trunc_Week", "date_trunc_Day", "date_trunc_Hour", + "date_trunc_Minute", "date_trunc_Second", + }; + // timestampdiff functions + static const char* kDiffs[] = { + "timestampdiffSecond", "timestampdiffMinute", "timestampdiffHour", + "timestampdiffDay", "timestampdiffWeek", "timestampdiffMonth", + "timestampdiffQuarter", "timestampdiffYear", + }; + // Date arithmetic (all count_first=false except date_add/add which have both) + static const struct { + const char* name; + bool count_first; + } kDateArith[] = { + {"date_add", true}, {"add", true}, {"date_add", false}, + {"add", false}, {"date_sub", false}, {"subtract", false}, + {"date_diff", false}, + }; + for (const auto* sfx : suffixes) { for (const auto& fa : kFixedAdds) { names.insert(std::string(fa.name) + "_int32_timestamp" + sfx); @@ -159,8 +103,8 @@ static std::unordered_set BuildAllFunctionNames() { for (const auto* ca : kCalendarAdds) { names.insert(std::string(ca) + "_int32_timestamp" + sfx); names.insert(std::string(ca) + "_timestamp_int32" + sfx); - names.insert(std::string(ca) + "_timestamp_int64" + sfx); names.insert(std::string(ca) + "_int64_timestamp" + sfx); + names.insert(std::string(ca) + "_timestamp_int64" + sfx); } for (const auto* ex : kExtracts) { names.insert(std::string(ex) + "_timestamp" + sfx); @@ -171,13 +115,12 @@ static std::unordered_set BuildAllFunctionNames() { for (const auto* di : kDiffs) { names.insert(std::string(di) + "_timestamp_timestamp" + sfx); } - for (const auto& ts2 : kTwoTsScalars) { - names.insert(std::string(ts2.name) + "_timestamp_timestamp" + sfx); - } - for (const auto& c : kCastsFromTs) { - names.insert(std::string(c.name) + "_timestamp" + sfx); - } - for (const auto& da : kDateArithEntries) { + names.insert(std::string("months_between_timestamp_timestamp") + sfx); + names.insert(std::string("datediff_timestamp_timestamp") + sfx); + names.insert(std::string("castDATE_timestamp") + sfx); + names.insert(std::string("castTIME_timestamp") + sfx); + names.insert(std::string("last_day_from_timestamp") + sfx); + for (const auto& da : kDateArith) { if (da.count_first) { names.insert(std::string(da.name) + "_int32_timestamp" + sfx); names.insert(std::string(da.name) + "_int64_timestamp" + sfx); @@ -194,7 +137,6 @@ static std::unordered_set BuildAllFunctionNames() { return names; } -// Thread-safe const set: initialized once via C++11 static local guarantee. static const std::unordered_set& AllFunctionNames() { static const std::unordered_set names = BuildAllFunctionNames(); return names; @@ -204,643 +146,4 @@ static const std::unordered_set& AllFunctionNames() { return AllFunctionNames().count(name) != 0; } -Status TimestampIR::BuildTimestampaddFixed(const std::string& function_name, - int64_t seconds_per_unit, - arrow::TimeUnit::type time_unit) { - auto i32 = types()->i32_type(); - auto i64 = types()->i64_type(); - auto function = BuildFunction(function_name, i64, {{"count", i32}, {"ts", i64}}); - - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto arg_iter = function->arg_begin(); - auto count = &arg_iter[0]; - auto ts = &arg_iter[1]; - - // result = ts + (int64)count * units_per_fixed_unit - int64_t units_per_fixed_unit = seconds_per_unit * UnitsPerSecond(time_unit); - auto count_i64 = ir_builder()->CreateSExt(count, i64); - auto delta = ir_builder()->CreateMul( - count_i64, llvm::ConstantInt::get(i64, units_per_fixed_unit)); - auto result = ir_builder()->CreateAdd(ts, delta); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildDateArithFixed(const std::string& function_name, - bool count_first, - int64_t seconds_per_count, - arrow::TimeUnit::type time_unit, - llvm::Type* count_type) { - auto i64 = types()->i64_type(); - llvm::Function* function; - if (count_first) { - function = BuildFunction(function_name, i64, {{"count", count_type}, {"ts", i64}}); - } else { - function = BuildFunction(function_name, i64, {{"ts", i64}, {"count", count_type}}); - } - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto arg_iter = function->arg_begin(); - llvm::Value* ts = count_first ? &arg_iter[1] : &arg_iter[0]; - llvm::Value* count = count_first ? &arg_iter[0] : &arg_iter[1]; - - int64_t units_per_count = seconds_per_count * UnitsPerSecond(time_unit); - auto count_i64 = (count_type == i64) ? count : ir_builder()->CreateSExt(count, i64); - auto delta = ir_builder()->CreateMul( - count_i64, llvm::ConstantInt::get(i64, units_per_count)); - auto result = ir_builder()->CreateAdd(ts, delta); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildTimestampaddCalendar(const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit) { - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i32 = types()->i32_type(); - auto i64 = types()->i64_type(); - auto function = BuildFunction(function_name, i64, {{"count", i32}, {"ts", i64}}); - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto arg_iter = function->arg_begin(); - auto count = &arg_iter[0]; - auto ts = &arg_iter[1]; - - int64_t upm = UnitsPerMilli(time_unit); - auto upm_const = llvm::ConstantInt::get(i64, upm); - auto [millis, remainder] = FloorDivRem(ts, upm_const); - auto result_millis = ir_builder()->CreateCall(precompiled_fn, {count, millis}); - auto result_scaled = ir_builder()->CreateMul(result_millis, upm_const); - auto result = ir_builder()->CreateAdd(result_scaled, remainder); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildTimestampaddCalendarGeneric( - const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit, - bool count_first, - llvm::Type* count_type) { - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - llvm::Function* function; - if (count_first) { - function = BuildFunction(function_name, i64, {{"count", count_type}, {"ts", i64}}); - } else { - function = BuildFunction(function_name, i64, {{"ts", i64}, {"count", count_type}}); - } - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto arg_iter = function->arg_begin(); - llvm::Value* ts = count_first ? &arg_iter[1] : &arg_iter[0]; - llvm::Value* count = count_first ? &arg_iter[0] : &arg_iter[1]; - - // Convert count to i32 if needed (precompiled fn takes int32 count for millis version) - auto i32 = types()->i32_type(); - auto count_i32 = (count_type == i32) ? count - : ir_builder()->CreateTrunc(count, i32); - - int64_t upm = UnitsPerMilli(time_unit); - auto upm_const = llvm::ConstantInt::get(i64, upm); - auto [millis, remainder] = FloorDivRem(ts, upm_const); - - // Precompiled millis fn always takes (int32 count, int64 millis) - auto result_millis = ir_builder()->CreateCall(precompiled_fn, {count_i32, millis}); - auto result_scaled = ir_builder()->CreateMul(result_millis, upm_const); - auto result = ir_builder()->CreateAdd(result_scaled, remainder); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildExtractWrapper(const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit) { - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - auto function = BuildFunction(function_name, i64, {{"ts", i64}}); - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto ts = &function->arg_begin()[0]; - int64_t upm = UnitsPerMilli(time_unit); - auto millis = FloorDiv(ts, llvm::ConstantInt::get(i64, upm)); - auto result = ir_builder()->CreateCall(precompiled_fn, {millis}); - - ir_builder()->CreateRet(result); - - return Status::OK(); -} - -Status TimestampIR::BuildTruncWrapper(const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit) { - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - auto function = BuildFunction(function_name, i64, {{"ts", i64}}); - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto ts = &function->arg_begin()[0]; - int64_t upm = UnitsPerMilli(time_unit); - auto upm_const = llvm::ConstantInt::get(i64, upm); - auto millis = FloorDiv(ts, upm_const); - auto result_millis = ir_builder()->CreateCall(precompiled_fn, {millis}); - auto result = ir_builder()->CreateMul(result_millis, upm_const); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildDiffWrapper(const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit, - llvm::Type* return_type) { - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - auto function = BuildFunction(function_name, return_type, - {{"ts1", i64}, {"ts2", i64}}); - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto arg_iter = function->arg_begin(); - int64_t upm = UnitsPerMilli(time_unit); - auto upm_const = llvm::ConstantInt::get(i64, upm); - auto millis1 = FloorDiv(&arg_iter[0], upm_const); - auto millis2 = FloorDiv(&arg_iter[1], upm_const); - auto result = ir_builder()->CreateCall(precompiled_fn, {millis1, millis2}); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildCastFromTimestampWrapper( - const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit, - llvm::Type* return_type) { - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - auto function = BuildFunction(function_name, return_type, {{"ts", i64}}); - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto ts = &function->arg_begin()[0]; - int64_t upm = UnitsPerMilli(time_unit); - auto millis = FloorDiv(ts, llvm::ConstantInt::get(i64, upm)); - auto result = ir_builder()->CreateCall(precompiled_fn, {millis}); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildTimezoneWrapper(const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit) { - // fn(context, ts, tz_str, tz_len) -> ts - // Split-recombine: the timezone offset is a whole-second delta, so sub-ms survives. - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - auto i32 = types()->i32_type(); - auto i8ptr = llvm::PointerType::get(*context(), 0); - auto function = BuildFunction(function_name, i64, - {{"ctx", i64}, {"ts", i64}, {"tz", i8ptr}, {"tz_len", i32}}); - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto arg_iter = function->arg_begin(); - auto ctx = &arg_iter[0]; - auto ts = &arg_iter[1]; - auto tz = &arg_iter[2]; - auto tz_len = &arg_iter[3]; - - int64_t upm = UnitsPerMilli(time_unit); - auto upm_const = llvm::ConstantInt::get(i64, upm); - auto [millis, remainder] = FloorDivRem(ts, upm_const); - - auto result_millis = ir_builder()->CreateCall(precompiled_fn, {ctx, millis, tz, tz_len}); - auto result_scaled = ir_builder()->CreateMul(result_millis, upm_const); - auto result = ir_builder()->CreateAdd(result_scaled, remainder); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildNextDayWrapper(const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit) { - // fn(context, ts, day_str, day_len) -> date64 - // next_day returns the date of the next weekday (e.g. 'MO' for Monday) at midnight, - // so sub-ms precision is not relevant in the result — just scale the input to millis. - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - auto i32 = types()->i32_type(); - auto i8ptr = llvm::PointerType::get(*context(), 0); - auto function = BuildFunction( - function_name, i64, - {{"ctx", i64}, {"ts", i64}, {"day", i8ptr}, {"day_len", i32}}); - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - - auto arg_iter = function->arg_begin(); - auto ctx = &arg_iter[0]; - auto ts = &arg_iter[1]; - auto day = &arg_iter[2]; - auto day_len = &arg_iter[3]; - - int64_t upm = UnitsPerMilli(time_unit); - auto millis = FloorDiv(ts, llvm::ConstantInt::get(i64, upm)); - auto result = ir_builder()->CreateCall(precompiled_fn, {ctx, millis, day, day_len}); - - ir_builder()->CreateRet(result); - return Status::OK(); -} - -Status TimestampIR::BuildCastVARCHARWrapper(const std::string& function_name, - const std::string& precompiled_millis_fn, - arrow::TimeUnit::type time_unit) { - // fn(context, ts, len, &out_len) -> const char* - // For MICRO/NANO: call the millis formatter for the first 23 chars, then append - // sub-millisecond digits (3 for us, 6 for ns) into a new arena-allocated buffer. - auto precompiled_fn = module()->getFunction(precompiled_millis_fn); - if (!precompiled_fn) { - return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); - } - - auto i64 = types()->i64_type(); - auto i32 = types()->i32_type(); - auto i8 = llvm::Type::getInt8Ty(*context()); - auto i8ptr = llvm::PointerType::get(*context(), 0); - auto i32ptr = llvm::PointerType::get(*context(), 0); - auto function = BuildFunction(function_name, i8ptr, - {{"ctx", i64}, {"ts", i64}, {"len", i64}, {"out_len", i32ptr}}); - - int64_t upm = UnitsPerMilli(time_unit); - int extra_digits = (time_unit == arrow::TimeUnit::MICRO) ? 3 - : (time_unit == arrow::TimeUnit::NANO) ? 6 - : 0; - - if (extra_digits == 0) { - // MILLI: pass through directly - auto entry = llvm::BasicBlock::Create(*context(), "entry", function); - ir_builder()->SetInsertPoint(entry); - auto arg_iter = function->arg_begin(); - auto millis = FloorDiv(&arg_iter[1], llvm::ConstantInt::get(i64, upm)); - auto result = ir_builder()->CreateCall( - precompiled_fn, {&arg_iter[0], millis, &arg_iter[2], &arg_iter[3]}); - ir_builder()->CreateRet(result); - return Status::OK(); - } - - // --- MICRO or NANO path --- - auto bb_entry = llvm::BasicBlock::Create(*context(), "entry", function); - auto bb_append = llvm::BasicBlock::Create(*context(), "append", function); - auto bb_done = llvm::BasicBlock::Create(*context(), "done", function); - - // Entry: call precompiled millis formatter - ir_builder()->SetInsertPoint(bb_entry); - auto arg_iter = function->arg_begin(); - auto ctx = &arg_iter[0]; - auto ts = &arg_iter[1]; - auto len = &arg_iter[2]; - auto out_len_ptr = &arg_iter[3]; - - auto upm_const = llvm::ConstantInt::get(i64, upm); - auto millis = FloorDiv(ts, upm_const); - auto base_buf = ir_builder()->CreateCall(precompiled_fn, {ctx, millis, len, out_len_ptr}); - auto base_len = ir_builder()->CreateLoad(i32, out_len_ptr); - - // Check if length allows extra digits - auto base_len_i64 = ir_builder()->CreateSExt(base_len, i64); - auto has_room = ir_builder()->CreateICmpSLT(base_len_i64, len); - ir_builder()->CreateCondBr(has_room, bb_append, bb_done); - - // Append: allocate new buffer, copy prefix, write sub-ms digits - ir_builder()->SetInsertPoint(bb_append); - - auto extra_const = llvm::ConstantInt::get(i32, extra_digits); - auto full_len = ir_builder()->CreateAdd(base_len, extra_const); - - // Clamp to len - auto len_i32 = ir_builder()->CreateTrunc(len, i32); - auto clamped_len = ir_builder()->CreateSelect( - ir_builder()->CreateICmpSLT(full_len, len_i32), full_len, len_i32); - - // arena_malloc(ctx, clamped_len) - auto arena_fn = module()->getFunction("gdv_fn_context_arena_malloc"); - auto new_buf = ir_builder()->CreateCall(arena_fn, {ctx, clamped_len}); - - // memcpy(new_buf, base_buf, base_len) - ir_builder()->CreateMemCpy( - new_buf, llvm::MaybeAlign(1), base_buf, llvm::MaybeAlign(1), base_len); - - // Compute the non-negative sub-ms remainder consistent with floor division. - // FloorDivRem guarantees remainder is in [0, upm), even for negative timestamps. - auto [millis_dup, sub_ms_rem] = FloorDivRem(ts, upm_const); - (void)millis_dup; - auto abs_rem = sub_ms_rem; - - // Write digits from most significant to least significant - // For MICRO: divisors are 100, 10, 1 - // For NANO: divisors are 100000, 10000, 1000, 100, 10, 1 - int64_t divisor = 1; - for (int i = 0; i < extra_digits - 1; ++i) divisor *= 10; - - llvm::BasicBlock* last_append_bb = nullptr; - for (int i = 0; i < extra_digits; ++i) { - auto idx = ir_builder()->CreateAdd(base_len, - llvm::ConstantInt::get(i32, i)); - auto write_pos = ir_builder()->CreateICmpSLT(idx, clamped_len); - - // digit = (abs_rem / divisor) % 10 + '0' - auto d = ir_builder()->CreateSDiv(abs_rem, - llvm::ConstantInt::get(i64, divisor)); - auto digit = ir_builder()->CreateSRem(d, llvm::ConstantInt::get(i64, 10)); - auto ch = ir_builder()->CreateAdd( - ir_builder()->CreateTrunc(digit, i8), llvm::ConstantInt::get(i8, '0')); - - auto gep = ir_builder()->CreateGEP(i8, new_buf, idx); - - auto bb_store = llvm::BasicBlock::Create(*context(), "store", function); - auto bb_next = llvm::BasicBlock::Create(*context(), "next", function); - ir_builder()->CreateCondBr(write_pos, bb_store, bb_next); - - ir_builder()->SetInsertPoint(bb_store); - ir_builder()->CreateStore(ch, gep); - ir_builder()->CreateBr(bb_next); - - ir_builder()->SetInsertPoint(bb_next); - last_append_bb = bb_next; - divisor /= 10; - } - - // Store final out_len - ir_builder()->CreateStore(clamped_len, out_len_ptr); - ir_builder()->CreateBr(bb_done); - - // Done: phi to select return value - ir_builder()->SetInsertPoint(bb_done); - auto phi = ir_builder()->CreatePHI(i8ptr, 2, "result"); - phi->addIncoming(base_buf, bb_entry); - phi->addIncoming(new_buf, last_append_bb); - - ir_builder()->CreateRet(phi); - return Status::OK(); -} - -llvm::Value* TimestampIR::FloorDiv(llvm::Value* ts, llvm::Value* divisor) { - auto i64 = types()->i64_type(); - auto zero = llvm::ConstantInt::get(i64, 0); - auto one = llvm::ConstantInt::get(i64, 1); - auto quotient = ir_builder()->CreateSDiv(ts, divisor); - auto remainder = ir_builder()->CreateSRem(ts, divisor); - auto has_remainder = ir_builder()->CreateICmpNE(remainder, zero); - auto is_negative = ir_builder()->CreateICmpSLT(ts, zero); - auto needs_adjust = ir_builder()->CreateAnd(is_negative, has_remainder); - return ir_builder()->CreateSub(quotient, - ir_builder()->CreateSelect(needs_adjust, one, zero)); -} - -std::pair TimestampIR::FloorDivRem( - llvm::Value* ts, llvm::Value* divisor) { - auto i64 = types()->i64_type(); - auto zero = llvm::ConstantInt::get(i64, 0); - auto one = llvm::ConstantInt::get(i64, 1); - auto quotient = ir_builder()->CreateSDiv(ts, divisor); - auto remainder = ir_builder()->CreateSRem(ts, divisor); - auto has_remainder = ir_builder()->CreateICmpNE(remainder, zero); - auto is_negative = ir_builder()->CreateICmpSLT(ts, zero); - auto needs_adjust = ir_builder()->CreateAnd(is_negative, has_remainder); - auto adj = ir_builder()->CreateSelect(needs_adjust, one, zero); - auto floor_q = ir_builder()->CreateSub(quotient, adj); - auto floor_r = ir_builder()->CreateAdd( - remainder, ir_builder()->CreateSelect(needs_adjust, divisor, zero)); - return {floor_q, floor_r}; -} - -/*static*/ Status TimestampIR::AddFunctions(Engine* engine) { - auto ts_ir = std::make_shared(engine); - auto i32 = ts_ir->types()->i32_type(); - auto i64 = ts_ir->types()->i64_type(); - auto f64 = ts_ir->types()->double_type(); - - for (auto unit : kUnits) { - auto sfx = UnitSuffix(unit); - - // Helper: skip functions whose precompiled base is missing, but warn on - // unexpected errors (type mismatch, LLVM failure, etc.). - auto try_build = [](const std::string& ir_name, Status status) { - if (status.ok() || status.IsInvalid()) { - return; // OK or precompiled function not found — expected - } - ARROW_LOG(DEBUG) << "TimestampIR: unexpected error building " << ir_name - << ": " << status.ToString(); - }; - - // Fixed-unit: pure IR (always succeeds) - for (const auto& fa : kFixedAdds) { - auto ir_name = std::string(fa.name) + "_int32_timestamp" + sfx; - ARROW_RETURN_NOT_OK(ts_ir->BuildTimestampaddFixed(ir_name, fa.seconds, unit)); - } - - // Calendar-based: precompiled wrapper - for (const auto* ca : kCalendarAdds) { - auto ir_name = std::string(ca) + "_int32_timestamp" + sfx; - try_build(ir_name, - ts_ir->BuildTimestampaddCalendar(ir_name, - std::string(ca) + "_int32_timestamp", unit)); - } - - // Extract functions - for (const auto* ex : kExtracts) { - auto ir_name = std::string(ex) + "_timestamp" + sfx; - try_build(ir_name, - ts_ir->BuildExtractWrapper(ir_name, - std::string(ex) + "_timestamp", unit)); - } - - // date_trunc functions - for (const auto* tr : kTruncs) { - auto ir_name = std::string(tr) + "_timestamp" + sfx; - try_build(ir_name, - ts_ir->BuildTruncWrapper(ir_name, - std::string(tr) + "_timestamp", unit)); - } - - // timestampdiff functions (two ts -> int32) - for (const auto* di : kDiffs) { - auto ir_name = std::string(di) + "_timestamp_timestamp" + sfx; - try_build(ir_name, - ts_ir->BuildDiffWrapper(ir_name, - std::string(di) + "_timestamp_timestamp", unit, i32)); - } - - // months_between / datediff - for (const auto& ts2 : kTwoTsScalars) { - auto ir_name = std::string(ts2.name) + "_timestamp_timestamp" + sfx; - try_build(ir_name, - ts_ir->BuildDiffWrapper(ir_name, - std::string(ts2.name) + "_timestamp_timestamp", unit, - ts2.returns_float ? f64 : i32)); - } - - // Cast from timestamp - for (const auto& c : kCastsFromTs) { - auto ir_name = std::string(c.name) + "_timestamp" + sfx; - try_build(ir_name, - ts_ir->BuildCastFromTimestampWrapper(ir_name, - std::string(c.name) + "_timestamp", unit, - c.returns_i32 ? i32 : i64)); - } - - // date_add/add/date_sub/subtract/date_diff with int32 and int64 - for (const auto& da : kDateArithEntries) { - for (auto* count_type : {i32, i64}) { - const char* type_name = (count_type == i32) ? "int32" : "int64"; - std::string ir_name; - if (da.count_first) { - ir_name = std::string(da.name) + "_" + type_name + "_timestamp" + sfx; - } else { - ir_name = std::string(da.name) + "_timestamp_" + type_name + sfx; - } - try_build(ir_name, - ts_ir->BuildDateArithFixed(ir_name, da.count_first, - da.sign * 86400LL, unit, count_type)); - } - } - - // int64 variants of timestampadd (pure IR arithmetic) - for (const auto& fa : kFixedAdds) { - auto ir_name = std::string(fa.name) + "_int64_timestamp" + sfx; - try_build(ir_name, - ts_ir->BuildDateArithFixed(ir_name, /*count_first=*/true, - fa.seconds, unit, i64)); - } - - // Reversed-arg variants: timestampaddX(timestamp, int32/int64) -> timestamp - for (const auto& fa : kFixedAdds) { - auto ir32 = std::string(fa.name) + "_timestamp_int32" + sfx; - try_build(ir32, - ts_ir->BuildDateArithFixed(ir32, /*count_first=*/false, - fa.seconds, unit, i32)); - auto ir64 = std::string(fa.name) + "_timestamp_int64" + sfx; - try_build(ir64, - ts_ir->BuildDateArithFixed(ir64, /*count_first=*/false, - fa.seconds, unit, i64)); - } - - // Reversed-arg calendar: timestampaddMonth/Quarter/Year(timestamp, int32/int64) - // and int64 calendar: timestampaddMonth/Quarter/Year(int64, timestamp) - // All use the precompiled (int32, timestamp) millis function as the base. - for (const auto* ca : kCalendarAdds) { - auto millis_fn = std::string(ca) + "_int32_timestamp"; - // (timestamp, int32) variant - auto rev32 = std::string(ca) + "_timestamp_int32" + sfx; - try_build(rev32, - ts_ir->BuildTimestampaddCalendarGeneric(rev32, millis_fn, unit, - /*count_first=*/false, i32)); - // (timestamp, int64) variant - auto rev64 = std::string(ca) + "_timestamp_int64" + sfx; - try_build(rev64, - ts_ir->BuildTimestampaddCalendarGeneric(rev64, millis_fn, unit, - /*count_first=*/false, i64)); - // (int64, timestamp) variant - auto fwd64 = std::string(ca) + "_int64_timestamp" + sfx; - try_build(fwd64, - ts_ir->BuildTimestampaddCalendarGeneric(fwd64, millis_fn, unit, - /*count_first=*/true, i64)); - } - - // Timezone functions: to_utc/from_utc (split-recombine) - { - std::string ir_to = std::string("to_utc_timezone_timestamp") + sfx; - try_build(ir_to, - ts_ir->BuildTimezoneWrapper(ir_to, "to_utc_timezone_timestamp", unit)); - std::string ir_from = std::string("from_utc_timezone_timestamp") + sfx; - try_build(ir_from, - ts_ir->BuildTimezoneWrapper(ir_from, "from_utc_timezone_timestamp", - unit)); - } - - // castVARCHAR(timestamp, int64): scale to millis - { - std::string ir_name = std::string("castVARCHAR_timestamp_int64") + sfx; - try_build(ir_name, - ts_ir->BuildCastVARCHARWrapper(ir_name, "castVARCHAR_timestamp_int64", - unit)); - } - - // next_day(timestamp, utf8): scale to millis, return date64 - { - std::string ir_name = std::string("next_day_from_timestamp") + sfx; - try_build(ir_name, - ts_ir->BuildNextDayWrapper(ir_name, "next_day_from_timestamp", unit)); - } - } - - // Validate that the set of functions we tried to build matches AllFunctionNames(). - // This catches drift between AddFunctions() and BuildAllFunctionNames(). - const auto& expected = AllFunctionNames(); - for (const auto& name : expected) { - if (!ts_ir->module()->getFunction(name)) { - ARROW_LOG(DEBUG) << "TimestampIR: " << name - << " in AllFunctionNames() but not created (precompiled base " - "likely missing — OK if intentional)"; - } - } - for (auto& fn : *ts_ir->module()) { - auto name = fn.getName().str(); - // Only check functions with unit suffixes that we generate - if ((name.find("_us") != std::string::npos || name.find("_ns") != std::string::npos) && - expected.find(name) == expected.end()) { - ARROW_LOG(WARNING) << "TimestampIR: function " << name - << " was created but is not in AllFunctionNames() — " - "it will not be remapped during code generation"; - } - } - - return Status::OK(); -} - } // namespace gandiva diff --git a/cpp/src/gandiva/timestamp_ir.h b/cpp/src/gandiva/timestamp_ir.h index 2891664b17b..e61e22d88ce 100644 --- a/cpp/src/gandiva/timestamp_ir.h +++ b/cpp/src/gandiva/timestamp_ir.h @@ -18,118 +18,24 @@ #pragma once #include -#include #include "arrow/type.h" -#include "gandiva/function_ir_builder.h" namespace gandiva { -/// @brief Timestamp IR functions for unit-aware timestamp operations. +/// @brief Registry of timestamp functions that have precompiled _us / _ns variants. /// -/// Follows the DecimalIR pattern: builds LLVM IR functions that handle -/// timestamp values in their native TimeUnit (ms, us, ns) without lossy -/// conversion. +/// The _us and _ns wrapper functions are now generated at build time as precompiled +/// bitcode (precompiled/timestamp_unit_ops.cc) rather than dynamically via the LLVM +/// IR builder at JIT initialization time. /// -/// Patterns: -/// - Pure IR: arithmetic generated entirely in IR (e.g., timestampaddSecond). -/// - Calendar wrapper: split into millis + remainder, call precompiled, recombine -/// (e.g., timestampaddMonth). -/// - Extract wrapper: convert to millis, call precompiled, return scalar -/// (e.g., extractMonth). -/// - Trunc wrapper: convert to millis, call precompiled truncation, scale back, -/// zero sub-milli remainder (e.g., date_trunc_Day). -/// - Diff wrapper: convert both inputs to millis, call precompiled, return scalar -/// (e.g., timestampdiffDay, months_between). -/// - Cast wrapper: convert to millis, call precompiled cast -/// (e.g., castDATE, castVARCHAR). -class TimestampIR : public FunctionIRBuilder { +/// IsTimestampIRFunction() is still used by LLVMGenerator::ResolveTimestampPcName() +/// to validate that a remapped function name exists before returning it. +class TimestampIR { public: - explicit TimestampIR(Engine* engine) : FunctionIRBuilder(engine) {} - - static Status AddFunctions(Engine* engine); static bool IsTimestampIRFunction(const std::string& function_name); static int64_t UnitsPerSecond(arrow::TimeUnit::type unit); static int64_t UnitsPerMilli(arrow::TimeUnit::type unit); - - private: - // ts + count * fixed_constant (pure IR) - Status BuildTimestampaddFixed(const std::string& fn, int64_t seconds_per_unit, - arrow::TimeUnit::type unit); - - // date_add/add/date_sub/subtract/date_diff and int64 timestampadd (pure IR) - // seconds_per_count: positive for add, negative for sub - Status BuildDateArithFixed(const std::string& fn, bool count_first, - int64_t seconds_per_count, arrow::TimeUnit::type unit, - llvm::Type* count_type); - - // ts + count * months via precompiled calendar math (split/recombine) - Status BuildTimestampaddCalendar(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit); - - // Generic calendar wrapper: handles both arg orders and int32/int64 count - Status BuildTimestampaddCalendarGeneric(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit, - bool count_first, - llvm::Type* count_type); - - // Extract: convert ts to millis, call precompiled, return int64 - // fn(int64 ts) -> int64 - Status BuildExtractWrapper(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit); - - // date_trunc: convert ts to millis, call precompiled trunc, scale back - // The truncation zeroes sub-unit data, so no remainder recombination. - // fn(int64 ts) -> int64 - Status BuildTruncWrapper(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit); - - // Diff: convert both ts inputs to millis, call precompiled, return scalar - // fn(int64 ts1, int64 ts2) -> int32 or float64 - Status BuildDiffWrapper(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit, - llvm::Type* return_type); - - // Cast: convert ts to millis, call precompiled cast (variable signatures) - Status BuildCastFromTimestampWrapper(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit, - llvm::Type* return_type); - - // Timezone: split-recombine wrapper for to_utc/from_utc - // fn(context, ts, tz_str, tz_len) -> ts - Status BuildTimezoneWrapper(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit); - - // castVARCHAR: scale ts to millis before formatting - // fn(context, ts, len, &out_len) -> const char* - Status BuildCastVARCHARWrapper(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit); - - // next_day: scale ts to millis, call precompiled, return date64 - // fn(context, ts, day_str, day_len) -> int64 - Status BuildNextDayWrapper(const std::string& fn, - const std::string& precompiled_fn, - arrow::TimeUnit::type unit); - - // Floor division: ts / divisor rounded toward negative infinity. - // C/LLVM SDiv truncates toward zero, which gives wrong millis for negative - // timestamps with non-zero sub-ms components (e.g., SDiv(-456, 1000) = 0 - // instead of -1). This helper corrects the quotient. - llvm::Value* FloorDiv(llvm::Value* ts, llvm::Value* divisor); - - // Floor division with remainder: returns {quotient, remainder} where - // quotient * divisor + remainder == ts and 0 <= remainder < divisor. - // Used by split-recombine wrappers (timezone, calendar add). - std::pair FloorDivRem(llvm::Value* ts, - llvm::Value* divisor); }; } // namespace gandiva