diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 15ae37573e6e..39fa824ed1c5 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -61,6 +61,7 @@ set(SRC_FILES context_helper.cc decimal_ir.cc decimal_type_util.cc + timestamp_ir.cc decimal_xlarge.cc engine.cc date_utils.cc diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 8c7beb67b04b..ba8c8d63cbeb 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -109,6 +109,7 @@ #include "gandiva/decimal_ir.h" #include "gandiva/exported_funcs.h" #include "gandiva/exported_funcs_registry.h" +#include "gandiva/timestamp_ir.h" namespace gandiva { @@ -228,7 +229,11 @@ Result> BuildJIT( #endif jit_builder.setJITTargetMachineBuilder(std::move(jtmb)); +#if LLVM_VERSION_MAJOR >= 17 jit_builder.setDataLayout(std::make_optional(data_layout)); +#else + jit_builder.setDataLayout(llvm::Optional(data_layout)); +#endif if (object_cache.has_value()) { jit_builder.setCompileFunctionCreator( @@ -325,6 +330,7 @@ Status Engine::LoadFunctionIRs() { if (!functions_loaded_) { ARROW_RETURN_NOT_OK(LoadPreCompiledIR()); ARROW_RETURN_NOT_OK(DecimalIR::AddFunctions(this)); + ARROW_RETURN_NOT_OK(TimestampIR::AddFunctions(this)); ARROW_RETURN_NOT_OK(LoadExternalPreCompiledIR()); functions_loaded_ = true; } diff --git a/cpp/src/gandiva/function_signature.cc b/cpp/src/gandiva/function_signature.cc index 6dc6416178e1..c2856295d5cf 100644 --- a/cpp/src/gandiva/function_signature.cc +++ b/cpp/src/gandiva/function_signature.cc @@ -45,6 +45,15 @@ bool DataTypeEquals(const DataTypePtr& left, const DataTypePtr& right) { return (dleft != NULL) && (dright != NULL) && (dleft->byte_width() == dright->byte_width()); } + case arrow::Type::TIMESTAMP: { + // For timestamp types, the TimeUnit isn't part of the signature + // (conversion is handled at codegen time by TimestampIR). + // However, timezone IS significant — a function registered for + // timestamp(null tz) should not match timestamp("America/New_York"). + auto tleft = checked_cast(left.get()); + auto tright = checked_cast(right.get()); + return tleft->timezone() == tright->timezone(); + } default: return left->Equals(right); } diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index ae5c6f1728f0..305e0280ee6b 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -21,6 +21,7 @@ #include #include +#include "arrow/type.h" #include "gandiva/bitmap_accumulator.h" #include "gandiva/decimal_ir.h" #include "gandiva/dex.h" @@ -28,6 +29,7 @@ #include "gandiva/expression.h" #include "gandiva/llvm_types.h" #include "gandiva/lvalue.h" +#include "gandiva/timestamp_ir.h" namespace gandiva { @@ -384,6 +386,7 @@ Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, int buffer_count, Visitor visitor(this, fn, loop_entry, arg_addrs, arg_local_bitmaps, arg_holder_ptrs, slice_offsets, arg_context_ptr, position_var); value_expr->Accept(visitor); + ARROW_RETURN_NOT_OK(visitor.status()); LValuePtr output_value = visitor.result(); // The "current" block may have changed due to code generation in the visitor. @@ -813,7 +816,8 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) { auto then_lambda = [&] { ADD_VISITOR_TRACE("fn " + function_name + " can return errors : all args valid, invoke fn"); - return BuildFunctionCall(native_function, arrow_return_type, ¶ms); + return BuildFunctionCall(native_function, arrow_return_type, ¶ms, + dex.func_descriptor()); }; // else block @@ -831,7 +835,9 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex& dex) { result_ = BuildIfElse(is_valid, then_lambda, else_lambda, arrow_return_type); } else { // fast path : invoke function without computing validities. - result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms); + result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms, + dex.func_descriptor()); + if (!status_.ok()) return; } } @@ -844,7 +850,8 @@ void LLVMGenerator::Visitor::Visit(const NullableNeverFuncDex& dex) { native_function->NeedsContext()); auto arrow_return_type = dex.func_descriptor()->return_type(); - result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms); + result_ = BuildFunctionCall(native_function, arrow_return_type, ¶ms, + dex.func_descriptor()); } void LLVMGenerator::Visitor::Visit(const NullableInternalFuncDex& dex) { @@ -1084,6 +1091,9 @@ void LLVMGenerator::Visitor::VisitInExpression(const InExprDexBase& dex) { for (auto& pair : dex.args()) { DexPtr value_expr = pair->value_expr(); value_expr->Accept(*this); + if (!status_.ok()) { + return; + } LValue& result_ref = *result(); params.push_back(result_ref.data()); @@ -1235,6 +1245,9 @@ LValuePtr LLVMGenerator::Visitor::BuildValueAndValidity(const ValueValidityPair& // generate code for value auto value_expr = pair.value_expr(); value_expr->Accept(*this); + if (!status_.ok()) { + return nullptr; + } auto value = result()->data(); auto length = result()->length(); @@ -1246,12 +1259,44 @@ LValuePtr LLVMGenerator::Visitor::BuildValueAndValidity(const ValueValidityPair& LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, DataTypePtr arrow_return_type, - std::vector* params) { + std::vector* params, + const FuncDescriptorPtr& descriptor) { auto types = generator_->types(); auto arrow_return_type_id = arrow_return_type->id(); auto llvm_return_type = types->IRType(arrow_return_type_id); DecimalIR decimalIR(generator_->engine_.get()); + // Resolve the function name — may remap to a TimestampIR-built variant + // based on the actual TimeUnit from the expression tree. + std::string pc_name = func->pc_name(); + if (descriptor != nullptr) { + arrow::TimeUnit::type ts_unit = arrow::TimeUnit::MILLI; + bool found_ts = false; + for (auto& param : descriptor->params()) { + if (param->id() == arrow::Type::TIMESTAMP) { + auto unit = + arrow::internal::checked_cast(*param).unit(); + if (!found_ts) { + ts_unit = unit; + found_ts = true; + } else if (unit != ts_unit) { + status_ = Status::Invalid( + "Gandiva cannot compile expression: mixed timestamp units in function '", + pc_name, "'. All timestamp arguments must have the same TimeUnit."); + return nullptr; + } + } + } + if (found_ts && ts_unit != arrow::TimeUnit::MILLI) { + std::string suffix = (ts_unit == arrow::TimeUnit::MICRO) ? "_us" : "_ns"; + std::string remapped = pc_name + suffix; + ARROW_LOG(DEBUG) << "TimestampIR remap: " << pc_name << " -> " << remapped; + if (TimestampIR::IsTimestampIRFunction(remapped)) { + pc_name = remapped; + } + } + } + if (arrow_return_type_id == arrow::Type::DECIMAL) { // For decimal fns, the output precision/scale are passed along as parameters. // @@ -1266,7 +1311,7 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, params->push_back(ret_lvalue->scale()); // Make the function call - auto out = decimalIR.CallDecimalFunction(func->pc_name(), llvm_return_type, *params); + auto out = decimalIR.CallDecimalFunction(pc_name, llvm_return_type, *params); ret_lvalue->set_data(out); return ret_lvalue; } else { @@ -1287,10 +1332,14 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, // Make the function call llvm::IRBuilder<>* builder = ir_builder(); - auto value = - isDecimalFunction - ? decimalIR.CallDecimalFunction(func->pc_name(), llvm_return_type, *params) - : generator_->AddFunctionCall(func->pc_name(), llvm_return_type, *params); + llvm::Value* value; + if (isDecimalFunction) { + value = decimalIR.CallDecimalFunction(pc_name, llvm_return_type, *params); + } else if (auto* ir_fn = generator_->engine_->module()->getFunction(pc_name)) { + value = ir_builder()->CreateCall(ir_fn, *params); + } else { + value = generator_->AddFunctionCall(pc_name, llvm_return_type, *params); + } auto value_len = (result_len_ptr == nullptr) ? nullptr diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index a60e2bf6b29e..9e30e5118518 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -135,6 +135,8 @@ class GANDIVA_EXPORT LLVMGenerator { bool has_arena_allocs() { return has_arena_allocs_; } + const Status& status() const { return status_; } + private: enum BufferType { kBufferTypeValidity = 0, kBufferTypeData, kBufferTypeOffsets }; @@ -158,7 +160,8 @@ class GANDIVA_EXPORT LLVMGenerator { // Generate code to invoke a function call. LValuePtr BuildFunctionCall(const NativeFunction* func, DataTypePtr arrow_return_type, - std::vector* params); + std::vector* params, + const FuncDescriptorPtr& descriptor = nullptr); // Generate code for an if-else condition. LValuePtr BuildIfElse(llvm::Value* condition, std::function then_func, @@ -179,6 +182,7 @@ class GANDIVA_EXPORT LLVMGenerator { LLVMGenerator* generator_; LValuePtr result_; + Status status_; llvm::Function* function_; llvm::BasicBlock* entry_block_; llvm::Value* arg_addrs_; diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index ecfff4fe72a0..5ab2e20faa9a 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -442,10 +442,17 @@ EXTRACT_MINUTE_TIME(time32) EXTRACT_HOUR_TIME(time32) -#define DATE_TRUNC_FIXED_UNIT(NAME, TYPE, NMILLIS_IN_UNIT) \ - FORCE_INLINE \ - gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \ - return ((millis / NMILLIS_IN_UNIT) * NMILLIS_IN_UNIT); \ +#define DATE_TRUNC_FIXED_UNIT(NAME, TYPE, NMILLIS_IN_UNIT) \ + FORCE_INLINE \ + gdv_##TYPE NAME##_##TYPE(gdv_##TYPE millis) { \ + /* Use floor division to correctly handle negative timestamps (pre-epoch). */ \ + /* C++ integer division truncates toward zero; we need toward negative inf. */ \ + gdv_##TYPE q = millis / NMILLIS_IN_UNIT; \ + gdv_##TYPE r = millis % NMILLIS_IN_UNIT; \ + if (r != 0 && (millis ^ NMILLIS_IN_UNIT) < 0) { \ + --q; \ + } \ + return q * NMILLIS_IN_UNIT; \ } #define DATE_TRUNC_WEEK(TYPE) \ @@ -927,7 +934,9 @@ const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in, gdv_int64 hour = extractHour_timestamp(in); gdv_int64 minute = extractMinute_timestamp(in); gdv_int64 second = extractSecond_timestamp(in); + // Use non-negative remainder for sub-second millis (pre-epoch safe). gdv_int64 millis = in % MILLIS_IN_SEC; + if (millis < 0) millis += MILLIS_IN_SEC; static const int kTimeStampStringLen = 23; const int char_buffer_length = kTimeStampStringLen + 1; // snprintf adds \0 diff --git a/cpp/src/gandiva/tests/date_time_test.cc b/cpp/src/gandiva/tests/date_time_test.cc index 6208f1ecba9b..ecacd7f1074b 100644 --- a/cpp/src/gandiva/tests/date_time_test.cc +++ b/cpp/src/gandiva/tests/date_time_test.cc @@ -828,4 +828,617 @@ TEST_F(DateTimeTestProjector, TestFromUtcTimestamp) { // Validate results EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0)); } + +// ---------- Timestamp precision tests (ms, us, ns) ---------- + +// 2021-06-15T14:30:45.123Z in millis since epoch +static const int64_t kTestMillis = 1623767445123LL; +static const int64_t kSubMs = 456; // sub-millisecond micros +static const int64_t kSubUs = 789; // sub-microsecond nanos +static const int64_t kTestMicros = kTestMillis * 1000 + kSubMs; +static const int64_t kTestNanos = kTestMillis * 1000000 + kSubMs * 1000 + kSubUs; + +// Helper: evaluate a unary timestamp function returning int64 +static int64_t EvalExtract(const std::string& func_name, arrow::TimeUnit::type unit, + int64_t ts_value, arrow::MemoryPool* pool) { + auto ts_type = timestamp(unit); + auto f0 = field("f0", ts_type); + auto schema = arrow::schema({f0}); + auto result_field = field("result", int64()); + auto expr = TreeExprBuilder::MakeExpression(func_name, {f0}, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto in_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {in_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool, &outputs); + EXPECT_TRUE(status.ok()); + + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); +} + +// Helper: evaluate a unary timestamp function returning timestamp +static int64_t EvalTrunc(const std::string& func_name, arrow::TimeUnit::type unit, + int64_t ts_value, arrow::MemoryPool* pool) { + auto ts_type = timestamp(unit); + auto f0 = field("f0", ts_type); + auto schema = arrow::schema({f0}); + auto result_field = field("result", ts_type); + auto expr = TreeExprBuilder::MakeExpression(func_name, {f0}, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto in_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {in_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool, &outputs); + EXPECT_TRUE(status.ok()); + + // Result is a TimestampArray, not Int64Array — use raw values buffer. + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); +} + +TEST_F(DateTimeTestProjector, TestExtractAcrossPrecisions) { + // extractMonth must return 6 for all precisions + EXPECT_EQ(6, EvalExtract("extractMonth", arrow::TimeUnit::MILLI, kTestMillis, pool_)); + EXPECT_EQ(6, EvalExtract("extractMonth", arrow::TimeUnit::MICRO, kTestMicros, pool_)); + EXPECT_EQ(6, EvalExtract("extractMonth", arrow::TimeUnit::NANO, kTestNanos, pool_)); + + // extractDay must return 15 + EXPECT_EQ(15, EvalExtract("extractDay", arrow::TimeUnit::MILLI, kTestMillis, pool_)); + EXPECT_EQ(15, EvalExtract("extractDay", arrow::TimeUnit::MICRO, kTestMicros, pool_)); + EXPECT_EQ(15, EvalExtract("extractDay", arrow::TimeUnit::NANO, kTestNanos, pool_)); + + // extractHour must return 14 + EXPECT_EQ(14, EvalExtract("extractHour", arrow::TimeUnit::MILLI, kTestMillis, pool_)); + EXPECT_EQ(14, EvalExtract("extractHour", arrow::TimeUnit::MICRO, kTestMicros, pool_)); + EXPECT_EQ(14, EvalExtract("extractHour", arrow::TimeUnit::NANO, kTestNanos, pool_)); + + // extractYear must return 2021 + EXPECT_EQ(2021, EvalExtract("extractYear", arrow::TimeUnit::MILLI, kTestMillis, pool_)); + EXPECT_EQ(2021, EvalExtract("extractYear", arrow::TimeUnit::MICRO, kTestMicros, pool_)); + EXPECT_EQ(2021, EvalExtract("extractYear", arrow::TimeUnit::NANO, kTestNanos, pool_)); +} + +TEST_F(DateTimeTestProjector, TestDateTruncAcrossPrecisions) { + // 2021-06-15T00:00:00Z in millis + int64_t day_millis = 1623715200000LL; + + // date_trunc_Day: millis + EXPECT_EQ(day_millis, + EvalTrunc("date_trunc_Day", arrow::TimeUnit::MILLI, kTestMillis, pool_)); + // date_trunc_Day: micros — sub-ms data zeroed + EXPECT_EQ(day_millis * 1000, + EvalTrunc("date_trunc_Day", arrow::TimeUnit::MICRO, kTestMicros, pool_)); + // date_trunc_Day: nanos — sub-ms data zeroed + EXPECT_EQ(day_millis * 1000000, + EvalTrunc("date_trunc_Day", arrow::TimeUnit::NANO, kTestNanos, pool_)); + + // 2021-06-15T14:00:00Z in millis + int64_t hour_millis = 1623765600000LL; + EXPECT_EQ(hour_millis * 1000, + EvalTrunc("date_trunc_Hour", arrow::TimeUnit::MICRO, kTestMicros, pool_)); + EXPECT_EQ(hour_millis * 1000000, + EvalTrunc("date_trunc_Hour", arrow::TimeUnit::NANO, kTestNanos, pool_)); +} + +// Helper: evaluate timestampadd(int32, timestamp) -> timestamp +static int64_t EvalTimestampadd(const std::string& func_name, arrow::TimeUnit::type unit, + int32_t count, int64_t ts_value, + arrow::MemoryPool* pool) { + auto ts_type = timestamp(unit); + auto f_count = field("count", int32()); + auto f_ts = field("ts", ts_type); + auto schema = arrow::schema({f_count, f_ts}); + auto result_field = field("result", ts_type); + + auto count_node = TreeExprBuilder::MakeField(f_count); + auto ts_node = TreeExprBuilder::MakeField(f_ts); + auto func_node = + TreeExprBuilder::MakeFunction(func_name, {count_node, ts_node}, ts_type); + auto expr = TreeExprBuilder::MakeExpression(func_node, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto count_array = + MakeArrowTypeArray(int32(), {count}, {true}); + auto ts_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {count_array, ts_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool, &outputs); + EXPECT_TRUE(status.ok()); + + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); +} + +TEST_F(DateTimeTestProjector, TestTimestampaddSecondPreservesSubMs) { + // Add 10 seconds to a timestamp with sub-ms precision. + // Sub-ms data must survive. + + // Micros: 10 seconds = 10_000_000 us. Sub-ms 456 preserved. + int64_t r = EvalTimestampadd("timestampaddSecond", arrow::TimeUnit::MICRO, 10, + kTestMicros, pool_); + EXPECT_EQ(kTestMicros + 10LL * 1000000, r); + EXPECT_EQ(kSubMs, r % 1000); + + // Nanos: 10 seconds = 10_000_000_000 ns. Sub-us 789 preserved. + r = EvalTimestampadd("timestampaddSecond", arrow::TimeUnit::NANO, 10, kTestNanos, + pool_); + EXPECT_EQ(kTestNanos + 10LL * 1000000000, r); + EXPECT_EQ(kSubUs, r % 1000); +} + +TEST_F(DateTimeTestProjector, TestTimestampaddDayPreservesSubMs) { + // Add 1 day. Sub-ms must survive. + int64_t r = + EvalTimestampadd("timestampaddDay", arrow::TimeUnit::MICRO, 1, kTestMicros, pool_); + EXPECT_EQ(kTestMicros + 86400LL * 1000000, r); + EXPECT_EQ(kSubMs, r % 1000); + + r = EvalTimestampadd("timestampaddDay", arrow::TimeUnit::NANO, 1, kTestNanos, pool_); + EXPECT_EQ(kTestNanos + 86400LL * 1000000000, r); + EXPECT_EQ(kSubUs, r % 1000); +} + +TEST_F(DateTimeTestProjector, TestTimestampaddMonthPreservesSubMs) { + // Add 2 months (calendar math). Sub-ms data must survive. + // Use millis result as ground truth — micros/nanos must match with sub-ms appended. + int64_t base_millis = EvalTimestampadd("timestampaddMonth", arrow::TimeUnit::MILLI, 2, + kTestMillis, pool_); + + int64_t r = EvalTimestampadd("timestampaddMonth", arrow::TimeUnit::MICRO, 2, + kTestMicros, pool_); + EXPECT_EQ(base_millis * 1000 + kSubMs, r); + + r = EvalTimestampadd("timestampaddMonth", arrow::TimeUnit::NANO, 2, kTestNanos, pool_); + EXPECT_EQ(base_millis * 1000000 + kSubMs * 1000 + kSubUs, r); +} + +// Helper: evaluate a two-timestamp function returning int32 +static int32_t EvalDiff(const std::string& func_name, arrow::TimeUnit::type unit, + int64_t ts1, int64_t ts2, arrow::MemoryPool* pool) { + auto ts_type = timestamp(unit); + auto f1 = field("f1", ts_type); + auto f2 = field("f2", ts_type); + auto schema = arrow::schema({f1, f2}); + auto result_field = field("result", int32()); + + auto n1 = TreeExprBuilder::MakeField(f1); + auto n2 = TreeExprBuilder::MakeField(f2); + auto func_node = TreeExprBuilder::MakeFunction(func_name, {n1, n2}, int32()); + auto expr = TreeExprBuilder::MakeExpression(func_node, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto a1 = MakeArrowTypeArray(ts_type, {ts1}, {true}); + auto a2 = MakeArrowTypeArray(ts_type, {ts2}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {a1, a2}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool, &outputs); + EXPECT_TRUE(status.ok()); + + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); +} + +TEST_F(DateTimeTestProjector, TestTimestampdiffAcrossPrecisions) { + // timestampdiffDay between kTestMillis and kTestMillis + 3 days + int64_t three_days_later_ms = kTestMillis + 3 * 86400000LL; + EXPECT_EQ(3, EvalDiff("timestampdiffDay", arrow::TimeUnit::MILLI, kTestMillis, + three_days_later_ms, pool_)); + + int64_t three_days_later_us = kTestMicros + 3 * 86400000000LL; + EXPECT_EQ(3, EvalDiff("timestampdiffDay", arrow::TimeUnit::MICRO, kTestMicros, + three_days_later_us, pool_)); + + int64_t three_days_later_ns = kTestNanos + 3 * 86400000000000LL; + EXPECT_EQ(3, EvalDiff("timestampdiffDay", arrow::TimeUnit::NANO, kTestNanos, + three_days_later_ns, pool_)); +} + +// Helper: evaluate months_between(ts1, ts2) -> float64 +static double EvalMonthsBetween(arrow::TimeUnit::type unit, int64_t ts1, int64_t ts2, + arrow::MemoryPool* pool) { + auto ts_type = timestamp(unit); + auto f1 = field("f1", ts_type); + auto f2 = field("f2", ts_type); + auto schema = arrow::schema({f1, f2}); + auto result_field = field("result", float64()); + + auto n1 = TreeExprBuilder::MakeField(f1); + auto n2 = TreeExprBuilder::MakeField(f2); + auto func_node = TreeExprBuilder::MakeFunction("months_between", {n1, n2}, float64()); + auto expr = TreeExprBuilder::MakeExpression(func_node, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto a1 = MakeArrowTypeArray(ts_type, {ts1}, {true}); + auto a2 = MakeArrowTypeArray(ts_type, {ts2}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {a1, a2}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool, &outputs); + EXPECT_TRUE(status.ok()); + + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); +} + +TEST_F(DateTimeTestProjector, TestMonthsBetweenAcrossPrecisions) { + // months_between returns the same value regardless of input precision. + // Use millis as baseline. + double base = EvalMonthsBetween(arrow::TimeUnit::MILLI, kTestMillis + 5270400000LL, + kTestMillis, pool_); + EXPECT_NEAR(base, + EvalMonthsBetween(arrow::TimeUnit::MICRO, kTestMicros + 5270400000000LL, + kTestMicros, pool_), + 0.001); + EXPECT_NEAR(base, + EvalMonthsBetween(arrow::TimeUnit::NANO, kTestNanos + 5270400000000000LL, + kTestNanos, pool_), + 0.001); +} + +// Helper: evaluate date_add/subtract(timestamp, int32) -> timestamp +static int64_t EvalDateArith(const std::string& func_name, arrow::TimeUnit::type unit, + int64_t ts_value, int32_t count, arrow::MemoryPool* pool) { + auto ts_type = timestamp(unit); + auto f_ts = field("ts", ts_type); + auto f_count = field("count", int32()); + auto schema = arrow::schema({f_ts, f_count}); + auto result_field = field("result", ts_type); + + auto ts_node = TreeExprBuilder::MakeField(f_ts); + auto count_node = TreeExprBuilder::MakeField(f_count); + auto func_node = + TreeExprBuilder::MakeFunction(func_name, {ts_node, count_node}, ts_type); + auto expr = TreeExprBuilder::MakeExpression(func_node, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto a_ts = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto a_count = MakeArrowTypeArray(int32(), {count}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {a_ts, a_count}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool, &outputs); + EXPECT_TRUE(status.ok()); + + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); +} + +TEST_F(DateTimeTestProjector, TestDateAddSubtractAcrossPrecisions) { + // date_add(ts, 3) adds 3 days. Sub-ms data must survive. + int64_t three_days_us = 3 * 86400LL * 1000000; + EXPECT_EQ(kTestMicros + three_days_us, + EvalDateArith("date_add", arrow::TimeUnit::MICRO, kTestMicros, 3, pool_)); + + int64_t three_days_ns = 3 * 86400LL * 1000000000; + EXPECT_EQ(kTestNanos + three_days_ns, + EvalDateArith("date_add", arrow::TimeUnit::NANO, kTestNanos, 3, pool_)); + + // subtract(ts, 1) subtracts 1 day. + int64_t one_day_us = 86400LL * 1000000; + EXPECT_EQ(kTestMicros - one_day_us, + EvalDateArith("subtract", arrow::TimeUnit::MICRO, kTestMicros, 1, pool_)); +} + +TEST_F(DateTimeTestProjector, TestTimestampaddMonthReversedArgMicros) { + // timestampaddMonth(timestamp, int32) with micros — reversed arg order. + // Use millis as ground truth. + auto millis_result = EvalTimestampadd("timestampaddMonth", arrow::TimeUnit::MILLI, 2, + kTestMillis, pool_); + auto micros_result = EvalTimestampadd("timestampaddMonth", arrow::TimeUnit::MICRO, 2, + kTestMicros, pool_); + // Sub-ms data must survive. + EXPECT_EQ(millis_result * 1000 + kSubMs, micros_result); +} + +// castDATE: timestamp(us/ns) -> date64 (millis at midnight) +TEST_F(DateTimeTestProjector, TestCastDateAcrossPrecisions) { + // 2021-06-15 14:30:45.123456789 -> should yield 2021-06-15 00:00:00 as date64 millis + int64_t expected_date_millis = 1623715200000LL; // 2021-06-15T00:00:00Z in millis + + auto eval_castdate = [this](arrow::TimeUnit::type unit, int64_t ts_value) -> int64_t { + auto ts_type = timestamp(unit); + auto f0 = field("f0", ts_type); + auto schema = arrow::schema({f0}); + auto result_field = field("result", date64()); + auto expr = TreeExprBuilder::MakeExpression("castDATE", {f0}, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto in_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {in_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()); + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); + }; + + EXPECT_EQ(expected_date_millis, eval_castdate(arrow::TimeUnit::MILLI, kTestMillis)); + EXPECT_EQ(expected_date_millis, eval_castdate(arrow::TimeUnit::MICRO, kTestMicros)); + EXPECT_EQ(expected_date_millis, eval_castdate(arrow::TimeUnit::NANO, kTestNanos)); +} + +// castTIME: timestamp(us/ns) -> time32(millis) — sub-ms truncated to millis +TEST_F(DateTimeTestProjector, TestCastTimeAcrossPrecisions) { + // 2021-06-15 14:30:45.123456789 -> time-of-day = 14:30:45.123 = 52245123 ms + int32_t expected_time_millis = 52245123; + + auto eval_casttime = [this](arrow::TimeUnit::type unit, int64_t ts_value) -> int32_t { + auto ts_type = timestamp(unit); + auto f0 = field("f0", ts_type); + auto schema = arrow::schema({f0}); + auto result_field = field("result", time32(arrow::TimeUnit::MILLI)); + auto expr = TreeExprBuilder::MakeExpression("castTIME", {f0}, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()); + + auto in_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {in_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()); + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->Value(0); + }; + + EXPECT_EQ(expected_time_millis, eval_casttime(arrow::TimeUnit::MILLI, kTestMillis)); + EXPECT_EQ(expected_time_millis, eval_casttime(arrow::TimeUnit::MICRO, kTestMicros)); + EXPECT_EQ(expected_time_millis, eval_casttime(arrow::TimeUnit::NANO, kTestNanos)); +} + +// Negative (pre-epoch) timestamps: verify extract and date_trunc produce correct +// results and that us/ns precisions match the millis baseline. +TEST_F(DateTimeTestProjector, TestNegativeTimestampPrecisions) { + time_t epoch = Epoch(); + // 1960-03-15 06:30:00.000 (pre-epoch) + int64_t neg_millis = MillisSince(epoch, 1960, 3, 15, 6, 30, 0, 0); + int64_t neg_sub_ms = 456; // sub-ms micros + int64_t neg_sub_us = 789; // sub-us nanos + int64_t neg_micros = neg_millis * 1000 - neg_sub_ms; + int64_t neg_nanos = neg_millis * 1000000 - neg_sub_ms * 1000 - neg_sub_us; + + // extractYear: all precisions must agree + auto year_ms = EvalExtract("extractYear", arrow::TimeUnit::MILLI, neg_millis, pool_); + EXPECT_EQ(year_ms, + EvalExtract("extractYear", arrow::TimeUnit::MICRO, neg_micros, pool_)); + EXPECT_EQ(year_ms, EvalExtract("extractYear", arrow::TimeUnit::NANO, neg_nanos, pool_)); + + // extractMonth: all precisions must agree + auto month_ms = EvalExtract("extractMonth", arrow::TimeUnit::MILLI, neg_millis, pool_); + EXPECT_EQ(month_ms, + EvalExtract("extractMonth", arrow::TimeUnit::MICRO, neg_micros, pool_)); + EXPECT_EQ(month_ms, + EvalExtract("extractMonth", arrow::TimeUnit::NANO, neg_nanos, pool_)); + + // date_trunc_Day: us/ns results must equal millis result scaled up + auto day_ms = EvalTrunc("date_trunc_Day", arrow::TimeUnit::MILLI, neg_millis, pool_); + EXPECT_EQ(day_ms * 1000, + EvalTrunc("date_trunc_Day", arrow::TimeUnit::MICRO, neg_micros, pool_)); + EXPECT_EQ(day_ms * 1000000, + EvalTrunc("date_trunc_Day", arrow::TimeUnit::NANO, neg_nanos, pool_)); +} + +// Negative timestamps at calendar boundaries: SDiv truncates toward zero, so +// -456 us / 1000 = 0 millis (wrong, should be -1). This causes the precompiled +// function to see millis=0 (epoch) instead of millis=-1 (just before epoch), +// crossing a second/hour/day/year boundary. +TEST_F(DateTimeTestProjector, TestNegativeTimestampBoundaryCrossing) { + // -456 us = 1969-12-31 23:59:59.999544 + // SDiv(-456, 1000) = 0 → precompiled sees epoch (1970-01-01 00:00:00) + // Floor(-456 / 1000) = -1 → correct millis (1969-12-31 23:59:59.999) + int64_t boundary_micros = -456; + int64_t boundary_nanos = -456000 - 789; // -456789 ns + + // extractHour: should be 23 (not 0) + EXPECT_EQ(23, + EvalExtract("extractHour", arrow::TimeUnit::MICRO, boundary_micros, pool_)); + EXPECT_EQ(23, EvalExtract("extractHour", arrow::TimeUnit::NANO, boundary_nanos, pool_)); + + // extractYear: should be 1969 (not 1970) + EXPECT_EQ(1969, + EvalExtract("extractYear", arrow::TimeUnit::MICRO, boundary_micros, pool_)); + EXPECT_EQ(1969, + EvalExtract("extractYear", arrow::TimeUnit::NANO, boundary_nanos, pool_)); + + // Verify millis-level baseline: precompiled functions with millis = -1 + // (1969-12-31 23:59:59.999) + int64_t boundary_millis = -1; + auto day_millis = + EvalExtract("extractDay", arrow::TimeUnit::MILLI, boundary_millis, pool_); + auto hour_millis = + EvalExtract("extractHour", arrow::TimeUnit::MILLI, boundary_millis, pool_); + auto year_millis = + EvalExtract("extractYear", arrow::TimeUnit::MILLI, boundary_millis, pool_); + auto trunc_s_millis = + EvalTrunc("date_trunc_Second", arrow::TimeUnit::MILLI, boundary_millis, pool_); + auto trunc_d_millis = + EvalTrunc("date_trunc_Day", arrow::TimeUnit::MILLI, boundary_millis, pool_); + ARROW_LOG(WARNING) << "millis baseline: day=" << day_millis << " hour=" << hour_millis + << " year=" << year_millis << " trunc_s=" << trunc_s_millis + << " trunc_d=" << trunc_d_millis; + + // extractDay: should be 31 (not 1) + EXPECT_EQ(31, + EvalExtract("extractDay", arrow::TimeUnit::MICRO, boundary_micros, pool_)); + EXPECT_EQ(31, EvalExtract("extractDay", arrow::TimeUnit::NANO, boundary_nanos, pool_)); + + // date_trunc_Second: 1969-12-31 23:59:59.000000 = -1000 ms = -1000000 us + EXPECT_EQ(-1000000, EvalTrunc("date_trunc_Second", arrow::TimeUnit::MICRO, + boundary_micros, pool_)); + EXPECT_EQ(-1000000000, + EvalTrunc("date_trunc_Second", arrow::TimeUnit::NANO, boundary_nanos, pool_)); + + // date_trunc_Day: 1969-12-31 00:00:00 = -86400000 ms = -86400000000 us + EXPECT_EQ(-86400000000LL, + EvalTrunc("date_trunc_Day", arrow::TimeUnit::MICRO, boundary_micros, pool_)); + EXPECT_EQ(-86400000000000LL, + EvalTrunc("date_trunc_Day", arrow::TimeUnit::NANO, boundary_nanos, pool_)); +} + +// castVARCHAR: verify sub-ms digits are included in the output string. +TEST_F(DateTimeTestProjector, TestCastVARCHARAcrossPrecisions) { + auto eval_castVARCHAR = [this](arrow::TimeUnit::type unit, + int64_t ts_value) -> std::string { + auto ts_type = timestamp(unit); + auto f0 = field("f0", ts_type); + auto len_field = field("len", int64()); + auto schema = arrow::schema({f0, len_field}); + auto result_field = field("result", utf8()); + auto expr = + TreeExprBuilder::MakeExpression("castVARCHAR", {f0, len_field}, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.ToString(); + + auto in_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto len_array = + MakeArrowTypeArray(int64(), {100}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {in_array, len_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()) << status.ToString(); + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->GetString(0); + }; + + // kTest* = 2021-06-15 14:30:45.123456789 + std::string expected_ms = "2021-06-15 14:30:45.123"; + std::string expected_us = "2021-06-15 14:30:45.123456"; + std::string expected_ns = "2021-06-15 14:30:45.123456789"; + + EXPECT_EQ(expected_ms, eval_castVARCHAR(arrow::TimeUnit::MILLI, kTestMillis)); + EXPECT_EQ(expected_us, eval_castVARCHAR(arrow::TimeUnit::MICRO, kTestMicros)); + EXPECT_EQ(expected_ns, eval_castVARCHAR(arrow::TimeUnit::NANO, kTestNanos)); +} + +// castVARCHAR with length truncation: sub-ms digits should be omitted when the +// length limit doesn't allow them. +TEST_F(DateTimeTestProjector, TestCastVARCHARTruncation) { + auto eval_truncated = [this](arrow::TimeUnit::type unit, int64_t ts_value, + int64_t max_len) -> std::string { + auto ts_type = timestamp(unit); + auto f0 = field("f0", ts_type); + auto len_field = field("len", int64()); + auto schema = arrow::schema({f0, len_field}); + auto result_field = field("result", utf8()); + auto expr = + TreeExprBuilder::MakeExpression("castVARCHAR", {f0, len_field}, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.ToString(); + + auto in_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto len_array = + MakeArrowTypeArray(int64(), {max_len}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {in_array, len_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()) << status.ToString(); + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->GetString(0); + }; + + // Full output for ns = "2021-06-15 14:30:45.123456789" (29 chars) + // Truncate to 23 chars → millis only: "2021-06-15 14:30:45.123" + EXPECT_EQ("2021-06-15 14:30:45.123", + eval_truncated(arrow::TimeUnit::NANO, kTestNanos, 23)); + // Truncate to 26 chars → us precision: "2021-06-15 14:30:45.123456" + EXPECT_EQ("2021-06-15 14:30:45.123456", + eval_truncated(arrow::TimeUnit::NANO, kTestNanos, 26)); + // Truncate to 20 chars → no fractional seconds + EXPECT_EQ("2021-06-15 14:30:45.", + eval_truncated(arrow::TimeUnit::NANO, kTestNanos, 20)); +} + +// Negative (pre-epoch) castVARCHAR: floor-division remainder must produce +// correct sub-millisecond digits even for negative timestamps. +TEST_F(DateTimeTestProjector, TestCastVARCHARNegativeTimestamp) { + auto eval_castVARCHAR = [this](arrow::TimeUnit::type unit, + int64_t ts_value) -> std::string { + auto ts_type = timestamp(unit); + auto f0 = field("f0", ts_type); + auto len_field = field("len", int64()); + auto schema = arrow::schema({f0, len_field}); + auto result_field = field("result", utf8()); + auto expr = + TreeExprBuilder::MakeExpression("castVARCHAR", {f0, len_field}, result_field); + + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.ToString(); + + auto in_array = + MakeArrowTypeArray(ts_type, {ts_value}, {true}); + auto len_array = + MakeArrowTypeArray(int64(), {100}, {true}); + auto in_batch = arrow::RecordBatch::Make(schema, 1, {in_array, len_array}); + + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()) << status.ToString(); + auto result_array = std::dynamic_pointer_cast(outputs.at(0)); + return result_array->GetString(0); + }; + + // -1 microsecond = 1969-12-31 23:59:59.999999 + EXPECT_EQ("1969-12-31 23:59:59.999999", eval_castVARCHAR(arrow::TimeUnit::MICRO, -1)); + // -1 nanosecond = 1969-12-31 23:59:59.999999999 + EXPECT_EQ("1969-12-31 23:59:59.999999999", eval_castVARCHAR(arrow::TimeUnit::NANO, -1)); + // -456 microseconds = 1969-12-31 23:59:59.999544 + EXPECT_EQ("1969-12-31 23:59:59.999544", eval_castVARCHAR(arrow::TimeUnit::MICRO, -456)); + // -456789 nanoseconds = 1969-12-31 23:59:59.999543211 + EXPECT_EQ("1969-12-31 23:59:59.999543211", + eval_castVARCHAR(arrow::TimeUnit::NANO, -456789)); +} + +// NOTE: TIME type handling +// time32 uses TimeUnit::SECOND or MILLISECOND. +// time64 uses TimeUnit::MICROSECOND or NANOSECOND. +// Gandiva's time functions (extractHour/Minute/Second on time32) operate on millis. +// time64[us]/time64[ns] will need the same TimestampIR treatment: convert to millis +// before calling precompiled functions. The pattern is identical to the extract wrappers. + } // namespace gandiva diff --git a/cpp/src/gandiva/timestamp_ir.cc b/cpp/src/gandiva/timestamp_ir.cc new file mode 100644 index 000000000000..e386f929310f --- /dev/null +++ b/cpp/src/gandiva/timestamp_ir.cc @@ -0,0 +1,798 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/timestamp_ir.h" + +#include "arrow/status.h" +#include "arrow/util/logging.h" +#include "gandiva/engine.h" + +namespace gandiva { + +/*static*/ int64_t TimestampIR::UnitsPerSecond(arrow::TimeUnit::type unit) { + switch (unit) { + case arrow::TimeUnit::MILLI: + return 1000; + case arrow::TimeUnit::MICRO: + return 1000000; + case arrow::TimeUnit::NANO: + return 1000000000; + default: + return 1; + } +} + +/*static*/ int64_t TimestampIR::UnitsPerMilli(arrow::TimeUnit::type unit) { + switch (unit) { + case arrow::TimeUnit::MILLI: + return 1; + case arrow::TimeUnit::MICRO: + return 1000; + case arrow::TimeUnit::NANO: + return 1000000; + default: + return 1; + } +} + +// Unit suffix appended to precompiled function names. +static const char* UnitSuffix(arrow::TimeUnit::type unit) { + switch (unit) { + case arrow::TimeUnit::MICRO: + return "_us"; + case arrow::TimeUnit::NANO: + return "_ns"; + default: + return ""; + } +} + +// Fixed-unit timestampadd: ts + count * constant (pure IR) +struct FixedAdd { + const char* name; + int64_t seconds; +}; +static const FixedAdd kFixedAdds[] = { + {"timestampaddSecond", 1}, {"timestampaddMinute", 60}, {"timestampaddHour", 3600}, + {"timestampaddDay", 86400}, {"timestampaddWeek", 604800}, +}; + +// Calendar-based timestampadd: split/recombine around precompiled millis fn +static const char* kCalendarAdds[] = { + "timestampaddMonth", + "timestampaddQuarter", + "timestampaddYear", +}; + +// Extract functions: convert ts to millis, call precompiled, return int64 +// pc_name pattern: {name}_timestamp +static const char* kExtracts[] = { + "extractMillennium", "extractCentury", "extractDecade", "extractYear", + "extractQuarter", "extractMonth", "extractWeek", "extractDay", + "extractHour", "extractMinute", "extractSecond", "extractDoy", + "extractDow", "extractEpoch", +}; + +// date_trunc functions: convert ts to millis, truncate, scale back (zero remainder) +// pc_name pattern: date_trunc_{Level}_timestamp +static const char* kTruncs[] = { + "date_trunc_Millennium", "date_trunc_Century", "date_trunc_Decade", "date_trunc_Year", + "date_trunc_Quarter", "date_trunc_Month", "date_trunc_Week", "date_trunc_Day", + "date_trunc_Hour", "date_trunc_Minute", "date_trunc_Second", +}; + +// timestampdiff: convert both inputs to millis, return int32 +// pc_name pattern: {name}_timestamp_timestamp +static const char* kDiffs[] = { + "timestampdiffSecond", "timestampdiffMinute", "timestampdiffHour", + "timestampdiffDay", "timestampdiffWeek", "timestampdiffMonth", + "timestampdiffQuarter", "timestampdiffYear", +}; + +// Two-timestamp functions returning scalar +// months_between(ts,ts)->float64, datediff(ts,ts)->int32 +struct TwoTsScalar { + const char* name; + bool returns_float; // true=float64, false=int32 +}; +static const TwoTsScalar kTwoTsScalars[] = { + {"months_between", true}, + {"datediff", false}, +}; + +// Cast functions from timestamp +struct CastFromTs { + const char* name; + bool returns_i32; // true=int32 (castTIME), false=int64 (castDATE) +}; +static const CastFromTs kCastsFromTs[] = { + {"castDATE", false}, + {"castTIME", true}, + {"last_day_from", false}, // last_day_from_timestamp(ts) -> date64 +}; + +// date_add/add/date_sub/subtract/date_diff with timestamp: +// These are fixed-unit (1 day) arithmetic with varying arg orders and signs. +struct DateArith { + const char* name; + bool count_first; // true=(int,ts), false=(ts,int) + int64_t sign; // +1 for add, -1 for sub +}; +static const DateArith kDateArithEntries[] = { + {"date_add", true, 1}, {"add", true, 1}, {"date_add", false, 1}, + {"add", false, 1}, {"date_sub", false, -1}, {"subtract", false, -1}, + {"date_diff", false, -1}, +}; + +// Units to generate functions for. +static const arrow::TimeUnit::type kUnits[] = { + arrow::TimeUnit::MICRO, + arrow::TimeUnit::NANO, +}; + +// Build the deterministic set of all IR function names that AddFunctions will create. +// These names depend only on the static tables above, not on any engine state. +static std::unordered_set BuildAllFunctionNames() { + std::unordered_set names; + const char* suffixes[] = {"_us", "_ns"}; + for (const auto* sfx : suffixes) { + for (const auto& fa : kFixedAdds) { + names.insert(std::string(fa.name) + "_int32_timestamp" + sfx); + names.insert(std::string(fa.name) + "_int64_timestamp" + sfx); + names.insert(std::string(fa.name) + "_timestamp_int32" + sfx); + names.insert(std::string(fa.name) + "_timestamp_int64" + sfx); + } + for (const auto* ca : kCalendarAdds) { + names.insert(std::string(ca) + "_int32_timestamp" + sfx); + names.insert(std::string(ca) + "_timestamp_int32" + sfx); + names.insert(std::string(ca) + "_timestamp_int64" + sfx); + names.insert(std::string(ca) + "_int64_timestamp" + sfx); + } + for (const auto* ex : kExtracts) { + names.insert(std::string(ex) + "_timestamp" + sfx); + } + for (const auto* tr : kTruncs) { + names.insert(std::string(tr) + "_timestamp" + sfx); + } + for (const auto* di : kDiffs) { + names.insert(std::string(di) + "_timestamp_timestamp" + sfx); + } + for (const auto& ts2 : kTwoTsScalars) { + names.insert(std::string(ts2.name) + "_timestamp_timestamp" + sfx); + } + for (const auto& c : kCastsFromTs) { + names.insert(std::string(c.name) + "_timestamp" + sfx); + } + for (const auto& da : kDateArithEntries) { + if (da.count_first) { + names.insert(std::string(da.name) + "_int32_timestamp" + sfx); + names.insert(std::string(da.name) + "_int64_timestamp" + sfx); + } else { + names.insert(std::string(da.name) + "_timestamp_int32" + sfx); + names.insert(std::string(da.name) + "_timestamp_int64" + sfx); + } + } + names.insert(std::string("to_utc_timezone_timestamp") + sfx); + names.insert(std::string("from_utc_timezone_timestamp") + sfx); + names.insert(std::string("castVARCHAR_timestamp_int64") + sfx); + } + return names; +} + +// Thread-safe const set: initialized once via C++11 static local guarantee. +static const std::unordered_set& AllFunctionNames() { + static const std::unordered_set names = BuildAllFunctionNames(); + return names; +} + +/*static*/ bool TimestampIR::IsTimestampIRFunction(const std::string& name) { + return AllFunctionNames().count(name) != 0; +} + +Status TimestampIR::BuildTimestampaddFixed(const std::string& function_name, + int64_t seconds_per_unit, + arrow::TimeUnit::type time_unit) { + auto i32 = types()->i32_type(); + auto i64 = types()->i64_type(); + auto function = BuildFunction(function_name, i64, {{"count", i32}, {"ts", i64}}); + + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto arg_iter = function->arg_begin(); + auto count = &arg_iter[0]; + auto ts = &arg_iter[1]; + + // result = ts + (int64)count * units_per_fixed_unit + int64_t units_per_fixed_unit = seconds_per_unit * UnitsPerSecond(time_unit); + auto count_i64 = ir_builder()->CreateSExt(count, i64); + auto delta = ir_builder()->CreateMul(count_i64, + llvm::ConstantInt::get(i64, units_per_fixed_unit)); + auto result = ir_builder()->CreateAdd(ts, delta); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildDateArithFixed(const std::string& function_name, + bool count_first, int64_t seconds_per_count, + arrow::TimeUnit::type time_unit, + llvm::Type* count_type) { + auto i64 = types()->i64_type(); + llvm::Function* function; + if (count_first) { + function = BuildFunction(function_name, i64, {{"count", count_type}, {"ts", i64}}); + } else { + function = BuildFunction(function_name, i64, {{"ts", i64}, {"count", count_type}}); + } + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto arg_iter = function->arg_begin(); + llvm::Value* ts = count_first ? &arg_iter[1] : &arg_iter[0]; + llvm::Value* count = count_first ? &arg_iter[0] : &arg_iter[1]; + + int64_t units_per_count = seconds_per_count * UnitsPerSecond(time_unit); + auto count_i64 = (count_type == i64) ? count : ir_builder()->CreateSExt(count, i64); + auto delta = + ir_builder()->CreateMul(count_i64, llvm::ConstantInt::get(i64, units_per_count)); + auto result = ir_builder()->CreateAdd(ts, delta); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildTimestampaddCalendar(const std::string& function_name, + const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit) { + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i32 = types()->i32_type(); + auto i64 = types()->i64_type(); + auto function = BuildFunction(function_name, i64, {{"count", i32}, {"ts", i64}}); + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto arg_iter = function->arg_begin(); + auto count = &arg_iter[0]; + auto ts = &arg_iter[1]; + + int64_t upm = UnitsPerMilli(time_unit); + auto upm_const = llvm::ConstantInt::get(i64, upm); + auto [millis, remainder] = FloorDivRem(ts, upm_const); + auto result_millis = ir_builder()->CreateCall(precompiled_fn, {count, millis}); + auto result_scaled = ir_builder()->CreateMul(result_millis, upm_const); + auto result = ir_builder()->CreateAdd(result_scaled, remainder); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildTimestampaddCalendarGeneric( + const std::string& function_name, const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit, bool count_first, llvm::Type* count_type) { + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + llvm::Function* function; + if (count_first) { + function = BuildFunction(function_name, i64, {{"count", count_type}, {"ts", i64}}); + } else { + function = BuildFunction(function_name, i64, {{"ts", i64}, {"count", count_type}}); + } + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto arg_iter = function->arg_begin(); + llvm::Value* ts = count_first ? &arg_iter[1] : &arg_iter[0]; + llvm::Value* count = count_first ? &arg_iter[0] : &arg_iter[1]; + + // Convert count to i32 if needed (precompiled fn takes int32 count for millis version) + auto i32 = types()->i32_type(); + auto count_i32 = (count_type == i32) ? count : ir_builder()->CreateTrunc(count, i32); + + int64_t upm = UnitsPerMilli(time_unit); + auto upm_const = llvm::ConstantInt::get(i64, upm); + auto [millis, remainder] = FloorDivRem(ts, upm_const); + + // Precompiled millis fn always takes (int32 count, int64 millis) + auto result_millis = ir_builder()->CreateCall(precompiled_fn, {count_i32, millis}); + auto result_scaled = ir_builder()->CreateMul(result_millis, upm_const); + auto result = ir_builder()->CreateAdd(result_scaled, remainder); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildExtractWrapper(const std::string& function_name, + const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit) { + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + auto function = BuildFunction(function_name, i64, {{"ts", i64}}); + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto ts = &function->arg_begin()[0]; + int64_t upm = UnitsPerMilli(time_unit); + auto millis = FloorDiv(ts, llvm::ConstantInt::get(i64, upm)); + auto result = ir_builder()->CreateCall(precompiled_fn, {millis}); + + ir_builder()->CreateRet(result); + + return Status::OK(); +} + +Status TimestampIR::BuildTruncWrapper(const std::string& function_name, + const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit) { + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + auto function = BuildFunction(function_name, i64, {{"ts", i64}}); + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto ts = &function->arg_begin()[0]; + int64_t upm = UnitsPerMilli(time_unit); + auto upm_const = llvm::ConstantInt::get(i64, upm); + auto millis = FloorDiv(ts, upm_const); + auto result_millis = ir_builder()->CreateCall(precompiled_fn, {millis}); + auto result = ir_builder()->CreateMul(result_millis, upm_const); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildDiffWrapper(const std::string& function_name, + const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit, + llvm::Type* return_type) { + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + auto function = BuildFunction(function_name, return_type, {{"ts1", i64}, {"ts2", i64}}); + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto arg_iter = function->arg_begin(); + int64_t upm = UnitsPerMilli(time_unit); + auto upm_const = llvm::ConstantInt::get(i64, upm); + auto millis1 = FloorDiv(&arg_iter[0], upm_const); + auto millis2 = FloorDiv(&arg_iter[1], upm_const); + auto result = ir_builder()->CreateCall(precompiled_fn, {millis1, millis2}); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildCastFromTimestampWrapper( + const std::string& function_name, const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit, llvm::Type* return_type) { + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + auto function = BuildFunction(function_name, return_type, {{"ts", i64}}); + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto ts = &function->arg_begin()[0]; + int64_t upm = UnitsPerMilli(time_unit); + auto millis = FloorDiv(ts, llvm::ConstantInt::get(i64, upm)); + auto result = ir_builder()->CreateCall(precompiled_fn, {millis}); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildTimezoneWrapper(const std::string& function_name, + const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit) { + // fn(context, ts, tz_str, tz_len) -> ts + // Split-recombine: the timezone offset is a whole-second delta, so sub-ms survives. + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + auto i32 = types()->i32_type(); + auto i8ptr = types()->i8_ptr_type(); + auto function = BuildFunction( + function_name, i64, {{"ctx", i64}, {"ts", i64}, {"tz", i8ptr}, {"tz_len", i32}}); + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto arg_iter = function->arg_begin(); + auto ctx = &arg_iter[0]; + auto ts = &arg_iter[1]; + auto tz = &arg_iter[2]; + auto tz_len = &arg_iter[3]; + + int64_t upm = UnitsPerMilli(time_unit); + auto upm_const = llvm::ConstantInt::get(i64, upm); + auto [millis, remainder] = FloorDivRem(ts, upm_const); + + auto result_millis = + ir_builder()->CreateCall(precompiled_fn, {ctx, millis, tz, tz_len}); + auto result_scaled = ir_builder()->CreateMul(result_millis, upm_const); + auto result = ir_builder()->CreateAdd(result_scaled, remainder); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + +Status TimestampIR::BuildCastVARCHARWrapper(const std::string& function_name, + const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit) { + // fn(context, ts, len, &out_len) -> const char* + // For MICRO/NANO: call the millis formatter for the first 23 chars, then append + // sub-millisecond digits (3 for us, 6 for ns) into a new arena-allocated buffer. + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + auto i32 = types()->i32_type(); + auto i8 = types()->i8_type(); + auto i8ptr = types()->i8_ptr_type(); + auto i32ptr = types()->i32_ptr_type(); + auto function = + BuildFunction(function_name, i8ptr, + {{"ctx", i64}, {"ts", i64}, {"len", i64}, {"out_len", i32ptr}}); + + int64_t upm = UnitsPerMilli(time_unit); + int extra_digits = (time_unit == arrow::TimeUnit::MICRO) ? 3 + : (time_unit == arrow::TimeUnit::NANO) ? 6 + : 0; + + if (extra_digits == 0) { + // MILLI: pass through directly + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + auto arg_iter = function->arg_begin(); + auto millis = FloorDiv(&arg_iter[1], llvm::ConstantInt::get(i64, upm)); + auto result = ir_builder()->CreateCall( + precompiled_fn, {&arg_iter[0], millis, &arg_iter[2], &arg_iter[3]}); + ir_builder()->CreateRet(result); + return Status::OK(); + } + + // --- MICRO or NANO path --- + auto bb_entry = llvm::BasicBlock::Create(*context(), "entry", function); + auto bb_append = llvm::BasicBlock::Create(*context(), "append", function); + auto bb_done = llvm::BasicBlock::Create(*context(), "done", function); + + // Entry: call precompiled millis formatter + ir_builder()->SetInsertPoint(bb_entry); + auto arg_iter = function->arg_begin(); + auto ctx = &arg_iter[0]; + auto ts = &arg_iter[1]; + auto len = &arg_iter[2]; + auto out_len_ptr = &arg_iter[3]; + + auto upm_const = llvm::ConstantInt::get(i64, upm); + auto millis = FloorDiv(ts, upm_const); + auto base_buf = + ir_builder()->CreateCall(precompiled_fn, {ctx, millis, len, out_len_ptr}); + auto base_len = ir_builder()->CreateLoad(i32, out_len_ptr); + + // Check if length allows extra digits + auto base_len_i64 = ir_builder()->CreateSExt(base_len, i64); + auto has_room = ir_builder()->CreateICmpSLT(base_len_i64, len); + ir_builder()->CreateCondBr(has_room, bb_append, bb_done); + + // Append: allocate new buffer, copy prefix, write sub-ms digits + ir_builder()->SetInsertPoint(bb_append); + + auto extra_const = llvm::ConstantInt::get(i32, extra_digits); + auto full_len = ir_builder()->CreateAdd(base_len, extra_const); + + // Clamp to len + auto len_i32 = ir_builder()->CreateTrunc(len, i32); + auto clamped_len = ir_builder()->CreateSelect( + ir_builder()->CreateICmpSLT(full_len, len_i32), full_len, len_i32); + + // arena_malloc(ctx, clamped_len) + auto arena_fn = module()->getFunction("gdv_fn_context_arena_malloc"); + auto new_buf = ir_builder()->CreateCall(arena_fn, {ctx, clamped_len}); + + // memcpy(new_buf, base_buf, base_len) + ir_builder()->CreateMemCpy(new_buf, llvm::MaybeAlign(1), base_buf, llvm::MaybeAlign(1), + base_len); + + // Compute the non-negative sub-ms remainder consistent with floor division. + // FloorDivRem guarantees remainder is in [0, upm), even for negative timestamps. + auto [millis_dup, sub_ms_rem] = FloorDivRem(ts, upm_const); + (void)millis_dup; + auto abs_rem = sub_ms_rem; + + // Write digits from most significant to least significant + // For MICRO: divisors are 100, 10, 1 + // For NANO: divisors are 100000, 10000, 1000, 100, 10, 1 + int64_t divisor = 1; + for (int i = 0; i < extra_digits - 1; ++i) divisor *= 10; + + llvm::BasicBlock* last_append_bb = nullptr; + for (int i = 0; i < extra_digits; ++i) { + auto idx = ir_builder()->CreateAdd(base_len, llvm::ConstantInt::get(i32, i)); + auto write_pos = ir_builder()->CreateICmpSLT(idx, clamped_len); + + // digit = (abs_rem / divisor) % 10 + '0' + auto d = ir_builder()->CreateSDiv(abs_rem, llvm::ConstantInt::get(i64, divisor)); + auto digit = ir_builder()->CreateSRem(d, llvm::ConstantInt::get(i64, 10)); + auto ch = ir_builder()->CreateAdd(ir_builder()->CreateTrunc(digit, i8), + llvm::ConstantInt::get(i8, '0')); + + auto gep = ir_builder()->CreateGEP(i8, new_buf, idx); + + auto bb_store = llvm::BasicBlock::Create(*context(), "store", function); + auto bb_next = llvm::BasicBlock::Create(*context(), "next", function); + ir_builder()->CreateCondBr(write_pos, bb_store, bb_next); + + ir_builder()->SetInsertPoint(bb_store); + ir_builder()->CreateStore(ch, gep); + ir_builder()->CreateBr(bb_next); + + ir_builder()->SetInsertPoint(bb_next); + last_append_bb = bb_next; + divisor /= 10; + } + + // Store final out_len + ir_builder()->CreateStore(clamped_len, out_len_ptr); + ir_builder()->CreateBr(bb_done); + + // Done: phi to select return value + ir_builder()->SetInsertPoint(bb_done); + auto phi = ir_builder()->CreatePHI(i8ptr, 2, "result"); + phi->addIncoming(base_buf, bb_entry); + phi->addIncoming(new_buf, last_append_bb); + + ir_builder()->CreateRet(phi); + return Status::OK(); +} + +llvm::Value* TimestampIR::FloorDiv(llvm::Value* ts, llvm::Value* divisor) { + auto i64 = types()->i64_type(); + auto zero = llvm::ConstantInt::get(i64, 0); + auto one = llvm::ConstantInt::get(i64, 1); + auto quotient = ir_builder()->CreateSDiv(ts, divisor); + auto remainder = ir_builder()->CreateSRem(ts, divisor); + auto has_remainder = ir_builder()->CreateICmpNE(remainder, zero); + auto is_negative = ir_builder()->CreateICmpSLT(ts, zero); + auto needs_adjust = ir_builder()->CreateAnd(is_negative, has_remainder); + return ir_builder()->CreateSub(quotient, + ir_builder()->CreateSelect(needs_adjust, one, zero)); +} + +std::pair TimestampIR::FloorDivRem(llvm::Value* ts, + llvm::Value* divisor) { + auto i64 = types()->i64_type(); + auto zero = llvm::ConstantInt::get(i64, 0); + auto one = llvm::ConstantInt::get(i64, 1); + auto quotient = ir_builder()->CreateSDiv(ts, divisor); + auto remainder = ir_builder()->CreateSRem(ts, divisor); + auto has_remainder = ir_builder()->CreateICmpNE(remainder, zero); + auto is_negative = ir_builder()->CreateICmpSLT(ts, zero); + auto needs_adjust = ir_builder()->CreateAnd(is_negative, has_remainder); + auto adj = ir_builder()->CreateSelect(needs_adjust, one, zero); + auto floor_q = ir_builder()->CreateSub(quotient, adj); + auto floor_r = ir_builder()->CreateAdd( + remainder, ir_builder()->CreateSelect(needs_adjust, divisor, zero)); + return {floor_q, floor_r}; +} + +/*static*/ Status TimestampIR::AddFunctions(Engine* engine) { + auto ts_ir = std::make_shared(engine); + auto i32 = ts_ir->types()->i32_type(); + auto i64 = ts_ir->types()->i64_type(); + auto f64 = ts_ir->types()->double_type(); + + for (auto unit : kUnits) { + auto sfx = UnitSuffix(unit); + + // Helper: skip functions whose precompiled base is missing, but warn on + // unexpected errors (type mismatch, LLVM failure, etc.). + auto try_build = [](const std::string& ir_name, Status status) { + if (status.ok() || status.IsInvalid()) { + return; // OK or precompiled function not found — expected + } + ARROW_LOG(DEBUG) << "TimestampIR: unexpected error building " << ir_name << ": " + << status.ToString(); + }; + + // Fixed-unit: pure IR (always succeeds) + for (const auto& fa : kFixedAdds) { + auto ir_name = std::string(fa.name) + "_int32_timestamp" + sfx; + ARROW_RETURN_NOT_OK(ts_ir->BuildTimestampaddFixed(ir_name, fa.seconds, unit)); + } + + // Calendar-based: precompiled wrapper + for (const auto* ca : kCalendarAdds) { + auto ir_name = std::string(ca) + "_int32_timestamp" + sfx; + try_build(ir_name, + ts_ir->BuildTimestampaddCalendar(ir_name, + std::string(ca) + "_int32_timestamp", + unit)); + } + + // Extract functions + for (const auto* ex : kExtracts) { + auto ir_name = std::string(ex) + "_timestamp" + sfx; + try_build(ir_name, + ts_ir->BuildExtractWrapper(ir_name, std::string(ex) + "_timestamp", + unit)); + } + + // date_trunc functions + for (const auto* tr : kTruncs) { + auto ir_name = std::string(tr) + "_timestamp" + sfx; + try_build(ir_name, + ts_ir->BuildTruncWrapper(ir_name, std::string(tr) + "_timestamp", unit)); + } + + // timestampdiff functions (two ts -> int32) + for (const auto* di : kDiffs) { + auto ir_name = std::string(di) + "_timestamp_timestamp" + sfx; + try_build(ir_name, + ts_ir->BuildDiffWrapper(ir_name, + std::string(di) + "_timestamp_timestamp", unit, + i32)); + } + + // months_between / datediff + for (const auto& ts2 : kTwoTsScalars) { + auto ir_name = std::string(ts2.name) + "_timestamp_timestamp" + sfx; + try_build(ir_name, + ts_ir->BuildDiffWrapper(ir_name, + std::string(ts2.name) + "_timestamp_timestamp", + unit, ts2.returns_float ? f64 : i32)); + } + + // Cast from timestamp + for (const auto& c : kCastsFromTs) { + auto ir_name = std::string(c.name) + "_timestamp" + sfx; + try_build(ir_name, + ts_ir->BuildCastFromTimestampWrapper(ir_name, + std::string(c.name) + "_timestamp", + unit, c.returns_i32 ? i32 : i64)); + } + + // date_add/add/date_sub/subtract/date_diff with int32 and int64 + for (const auto& da : kDateArithEntries) { + for (auto* count_type : {i32, i64}) { + const char* type_name = (count_type == i32) ? "int32" : "int64"; + std::string ir_name; + if (da.count_first) { + ir_name = std::string(da.name) + "_" + type_name + "_timestamp" + sfx; + } else { + ir_name = std::string(da.name) + "_timestamp_" + type_name + sfx; + } + try_build(ir_name, + ts_ir->BuildDateArithFixed(ir_name, da.count_first, + da.sign * 86400LL, unit, count_type)); + } + } + + // int64 variants of timestampadd (pure IR arithmetic) + for (const auto& fa : kFixedAdds) { + auto ir_name = std::string(fa.name) + "_int64_timestamp" + sfx; + try_build(ir_name, + ts_ir->BuildDateArithFixed(ir_name, /*count_first=*/true, + fa.seconds, unit, i64)); + } + + // Reversed-arg variants: timestampaddX(timestamp, int32/int64) -> timestamp + for (const auto& fa : kFixedAdds) { + auto ir32 = std::string(fa.name) + "_timestamp_int32" + sfx; + try_build(ir32, + ts_ir->BuildDateArithFixed(ir32, /*count_first=*/false, + fa.seconds, unit, i32)); + auto ir64 = std::string(fa.name) + "_timestamp_int64" + sfx; + try_build(ir64, + ts_ir->BuildDateArithFixed(ir64, /*count_first=*/false, + fa.seconds, unit, i64)); + } + + // Reversed-arg calendar: timestampaddMonth/Quarter/Year(timestamp, int32/int64) + // and int64 calendar: timestampaddMonth/Quarter/Year(int64, timestamp) + // All use the precompiled (int32, timestamp) millis function as the base. + for (const auto* ca : kCalendarAdds) { + auto millis_fn = std::string(ca) + "_int32_timestamp"; + // (timestamp, int32) variant + auto rev32 = std::string(ca) + "_timestamp_int32" + sfx; + try_build(rev32, + ts_ir->BuildTimestampaddCalendarGeneric(rev32, millis_fn, unit, + /*count_first=*/false, i32)); + // (timestamp, int64) variant + auto rev64 = std::string(ca) + "_timestamp_int64" + sfx; + try_build(rev64, + ts_ir->BuildTimestampaddCalendarGeneric(rev64, millis_fn, unit, + /*count_first=*/false, i64)); + // (int64, timestamp) variant + auto fwd64 = std::string(ca) + "_int64_timestamp" + sfx; + try_build(fwd64, + ts_ir->BuildTimestampaddCalendarGeneric(fwd64, millis_fn, unit, + /*count_first=*/true, i64)); + } + + // Timezone functions: to_utc/from_utc (split-recombine) + { + std::string ir_to = std::string("to_utc_timezone_timestamp") + sfx; + try_build(ir_to, + ts_ir->BuildTimezoneWrapper(ir_to, "to_utc_timezone_timestamp", unit)); + std::string ir_from = std::string("from_utc_timezone_timestamp") + sfx; + try_build(ir_from, + ts_ir->BuildTimezoneWrapper(ir_from, "from_utc_timezone_timestamp", unit)); + } + + // castVARCHAR(timestamp, int64): scale to millis + { + std::string ir_name = std::string("castVARCHAR_timestamp_int64") + sfx; + try_build(ir_name, + ts_ir->BuildCastVARCHARWrapper(ir_name, "castVARCHAR_timestamp_int64", + unit)); + } + } + + // Validate that the set of functions we tried to build matches AllFunctionNames(). + // This catches drift between AddFunctions() and BuildAllFunctionNames(). + const auto& expected = AllFunctionNames(); + for (const auto& name : expected) { + if (!ts_ir->module()->getFunction(name)) { + ARROW_LOG(DEBUG) << "TimestampIR: " << name + << " in AllFunctionNames() but not created (precompiled base " + "likely missing — OK if intentional)"; + } + } + for (auto& fn : *ts_ir->module()) { + auto name = fn.getName().str(); + // Only check functions with unit suffixes that we generate + if ((name.find("_us") != std::string::npos || + name.find("_ns") != std::string::npos) && + expected.find(name) == expected.end()) { + ARROW_LOG(WARNING) << "TimestampIR: function " << name + << " was created but is not in AllFunctionNames() — " + "it will not be remapped during code generation"; + } + } + + return Status::OK(); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/timestamp_ir.h b/cpp/src/gandiva/timestamp_ir.h new file mode 100644 index 000000000000..4f0916fefbb1 --- /dev/null +++ b/cpp/src/gandiva/timestamp_ir.h @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "arrow/type.h" +#include "gandiva/function_ir_builder.h" + +namespace gandiva { + +/// @brief Timestamp IR functions for unit-aware timestamp operations. +/// +/// Follows the DecimalIR pattern: builds LLVM IR functions that handle +/// timestamp values in their native TimeUnit (ms, us, ns) without lossy +/// conversion. +/// +/// Patterns: +/// - Pure IR: arithmetic generated entirely in IR (e.g., timestampaddSecond). +/// - Calendar wrapper: split into millis + remainder, call precompiled, recombine +/// (e.g., timestampaddMonth). +/// - Extract wrapper: convert to millis, call precompiled, return scalar +/// (e.g., extractMonth). +/// - Trunc wrapper: convert to millis, call precompiled truncation, scale back, +/// zero sub-milli remainder (e.g., date_trunc_Day). +/// - Diff wrapper: convert both inputs to millis, call precompiled, return scalar +/// (e.g., timestampdiffDay, months_between). +/// - Cast wrapper: convert to millis, call precompiled cast +/// (e.g., castDATE, castVARCHAR). +class TimestampIR : public FunctionIRBuilder { + public: + explicit TimestampIR(Engine* engine) : FunctionIRBuilder(engine) {} + + static Status AddFunctions(Engine* engine); + static bool IsTimestampIRFunction(const std::string& function_name); + static int64_t UnitsPerSecond(arrow::TimeUnit::type unit); + static int64_t UnitsPerMilli(arrow::TimeUnit::type unit); + + private: + // ts + count * fixed_constant (pure IR) + Status BuildTimestampaddFixed(const std::string& fn, int64_t seconds_per_unit, + arrow::TimeUnit::type unit); + + // date_add/add/date_sub/subtract/date_diff and int64 timestampadd (pure IR) + // seconds_per_count: positive for add, negative for sub + Status BuildDateArithFixed(const std::string& fn, bool count_first, + int64_t seconds_per_count, arrow::TimeUnit::type unit, + llvm::Type* count_type); + + // ts + count * months via precompiled calendar math (split/recombine) + Status BuildTimestampaddCalendar(const std::string& fn, + const std::string& precompiled_fn, + arrow::TimeUnit::type unit); + + // Generic calendar wrapper: handles both arg orders and int32/int64 count + Status BuildTimestampaddCalendarGeneric(const std::string& fn, + const std::string& precompiled_fn, + arrow::TimeUnit::type unit, bool count_first, + llvm::Type* count_type); + + // Extract: convert ts to millis, call precompiled, return int64 + // fn(int64 ts) -> int64 + Status BuildExtractWrapper(const std::string& fn, const std::string& precompiled_fn, + arrow::TimeUnit::type unit); + + // date_trunc: convert ts to millis, call precompiled trunc, scale back + // The truncation zeroes sub-unit data, so no remainder recombination. + // fn(int64 ts) -> int64 + Status BuildTruncWrapper(const std::string& fn, const std::string& precompiled_fn, + arrow::TimeUnit::type unit); + + // Diff: convert both ts inputs to millis, call precompiled, return scalar + // fn(int64 ts1, int64 ts2) -> int32 or float64 + Status BuildDiffWrapper(const std::string& fn, const std::string& precompiled_fn, + arrow::TimeUnit::type unit, llvm::Type* return_type); + + // Cast: convert ts to millis, call precompiled cast (variable signatures) + Status BuildCastFromTimestampWrapper(const std::string& fn, + const std::string& precompiled_fn, + arrow::TimeUnit::type unit, + llvm::Type* return_type); + + // Timezone: split-recombine wrapper for to_utc/from_utc + // fn(context, ts, tz_str, tz_len) -> ts + Status BuildTimezoneWrapper(const std::string& fn, const std::string& precompiled_fn, + arrow::TimeUnit::type unit); + + // castVARCHAR: scale ts to millis before formatting + // fn(context, ts, len, &out_len) -> const char* + Status BuildCastVARCHARWrapper(const std::string& fn, const std::string& precompiled_fn, + arrow::TimeUnit::type unit); + + // Floor division: ts / divisor rounded toward negative infinity. + // C/LLVM SDiv truncates toward zero, which gives wrong millis for negative + // timestamps with non-zero sub-ms components (e.g., SDiv(-456, 1000) = 0 + // instead of -1). This helper corrects the quotient. + llvm::Value* FloorDiv(llvm::Value* ts, llvm::Value* divisor); + + // Floor division with remainder: returns {quotient, remainder} where + // quotient * divisor + remainder == ts and 0 <= remainder < divisor. + // Used by split-recombine wrappers (timezone, calendar add). + std::pair FloorDivRem(llvm::Value* ts, + llvm::Value* divisor); +}; + +} // namespace gandiva