From 73313a774a728601423303bf0f997b59a8053b86 Mon Sep 17 00:00:00 2001 From: Arkadii Kravchuk Date: Fri, 17 Apr 2026 15:52:27 +0300 Subject: [PATCH] DX-105463: [C++][Gandiva] Add TimestampIR wrapper for next_day MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit next_day(timestamp, utf8) was registered in the C++ function registry for timestamp inputs but was not included in any TimestampIR table, so no _us/_ns IR wrappers were generated. With the relaxed DataTypeEquals (ignoring TimeUnit), calls like next_day(timestamp[us], 'MO') pass validation and are routed to Gandiva, but BuildFunctionCall silently falls through to the precompiled millis function, which interprets microseconds as milliseconds — producing dates ~51,000 years in the future (e.g. +53425-02-28 for a 2021 input). This adds a BuildNextDayWrapper that scales the timestamp input to millis via FloorDiv and calls the precompiled next_day_from_timestamp(context, millis, day_str, day_len) function. Pattern follows the cast-from-timestamp wrapper shape with the additional (context, string, length) args. No remainder recombination is needed: next_day returns date64 (midnight of the next weekday), so sub-millisecond input precision is not meaningful in the result. Co-Authored-By: Claude Opus 4.7 (1M context) --- cpp/src/gandiva/timestamp_ir.cc | 42 +++++++++++++++++++++++++++++++++ cpp/src/gandiva/timestamp_ir.h | 6 +++++ 2 files changed, 48 insertions(+) diff --git a/cpp/src/gandiva/timestamp_ir.cc b/cpp/src/gandiva/timestamp_ir.cc index 947c377050e5..49af53deb0d1 100644 --- a/cpp/src/gandiva/timestamp_ir.cc +++ b/cpp/src/gandiva/timestamp_ir.cc @@ -189,6 +189,7 @@ static std::unordered_set BuildAllFunctionNames() { names.insert(std::string("to_utc_timezone_timestamp") + sfx); names.insert(std::string("from_utc_timezone_timestamp") + sfx); names.insert(std::string("castVARCHAR_timestamp_int64") + sfx); + names.insert(std::string("next_day_from_timestamp") + sfx); } return names; } @@ -462,6 +463,40 @@ Status TimestampIR::BuildTimezoneWrapper(const std::string& function_name, return Status::OK(); } +Status TimestampIR::BuildNextDayWrapper(const std::string& function_name, + const std::string& precompiled_millis_fn, + arrow::TimeUnit::type time_unit) { + // fn(context, ts, day_str, day_len) -> date64 + // next_day returns the date of the next weekday (e.g. 'MO' for Monday) at midnight, + // so sub-ms precision is not relevant in the result — just scale the input to millis. + auto precompiled_fn = module()->getFunction(precompiled_millis_fn); + if (!precompiled_fn) { + return Status::Invalid("Precompiled function not found: ", precompiled_millis_fn); + } + + auto i64 = types()->i64_type(); + auto i32 = types()->i32_type(); + auto i8ptr = llvm::PointerType::get(*context(), 0); + auto function = BuildFunction( + function_name, i64, + {{"ctx", i64}, {"ts", i64}, {"day", i8ptr}, {"day_len", i32}}); + auto entry = llvm::BasicBlock::Create(*context(), "entry", function); + ir_builder()->SetInsertPoint(entry); + + auto arg_iter = function->arg_begin(); + auto ctx = &arg_iter[0]; + auto ts = &arg_iter[1]; + auto day = &arg_iter[2]; + auto day_len = &arg_iter[3]; + + int64_t upm = UnitsPerMilli(time_unit); + auto millis = FloorDiv(ts, llvm::ConstantInt::get(i64, upm)); + auto result = ir_builder()->CreateCall(precompiled_fn, {ctx, millis, day, day_len}); + + ir_builder()->CreateRet(result); + return Status::OK(); +} + Status TimestampIR::BuildCastVARCHARWrapper(const std::string& function_name, const std::string& precompiled_millis_fn, arrow::TimeUnit::type time_unit) { @@ -775,6 +810,13 @@ std::pair TimestampIR::FloorDivRem( ts_ir->BuildCastVARCHARWrapper(ir_name, "castVARCHAR_timestamp_int64", unit)); } + + // next_day(timestamp, utf8): scale to millis, return date64 + { + std::string ir_name = std::string("next_day_from_timestamp") + sfx; + try_build(ir_name, + ts_ir->BuildNextDayWrapper(ir_name, "next_day_from_timestamp", unit)); + } } // Validate that the set of functions we tried to build matches AllFunctionNames(). diff --git a/cpp/src/gandiva/timestamp_ir.h b/cpp/src/gandiva/timestamp_ir.h index 923d954f7e02..2891664b17b3 100644 --- a/cpp/src/gandiva/timestamp_ir.h +++ b/cpp/src/gandiva/timestamp_ir.h @@ -113,6 +113,12 @@ class TimestampIR : public FunctionIRBuilder { const std::string& precompiled_fn, arrow::TimeUnit::type unit); + // next_day: scale ts to millis, call precompiled, return date64 + // fn(context, ts, day_str, day_len) -> int64 + Status BuildNextDayWrapper(const std::string& fn, + const std::string& precompiled_fn, + arrow::TimeUnit::type unit); + // Floor division: ts / divisor rounded toward negative infinity. // C/LLVM SDiv truncates toward zero, which gives wrong millis for negative // timestamps with non-zero sub-ms components (e.g., SDiv(-456, 1000) = 0