From f693e80e3d3f95488d5c6ffe4093aef01c22ea6e Mon Sep 17 00:00:00 2001 From: "joey.ljy" Date: Sat, 23 Mar 2024 17:59:29 +0800 Subject: [PATCH] nanos in timestamp INT96 can be negative --- velox/dwio/parquet/reader/PageReader.cpp | 6 +++--- velox/type/Timestamp.h | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/velox/dwio/parquet/reader/PageReader.cpp b/velox/dwio/parquet/reader/PageReader.cpp index 7bf278329d87..46e11ec07db5 100644 --- a/velox/dwio/parquet/reader/PageReader.cpp +++ b/velox/dwio/parquet/reader/PageReader.cpp @@ -388,15 +388,15 @@ void PageReader::prepareDictionary(const PageHeader& pageHeader) { for (auto i = dictionary_.numValues - 1; i >= 0; --i) { // Convert the timestamp into seconds and nanos since the Unix epoch, // 00:00:00.000000 on 1 January 1970. - uint64_t nanos; + int64_t nanos; memcpy( &nanos, parquetValues + i * sizeof(Int96Timestamp), - sizeof(uint64_t)); + sizeof(int64_t)); int32_t days; memcpy( &days, - parquetValues + i * sizeof(Int96Timestamp) + sizeof(uint64_t), + parquetValues + i * sizeof(Int96Timestamp) + sizeof(int64_t), sizeof(int32_t)); values[i] = Timestamp::fromDaysAndNanos(days, nanos); diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index c6565ce1bf41..c31686f644a0 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -101,7 +101,7 @@ struct Timestamp { constexpr Timestamp() : seconds_(0), nanos_(0) {} - Timestamp(int64_t seconds, uint64_t nanos) + Timestamp(int64_t seconds, int64_t nanos) : seconds_(seconds), nanos_(nanos) { VELOX_USER_DCHECK_GE( seconds, kMinSeconds, "Timestamp seconds out of range"); @@ -110,16 +110,19 @@ struct Timestamp { VELOX_USER_DCHECK_LE(nanos, kMaxNanos, "Timestamp nanos out of range"); } - static Timestamp fromDaysAndNanos(int32_t days, uint64_t nanos) { + static Timestamp fromDaysAndNanos(int32_t days, int64_t nanos) { static constexpr int64_t kJulianToUnixEpochDays = 2440588LL; static constexpr int64_t kSecondsPerDay = 86400LL; static constexpr int64_t kNanosPerSecond = Timestamp::kNanosecondsInMillisecond * Timestamp::kMillisecondsInSecond; int64_t seconds = (days - kJulianToUnixEpochDays) * kSecondsPerDay; - if (nanos > Timestamp::kMaxNanos) { + if (nanos > static_cast(Timestamp::kMaxNanos)) { seconds += nanos / kNanosPerSecond; nanos -= (nanos / kNanosPerSecond) * kNanosPerSecond; + } else if (nanos < 0) { + seconds += (nanos / kNanosPerSecond - 1); + nanos -= (nanos / kNanosPerSecond - 1) * kNanosPerSecond; } return Timestamp(seconds, nanos);