diff --git a/velox/functions/lib/DateTimeFormatter.cpp b/velox/functions/lib/DateTimeFormatter.cpp index 3a104a6f1d98..10f030735c10 100644 --- a/velox/functions/lib/DateTimeFormatter.cpp +++ b/velox/functions/lib/DateTimeFormatter.cpp @@ -214,27 +214,6 @@ std::string padContent( } } -void validateTimePoint(const std::chrono::time_point< - std::chrono::system_clock, - std::chrono::milliseconds>& timePoint) { - // Due to the limit of std::chrono we can only represent time in - // [-32767-01-01, 32767-12-31] date range - const auto minTimePoint = date::sys_days{ - date::year_month_day(date::year::min(), date::month(1), date::day(1))}; - const auto maxTimePoint = date::sys_days{ - date::year_month_day(date::year::max(), date::month(12), date::day(31))}; - if (timePoint < minTimePoint || timePoint > maxTimePoint) { - VELOX_USER_FAIL( - "Cannot format time out of range of [{}-{}-{}, {}-{}-{}]", - (int)date::year::min(), - "01", - "01", - (int)date::year::max(), - "12", - "31"); - } -} - size_t countOccurence(const std::string_view& base, const std::string& target) { int occurrences = 0; std::string::size_type pos = 0; @@ -952,10 +931,11 @@ void parseFromPattern( std::string DateTimeFormatter::format( const Timestamp& timestamp, const date::time_zone* timezone) const { - const std::chrono:: - time_point - timePoint(std::chrono::milliseconds(timestamp.toMillis())); - validateTimePoint(timePoint); + Timestamp t = timestamp; + if (timezone != nullptr) { + t.toTimezone(*timezone); + } + const auto timePoint = t.toTimePoint(); const auto daysTimePoint = date::floor(timePoint); const auto durationInTheDay = date::make_time(timePoint - daysTimePoint); diff --git a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp index 6a7677d007f2..e49294e94d4e 100644 --- a/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp @@ -696,7 +696,7 @@ TEST_F(DateTimeFunctionsTest, hour) { EXPECT_EQ(std::nullopt, hour(std::nullopt)); EXPECT_EQ(13, hour(Timestamp(0, 0))); - EXPECT_EQ(12, hour(Timestamp(-1, Timestamp::kMaxNanos))); + EXPECT_EQ(13, hour(Timestamp(-1, Timestamp::kMaxNanos))); // Disabled for now because the TZ for Pacific/Apia in 2096 varies between // systems. // EXPECT_EQ(21, hour(Timestamp(4000000000, 0))); @@ -2529,12 +2529,12 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) { // Multi-specifier and literal formats EXPECT_EQ( - "AD 19 1970 4 Thu 1970 1 1 1 AM 2 2 2 2 33 11 5 Asia/Kolkata", + "AD 19 1970 4 Thu 1970 1 1 1 AM 8 8 8 8 3 11 5 Asia/Kolkata", formatDatetime( fromTimestampString("1970-01-01 02:33:11.5"), "G C Y e E y D M d a K h H k m s S zzzz")); EXPECT_EQ( - "AD 19 1970 4 asdfghjklzxcvbnmqwertyuiop Thu ' 1970 1 1 1 AM 2 2 2 2 33 11 5 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ Asia/Kolkata", + "AD 19 1970 4 asdfghjklzxcvbnmqwertyuiop Thu ' 1970 1 1 1 AM 8 8 8 8 3 11 5 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ Asia/Kolkata", formatDatetime( fromTimestampString("1970-01-01 02:33:11.5"), "G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ zzzz")); @@ -2787,21 +2787,43 @@ TEST_F(DateTimeFunctionsTest, dateFormat) { EXPECT_EQ("z", dateFormat(fromTimestampString("1970-01-01"), "%z")); EXPECT_EQ("g", dateFormat(fromTimestampString("1970-01-01"), "%g")); - // With timezone + // With timezone. Indian Standard Time (IST) UTC+5:30. setQueryTimeZone("Asia/Kolkata"); + EXPECT_EQ( "1970-01-01", dateFormat(fromTimestampString("1970-01-01"), "%Y-%m-%d")); EXPECT_EQ( - "2000-02-29 12:00:00 AM", + "2000-02-29 05:30:00 AM", dateFormat( fromTimestampString("2000-02-29 00:00:00.987"), "%Y-%m-%d %r")); EXPECT_EQ( - "2000-02-29 00:00:00.987000", + "2000-02-29 05:30:00.987000", dateFormat( fromTimestampString("2000-02-29 00:00:00.987"), "%Y-%m-%d %H:%i:%s.%f")); EXPECT_EQ( - "-2000-02-29 00:00:00.987000", + "-2000-02-29 05:53:29.987000", + dateFormat( + fromTimestampString("-2000-02-29 00:00:00.987"), + "%Y-%m-%d %H:%i:%s.%f")); + + // Same timestamps with a different timezone. Pacific Daylight Time (North + // America) PDT UTC-8:00. + setQueryTimeZone("America/Los_Angeles"); + + EXPECT_EQ( + "1969-12-31", dateFormat(fromTimestampString("1970-01-01"), "%Y-%m-%d")); + EXPECT_EQ( + "2000-02-28 04:00:00 PM", + dateFormat( + fromTimestampString("2000-02-29 00:00:00.987"), "%Y-%m-%d %r")); + EXPECT_EQ( + "2000-02-28 16:00:00.987000", + dateFormat( + fromTimestampString("2000-02-29 00:00:00.987"), + "%Y-%m-%d %H:%i:%s.%f")); + EXPECT_EQ( + "-2000-02-28 16:07:03.987000", dateFormat( fromTimestampString("-2000-02-29 00:00:00.987"), "%Y-%m-%d %H:%i:%s.%f")); diff --git a/velox/functions/sparksql/DateTimeFunctions.h b/velox/functions/sparksql/DateTimeFunctions.h index d68246709818..7588bc043109 100644 --- a/velox/functions/sparksql/DateTimeFunctions.h +++ b/velox/functions/sparksql/DateTimeFunctions.h @@ -152,6 +152,45 @@ struct UnixTimestampParseWithFormatFunction bool invalidFormat_{false}; }; +/// Parse unix time in seconds to a string in given time format. +template +struct FromUnixtimeFunction { + VELOX_DEFINE_FUNCTION_TYPES(T); + + const date::time_zone* sessionTimeZone_ = nullptr; + std::shared_ptr mysqlDateTime_; + bool isConstantTimeFormat = false; + + FOLLY_ALWAYS_INLINE void initialize( + const core::QueryConfig& config, + const arg_type* /*unixtime*/, + const arg_type* timeFormat) { + sessionTimeZone_ = getTimeZoneFromConfig(config); + if (timeFormat != nullptr) { + isConstantTimeFormat = true; + mysqlDateTime_ = buildJodaDateTimeFormatter( + std::string_view(timeFormat->data(), timeFormat->size())); + } + } + + FOLLY_ALWAYS_INLINE void call( + out_type& result, + const arg_type second, + const arg_type timeFormat) { + if (!isConstantTimeFormat) { + mysqlDateTime_ = buildJodaDateTimeFormatter( + std::string_view(timeFormat.data(), timeFormat.size())); + } + Timestamp timestamp = Timestamp::fromMillis(1000 * second); + auto formattedResult = mysqlDateTime_->format(timestamp, sessionTimeZone_); + auto resultSize = formattedResult.size(); + result.resize(resultSize); + if (resultSize != 0) { + std::memcpy(result.data(), formattedResult.data(), resultSize); + } + } +}; + template struct MakeDateFunction { VELOX_DEFINE_FUNCTION_TYPES(T); diff --git a/velox/functions/sparksql/Register.cpp b/velox/functions/sparksql/Register.cpp index 8fa92cefa117..a43a27a2cf30 100644 --- a/velox/functions/sparksql/Register.cpp +++ b/velox/functions/sparksql/Register.cpp @@ -206,6 +206,9 @@ void registerFunctions(const std::string& prefix) { registerFunction({prefix + "last_day"}); + registerFunction( + {prefix + "from_unixtime"}); + // Register bloom filter function registerFunction( {prefix + "might_contain"}); diff --git a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp index 03bed76d4816..9932fe3e156c 100644 --- a/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp +++ b/velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp @@ -271,5 +271,17 @@ TEST_F(DateTimeFunctionsTest, dateDiff) { EXPECT_EQ(-366, dateDiff(parseDate("2020-02-29"), parseDate("2019-02-28"))); } +TEST_F(DateTimeFunctionsTest, fromUnixTime) { + const auto fromUnixTime = [&](std::optional unixTime, + std::optional timeFormat) { + return evaluateOnce( + "from_unixtime(c0, c1)", unixTime, timeFormat); + }; + + EXPECT_EQ(fromUnixTime(100, "yyyy-MM-dd"), "1970-01-01"); + EXPECT_EQ(fromUnixTime(120, "yyyy-MM-dd HH:mm"), "1970-01-01 00:02"); + EXPECT_EQ(fromUnixTime(100, "yyyy-MM-dd HH:mm:ss"), "1970-01-01 00:01:40"); +} + } // namespace } // namespace facebook::velox::functions::sparksql::test diff --git a/velox/type/Timestamp.cpp b/velox/type/Timestamp.cpp index 563289bc68aa..7758d36b8ad0 100644 --- a/velox/type/Timestamp.cpp +++ b/velox/type/Timestamp.cpp @@ -69,9 +69,40 @@ void Timestamp::toGMT(int16_t tzID) { } } +namespace { +void validateTimePoint(const std::chrono::time_point< + std::chrono::system_clock, + std::chrono::milliseconds>& timePoint) { + // Due to the limit of std::chrono we can only represent time in + // [-32767-01-01, 32767-12-31] date range + const auto minTimePoint = date::sys_days{ + date::year_month_day(date::year::min(), date::month(1), date::day(1))}; + const auto maxTimePoint = date::sys_days{ + date::year_month_day(date::year::max(), date::month(12), date::day(31))}; + if (timePoint < minTimePoint || timePoint > maxTimePoint) { + VELOX_USER_FAIL( + "Timestamp is outside of supported range of [{}-{}-{}, {}-{}-{}]", + (int)date::year::min(), + "01", + "01", + (int)date::year::max(), + "12", + "31"); + } +} +} // namespace + +std::chrono::time_point +Timestamp::toTimePoint() const { + auto tp = std::chrono:: + time_point( + std::chrono::milliseconds(toMillis())); + validateTimePoint(tp); + return tp; +} + void Timestamp::toTimezone(const date::time_zone& zone) { - auto tp = std::chrono::time_point( - std::chrono::seconds(seconds_)); + auto tp = toTimePoint(); auto epoch = zone.to_local(tp).time_since_epoch(); seconds_ = std::chrono::duration_cast(epoch).count(); } diff --git a/velox/type/Timestamp.h b/velox/type/Timestamp.h index c3b7c04ff4ee..0dbf889b6236 100644 --- a/velox/type/Timestamp.h +++ b/velox/type/Timestamp.h @@ -117,6 +117,11 @@ struct Timestamp { } } + /// Due to the limit of std::chrono, throws if timestamp is outside of + /// [-32767-01-01, 32767-12-31] range. + std::chrono::time_point + toTimePoint() const; + static Timestamp fromMillis(int64_t millis) { if (millis >= 0 || millis % 1'000 == 0) { return Timestamp(millis / 1'000, (millis % 1'000) * 1'000'000); diff --git a/velox/type/tests/TimestampTest.cpp b/velox/type/tests/TimestampTest.cpp index c157da6f5ff5..08287dad3972 100644 --- a/velox/type/tests/TimestampTest.cpp +++ b/velox/type/tests/TimestampTest.cpp @@ -17,6 +17,7 @@ #include #include "velox/common/base/tests/GTestUtils.h" +#include "velox/external/date/tz.h" #include "velox/type/Timestamp.h" namespace facebook::velox { @@ -158,5 +159,15 @@ TEST(TimestampTest, toString) { EXPECT_EQ("-292275055-05-16T16:47:04.000000000", kMin.toString()); EXPECT_EQ("292278994-08-17T07:12:55.999999999", kMax.toString()); } + +TEST(TimestampTest, outOfRange) { + auto* timezone = date::locate_zone("GMT"); + Timestamp t(-3217830796800, 0); + + VELOX_ASSERT_THROW( + t.toTimePoint(), "Timestamp is outside of supported range"); + VELOX_ASSERT_THROW( + t.toTimezone(*timezone), "Timestamp is outside of supported range"); +} } // namespace } // namespace facebook::velox