Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support from_unixtime function #406

Merged
merged 4 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 5 additions & 25 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,27 +214,6 @@ std::string padContent(
}
}

void validateTimePoint(const std::chrono::time_point<
std::chrono::system_clock,
std::chrono::milliseconds>& timePoint) {
// Due to the limit of std::chrono we can only represent time in
// [-32767-01-01, 32767-12-31] date range
const auto minTimePoint = date::sys_days{
date::year_month_day(date::year::min(), date::month(1), date::day(1))};
const auto maxTimePoint = date::sys_days{
date::year_month_day(date::year::max(), date::month(12), date::day(31))};
if (timePoint < minTimePoint || timePoint > maxTimePoint) {
VELOX_USER_FAIL(
"Cannot format time out of range of [{}-{}-{}, {}-{}-{}]",
(int)date::year::min(),
"01",
"01",
(int)date::year::max(),
"12",
"31");
}
}

size_t countOccurence(const std::string_view& base, const std::string& target) {
int occurrences = 0;
std::string::size_type pos = 0;
Expand Down Expand Up @@ -952,10 +931,11 @@ void parseFromPattern(
std::string DateTimeFormatter::format(
const Timestamp& timestamp,
const date::time_zone* timezone) const {
const std::chrono::
time_point<std::chrono::system_clock, std::chrono::milliseconds>
timePoint(std::chrono::milliseconds(timestamp.toMillis()));
validateTimePoint(timePoint);
Timestamp t = timestamp;
if (timezone != nullptr) {
t.toTimezone(*timezone);
}
const auto timePoint = t.toTimePoint();
const auto daysTimePoint = date::floor<date::days>(timePoint);

const auto durationInTheDay = date::make_time(timePoint - daysTimePoint);
Expand Down
36 changes: 29 additions & 7 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ TEST_F(DateTimeFunctionsTest, hour) {

EXPECT_EQ(std::nullopt, hour(std::nullopt));
EXPECT_EQ(13, hour(Timestamp(0, 0)));
EXPECT_EQ(12, hour(Timestamp(-1, Timestamp::kMaxNanos)));
EXPECT_EQ(13, hour(Timestamp(-1, Timestamp::kMaxNanos)));
// Disabled for now because the TZ for Pacific/Apia in 2096 varies between
// systems.
// EXPECT_EQ(21, hour(Timestamp(4000000000, 0)));
Expand Down Expand Up @@ -2529,12 +2529,12 @@ TEST_F(DateTimeFunctionsTest, formatDateTime) {

// Multi-specifier and literal formats
EXPECT_EQ(
"AD 19 1970 4 Thu 1970 1 1 1 AM 2 2 2 2 33 11 5 Asia/Kolkata",
"AD 19 1970 4 Thu 1970 1 1 1 AM 8 8 8 8 3 11 5 Asia/Kolkata",
formatDatetime(
fromTimestampString("1970-01-01 02:33:11.5"),
"G C Y e E y D M d a K h H k m s S zzzz"));
EXPECT_EQ(
"AD 19 1970 4 asdfghjklzxcvbnmqwertyuiop Thu ' 1970 1 1 1 AM 2 2 2 2 33 11 5 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ Asia/Kolkata",
"AD 19 1970 4 asdfghjklzxcvbnmqwertyuiop Thu ' 1970 1 1 1 AM 8 8 8 8 3 11 5 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ Asia/Kolkata",
formatDatetime(
fromTimestampString("1970-01-01 02:33:11.5"),
"G C Y e 'asdfghjklzxcvbnmqwertyuiop' E '' y D M d a K h H k m s S 1234567890\\\"!@#$%^&*()-+`~{}[];:,./ zzzz"));
Expand Down Expand Up @@ -2787,21 +2787,43 @@ TEST_F(DateTimeFunctionsTest, dateFormat) {
EXPECT_EQ("z", dateFormat(fromTimestampString("1970-01-01"), "%z"));
EXPECT_EQ("g", dateFormat(fromTimestampString("1970-01-01"), "%g"));

// With timezone
// With timezone. Indian Standard Time (IST) UTC+5:30.
setQueryTimeZone("Asia/Kolkata");

EXPECT_EQ(
"1970-01-01", dateFormat(fromTimestampString("1970-01-01"), "%Y-%m-%d"));
EXPECT_EQ(
"2000-02-29 12:00:00 AM",
"2000-02-29 05:30:00 AM",
dateFormat(
fromTimestampString("2000-02-29 00:00:00.987"), "%Y-%m-%d %r"));
EXPECT_EQ(
"2000-02-29 00:00:00.987000",
"2000-02-29 05:30:00.987000",
dateFormat(
fromTimestampString("2000-02-29 00:00:00.987"),
"%Y-%m-%d %H:%i:%s.%f"));
EXPECT_EQ(
"-2000-02-29 00:00:00.987000",
"-2000-02-29 05:53:29.987000",
dateFormat(
fromTimestampString("-2000-02-29 00:00:00.987"),
"%Y-%m-%d %H:%i:%s.%f"));

// Same timestamps with a different timezone. Pacific Daylight Time (North
// America) PDT UTC-8:00.
setQueryTimeZone("America/Los_Angeles");

EXPECT_EQ(
"1969-12-31", dateFormat(fromTimestampString("1970-01-01"), "%Y-%m-%d"));
EXPECT_EQ(
"2000-02-28 04:00:00 PM",
dateFormat(
fromTimestampString("2000-02-29 00:00:00.987"), "%Y-%m-%d %r"));
EXPECT_EQ(
"2000-02-28 16:00:00.987000",
dateFormat(
fromTimestampString("2000-02-29 00:00:00.987"),
"%Y-%m-%d %H:%i:%s.%f"));
EXPECT_EQ(
"-2000-02-28 16:07:03.987000",
dateFormat(
fromTimestampString("-2000-02-29 00:00:00.987"),
"%Y-%m-%d %H:%i:%s.%f"));
Expand Down
39 changes: 39 additions & 0 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,45 @@ struct UnixTimestampParseWithFormatFunction
bool invalidFormat_{false};
};

/// Parse unix time in seconds to a string in given time format.
template <typename T>
struct FromUnixtimeFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

const date::time_zone* sessionTimeZone_ = nullptr;
std::shared_ptr<DateTimeFormatter> mysqlDateTime_;
bool isConstantTimeFormat = false;

FOLLY_ALWAYS_INLINE void initialize(
const core::QueryConfig& config,
const arg_type<int64_t>* /*unixtime*/,
const arg_type<Varchar>* timeFormat) {
sessionTimeZone_ = getTimeZoneFromConfig(config);
if (timeFormat != nullptr) {
isConstantTimeFormat = true;
mysqlDateTime_ = buildJodaDateTimeFormatter(
std::string_view(timeFormat->data(), timeFormat->size()));
}
}

FOLLY_ALWAYS_INLINE void call(
out_type<Varchar>& result,
const arg_type<int64_t> second,
const arg_type<Varchar> timeFormat) {
if (!isConstantTimeFormat) {
mysqlDateTime_ = buildJodaDateTimeFormatter(
std::string_view(timeFormat.data(), timeFormat.size()));
}
Timestamp timestamp = Timestamp::fromMillis(1000 * second);
auto formattedResult = mysqlDateTime_->format(timestamp, sessionTimeZone_);
auto resultSize = formattedResult.size();
result.resize(resultSize);
if (resultSize != 0) {
std::memcpy(result.data(), formattedResult.data(), resultSize);
}
}
};

template <typename T>
struct MakeDateFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand Down
3 changes: 3 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ void registerFunctions(const std::string& prefix) {

registerFunction<LastDayFunction, Date, Date>({prefix + "last_day"});

registerFunction<FromUnixtimeFunction, Varchar, int64_t, Varchar>(
{prefix + "from_unixtime"});

// Register bloom filter function
registerFunction<BloomFilterMightContainFunction, bool, Varbinary, int64_t>(
{prefix + "might_contain"});
Expand Down
12 changes: 12 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,5 +271,17 @@ TEST_F(DateTimeFunctionsTest, dateDiff) {
EXPECT_EQ(-366, dateDiff(parseDate("2020-02-29"), parseDate("2019-02-28")));
}

TEST_F(DateTimeFunctionsTest, fromUnixTime) {
const auto fromUnixTime = [&](std::optional<int64_t> unixTime,
std::optional<std::string> timeFormat) {
return evaluateOnce<std::string>(
"from_unixtime(c0, c1)", unixTime, timeFormat);
};

EXPECT_EQ(fromUnixTime(100, "yyyy-MM-dd"), "1970-01-01");
EXPECT_EQ(fromUnixTime(120, "yyyy-MM-dd HH:mm"), "1970-01-01 00:02");
EXPECT_EQ(fromUnixTime(100, "yyyy-MM-dd HH:mm:ss"), "1970-01-01 00:01:40");
}

} // namespace
} // namespace facebook::velox::functions::sparksql::test
35 changes: 33 additions & 2 deletions velox/type/Timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,40 @@ void Timestamp::toGMT(int16_t tzID) {
}
}

namespace {
void validateTimePoint(const std::chrono::time_point<
std::chrono::system_clock,
std::chrono::milliseconds>& timePoint) {
// Due to the limit of std::chrono we can only represent time in
// [-32767-01-01, 32767-12-31] date range
const auto minTimePoint = date::sys_days{
date::year_month_day(date::year::min(), date::month(1), date::day(1))};
const auto maxTimePoint = date::sys_days{
date::year_month_day(date::year::max(), date::month(12), date::day(31))};
if (timePoint < minTimePoint || timePoint > maxTimePoint) {
VELOX_USER_FAIL(
"Timestamp is outside of supported range of [{}-{}-{}, {}-{}-{}]",
(int)date::year::min(),
"01",
"01",
(int)date::year::max(),
"12",
"31");
}
}
} // namespace

std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>
Timestamp::toTimePoint() const {
auto tp = std::chrono::
time_point<std::chrono::system_clock, std::chrono::milliseconds>(
std::chrono::milliseconds(toMillis()));
validateTimePoint(tp);
return tp;
}

void Timestamp::toTimezone(const date::time_zone& zone) {
auto tp = std::chrono::time_point<std::chrono::system_clock>(
std::chrono::seconds(seconds_));
auto tp = toTimePoint();
auto epoch = zone.to_local(tp).time_since_epoch();
seconds_ = std::chrono::duration_cast<std::chrono::seconds>(epoch).count();
}
Expand Down
5 changes: 5 additions & 0 deletions velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ struct Timestamp {
}
}

/// Due to the limit of std::chrono, throws if timestamp is outside of
/// [-32767-01-01, 32767-12-31] range.
std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>
toTimePoint() const;

static Timestamp fromMillis(int64_t millis) {
if (millis >= 0 || millis % 1'000 == 0) {
return Timestamp(millis / 1'000, (millis % 1'000) * 1'000'000);
Expand Down
11 changes: 11 additions & 0 deletions velox/type/tests/TimestampTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <gtest/gtest.h>

#include "velox/common/base/tests/GTestUtils.h"
#include "velox/external/date/tz.h"
#include "velox/type/Timestamp.h"

namespace facebook::velox {
Expand Down Expand Up @@ -158,5 +159,15 @@ TEST(TimestampTest, toString) {
EXPECT_EQ("-292275055-05-16T16:47:04.000000000", kMin.toString());
EXPECT_EQ("292278994-08-17T07:12:55.999999999", kMax.toString());
}

TEST(TimestampTest, outOfRange) {
auto* timezone = date::locate_zone("GMT");
Timestamp t(-3217830796800, 0);

VELOX_ASSERT_THROW(
t.toTimePoint(), "Timestamp is outside of supported range");
VELOX_ASSERT_THROW(
t.toTimezone(*timezone), "Timestamp is outside of supported range");
}
} // namespace
} // namespace facebook::velox