Skip to content

Commit 406a3f2

Browse files
committed
fix review
1 parent 483fa0f commit 406a3f2

File tree

2 files changed

+245
-30
lines changed

2 files changed

+245
-30
lines changed

src/iceberg/expression/literal.cc

Lines changed: 131 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <chrono>
2323
#include <cmath>
2424
#include <concepts>
25+
#include <cstdint>
26+
#include <iomanip>
2527

2628
#include "iceberg/exception.h"
2729

@@ -35,6 +37,14 @@ int32_t MicrosToDays(int64_t micros_since_epoch) {
3537
return static_cast<int32_t>(days_duration.count());
3638
}
3739

40+
time_t timegm_custom(std::tm* tm) {
41+
#if defined(_WIN32)
42+
return _mkgmtime(tm);
43+
#else
44+
return timegm(tm);
45+
#endif
46+
}
47+
3848
} // namespace
3949

4050
/// \brief LiteralCaster handles type casting operations for Literal.
@@ -111,6 +121,7 @@ Result<Literal> LiteralCaster::CastFromInt(
111121
return Literal::Double(static_cast<double>(int_val));
112122
case TypeId::kDate:
113123
return Literal::Date(int_val);
124+
// TODO(Li Feiyang): Implement cast from Int to decimal
114125
default:
115126
return NotSupported("Cast from Int to {} is not implemented",
116127
target_type->ToString());
@@ -137,10 +148,10 @@ Result<Literal> LiteralCaster::CastFromLong(
137148
case TypeId::kDouble:
138149
return Literal::Double(static_cast<double>(long_val));
139150
case TypeId::kDate: {
140-
if (long_val > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
151+
if (long_val > std::numeric_limits<int32_t>::max()) {
141152
return AboveMaxLiteral(target_type);
142153
}
143-
if (long_val < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
154+
if (long_val < std::numeric_limits<int32_t>::min()) {
144155
return BelowMinLiteral(target_type);
145156
}
146157
return Literal::Date(static_cast<int32_t>(long_val));
@@ -151,6 +162,7 @@ Result<Literal> LiteralCaster::CastFromLong(
151162
return Literal::Timestamp(long_val);
152163
case TypeId::kTimestampTz:
153164
return Literal::TimestampTz(long_val);
165+
// TODO(Li Feiyang): Implement cast from Long to decimal, TimestampNs and
154166
default:
155167
return NotSupported("Cast from Long to {} is not supported",
156168
target_type->ToString());
@@ -164,6 +176,7 @@ Result<Literal> LiteralCaster::CastFromFloat(
164176
switch (target_type->type_id()) {
165177
case TypeId::kDouble:
166178
return Literal::Double(static_cast<double>(float_val));
179+
// TODO(Li Feiyang): Implement cast from Float to decimal
167180
default:
168181
return NotSupported("Cast from Float to {} is not supported",
169182
target_type->ToString());
@@ -192,30 +205,129 @@ Result<Literal> LiteralCaster::CastFromDouble(
192205

193206
Result<Literal> LiteralCaster::CastFromString(
194207
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
208+
const auto& str_val = std::get<std::string>(literal.value_);
209+
std::istringstream in{str_val};
210+
std::tm tm = {};
211+
195212
switch (target_type->type_id()) {
196213
case TypeId::kDate: {
197-
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DD" using std::chrono::parse
198-
// once it becomes available in the target libc++.
199-
return NotImplemented("Cast from String to Date is not yet implemented.");
214+
// Parse "YYYY-MM-DD" into days since 1970-01-01 epoch.
215+
in >> std::get_time(&tm, "%Y-%m-%d");
216+
217+
if (in.fail() || tm.tm_mday == 0 || in.peek() != EOF) {
218+
return NotSupported("Failed to parse '{}' as a valid Date (expected YYYY-MM-DD)",
219+
str_val);
220+
}
221+
222+
auto time_point = std::chrono::system_clock::from_time_t(timegm_custom(&tm));
223+
auto days_since_epoch = std::chrono::floor<std::chrono::days>(time_point);
224+
return Literal::Date(
225+
static_cast<int32_t>(days_since_epoch.time_since_epoch().count()));
200226
}
201227

202228
case TypeId::kTime: {
203-
// TODO(Li Feiyang): Implement parsing for "HH:MM:SS.ffffff" using
204-
// std::chrono::parse once it becomes available in the target libc++.
205-
return NotImplemented("Cast from String to Time is not yet implemented.");
206-
}
229+
// Parse "HH:MM:SS.ffffff" into microseconds since midnight.
230+
in >> std::get_time(&tm, "%H:%M:%S");
207231

208-
case TypeId::kTimestamp: {
209-
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DDTHH:MM:SS.ffffff" using
210-
// std::chrono::parse once it becomes available in the target libc++.
211-
return NotImplemented("Cast from String to Timestamp is not yet implemented.");
232+
if (in.fail()) {
233+
return NotSupported(
234+
"Failed to parse '{}' as a valid Time (expected HH:MM:SS.ffffff)", str_val);
235+
}
236+
237+
int64_t total_micros =
238+
(tm.tm_hour * 3600LL + tm.tm_min * 60LL + tm.tm_sec) * 1000000LL;
239+
240+
if (in.peek() == '.') {
241+
in.ignore();
242+
std::string fractional_str;
243+
char c;
244+
while (in.get(c) && isdigit(c)) {
245+
fractional_str += c;
246+
}
247+
if (in) {
248+
in.unget();
249+
}
250+
251+
if (fractional_str.length() > 6) {
252+
fractional_str.resize(6);
253+
}
254+
try {
255+
if (!fractional_str.empty()) {
256+
fractional_str.append(6 - fractional_str.length(), '0');
257+
total_micros += std::stoll(fractional_str);
258+
}
259+
} catch (const std::exception&) {
260+
return NotSupported("Failed to parse fractional part of Time '{}'", str_val);
261+
}
262+
}
263+
264+
if (in.peek() != EOF) {
265+
return NotSupported("Unconsumed characters found after parsing Time '{}'",
266+
str_val);
267+
}
268+
269+
return Literal::Time(total_micros);
212270
}
213271

272+
case TypeId::kTimestamp:
214273
case TypeId::kTimestampTz: {
215-
// TODO(Li Feiyang): Implement parsing for "YYYY-MM-DDTHH:MM:SS.ffffffZ" using
216-
// std::chrono::parse once it becomes available in the target libc++.
217-
return NotImplemented("Cast from String to TimestampTz is not yet implemented.");
274+
// Parse "YYYY-MM-DDTHH:MM:SS.ffffff" and optional 'Z'
275+
in >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%S");
276+
277+
if (in.fail()) {
278+
return NotSupported(
279+
"Failed to parse '{}' as a valid Timestamp (expected YYYY-MM-DDTHH:MM:SS...)",
280+
str_val);
281+
}
282+
283+
auto seconds_since_epoch = timegm_custom(&tm);
284+
int64_t total_micros = seconds_since_epoch * 1000000LL;
285+
286+
if (in.peek() == '.') {
287+
in.ignore();
288+
std::string fractional_str;
289+
char c;
290+
while (in.get(c) && isdigit(c)) {
291+
fractional_str += c;
292+
}
293+
if (in) {
294+
in.unget();
295+
}
296+
297+
if (fractional_str.length() > 6) {
298+
fractional_str.resize(6);
299+
}
300+
try {
301+
if (!fractional_str.empty()) {
302+
fractional_str.append(6 - fractional_str.length(), '0');
303+
total_micros += std::stoll(fractional_str);
304+
}
305+
} catch (const std::exception&) {
306+
return NotSupported("Failed to parse fractional part of Timestamp '{}'",
307+
str_val);
308+
}
309+
}
310+
311+
if (target_type->type_id() == TypeId::kTimestampTz) {
312+
// NOTE: This implementation DOES NOT support timezone offsets like
313+
// '+08:00' or '-07:00'. It only supports the UTC designator 'Z'.
314+
if (in.peek() == 'Z') {
315+
in.ignore(); // Consume 'Z'
316+
}
317+
}
318+
319+
if (in.peek() != EOF) {
320+
return NotSupported("Unconsumed characters found after parsing Timestamp '{}'",
321+
str_val);
322+
}
323+
324+
if (target_type->type_id() == TypeId::kTimestamp) {
325+
return Literal::Timestamp(total_micros);
326+
} else {
327+
return Literal::TimestampTz(total_micros);
328+
}
218329
}
330+
// TODO(Li Feiyang): Implement cast from String to uuid and decimal
219331

220332
default:
221333
return NotSupported("Cast from String to {} is not supported",
@@ -243,12 +355,10 @@ Result<Literal> LiteralCaster::CastFromTimestampTz(
243355
auto micros = std::get<int64_t>(literal.value_);
244356

245357
switch (target_type->type_id()) {
246-
case TypeId::kDate: {
358+
case TypeId::kDate:
247359
return Literal::Date(MicrosToDays(micros));
248-
}
249-
case TypeId::kTimestamp: {
360+
case TypeId::kTimestamp:
250361
return Literal::Timestamp(micros);
251-
}
252362
default:
253363
return NotSupported("Cast from TimestampTz to {} is not supported",
254364
target_type->ToString());
@@ -278,17 +388,8 @@ Result<Literal> LiteralCaster::CastFromFixed(
278388
const auto& fixed_val = std::get<std::vector<uint8_t>>(literal.value_);
279389

280390
switch (target_type->type_id()) {
281-
case TypeId::kBinary: {
391+
case TypeId::kBinary:
282392
return Literal::Binary(fixed_val);
283-
}
284-
case TypeId::kFixed: {
285-
auto target_fixed_type = std::dynamic_pointer_cast<FixedType>(target_type);
286-
if (fixed_val.size() == target_fixed_type->length()) {
287-
return literal;
288-
}
289-
return NotSupported("Cannot cast Fixed({}) to Fixed({}) due to mismatched lengths",
290-
fixed_val.size(), target_fixed_type->length());
291-
}
292393
default:
293394
return NotSupported("Cast from Fixed to {} is not supported",
294395
target_type->ToString());

test/literal_test.cc

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,120 @@ TEST(LiteralTest, StringComparison) {
290290
EXPECT_EQ(string2 <=> string1, std::partial_ordering::greater);
291291
}
292292

293+
TEST(LiteralTest, StringCastToDate) {
294+
// Test standard date format
295+
auto literal1 = Literal::String("2023-05-15");
296+
auto result1 = literal1.CastTo(iceberg::date());
297+
ASSERT_THAT(result1, IsOk());
298+
EXPECT_EQ(result1->type()->type_id(), TypeId::kDate);
299+
EXPECT_EQ(result1->ToString(), "19492");
300+
301+
// Test epoch
302+
auto literal_epoch = Literal::String("1970-01-01");
303+
auto result_epoch = literal_epoch.CastTo(iceberg::date());
304+
ASSERT_THAT(result_epoch, IsOk());
305+
EXPECT_EQ(result_epoch->ToString(), "0");
306+
307+
// Test pre-epoch
308+
auto literal_pre_epoch = Literal::String("1969-12-31");
309+
auto result_pre_epoch = literal_pre_epoch.CastTo(iceberg::date());
310+
ASSERT_THAT(result_pre_epoch, IsOk());
311+
EXPECT_EQ(result_pre_epoch->ToString(), "-1");
312+
313+
// Invalid Formats
314+
auto invalid1 = Literal::String("2023/05/15");
315+
EXPECT_THAT(invalid1.CastTo(iceberg::date()), IsError(ErrorKind::kNotSupported));
316+
317+
auto invalid2 = Literal::String("2023-05-15 extra");
318+
EXPECT_THAT(invalid2.CastTo(iceberg::date()), IsError(ErrorKind::kNotSupported));
319+
320+
auto invalid3 = Literal::String("2023-05");
321+
EXPECT_THAT(invalid3.CastTo(iceberg::date()), IsError(ErrorKind::kNotSupported));
322+
}
323+
324+
TEST(LiteralTest, StringCastToTime) {
325+
// Test without fractional part
326+
auto literal1 = Literal::String("12:00:00");
327+
auto result1 = literal1.CastTo(iceberg::time());
328+
ASSERT_THAT(result1, IsOk());
329+
EXPECT_EQ(result1->type()->type_id(), TypeId::kTime);
330+
EXPECT_EQ(result1->ToString(), "43200000000"); // 12h in microseconds
331+
332+
// Test with full fractional part
333+
auto literal2 = Literal::String("12:34:56.123456");
334+
auto result2 = literal2.CastTo(iceberg::time());
335+
ASSERT_THAT(result2, IsOk());
336+
EXPECT_EQ(result2->ToString(), "45296123456");
337+
338+
// Test with fractional part that needs padding
339+
auto literal3 = Literal::String("01:02:03.123");
340+
auto result3 = literal3.CastTo(iceberg::time());
341+
ASSERT_THAT(result3, IsOk());
342+
EXPECT_EQ(result3->ToString(), "3723123000"); // .123 becomes .123000
343+
344+
// Test with fractional part that needs truncation
345+
auto literal4 = Literal::String("23:59:59.987654321");
346+
auto result4 = literal4.CastTo(iceberg::time());
347+
ASSERT_THAT(result4, IsOk());
348+
EXPECT_EQ(result4->ToString(), "86399987654");
349+
350+
// Invalid Formats
351+
auto invalid1 = Literal::String("12-00-00");
352+
EXPECT_THAT(invalid1.CastTo(iceberg::time()), IsError(ErrorKind::kNotSupported));
353+
354+
auto invalid2 = Literal::String("12:00:00 extra");
355+
EXPECT_THAT(invalid2.CastTo(iceberg::time()), IsError(ErrorKind::kNotSupported));
356+
357+
auto invalid3 = Literal::String("25:00:00");
358+
EXPECT_THAT(invalid3.CastTo(iceberg::time()), IsError(ErrorKind::kNotSupported));
359+
}
360+
361+
TEST(LiteralTest, StringCastToTimestamp) {
362+
// Test without fractional part
363+
auto literal1 = Literal::String("2023-05-15T12:00:00");
364+
auto result1 = literal1.CastTo(iceberg::timestamp());
365+
ASSERT_THAT(result1, IsOk());
366+
EXPECT_EQ(result1->type()->type_id(), TypeId::kTimestamp);
367+
EXPECT_EQ(result1->ToString(), "1684152000000000");
368+
369+
// Test with full fractional part
370+
auto literal2 = Literal::String("2023-05-15T12:34:56.123456");
371+
auto result2 = literal2.CastTo(iceberg::timestamp());
372+
ASSERT_THAT(result2, IsOk());
373+
EXPECT_EQ(result2->ToString(), "1684154096123456");
374+
375+
// Invalid Formats
376+
auto invalid1 = Literal::String("2023-05-15 12:00:00");
377+
EXPECT_THAT(invalid1.CastTo(iceberg::timestamp()), IsError(ErrorKind::kNotSupported));
378+
379+
auto invalid2 = Literal::String("2023-05-15T12:00:00Z");
380+
EXPECT_THAT(invalid2.CastTo(iceberg::timestamp()), IsError(ErrorKind::kNotSupported));
381+
}
382+
383+
TEST(LiteralTest, StringCastToTimestampTz) {
384+
// Test with 'Z' suffix and fractional part
385+
auto literal1 = Literal::String("2023-05-15T12:34:56.123456Z");
386+
auto result1 = literal1.CastTo(iceberg::timestamp_tz());
387+
ASSERT_THAT(result1, IsOk());
388+
EXPECT_EQ(result1->type()->type_id(), TypeId::kTimestampTz);
389+
EXPECT_EQ(result1->ToString(), "1684154096123456");
390+
391+
// Test without 'Z' suffix (should still be interpreted as UTC per spec)
392+
auto literal2 = Literal::String("2023-05-15T12:00:00");
393+
auto result2 = literal2.CastTo(iceberg::timestamp_tz());
394+
ASSERT_THAT(result2, IsOk());
395+
EXPECT_EQ(result2->ToString(), "1684152000000000");
396+
397+
// Invalid & Unsupported Formats
398+
auto unsupported1 = Literal::String("2023-05-15T12:00:00+08:00");
399+
EXPECT_THAT(unsupported1.CastTo(iceberg::timestamp_tz()),
400+
IsError(ErrorKind::kNotSupported));
401+
402+
auto invalid2 = Literal::String("2023-05-15T12:00:00Z oops");
403+
EXPECT_THAT(invalid2.CastTo(iceberg::timestamp_tz()),
404+
IsError(ErrorKind::kNotSupported));
405+
}
406+
293407
// Binary type tests
294408
TEST(LiteralTest, BinaryBasics) {
295409
std::vector<uint8_t> data = {0x01, 0x02, 0x03, 0xFF};

0 commit comments

Comments
 (0)