Skip to content

Commit b2358c6

Browse files
committed
feat: Implement Type Casting and toString for Literals
1 parent b717675 commit b2358c6

File tree

8 files changed

+1165
-46
lines changed

8 files changed

+1165
-46
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,10 @@ set(ICEBERG_SOURCES
4949
manifest_reader_internal.cc
5050
manifest_writer.cc
5151
arrow_c_data_guard_internal.cc
52+
util/date_time_util.cc
53+
util/gzip_internal.cc
5254
util/murmurhash3_internal.cc
53-
util/timepoint.cc
54-
util/gzip_internal.cc)
55+
util/timepoint.cc)
5556

5657
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
5758
set(ICEBERG_SHARED_BUILD_INTERFACE_LIBS)

src/iceberg/expression/literal.cc

Lines changed: 208 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919

2020
#include "iceberg/expression/literal.h"
2121

22-
#include <cmath>
2322
#include <concepts>
23+
#include <cstdint>
24+
25+
#include <iceberg/result.h>
2426

2527
#include "iceberg/exception.h"
28+
#include "iceberg/util/date_time_util.h"
2629

2730
namespace iceberg {
2831

@@ -52,6 +55,30 @@ class LiteralCaster {
5255
/// Cast from Float type to target type.
5356
static Result<Literal> CastFromFloat(const Literal& literal,
5457
const std::shared_ptr<PrimitiveType>& target_type);
58+
59+
/// Cast from Double type to target type.
60+
static Result<Literal> CastFromDouble(
61+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
62+
63+
/// Cast from String type to target type.
64+
static Result<Literal> CastFromString(
65+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
66+
67+
/// Cast from Timestamp type to target type.
68+
static Result<Literal> CastFromTimestamp(
69+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
70+
71+
/// Cast from TimestampTz type to target type.
72+
static Result<Literal> CastFromTimestampTz(
73+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
74+
75+
/// Cast from Binary type to target type.
76+
static Result<Literal> CastFromBinary(
77+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type);
78+
79+
/// Cast from Fixed type to target type.
80+
static Result<Literal> CastFromFixed(const Literal& literal,
81+
const std::shared_ptr<PrimitiveType>& target_type);
5582
};
5683

5784
Literal LiteralCaster::BelowMinLiteral(std::shared_ptr<PrimitiveType> type) {
@@ -74,6 +101,9 @@ Result<Literal> LiteralCaster::CastFromInt(
74101
return Literal::Float(static_cast<float>(int_val));
75102
case TypeId::kDouble:
76103
return Literal::Double(static_cast<double>(int_val));
104+
case TypeId::kDate:
105+
return Literal::Date(int_val);
106+
// TODO(Li Feiyang): Implement cast from Int to decimal
77107
default:
78108
return NotSupported("Cast from Int to {} is not implemented",
79109
target_type->ToString());
@@ -83,15 +113,14 @@ Result<Literal> LiteralCaster::CastFromInt(
83113
Result<Literal> LiteralCaster::CastFromLong(
84114
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
85115
auto long_val = std::get<int64_t>(literal.value_);
86-
auto target_type_id = target_type->type_id();
87116

88-
switch (target_type_id) {
117+
switch (target_type->type_id()) {
89118
case TypeId::kInt: {
90119
// Check for overflow
91-
if (long_val >= std::numeric_limits<int32_t>::max()) {
120+
if (long_val > std::numeric_limits<int32_t>::max()) {
92121
return AboveMaxLiteral(target_type);
93122
}
94-
if (long_val <= std::numeric_limits<int32_t>::min()) {
123+
if (long_val < std::numeric_limits<int32_t>::min()) {
95124
return BelowMinLiteral(target_type);
96125
}
97126
return Literal::Int(static_cast<int32_t>(long_val));
@@ -100,6 +129,22 @@ Result<Literal> LiteralCaster::CastFromLong(
100129
return Literal::Float(static_cast<float>(long_val));
101130
case TypeId::kDouble:
102131
return Literal::Double(static_cast<double>(long_val));
132+
case TypeId::kDate: {
133+
if (long_val > std::numeric_limits<int32_t>::max()) {
134+
return AboveMaxLiteral(target_type);
135+
}
136+
if (long_val < std::numeric_limits<int32_t>::min()) {
137+
return BelowMinLiteral(target_type);
138+
}
139+
return Literal::Date(static_cast<int32_t>(long_val));
140+
}
141+
case TypeId::kTime:
142+
return Literal::Time(long_val);
143+
case TypeId::kTimestamp:
144+
return Literal::Timestamp(long_val);
145+
case TypeId::kTimestampTz:
146+
return Literal::TimestampTz(long_val);
147+
// TODO(Li Feiyang): Implement cast from Long to decimal, TimestampNs and
103148
default:
104149
return NotSupported("Cast from Long to {} is not supported",
105150
target_type->ToString());
@@ -109,17 +154,141 @@ Result<Literal> LiteralCaster::CastFromLong(
109154
Result<Literal> LiteralCaster::CastFromFloat(
110155
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
111156
auto float_val = std::get<float>(literal.value_);
112-
auto target_type_id = target_type->type_id();
113157

114-
switch (target_type_id) {
158+
switch (target_type->type_id()) {
115159
case TypeId::kDouble:
116160
return Literal::Double(static_cast<double>(float_val));
161+
// TODO(Li Feiyang): Implement cast from Float to decimal
117162
default:
118163
return NotSupported("Cast from Float to {} is not supported",
119164
target_type->ToString());
120165
}
121166
}
122167

168+
Result<Literal> LiteralCaster::CastFromDouble(
169+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
170+
auto double_val = std::get<double>(literal.value_);
171+
172+
switch (target_type->type_id()) {
173+
case TypeId::kFloat: {
174+
if (double_val > static_cast<double>(std::numeric_limits<float>::max())) {
175+
return AboveMaxLiteral(target_type);
176+
}
177+
if (double_val < static_cast<double>(std::numeric_limits<float>::lowest())) {
178+
return BelowMinLiteral(target_type);
179+
}
180+
return Literal::Float(static_cast<float>(double_val));
181+
}
182+
default:
183+
return NotSupported("Cast from Double to {} is not supported",
184+
target_type->ToString());
185+
}
186+
}
187+
188+
Result<Literal> LiteralCaster::CastFromString(
189+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
190+
const auto& str_val = std::get<std::string>(literal.value_);
191+
192+
switch (target_type->type_id()) {
193+
case TypeId::kDate: {
194+
auto days_result = ParseDateString(str_val);
195+
if (!days_result.has_value()) {
196+
return std::unexpected(days_result.error());
197+
}
198+
return Literal::Date(days_result.value());
199+
}
200+
201+
case TypeId::kTime: {
202+
auto micros_result = ParseTimeString(str_val);
203+
if (!micros_result.has_value()) {
204+
return std::unexpected(micros_result.error());
205+
}
206+
return Literal::Time(micros_result.value());
207+
}
208+
209+
case TypeId::kTimestamp: {
210+
auto micros_result = ParseTimestampString(str_val);
211+
if (!micros_result.has_value()) {
212+
return std::unexpected(micros_result.error());
213+
}
214+
return Literal::Timestamp(micros_result.value());
215+
}
216+
217+
case TypeId::kTimestampTz: {
218+
auto micros_result = ParseTimestampTzString(str_val);
219+
if (!micros_result.has_value()) {
220+
return std::unexpected(micros_result.error());
221+
}
222+
return Literal::TimestampTz(micros_result.value());
223+
}
224+
// TODO(Li Feiyang): Implement cast from String to uuid and decimal
225+
226+
default:
227+
return NotSupported("Cast from String to {} is not supported",
228+
target_type->ToString());
229+
}
230+
}
231+
232+
Result<Literal> LiteralCaster::CastFromTimestamp(
233+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
234+
auto timestamp_val = std::get<int64_t>(literal.value_);
235+
236+
switch (target_type->type_id()) {
237+
case TypeId::kDate:
238+
return Literal::Date(MicrosToDays(timestamp_val));
239+
case TypeId::kTimestampTz:
240+
return Literal::TimestampTz(timestamp_val);
241+
default:
242+
return NotSupported("Cast from Timestamp to {} is not supported",
243+
target_type->ToString());
244+
}
245+
}
246+
247+
Result<Literal> LiteralCaster::CastFromTimestampTz(
248+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
249+
auto micros = std::get<int64_t>(literal.value_);
250+
251+
switch (target_type->type_id()) {
252+
case TypeId::kDate:
253+
return Literal::Date(MicrosToDays(micros));
254+
case TypeId::kTimestamp:
255+
return Literal::Timestamp(micros);
256+
default:
257+
return NotSupported("Cast from TimestampTz to {} is not supported",
258+
target_type->ToString());
259+
}
260+
}
261+
262+
Result<Literal> LiteralCaster::CastFromBinary(
263+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
264+
auto binary_val = std::get<std::vector<uint8_t>>(literal.value_);
265+
switch (target_type->type_id()) {
266+
case TypeId::kFixed: {
267+
auto target_fixed_type = std::static_pointer_cast<FixedType>(target_type);
268+
if (binary_val.size() == target_fixed_type->length()) {
269+
return Literal::Fixed(std::move(binary_val));
270+
}
271+
return InvalidArgument("Failed to cast Binary with length {} to Fixed({})",
272+
binary_val.size(), target_fixed_type->length());
273+
}
274+
default:
275+
return NotSupported("Cast from Binary to {} is not supported",
276+
target_type->ToString());
277+
}
278+
}
279+
280+
Result<Literal> LiteralCaster::CastFromFixed(
281+
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type) {
282+
switch (target_type->type_id()) {
283+
case TypeId::kBinary:
284+
return Literal::Binary(
285+
std::get<std::vector<uint8_t>>(literal.value_)); // 直接拷贝+move
286+
default:
287+
return NotSupported("Cast from Fixed to {} is not supported",
288+
target_type->ToString());
289+
}
290+
}
291+
123292
// Constructor
124293
Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
125294
: value_(std::move(value)), type_(std::move(type)) {}
@@ -149,6 +318,11 @@ Literal Literal::Binary(std::vector<uint8_t> value) {
149318
return {Value{std::move(value)}, binary()};
150319
}
151320

321+
Literal Literal::Fixed(std::vector<uint8_t> value) {
322+
const auto size = value.size();
323+
return {Value{std::move(value)}, fixed(size)};
324+
}
325+
152326
Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
153327
std::shared_ptr<PrimitiveType> type) {
154328
return NotImplemented("Deserialization of Literal is not implemented yet");
@@ -216,6 +390,7 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
216390
}
217391

218392
case TypeId::kLong:
393+
case TypeId::kTime:
219394
case TypeId::kTimestamp:
220395
case TypeId::kTimestampTz: {
221396
auto this_val = std::get<int64_t>(value_);
@@ -243,7 +418,8 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
243418
return this_val <=> other_val;
244419
}
245420

246-
case TypeId::kBinary: {
421+
case TypeId::kBinary:
422+
case TypeId::kFixed: {
247423
auto& this_val = std::get<std::vector<uint8_t>>(value_);
248424
auto& other_val = std::get<std::vector<uint8_t>>(other.value_);
249425
return this_val <=> other_val;
@@ -285,23 +461,29 @@ std::string Literal::ToString() const {
285461
case TypeId::kString: {
286462
return std::get<std::string>(value_);
287463
}
288-
case TypeId::kBinary: {
464+
case TypeId::kBinary:
465+
case TypeId::kFixed: {
289466
const auto& binary_data = std::get<std::vector<uint8_t>>(value_);
290-
std::string result;
291-
result.reserve(binary_data.size() * 2); // 2 chars per byte
467+
std::string result = "X'";
468+
result.reserve(2 + binary_data.size() * 2 +
469+
1); // 2 chars per byte and 2 + 1 for prefix and suffix
292470
for (const auto& byte : binary_data) {
293471
std::format_to(std::back_inserter(result), "{:02X}", byte);
294472
}
473+
result.push_back('\'');
295474
return result;
296475
}
297-
case TypeId::kDecimal:
298-
case TypeId::kUuid:
299-
case TypeId::kFixed:
300-
case TypeId::kDate:
301476
case TypeId::kTime:
302477
case TypeId::kTimestamp:
303478
case TypeId::kTimestampTz: {
304-
throw IcebergError("Not implemented: ToString for " + type_->ToString());
479+
return std::to_string(std::get<int64_t>(value_));
480+
}
481+
case TypeId::kDate: {
482+
return std::to_string(std::get<int32_t>(value_));
483+
}
484+
case TypeId::kDecimal:
485+
case TypeId::kUuid: {
486+
throw NotImplemented("kDecimal and kUuid are not implemented yet");
305487
}
306488
default: {
307489
throw IcebergError("Unknown type: " + type_->ToString());
@@ -343,10 +525,18 @@ Result<Literal> LiteralCaster::CastTo(const Literal& literal,
343525
case TypeId::kFloat:
344526
return CastFromFloat(literal, target_type);
345527
case TypeId::kDouble:
346-
case TypeId::kBoolean:
528+
return CastFromDouble(literal, target_type);
347529
case TypeId::kString:
530+
return CastFromString(literal, target_type);
348531
case TypeId::kBinary:
349-
break;
532+
return CastFromBinary(literal, target_type);
533+
case TypeId::kFixed:
534+
return CastFromFixed(literal, target_type);
535+
case TypeId::kTimestamp:
536+
return CastFromTimestamp(literal, target_type);
537+
case TypeId::kTimestampTz:
538+
return CastFromTimestampTz(literal, target_type);
539+
case TypeId::kBoolean:
350540
default:
351541
break;
352542
}

src/iceberg/expression/literal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class ICEBERG_EXPORT Literal {
7171
static Literal Double(double value);
7272
static Literal String(std::string value);
7373
static Literal Binary(std::vector<uint8_t> value);
74+
static Literal Fixed(std::vector<uint8_t> value);
7475

7576
/// \brief Create a literal representing a null value.
7677
static Literal Null(std::shared_ptr<PrimitiveType> type) {

0 commit comments

Comments
 (0)