Skip to content

Commit c383808

Browse files
committed
add date/time/timestamp/decimal
1 parent ce2df38 commit c383808

File tree

3 files changed

+58
-8
lines changed

3 files changed

+58
-8
lines changed

src/iceberg/row/arrow_array_wrapper.cc

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
#include "iceberg/row/arrow_array_wrapper.h"
2121

22+
#include <cstring>
23+
2224
#include <nanoarrow/nanoarrow.h>
2325

2426
#include "iceberg/arrow_c_data_guard_internal.h"
@@ -45,8 +47,11 @@ Result<Scalar> ExtractValue(const ArrowSchema* schema, const ArrowArray* array,
4547
case NANOARROW_TYPE_BOOL:
4648
return static_cast<bool>(ArrowArrayViewGetIntUnsafe(array_view, index));
4749
case NANOARROW_TYPE_INT32:
50+
case NANOARROW_TYPE_DATE32:
4851
return static_cast<int32_t>(ArrowArrayViewGetIntUnsafe(array_view, index));
4952
case NANOARROW_TYPE_INT64:
53+
case NANOARROW_TYPE_TIME64:
54+
case NANOARROW_TYPE_TIMESTAMP:
5055
return ArrowArrayViewGetIntUnsafe(array_view, index);
5156
case NANOARROW_TYPE_FLOAT:
5257
return static_cast<float>(ArrowArrayViewGetDoubleUnsafe(array_view, index));
@@ -62,6 +67,21 @@ Result<Scalar> ExtractValue(const ArrowSchema* schema, const ArrowArray* array,
6267
ArrowStringView value = ArrowArrayViewGetStringUnsafe(array_view, index);
6368
return std::string_view(value.data, value.size_bytes);
6469
}
70+
case NANOARROW_TYPE_DECIMAL128: {
71+
ArrowError error;
72+
ArrowSchemaView schema_view;
73+
NANOARROW_RETURN_IF_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, &error));
74+
ArrowDecimal value;
75+
ArrowDecimalInit(&value, schema_view.decimal_bitwidth,
76+
schema_view.decimal_precision, schema_view.decimal_scale);
77+
ArrowArrayViewGetDecimalUnsafe(array_view, index, &value);
78+
if (value.n_words != 2) {
79+
return InvalidArrowData("Unsupported Arrow decimal words: {}", value.n_words);
80+
}
81+
int128_t int_value{0};
82+
std::memcpy(&int_value, value.words, sizeof(int128_t));
83+
return Decimal(int_value);
84+
}
6585
case NANOARROW_TYPE_STRUCT: {
6686
ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<StructLike> struct_like,
6787
ArrowArrayStructLike::Make(*schema, *array, index));
@@ -77,10 +97,6 @@ Result<Scalar> ExtractValue(const ArrowSchema* schema, const ArrowArray* array,
7797
ArrowArrayMapLike::Make(*schema, *array, index));
7898
return map_like;
7999
}
80-
case NANOARROW_TYPE_DATE32:
81-
case NANOARROW_TYPE_TIME64:
82-
case NANOARROW_TYPE_TIMESTAMP:
83-
case NANOARROW_TYPE_DECIMAL128:
84100
case NANOARROW_TYPE_EXTENSION:
85101
// TODO(gangwu): Handle these types properly
86102
default:

src/iceberg/row/struct_like.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@
2929
#include <memory>
3030
#include <string_view>
3131
#include <variant>
32+
#include <vector>
3233

3334
#include "iceberg/result.h"
3435
#include "iceberg/type_fwd.h"
36+
#include "iceberg/util/decimal.h"
3537

3638
namespace iceberg {
3739

@@ -46,6 +48,7 @@ using Scalar = std::variant<std::monostate, // for null
4648
float, // for float
4749
double, // for double
4850
std::string_view, // for non-owned string, binary and fixed
51+
Decimal, // for decimal
4952
std::shared_ptr<StructLike>, // for struct
5053
std::shared_ptr<ArrayLike>, // for list
5154
std::shared_ptr<MapLike>>; // for map

src/iceberg/test/struct_like_test.cc

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <arrow/c/bridge.h>
2121
#include <arrow/json/from_string.h>
2222
#include <arrow/type.h>
23+
#include <arrow/util/decimal.h>
2324

2425
#include "iceberg/arrow_c_data_guard_internal.h"
2526
#include "iceberg/manifest_list.h"
@@ -39,6 +40,15 @@ namespace iceberg {
3940
EXPECT_EQ(std::get<expected_type>(scalar), expected_value); \
4041
} while (0)
4142

43+
#define EXPECT_DECIMAL_EQ(result, scale, expected_value) \
44+
do { \
45+
ASSERT_THAT(result, IsOk()); \
46+
auto scalar = result.value(); \
47+
ASSERT_TRUE(std::holds_alternative<Decimal>(scalar)); \
48+
auto decimal = std::get<Decimal>(scalar); \
49+
EXPECT_EQ(decimal.ToString(scale), expected_value); \
50+
} while (0)
51+
4252
#define EXPECT_SCALAR_NULL(result) \
4353
do { \
4454
ASSERT_THAT(result, IsOk()); \
@@ -189,12 +199,22 @@ TEST(ArrowArrayStructLike, PrimitiveFields) {
189199
{::arrow::field("id", ::arrow::int64(), /*nullable=*/false),
190200
::arrow::field("name", ::arrow::utf8(), /*nullable=*/true),
191201
::arrow::field("score", ::arrow::float32(), /*nullable=*/true),
192-
::arrow::field("active", ::arrow::boolean(), /*nullable=*/false)});
202+
::arrow::field("active", ::arrow::boolean(), /*nullable=*/false),
203+
::arrow::field("date", ::arrow::date32(), /*nullable=*/false),
204+
::arrow::field("time", ::arrow::time64(::arrow::TimeUnit::MICRO),
205+
/*nullable=*/false),
206+
::arrow::field("timestamp", ::arrow::timestamp(::arrow::TimeUnit::MICRO),
207+
/*nullable=*/false),
208+
::arrow::field("fixed", ::arrow::fixed_size_binary(4), /*nullable=*/false),
209+
::arrow::field("decimal", ::arrow::decimal128(10, 2), /*nullable=*/false)});
193210

194211
auto arrow_array = ::arrow::json::ArrayFromJSONString(struct_type, R"([
195-
{"id": 1, "name": "Alice", "score": 95.5, "active": true},
196-
{"id": 2, "name": "Bob", "score": null, "active": false},
197-
{"id": 3, "name": null, "score": 87.2, "active": true}])")
212+
{"id": 1, "name": "Alice", "score": 95.5, "active": true, "date": 1714396800,
213+
"time": 123456, "timestamp": 1714396800000000, "fixed": "aaaa", "decimal": "1234.56"},
214+
{"id": 2, "name": "Bob", "score": null, "active": false, "date": 1714396801,
215+
"time": 123457, "timestamp": 1714396800000001, "fixed": "bbbb", "decimal": "-1234.56"},
216+
{"id": 3, "name": null, "score": 87.2, "active": true, "date": 1714396802,
217+
"time": 123458, "timestamp": 1714396800000002, "fixed": "cccc", "decimal": "1234.00"}])")
198218
.ValueOrDie();
199219

200220
ArrowSchema c_schema;
@@ -213,6 +233,12 @@ TEST(ArrowArrayStructLike, PrimitiveFields) {
213233
std::array<std::optional<std::string>, kNumRows> names = {"Alice", "Bob", std::nullopt};
214234
std::array<std::optional<float>, kNumRows> scores = {95.5f, std::nullopt, 87.2f};
215235
std::array<bool, kNumRows> actives = {true, false, true};
236+
std::array<int32_t, kNumRows> dates = {1714396800, 1714396801, 1714396802};
237+
std::array<int64_t, kNumRows> times = {123456, 123457, 123458};
238+
std::array<int64_t, kNumRows> timestamps = {1714396800000000, 1714396800000001,
239+
1714396800000002};
240+
std::array<std::string, kNumRows> fixeds = {"aaaa", "bbbb", "cccc"};
241+
std::array<std::string, kNumRows> decimals = {"1234.56", "-1234.56", "1234.00"};
216242

217243
for (int64_t i = 0; i < kNumRows; ++i) {
218244
ASSERT_THAT(struct_like->Reset(i), IsOk());
@@ -228,6 +254,11 @@ TEST(ArrowArrayStructLike, PrimitiveFields) {
228254
EXPECT_SCALAR_NULL(struct_like->GetField(2));
229255
}
230256
EXPECT_SCALAR_EQ(struct_like->GetField(3), bool, actives[i]);
257+
EXPECT_SCALAR_EQ(struct_like->GetField(4), int32_t, dates[i]);
258+
EXPECT_SCALAR_EQ(struct_like->GetField(5), int64_t, times[i]);
259+
EXPECT_SCALAR_EQ(struct_like->GetField(6), int64_t, timestamps[i]);
260+
EXPECT_SCALAR_EQ(struct_like->GetField(7), std::string_view, fixeds[i]);
261+
EXPECT_DECIMAL_EQ(struct_like->GetField(8), /*scale=*/2, decimals[i]);
231262
}
232263
}
233264

0 commit comments

Comments
 (0)