Skip to content

Commit 3f0ae8d

Browse files
committed
add date/time/timestamp/decimal
1 parent 23b27f0 commit 3f0ae8d

File tree

3 files changed

+53
-8
lines changed

3 files changed

+53
-8
lines changed

src/iceberg/row/arrow_array_wrapper.cc

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,11 @@ Result<Scalar> ExtractValue(const ArrowSchema* schema, const ArrowArray* array,
4545
case NANOARROW_TYPE_BOOL:
4646
return static_cast<bool>(ArrowArrayViewGetIntUnsafe(array_view, index));
4747
case NANOARROW_TYPE_INT32:
48+
case NANOARROW_TYPE_DATE32:
4849
return static_cast<int32_t>(ArrowArrayViewGetIntUnsafe(array_view, index));
4950
case NANOARROW_TYPE_INT64:
51+
case NANOARROW_TYPE_TIME64:
52+
case NANOARROW_TYPE_TIMESTAMP:
5053
return ArrowArrayViewGetIntUnsafe(array_view, index);
5154
case NANOARROW_TYPE_FLOAT:
5255
return static_cast<float>(ArrowArrayViewGetDoubleUnsafe(array_view, index));
@@ -62,6 +65,18 @@ Result<Scalar> ExtractValue(const ArrowSchema* schema, const ArrowArray* array,
6265
ArrowStringView value = ArrowArrayViewGetStringUnsafe(array_view, index);
6366
return std::string_view(value.data, value.size_bytes);
6467
}
68+
case NANOARROW_TYPE_DECIMAL128: {
69+
ArrowError error;
70+
ArrowSchemaView schema_view;
71+
NANOARROW_RETURN_IF_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, &error));
72+
ArrowDecimal value;
73+
ArrowDecimalInit(&value, schema_view.decimal_bitwidth,
74+
schema_view.decimal_precision, schema_view.decimal_scale);
75+
ArrowArrayViewGetDecimalUnsafe(array_view, index, &value);
76+
std::vector<uint8_t> bytes(value.n_words * sizeof(uint64_t));
77+
ArrowDecimalGetBytes(&value, bytes.data());
78+
return bytes;
79+
}
6580
case NANOARROW_TYPE_STRUCT: {
6681
ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<StructLike> struct_like,
6782
ArrowArrayStructLike::Make(*schema, *array, index));
@@ -77,10 +92,6 @@ Result<Scalar> ExtractValue(const ArrowSchema* schema, const ArrowArray* array,
7792
ArrowArrayMapLike::Make(*schema, *array, index));
7893
return map_like;
7994
}
80-
case NANOARROW_TYPE_DATE32:
81-
case NANOARROW_TYPE_TIME64:
82-
case NANOARROW_TYPE_TIMESTAMP:
83-
case NANOARROW_TYPE_DECIMAL128:
8495
case NANOARROW_TYPE_EXTENSION:
8596
// TODO(gangwu): Handle these types properly
8697
default:

src/iceberg/row/struct_like.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <memory>
3030
#include <string_view>
3131
#include <variant>
32+
#include <vector>
3233

3334
#include "iceberg/result.h"
3435
#include "iceberg/type_fwd.h"
@@ -46,6 +47,7 @@ using Scalar = std::variant<std::monostate, // for null
4647
float, // for float
4748
double, // for double
4849
std::string_view, // for non-owned string, binary and fixed
50+
std::vector<uint8_t>, // for decimal
4951
std::shared_ptr<StructLike>, // for struct
5052
std::shared_ptr<ArrayLike>, // for list
5153
std::shared_ptr<MapLike>>; // for map

test/struct_like_test.cc

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <arrow/c/bridge.h>
2121
#include <arrow/json/from_string.h>
2222
#include <arrow/type.h>
23+
#include <arrow/util/decimal.h>
2324

2425
#include "iceberg/arrow_c_data_guard_internal.h"
2526
#include "iceberg/manifest_list.h"
@@ -39,6 +40,16 @@ namespace iceberg {
3940
EXPECT_EQ(std::get<expected_type>(scalar), expected_value); \
4041
} while (0)
4142

43+
#define EXPECT_DECIMAL_EQ(result, scale, expected_value) \
44+
do { \
45+
ASSERT_THAT(result, IsOk()); \
46+
auto scalar = result.value(); \
47+
ASSERT_TRUE(std::holds_alternative<std::vector<uint8_t>>(scalar)); \
48+
auto bytes = std::get<std::vector<uint8_t>>(scalar); \
49+
::arrow::Decimal128 decimal(bytes.data()); \
50+
EXPECT_EQ(decimal.ToString(scale), expected_value); \
51+
} while (0)
52+
4253
#define EXPECT_SCALAR_NULL(result) \
4354
do { \
4455
ASSERT_THAT(result, IsOk()); \
@@ -189,12 +200,22 @@ TEST(ArrowArrayStructLike, PrimitiveFields) {
189200
{::arrow::field("id", ::arrow::int64(), /*nullable=*/false),
190201
::arrow::field("name", ::arrow::utf8(), /*nullable=*/true),
191202
::arrow::field("score", ::arrow::float32(), /*nullable=*/true),
192-
::arrow::field("active", ::arrow::boolean(), /*nullable=*/false)});
203+
::arrow::field("active", ::arrow::boolean(), /*nullable=*/false),
204+
::arrow::field("date", ::arrow::date32(), /*nullable=*/false),
205+
::arrow::field("time", ::arrow::time64(::arrow::TimeUnit::MICRO),
206+
/*nullable=*/false),
207+
::arrow::field("timestamp", ::arrow::timestamp(::arrow::TimeUnit::MICRO),
208+
/*nullable=*/false),
209+
::arrow::field("fixed", ::arrow::fixed_size_binary(4), /*nullable=*/false),
210+
::arrow::field("decimal", ::arrow::decimal128(10, 2), /*nullable=*/false)});
193211

194212
auto arrow_array = ::arrow::json::ArrayFromJSONString(struct_type, R"([
195-
{"id": 1, "name": "Alice", "score": 95.5, "active": true},
196-
{"id": 2, "name": "Bob", "score": null, "active": false},
197-
{"id": 3, "name": null, "score": 87.2, "active": true}])")
213+
{"id": 1, "name": "Alice", "score": 95.5, "active": true, "date": 1714396800,
214+
"time": 123456, "timestamp": 1714396800000000, "fixed": "aaaa", "decimal": "1234.56"},
215+
{"id": 2, "name": "Bob", "score": null, "active": false, "date": 1714396801,
216+
"time": 123457, "timestamp": 1714396800000001, "fixed": "bbbb", "decimal": "-1234.56"},
217+
{"id": 3, "name": null, "score": 87.2, "active": true, "date": 1714396802,
218+
"time": 123458, "timestamp": 1714396800000002, "fixed": "cccc", "decimal": "1234.00"}])")
198219
.ValueOrDie();
199220

200221
ArrowSchema c_schema;
@@ -213,6 +234,12 @@ TEST(ArrowArrayStructLike, PrimitiveFields) {
213234
std::array<std::optional<std::string>, kNumRows> names = {"Alice", "Bob", std::nullopt};
214235
std::array<std::optional<float>, kNumRows> scores = {95.5f, std::nullopt, 87.2f};
215236
std::array<bool, kNumRows> actives = {true, false, true};
237+
std::array<int32_t, kNumRows> dates = {1714396800, 1714396801, 1714396802};
238+
std::array<int64_t, kNumRows> times = {123456, 123457, 123458};
239+
std::array<int64_t, kNumRows> timestamps = {1714396800000000, 1714396800000001,
240+
1714396800000002};
241+
std::array<std::string, kNumRows> fixeds = {"aaaa", "bbbb", "cccc"};
242+
std::array<std::string, kNumRows> decimals = {"1234.56", "-1234.56", "1234.00"};
216243

217244
for (int64_t i = 0; i < kNumRows; ++i) {
218245
ASSERT_THAT(struct_like->Reset(i), IsOk());
@@ -228,6 +255,11 @@ TEST(ArrowArrayStructLike, PrimitiveFields) {
228255
EXPECT_SCALAR_NULL(struct_like->GetField(2));
229256
}
230257
EXPECT_SCALAR_EQ(struct_like->GetField(3), bool, actives[i]);
258+
EXPECT_SCALAR_EQ(struct_like->GetField(4), int32_t, dates[i]);
259+
EXPECT_SCALAR_EQ(struct_like->GetField(5), int64_t, times[i]);
260+
EXPECT_SCALAR_EQ(struct_like->GetField(6), int64_t, timestamps[i]);
261+
EXPECT_SCALAR_EQ(struct_like->GetField(7), std::string_view, fixeds[i]);
262+
EXPECT_DECIMAL_EQ(struct_like->GetField(8), /*scale=*/2, decimals[i]);
231263
}
232264
}
233265

0 commit comments

Comments
 (0)