Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions src/arrow-util/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1034,13 +1034,21 @@ impl ArrowColumn {
)
}
let dims_builder: &mut ArrowColumn = struct_builder.field_builder(1).unwrap();
if let ColBuilder::UInt8Builder(dims_builder) = &mut dims_builder.inner {
dims_builder.append_value(arr.dims().ndims());
} else {
anyhow::bail!(
"Expected UInt8Builder for StructBuilder with Array datum: {:?}",
match &mut dims_builder.inner {
ColBuilder::UInt8Builder(dims_builder) => {
dims_builder.append_value(arr.dims().ndims());
}
// Iceberg has no narrow integer types, so the synthetic
// `dimensions` field comes back from the Iceberg schema
// widened to Int32. Promote `ndims` (a u8) the same way
// smallint columns widen into an Int32Builder.
ColBuilder::Int32Builder(dims_builder) => {
dims_builder.append_value(i32::from(arr.dims().ndims()));
}
_ => anyhow::bail!(
"Expected UInt8Builder or Int32Builder for StructBuilder with Array datum: {:?}",
struct_builder
)
),
}
struct_builder.append(true)
}
Expand Down
34 changes: 34 additions & 0 deletions test/iceberg/catalog.td
Original file line number Diff line number Diff line change
Expand Up @@ -335,3 +335,37 @@ SELECT id, cardinality(props), list_sort(map_keys(props))::VARCHAR, list_sort(ma
1 2 [a, b] [bar, foo]
2 0 [] []
3 1 [key] [value]

# Test array columns
# Materialize arrays are an arrow struct of {items: list, dimensions: uint8}.
# Iceberg has no uint8, so the dimensions field widens to Iceberg int (Int32),
# the same way smallint does above. The array Datum's ndims (a u8) must promote
# into that Int32Builder, or the sink stalls converting the row to a recordbatch
# with "Expected UInt8Builder for StructBuilder with Array datum".
> CREATE TABLE arrays(id int, vals int[]);

> INSERT INTO arrays VALUES
(1, '{1,2,3}'),
(2, '{}'),
(3, '{42}');

> CREATE SINK array_demo
FROM arrays
INTO ICEBERG CATALOG CONNECTION polaris (
NAMESPACE 'default_namespace',
TABLE 'array_table'
)
USING AWS CONNECTION aws_conn
KEY (id) NOT ENFORCED
MODE UPSERT
WITH (COMMIT INTERVAL '1s');

$ sleep-is-probably-flaky-i-have-justified-my-need-with-a-comment duration=10s

# The array lands as a struct {items: list, dimensions: int}. Verify the
# elements and dimension count round-trip.
$ duckdb-query name=iceberg
SELECT id, vals.items::VARCHAR, vals.dimensions FROM iceberg_scan('s3://test-bucket/default_namespace/array_table') ORDER BY id
1 [1, 2, 3] 1
2 [] 0
3 [42] 1
Loading