diff --git a/src/arrow-util/src/builder.rs b/src/arrow-util/src/builder.rs index 398b48947dacb..bdfa9943e175a 100644 --- a/src/arrow-util/src/builder.rs +++ b/src/arrow-util/src/builder.rs @@ -1034,13 +1034,21 @@ impl ArrowColumn { ) } let dims_builder: &mut ArrowColumn = struct_builder.field_builder(1).unwrap(); - if let ColBuilder::UInt8Builder(dims_builder) = &mut dims_builder.inner { - dims_builder.append_value(arr.dims().ndims()); - } else { - anyhow::bail!( - "Expected UInt8Builder for StructBuilder with Array datum: {:?}", + match &mut dims_builder.inner { + ColBuilder::UInt8Builder(dims_builder) => { + dims_builder.append_value(arr.dims().ndims()); + } + // Iceberg has no narrow integer types, so the synthetic + // `dimensions` field comes back from the Iceberg schema + // widened to Int32. Promote `ndims` (a u8) the same way + // smallint columns widen into an Int32Builder. + ColBuilder::Int32Builder(dims_builder) => { + dims_builder.append_value(i32::from(arr.dims().ndims())); + } + _ => anyhow::bail!( + "Expected UInt8Builder or Int32Builder for StructBuilder with Array datum: {:?}", struct_builder - ) + ), } struct_builder.append(true) } diff --git a/test/iceberg/catalog.td b/test/iceberg/catalog.td index fa483395d5be4..e84c3ecbd0da8 100644 --- a/test/iceberg/catalog.td +++ b/test/iceberg/catalog.td @@ -335,3 +335,37 @@ SELECT id, cardinality(props), list_sort(map_keys(props))::VARCHAR, list_sort(ma 1 2 [a, b] [bar, foo] 2 0 [] [] 3 1 [key] [value] + +# Test array columns +# Materialize arrays are an arrow struct of {items: list, dimensions: uint8}. +# Iceberg has no uint8, so the dimensions field widens to Iceberg int (Int32), +# the same way smallint does above. The array Datum's ndims (a u8) must promote +# into that Int32Builder, or the sink stalls converting the row to a recordbatch +# with "Expected UInt8Builder for StructBuilder with Array datum". +> CREATE TABLE arrays(id int, vals int[]); + +> INSERT INTO arrays VALUES + (1, '{1,2,3}'), + (2, '{}'), + (3, '{42}'); + +> CREATE SINK array_demo + FROM arrays + INTO ICEBERG CATALOG CONNECTION polaris ( + NAMESPACE 'default_namespace', + TABLE 'array_table' + ) + USING AWS CONNECTION aws_conn + KEY (id) NOT ENFORCED + MODE UPSERT + WITH (COMMIT INTERVAL '1s'); + +$ sleep-is-probably-flaky-i-have-justified-my-need-with-a-comment duration=10s + +# The array lands as a struct {items: list, dimensions: int}. Verify the +# elements and dimension count round-trip. +$ duckdb-query name=iceberg +SELECT id, vals.items::VARCHAR, vals.dimensions FROM iceberg_scan('s3://test-bucket/default_namespace/array_table') ORDER BY id +1 [1, 2, 3] 1 +2 [] 0 +3 [42] 1