From e702a23190f1b39d85ca261d780a8e0ed259c0f1 Mon Sep 17 00:00:00 2001
From: Liam Bao <liam.zw.bao@gmail.com>
Date: Wed, 4 Mar 2026 20:16:50 -0500
Subject: [PATCH 1/3] [Json] Add benchmarks for list json reader

---
 arrow-json/Cargo.toml                         |  2 +-
 .../{json-reader.rs => json_reader.rs}        | 87 ++++++++++++++++++-
 2 files changed, 87 insertions(+), 2 deletions(-)
 rename arrow-json/benches/{json-reader.rs => json_reader.rs} (73%)
diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index 5fcde480eb6d..03e3553bc626 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -67,5 +67,5 @@ name = "serde"
 harness = false
 
 [[bench]]
-name = "json-reader"
+name = "json_reader"
 harness = false
diff --git a/arrow-json/benches/json-reader.rs b/arrow-json/benches/json_reader.rs
similarity index 73%
rename from arrow-json/benches/json-reader.rs
rename to arrow-json/benches/json_reader.rs
index 504839f8ffe2..f87ba695eb62 100644
--- a/arrow-json/benches/json-reader.rs
+++ b/arrow-json/benches/json_reader.rs
@@ -32,6 +32,8 @@ const BATCH_SIZE: usize = 1 << 13; // 8K rows per batch
 const WIDE_FIELDS: usize = 64;
 const BINARY_BYTES: usize = 64;
 const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3
+const LIST_SHORT_ELEMENTS: usize = 5;
+const LIST_LONG_ELEMENTS: usize = 100;
 
 fn decode_and_flush(decoder: &mut Decoder, data: &[u8]) {
     let mut offset = 0;
@@ -240,11 +242,94 @@ fn bench_wide_projection(c: &mut Criterion) {
     );
 }
 
+fn build_list_json(rows: usize, elements: usize) -> Vec<u8> {
+    // Builds newline-delimited JSON objects with a single list field.
+    // Example (rows=2, elements=3):
+    // {"list":[0,1,2]}
+    // {"list":[1,2,3]}
+    let mut out = String::with_capacity(rows * (elements * 6 + 16));
+    for row in 0..rows {
+        out.push_str("{\"list\":[");
+        for i in 0..elements {
+            if i > 0 {
+                out.push(',');
+            }
+            write!(&mut out, "{}", (row + i) as i64).unwrap();
+        }
+        out.push_str("]}\n");
+    }
+    out.into_bytes()
+}
+
+fn build_list_values(rows: usize, elements: usize) -> Vec<Value> {
+    // Mirrors build_list_json but returns structured serde_json::Value objects.
+    let mut out = Vec::with_capacity(rows);
+    for row in 0..rows {
+        let arr: Vec<Value> = (0..elements)
+            .map(|i| Value::Number(Number::from((row + i) as i64)))
+            .collect();
+        let mut map = Map::with_capacity(1);
+        map.insert("list".to_string(), Value::Array(arr));
+        out.push(Value::Object(map));
+    }
+    out
+}
+
+fn build_list_schema() -> Arc<Schema> {
+    Arc::new(Schema::new(vec![Field::new(
+        "list",
+        DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
+        false,
+    )]))
+}
+
+fn bench_decode_list(c: &mut Criterion) {
+    let schema = build_list_schema();
+
+    // Short lists: tests list handling overhead (few elements per row)
+    let short_data = build_list_json(ROWS, LIST_SHORT_ELEMENTS);
+    bench_decode_schema(c, "decode_list_short_i64_json", &short_data, schema.clone());
+
+    // Long lists: tests child element decode throughput (many elements per row)
+    let long_data = build_list_json(ROWS, LIST_LONG_ELEMENTS);
+    bench_decode_schema(c, "decode_list_long_i64_json", &long_data, schema);
+}
+
+fn bench_serialize_list(c: &mut Criterion) {
+    let schema = build_list_schema();
+
+    let short_values = build_list_values(ROWS, LIST_SHORT_ELEMENTS);
+    c.bench_function("decode_list_short_i64_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&short_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+
+    let long_values = build_list_values(ROWS, LIST_LONG_ELEMENTS);
+    c.bench_function("decode_list_long_i64_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&long_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+}
+
 criterion_group!(
     benches,
     bench_decode_wide_object,
     bench_serialize_wide_object,
     bench_binary_hex,
-    bench_wide_projection
+    bench_wide_projection,
+    bench_decode_list,
+    bench_serialize_list
 );
 criterion_main!(benches);

From cfaea69765b6d845314211e5ed67c6176fbcc6fa Mon Sep 17 00:00:00 2001
From: Liam Bao <liam.zw.bao@gmail.com>
Date: Wed, 4 Mar 2026 20:59:36 -0500
Subject: [PATCH 2/3] Add bench for json-writer

---
 arrow-json/Cargo.toml             |   4 +
 arrow-json/benches/json_writer.rs | 129 ++++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 arrow-json/benches/json_writer.rs

diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index 03e3553bc626..be1f8d0ccdca 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -69,3 +69,7 @@ harness = false
 [[bench]]
 name = "json_reader"
 harness = false
+
+[[bench]]
+name = "json_writer"
+harness = false
diff --git a/arrow-json/benches/json_writer.rs b/arrow-json/benches/json_writer.rs
new file mode 100644
index 000000000000..628d378b9a50
--- /dev/null
+++ b/arrow-json/benches/json_writer.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::builder::{FixedSizeListBuilder, Int64Builder, ListBuilder};
+use arrow_array::{Array, RecordBatch};
+use arrow_json::LineDelimitedWriter;
+use arrow_schema::{DataType, Field, Schema};
+use criterion::{Criterion, Throughput, criterion_group, criterion_main};
+use std::sync::Arc;
+
+const ROWS: usize = 1 << 17; // 128K rows
+const LIST_SHORT_ELEMENTS: usize = 5;
+const LIST_LONG_ELEMENTS: usize = 100;
+
+fn build_list_batch(rows: usize, elements: usize) -> RecordBatch {
+    let mut list_builder = ListBuilder::new(Int64Builder::new());
+    for row in 0..rows {
+        for i in 0..elements {
+            list_builder.values().append_value((row + i) as i64);
+        }
+        list_builder.append(true);
+    }
+    let list_array = list_builder.finish();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "list",
+        DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
+        false,
+    )]));
+
+    RecordBatch::try_new(schema, vec![Arc::new(list_array)]).unwrap()
+}
+
+fn bench_write_list(c: &mut Criterion) {
+    let short_batch = build_list_batch(ROWS, LIST_SHORT_ELEMENTS);
+    let long_batch = build_list_batch(ROWS, LIST_LONG_ELEMENTS);
+
+    let mut group = c.benchmark_group("write_list_i64");
+    // Short lists: tests per-list overhead (few elements per row)
+    group.throughput(Throughput::Elements(ROWS as u64));
+    group.bench_function("short", |b| {
+        b.iter(|| {
+            let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&short_batch).unwrap();
+            writer.finish().unwrap();
+            buf
+        })
+    });
+
+    // Long lists: tests child element encode throughput (many elements per row)
+    group.bench_function("long", |b| {
+        b.iter(|| {
+            let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&long_batch).unwrap();
+            writer.finish().unwrap();
+            buf
+        })
+    });
+
+    group.finish();
+}
+
+fn build_fixed_size_list_batch(rows: usize, elements: usize) -> RecordBatch {
+    let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), elements as i32);
+    for row in 0..rows {
+        for i in 0..elements {
+            builder.values().append_value((row + i) as i64);
+        }
+        builder.append(true);
+    }
+    let fsl_array = builder.finish();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "fixed_size_list",
+        fsl_array.data_type().clone(),
+        false,
+    )]));
+
+    RecordBatch::try_new(schema, vec![Arc::new(fsl_array)]).unwrap()
+}
+
+fn bench_write_fixed_size_list(c: &mut Criterion) {
+    let short_batch = build_fixed_size_list_batch(ROWS, LIST_SHORT_ELEMENTS);
+    let long_batch = build_fixed_size_list_batch(ROWS, LIST_LONG_ELEMENTS);
+
+    let mut group = c.benchmark_group("write_fixed_size_list_i64");
+    group.throughput(Throughput::Elements(ROWS as u64));
+
+    group.bench_function("short", |b| {
+        b.iter(|| {
+            let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&short_batch).unwrap();
+            writer.finish().unwrap();
+            buf
+        })
+    });
+
+    group.bench_function("long", |b| {
+        b.iter(|| {
+            let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&long_batch).unwrap();
+            writer.finish().unwrap();
+            buf
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_write_list, bench_write_fixed_size_list);
+criterion_main!(benches);

From 6789ab466d7c93b5e78cfbd2a1cd9a8ec0af8741 Mon Sep 17 00:00:00 2001
From: Liam Bao <liam.zw.bao@gmail.com>
Date: Tue, 10 Mar 2026 19:12:07 -0400
Subject: [PATCH 3/3] Refactor

---
 arrow-json/benches/json_writer.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arrow-json/benches/json_writer.rs b/arrow-json/benches/json_writer.rs
index 628d378b9a50..055ad5be48b4 100644
--- a/arrow-json/benches/json_writer.rs
+++ b/arrow-json/benches/json_writer.rs
@@ -53,23 +53,23 @@ fn bench_write_list(c: &mut Criterion) {
     // Short lists: tests per-list overhead (few elements per row)
     group.throughput(Throughput::Elements(ROWS as u64));
     group.bench_function("short", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
         b.iter(|| {
-            let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
+            buf.clear();
             let mut writer = LineDelimitedWriter::new(&mut buf);
             writer.write(&short_batch).unwrap();
             writer.finish().unwrap();
-            buf
         })
     });
 
     // Long lists: tests child element encode throughput (many elements per row)
     group.bench_function("long", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
         b.iter(|| {
-            let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
+            buf.clear();
             let mut writer = LineDelimitedWriter::new(&mut buf);
             writer.write(&long_batch).unwrap();
             writer.finish().unwrap();
-            buf
         })
     });
 
@@ -103,22 +103,22 @@ fn bench_write_fixed_size_list(c: &mut Criterion) {
     group.throughput(Throughput::Elements(ROWS as u64));
 
     group.bench_function("short", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
         b.iter(|| {
-            let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
+            buf.clear();
             let mut writer = LineDelimitedWriter::new(&mut buf);
             writer.write(&short_batch).unwrap();
             writer.finish().unwrap();
-            buf
         })
     });
 
     group.bench_function("long", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
         b.iter(|| {
-            let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
+            buf.clear();
             let mut writer = LineDelimitedWriter::new(&mut buf);
             writer.write(&long_batch).unwrap();
             writer.finish().unwrap();
-            buf
         })
     });