apache · Dandandan · Apr 28, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
diff --git a/arrow-arith/src/arity.rs b/arrow-arith/src/arity.rs
@@ -20,8 +20,6 @@
 use arrow_array::builder::BufferBuilder;
 use arrow_array::*;
 use arrow_buffer::buffer::NullBuffer;
-use arrow_buffer::ArrowNativeType;
-use arrow_buffer::{Buffer, MutableBuffer};
 use arrow_data::ArrayData;
 use arrow_schema::ArrowError;
 
@@ -124,13 +122,13 @@ where
 
     let nulls = NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref());
 
-    let values = a.values().iter().zip(b.values()).map(|(l, r)| op(*l, *r));
-    // JUSTIFICATION
-    //  Benefit
-    //      ~60% speedup
-    //  Soundness
-    //      `values` is an iterator with a known size from a PrimitiveArray
-    let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
+    let values = a
+        .values()
+        .into_iter()
+        .zip(b.values())
+        .map(|(l, r)| op(*l, *r));
+
+    let buffer: Vec<_> = values.collect();
     Ok(PrimitiveArray::new(buffer.into(), nulls))
 }
 
@@ -251,14 +249,16 @@ where
 ///
 /// Return an error if the arrays have different lengths or
 /// the operation is under erroneous
-pub fn try_binary<A: ArrayAccessor, B: ArrayAccessor, F, O>(
-    a: A,
-    b: B,
+pub fn try_binary<A, B, F, O>(
+    a: &PrimitiveArray<A>,
+    b: &PrimitiveArray<B>,
     op: F,
 ) -> Result<PrimitiveArray<O>, ArrowError>
 where
+    A: ArrowPrimitiveType,
+    B: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
-    F: Fn(A::Item, B::Item) -> Result<O::Native, ArrowError>,
+    F: Fn(A::Native, B::Native) -> Result<O::Native, ArrowError>,
 {
     if a.len() != b.len() {
         return Err(ArrowError::ComputeError(
@@ -271,7 +271,7 @@ where
     let len = a.len();
 
     if a.null_count() == 0 && b.null_count() == 0 {
-        try_binary_no_nulls(len, a, b, op)
+        try_binary_no_nulls(a, b, op)
     } else {
         let nulls =
             NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref()).unwrap();
@@ -369,23 +369,25 @@ fn create_union_null_buffer(
 
 /// This intentional inline(never) attribute helps LLVM optimize the loop.
 #[inline(never)]
-fn try_binary_no_nulls<A: ArrayAccessor, B: ArrayAccessor, F, O>(
-    len: usize,
-    a: A,
-    b: B,
+fn try_binary_no_nulls<A, B, F, O>(
+    a: &PrimitiveArray<A>,
+    b: &PrimitiveArray<B>,
     op: F,
 ) -> Result<PrimitiveArray<O>, ArrowError>
 where
+    A: ArrowPrimitiveType,
+    B: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
-    F: Fn(A::Item, B::Item) -> Result<O::Native, ArrowError>,
+    B: ArrowPrimitiveType,
+    F: Fn(A::Native, B::Native) -> Result<O::Native, ArrowError>,
 {
-    let mut buffer = MutableBuffer::new(len * O::Native::get_byte_width());
-    for idx in 0..len {
-        unsafe {
-            buffer.push_unchecked(op(a.value_unchecked(idx), b.value_unchecked(idx))?);
-        };
-    }
-    Ok(PrimitiveArray::new(buffer.into(), None))
+    let new_values = a
+        .values()
+        .into_iter()
+        .zip(b.values().into_iter())
+        .map(|(l, r)| op(*l, *r))
+        .collect::<Result<Vec<_>, ArrowError>>()?;
+    Ok(PrimitiveArray::new(new_values.into(), None))
 }
 
 /// This intentional inline(never) attribute helps LLVM optimize the loop.

diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
@@ -729,10 +729,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
 
     /// Creates a PrimitiveArray based on a constant value with `count` elements
     pub fn from_value(value: T::Native, count: usize) -> Self {
-        unsafe {
-            let val_buf = Buffer::from_trusted_len_iter((0..count).map(|_| value));
-            Self::new(val_buf.into(), None)
-        }
+        let val_buf: Vec<_> = vec![value; count];
+        Self::new(val_buf.into(), None)
     }
 
     /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i`
@@ -827,13 +825,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         F: Fn(T::Native) -> O::Native,
     {
         let nulls = self.nulls().cloned();
-        let values = self.values().iter().map(|v| op(*v));
-        // JUSTIFICATION
-        //  Benefit
-        //      ~60% speedup
-        //  Soundness
-        //      `values` is an iterator with a known size because arrays are sized.
-        let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
+        let values = self.values().into_iter().map(|v| op(*v));
+        let buffer: Vec<_> = values.collect();
         PrimitiveArray::new(buffer.into(), nulls)
     }
 
@@ -1035,12 +1028,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         F: FnMut(U::Item) -> T::Native,
     {
         let nulls = left.logical_nulls();
-        let buffer = unsafe {
+        let buffer: Vec<_> = (0..left.len())
             // SAFETY: i in range 0..left.len()
-            let iter = (0..left.len()).map(|i| op(left.value_unchecked(i)));
-            // SAFETY: upper bound is trusted because `iter` is over a range
-            Buffer::from_trusted_len_iter(iter)
-        };
+            .map(|i| op(unsafe { left.value_unchecked(i) }))
+            .collect();
 
         PrimitiveArray::new(buffer.into(), nulls)
     }

diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
@@ -103,11 +103,13 @@ where
     F: FnMut(u64) -> u64,
 {
     // reserve capacity and set length so we can get a typed view of u64 chunks
-    let mut result =
-        MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
+    let mut result = MutableBuffer::new(ceil(len_in_bits, 8));
 
     let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
 
+    // SAFETY: `MutableBuffer::set_len` is sound because it is initalized right after
+    unsafe { result.set_len(left_chunks.iter().len() * 8) };
+
     let result_chunks = result.typed_data_mut::<u64>().iter_mut();
 
     result_chunks