Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions encodings/sparse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ vortex-mask = { workspace = true }
vortex-session = { workspace = true }

[dev-dependencies]
divan = { workspace = true }
itertools = { workspace = true }
rstest = { workspace = true }
vortex-array = { workspace = true, features = ["_test-harness"] }

[[bench]]
name = "sparse_canonical"
harness = false
121 changes: 121 additions & 0 deletions encodings/sparse/benches/sparse_canonical.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#![expect(clippy::cast_possible_truncation)]

use std::sync::Arc;

use divan::Bencher;
use vortex_array::ArrayRef;
use vortex_array::IntoArray;
use vortex_array::arrays::FixedSizeListArray;
use vortex_array::arrays::ListViewArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::dtype::Nullability::NonNullable;
use vortex_array::dtype::PType::I32;
use vortex_array::scalar::Scalar;
use vortex_array::validity::Validity;
use vortex_buffer::Buffer;
use vortex_error::VortexExpect;
use vortex_sparse::Sparse;

fn main() {
divan::main();
}

const LIST_ARGS: &[(usize, usize, usize)] = &[
// len, patch_stride, list_size
(10_000, 7, 8),
(50_000, 7, 8),
(50_000, 11, 16),
];

const FIXED_SIZE_LIST_ARGS: &[(usize, usize, u32)] = &[
// len, patch_stride, list_size
(10_000, 7, 8),
(50_000, 7, 8),
(50_000, 11, 16),
];

fn make_sparse_list(len: usize, patch_stride: usize, list_size: usize) -> ArrayRef {
let patch_indices: Buffer<u32> = (0..len).step_by(patch_stride).map(|i| i as u32).collect();
let n_patches = patch_indices.len();

let patch_elements = PrimitiveArray::from_iter(0..(n_patches * list_size) as i32).into_array();
let patch_offsets: Buffer<u32> = (0..n_patches).map(|i| (i * list_size) as u32).collect();
let patch_sizes: Buffer<u32> = std::iter::repeat_n(list_size as u32, n_patches).collect();
let patch_values = ListViewArray::new(
patch_elements,
patch_offsets.into_array(),
patch_sizes.into_array(),
Validity::NonNullable,
)
.into_array();

let fill_value = Scalar::list(
Arc::new(I32.into()),
(0..list_size as i32).map(Scalar::from).collect(),
NonNullable,
);

Sparse::try_new(patch_indices.into_array(), patch_values, len, fill_value)
.vortex_expect("sparse list input should be valid")
.into_array()
}

fn make_sparse_fixed_size_list(len: usize, patch_stride: usize, list_size: u32) -> ArrayRef {
let patch_indices: Buffer<u32> = (0..len).step_by(patch_stride).map(|i| i as u32).collect();
let n_patches = patch_indices.len();

let patch_elements =
PrimitiveArray::from_iter(0..(n_patches * list_size as usize) as i32).into_array();
let patch_values =
FixedSizeListArray::new(patch_elements, list_size, Validity::NonNullable, n_patches)
.into_array();

let fill_value = Scalar::fixed_size_list(
Arc::new(I32.into()),
(0..list_size as i32).map(Scalar::from).collect(),
NonNullable,
);

Sparse::try_new(patch_indices.into_array(), patch_values, len, fill_value)
.vortex_expect("sparse fixed-size-list input should be valid")
.into_array()
}

#[divan::bench(args = LIST_ARGS)]
fn canonicalize_sparse_list(
bencher: Bencher,
(len, patch_stride, list_size): (usize, usize, usize),
) {
let sparse = make_sparse_list(len, patch_stride, list_size);

bencher
.with_inputs(|| sparse.clone())
.bench_values(|array| {
divan::black_box(
array
.to_canonical()
.vortex_expect("sparse list canonicalization"),
)
});
}

#[divan::bench(args = FIXED_SIZE_LIST_ARGS)]
fn canonicalize_sparse_fixed_size_list(
bencher: Bencher,
(len, patch_stride, list_size): (usize, usize, u32),
) {
let sparse = make_sparse_fixed_size_list(len, patch_stride, list_size);

bencher
.with_inputs(|| sparse.clone())
.bench_values(|array| {
divan::black_box(
array
.to_canonical()
.vortex_expect("sparse fixed-size-list canonicalization"),
)
});
}
38 changes: 18 additions & 20 deletions encodings/sparse/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ fn execute_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
total_canonical_values,
len,
);
let patch_values_validity = patch_values.listview_validity_mask();

let mut patch_idx = 0;

Expand All @@ -195,15 +196,17 @@ fn execute_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
== position;

if position_is_patched {
// Set with the patch value.
builder
.append_value(
patch_values
.scalar_at(patch_idx)
.vortex_expect("scalar_at")
.as_list(),
)
.vortex_expect("Failed to append sparse value");
if patch_values_validity.value(patch_idx) {
// Bulk-append the list value to avoid per-element scalar_at.
let patch_list = patch_values
.list_elements_at(patch_idx)
.vortex_expect("list_elements_at");
builder
.append_array_as_list(&patch_list)
.vortex_expect("Failed to append sparse value");
} else {
builder.append_null();
}
patch_idx += 1;
} else {
// Set with the fill value.
Expand Down Expand Up @@ -265,6 +268,10 @@ fn execute_sparse_fixed_size_list_inner<I: IntegerPType>(
let total_elements = array_len * list_size as usize;
let mut builder = builder_with_capacity(element_dtype, total_elements);
let fill_elements = fill_value.elements();
let values_validity = values
.validity()
.vortex_expect("sparse fixed-size-list validity should be derivable")
.to_mask(values.len());

let mut next_index = 0;
let indices = indices
Expand All @@ -281,20 +288,11 @@ fn execute_sparse_fixed_size_list_inner<I: IntegerPType>(
);

// Append the patch value, handling null patches by appending defaults.
if values
.validity()
.vortex_expect("sparse fixed-size-list validity should be derivable")
.is_valid(patch_idx)
.vortex_expect("is_valid")
{
if values_validity.value(patch_idx) {
let patch_list = values
.fixed_size_list_elements_at(patch_idx)
.vortex_expect("fixed_size_list_elements_at");
for i in 0..list_size as usize {
builder
.append_scalar(&patch_list.scalar_at(i).vortex_expect("scalar_at"))
.vortex_expect("element dtype must match");
}
builder.extend_from_array(&patch_list);
} else {
builder.append_defaults(list_size as usize);
}
Expand Down
Loading