Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion rust/lance-index/src/vector/ivf/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl IvfModel {
}

pub fn partition_size(&self, part: usize) -> usize {
self.lengths[part] as usize
self.lengths.get(part).cloned().unwrap_or(0) as usize
}

pub fn num_rows(&self) -> u64 {
Expand Down Expand Up @@ -354,4 +354,25 @@ mod tests {
assert_eq!(first_vals.value(0), 1.0);
assert_eq!(first_vals.value(1), 2.0);
}

#[test]
fn test_partition_size_bounds_checking() {
// Test that partition_size returns 0 for out-of-bounds partitions
// This prevents panics when indices have mismatched partition counts
// during optimization. Regression test for issue #5312.
let mut ivf = IvfModel::empty();
ivf.add_partition(20);
ivf.add_partition(50);
ivf.add_partition(30);

// Valid partitions
assert_eq!(ivf.partition_size(0), 20);
assert_eq!(ivf.partition_size(1), 50);
assert_eq!(ivf.partition_size(2), 30);

// Out of bounds - should return 0 instead of panicking
assert_eq!(ivf.partition_size(3), 0);
assert_eq!(ivf.partition_size(100), 0);
assert_eq!(ivf.partition_size(117), 0); // The specific case from issue #5312
}
}
49 changes: 47 additions & 2 deletions rust/lance/src/index/vector/ivf/v2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ mod tests {
use std::{ops::Range, sync::Arc};

use all_asserts::{assert_ge, assert_lt};
use arrow::datatypes::{Float64Type, UInt64Type, UInt8Type};
use arrow::datatypes::{Float64Type, Int32Type, UInt64Type, UInt8Type};
use arrow::{array::AsArray, datatypes::Float32Type};
use arrow_array::{
Array, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, Float32Array, Int64Array,
Expand All @@ -625,13 +625,14 @@ mod tests {
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use itertools::Itertools;
use lance_arrow::FixedSizeListArrayExt;
use lance_datagen::{array, gen_batch, BatchCount, Dimension, RowCount};
use lance_index::vector::bq::RQBuildParams;
use lance_index::vector::storage::VectorStore;

use crate::dataset::{InsertBuilder, UpdateBuilder, WriteMode, WriteParams};
use crate::index::vector::ivf::v2::IvfPq;
use crate::index::DatasetIndexInternalExt;
use crate::utils::test::copy_test_data_to_tmp;
use crate::utils::test::{copy_test_data_to_tmp, DatagenExt, FragmentCount, FragmentRowCount};
use crate::{
dataset::optimize::{compact_files, CompactionOptions},
index::vector::IndexFileVersion,
Expand Down Expand Up @@ -2307,6 +2308,50 @@ mod tests {
.unwrap();
}

#[tokio::test]
async fn test_optimize_with_partition_splits() {
// Regression test for issue #5312
let mut dataset = gen_batch()
.col("id", array::step::<Int32Type>())
.col("vec", array::rand_vec::<Float32Type>(Dimension::from(16)))
.into_ram_dataset(FragmentCount::from(1), FragmentRowCount::from(30_000))
.await
.unwrap();
let ivf_params = IvfBuildParams::new(3);
let pq_params = PQBuildParams::new(2, 8);
let index_params =
VectorIndexParams::with_ivf_pq_params(DistanceType::L2, ivf_params, pq_params);
dataset
.create_index_builder(&["vec"], IndexType::Vector, &index_params)
.await
.unwrap();

// Append 2 more rows and create a new index segment.
let new_data = gen_batch()
.col("id", array::step_custom::<Int32Type>(10_000, 1))
.col("vec", array::rand_vec::<Float32Type>(Dimension::from(16)))
.into_reader_rows(RowCount::from(2), BatchCount::from(1));
dataset.append(new_data, None).await.unwrap();
dataset
.optimize_indices(&OptimizeOptions::append())
.await
.unwrap();

// Delete every other row
dataset.delete("id % 2 = 0").await.unwrap();
let options = CompactionOptions {
target_rows_per_fragment: 50, // Prevent fragments from being merged together
..Default::default()
};
let compact_stats = compact_files(&mut dataset, options, None).await.unwrap();
assert_eq!(compact_stats.fragments_removed, 2);

dataset
.optimize_indices(&OptimizeOptions::merge(1))
.await
.unwrap();
}

#[tokio::test]
async fn test_create_index_with_many_invalid_vectors() {
let test_dir = TempStrDir::default();
Expand Down
Loading