From 842022d3f060888fa3bfd25c87c7725c1bca3d1e Mon Sep 17 00:00:00 2001 From: Koki Kato Date: Fri, 31 Jan 2025 09:14:30 +0900 Subject: [PATCH 1/4] Rename existing `suffix_array` module to `sample` --- benches/locate.rs | 2 +- src/builder.rs | 2 +- src/fm_index.rs | 4 ++-- src/lib.rs | 2 +- src/rlfmi.rs | 4 ++-- src/{suffix_array.rs => sample.rs} | 0 src/search.rs | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) rename src/{suffix_array.rs => sample.rs} (100%) diff --git a/benches/locate.rs b/benches/locate.rs index 16b8c48..ae59841 100644 --- a/benches/locate.rs +++ b/benches/locate.rs @@ -1,4 +1,4 @@ -use fm_index::suffix_array::HasPosition; +use fm_index::sample::HasPosition; use fm_index::{FMIndexBackend, SearchIndexBuilder}; use criterion::{criterion_group, criterion_main}; diff --git a/src/builder.rs b/src/builder.rs index b5e9341..229a16a 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -3,7 +3,7 @@ use crate::converter; use crate::{ converter::{Converter, IdConverter}, - suffix_array::SuffixOrderSampledArray, + sample::SuffixOrderSampledArray, Character, FMIndex, RLFMIndex, }; diff --git a/src/fm_index.rs b/src/fm_index.rs index 1e65965..6edab17 100644 --- a/src/fm_index.rs +++ b/src/fm_index.rs @@ -3,7 +3,7 @@ use crate::character::{prepare_text, Character}; use crate::converter; use crate::converter::{Converter, IndexWithConverter}; use crate::iter::FMIndexBackend; -use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray}; +use crate::sample::{self, HasPosition, SuffixOrderSampledArray}; use crate::{sais, seal, HeapSize}; use crate::{util, Search}; @@ -39,7 +39,7 @@ where C: Converter, { pub(crate) fn new(text: Vec, converter: C, level: usize) -> Self { - Self::create(text, converter, |sa| suffix_array::sample(sa, level)) + Self::create(text, converter, |sa| sample::sample(sa, level)) } } diff --git a/src/lib.rs b/src/lib.rs index dc36428..c0b73f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,7 +135,7 @@ #![warn(missing_docs)] pub mod converter; -pub mod suffix_array; +pub mod sample; mod builder; mod character; diff --git a/src/rlfmi.rs b/src/rlfmi.rs index 9ec199b..3f645f4 100644 --- a/src/rlfmi.rs +++ b/src/rlfmi.rs @@ -3,7 +3,7 @@ use crate::character::{prepare_text, Character}; use crate::converter; use crate::converter::{Converter, IndexWithConverter}; use crate::iter::FMIndexBackend; -use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray}; +use crate::sample::{self, HasPosition, SuffixOrderSampledArray}; use crate::{sais, HeapSize, Search}; use crate::{seal, util}; @@ -41,7 +41,7 @@ where C: Converter, { pub(crate) fn new(text: Vec, converter: C, level: usize) -> Self { - Self::create(text, converter, |sa| suffix_array::sample(sa, level)) + Self::create(text, converter, |sa| sample::sample(sa, level)) } } diff --git a/src/suffix_array.rs b/src/sample.rs similarity index 100% rename from src/suffix_array.rs rename to src/sample.rs diff --git a/src/search.rs b/src/search.rs index 6725586..9924b09 100644 --- a/src/search.rs +++ b/src/search.rs @@ -5,8 +5,8 @@ use crate::converter; use crate::converter::IndexWithConverter; use crate::iter::FMIndexBackend; +use crate::sample::HasPosition; use crate::seal; -use crate::suffix_array::HasPosition; /// An object containing the result of a search. /// From 7cd094400f3170cd042676698a87cefea7ba9f0c Mon Sep 17 00:00:00 2001 From: Koki Kato Date: Fri, 31 Jan 2025 09:25:15 +0900 Subject: [PATCH 2/4] Move HasPosition to iter module HasPosition is a property (trait) of a search index, so it might be more intuitive to move this to iter module, where other traits for indexes are located. --- benches/locate.rs | 3 +-- src/fm_index.rs | 4 ++-- src/iter.rs | 10 ++++++++++ src/lib.rs | 2 +- src/rlfmi.rs | 4 ++-- src/sample.rs | 10 +--------- src/search.rs | 3 +-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/benches/locate.rs b/benches/locate.rs index ae59841..2352f66 100644 --- a/benches/locate.rs +++ b/benches/locate.rs @@ -1,5 +1,4 @@ -use fm_index::sample::HasPosition; -use fm_index::{FMIndexBackend, SearchIndexBuilder}; +use fm_index::{FMIndexBackend, HasPosition, SearchIndexBuilder}; use criterion::{criterion_group, criterion_main}; use criterion::{BatchSize, BenchmarkId, Criterion, Throughput}; diff --git a/src/fm_index.rs b/src/fm_index.rs index 6edab17..6090044 100644 --- a/src/fm_index.rs +++ b/src/fm_index.rs @@ -2,8 +2,8 @@ use crate::character::{prepare_text, Character}; #[cfg(doc)] use crate::converter; use crate::converter::{Converter, IndexWithConverter}; -use crate::iter::FMIndexBackend; -use crate::sample::{self, HasPosition, SuffixOrderSampledArray}; +use crate::iter::{FMIndexBackend, HasPosition}; +use crate::sample::{self, SuffixOrderSampledArray}; use crate::{sais, seal, HeapSize}; use crate::{util, Search}; diff --git a/src/iter.rs b/src/iter.rs index 1cb9df0..b241332 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,5 +1,7 @@ use crate::character::Character; use crate::converter::{Converter, IndexWithConverter}; +#[cfg(doc)] +use crate::sample::SuffixOrderSampledArray; use crate::seal; use crate::search::Search; @@ -68,6 +70,14 @@ pub trait HeapSize { fn size(&self) -> usize; } +/// A trait for an index that supports locate queries. +/// +/// This is only supported when [`SuffixOrderSampledArray`] is passed in. +pub trait HasPosition { + #[doc(hidden)] + fn get_sa(&self, i: u64) -> u64; +} + /// An iterator that goes backwards through the text, producing [`Character`]. pub struct BackwardIterator<'a, I> where diff --git a/src/lib.rs b/src/lib.rs index c0b73f9..e55f1f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -152,5 +152,5 @@ pub use crate::rlfmi::RLFMIndex; pub use builder::SearchIndexBuilder; pub use character::Character; -pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator, HeapSize}; +pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator, HasPosition, HeapSize}; pub use search::Search; diff --git a/src/rlfmi.rs b/src/rlfmi.rs index 3f645f4..a80574e 100644 --- a/src/rlfmi.rs +++ b/src/rlfmi.rs @@ -2,8 +2,8 @@ use crate::character::{prepare_text, Character}; #[cfg(doc)] use crate::converter; use crate::converter::{Converter, IndexWithConverter}; -use crate::iter::FMIndexBackend; -use crate::sample::{self, HasPosition, SuffixOrderSampledArray}; +use crate::iter::{FMIndexBackend, HasPosition}; +use crate::sample::{self, SuffixOrderSampledArray}; use crate::{sais, HeapSize, Search}; use crate::{seal, util}; diff --git a/src/sample.rs b/src/sample.rs index 21a3056..40892f7 100644 --- a/src/sample.rs +++ b/src/sample.rs @@ -1,20 +1,12 @@ //! Suffix arrays, used to construct the index. //! //! Can also be used in sampled fashion to perform locate queries. -use crate::{seal, util}; +use crate::util; use std::fmt; use serde::{Deserialize, Serialize}; use vers_vecs::BitVec; -/// A trait for an index that supports locate queries. -/// -/// This is only supported when [`SuffixOrderSampledArray`] is passed in. -pub trait HasPosition { - #[doc(hidden)] - fn get_sa(&self, i: u64) -> u64; -} - /// A sampled suffix array, stored within the index. #[derive(Serialize, Deserialize)] pub struct SuffixOrderSampledArray { diff --git a/src/search.rs b/src/search.rs index 9924b09..a713f68 100644 --- a/src/search.rs +++ b/src/search.rs @@ -4,8 +4,7 @@ use crate::character::Character; use crate::converter; use crate::converter::IndexWithConverter; -use crate::iter::FMIndexBackend; -use crate::sample::HasPosition; +use crate::iter::{FMIndexBackend, HasPosition}; use crate::seal; /// An object containing the result of a search. From f2e72d38031b80d3c7da995f0d3aeadef523110f Mon Sep 17 00:00:00 2001 From: Koki Kato Date: Fri, 31 Jan 2025 09:34:40 +0900 Subject: [PATCH 3/4] Move sample module under suffix_array --- src/builder.rs | 2 +- src/fm_index.rs | 2 +- src/lib.rs | 2 +- src/rlfmi.rs | 2 +- src/suffix_array.rs | 5 +++++ src/{ => suffix_array}/sample.rs | 4 +--- 6 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 src/suffix_array.rs rename src/{ => suffix_array}/sample.rs (95%) diff --git a/src/builder.rs b/src/builder.rs index 229a16a..faf4f0a 100644 --- a/src/builder.rs +++ b/src/builder.rs @@ -3,7 +3,7 @@ use crate::converter; use crate::{ converter::{Converter, IdConverter}, - sample::SuffixOrderSampledArray, + suffix_array::sample::SuffixOrderSampledArray, Character, FMIndex, RLFMIndex, }; diff --git a/src/fm_index.rs b/src/fm_index.rs index 6090044..3a4f1fb 100644 --- a/src/fm_index.rs +++ b/src/fm_index.rs @@ -3,7 +3,7 @@ use crate::character::{prepare_text, Character}; use crate::converter; use crate::converter::{Converter, IndexWithConverter}; use crate::iter::{FMIndexBackend, HasPosition}; -use crate::sample::{self, SuffixOrderSampledArray}; +use crate::suffix_array::sample::{self, SuffixOrderSampledArray}; use crate::{sais, seal, HeapSize}; use crate::{util, Search}; diff --git a/src/lib.rs b/src/lib.rs index e55f1f9..4067369 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -135,7 +135,6 @@ #![warn(missing_docs)] pub mod converter; -pub mod sample; mod builder; mod character; @@ -145,6 +144,7 @@ mod rlfmi; mod sais; mod seal; mod search; +mod suffix_array; mod util; pub use crate::fm_index::FMIndex; diff --git a/src/rlfmi.rs b/src/rlfmi.rs index a80574e..0f0d2ff 100644 --- a/src/rlfmi.rs +++ b/src/rlfmi.rs @@ -3,7 +3,7 @@ use crate::character::{prepare_text, Character}; use crate::converter; use crate::converter::{Converter, IndexWithConverter}; use crate::iter::{FMIndexBackend, HasPosition}; -use crate::sample::{self, SuffixOrderSampledArray}; +use crate::suffix_array::sample::{self, SuffixOrderSampledArray}; use crate::{sais, HeapSize, Search}; use crate::{seal, util}; diff --git a/src/suffix_array.rs b/src/suffix_array.rs new file mode 100644 index 0000000..3068146 --- /dev/null +++ b/src/suffix_array.rs @@ -0,0 +1,5 @@ +//! Suffix arrays, used to construct the index. +//! +//! Can also be used in sampled fashion to perform locate queries. + +pub mod sample; diff --git a/src/sample.rs b/src/suffix_array/sample.rs similarity index 95% rename from src/sample.rs rename to src/suffix_array/sample.rs index 40892f7..e3df26b 100644 --- a/src/sample.rs +++ b/src/suffix_array/sample.rs @@ -1,6 +1,4 @@ -//! Suffix arrays, used to construct the index. -//! -//! Can also be used in sampled fashion to perform locate queries. +//! Sampled suffix arrays to perform locate queries. use crate::util; use std::fmt; From b9a31004b637a973510ff1fef1b6a3263ac2e37b Mon Sep 17 00:00:00 2001 From: Koki Kato Date: Fri, 31 Jan 2025 09:43:42 +0900 Subject: [PATCH 4/4] Fix doc --- src/iter.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/iter.rs b/src/iter.rs index b241332..e6f6aa3 100644 --- a/src/iter.rs +++ b/src/iter.rs @@ -1,7 +1,5 @@ use crate::character::Character; use crate::converter::{Converter, IndexWithConverter}; -#[cfg(doc)] -use crate::sample::SuffixOrderSampledArray; use crate::seal; use crate::search::Search; @@ -71,8 +69,6 @@ pub trait HeapSize { } /// A trait for an index that supports locate queries. -/// -/// This is only supported when [`SuffixOrderSampledArray`] is passed in. pub trait HasPosition { #[doc(hidden)] fn get_sa(&self, i: u64) -> u64;