Skip to content

Commit

Permalink
Drop SearchIndexBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
ajalab committed Jan 23, 2025
1 parent 0689333 commit d54da80
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 171 deletions.
52 changes: 0 additions & 52 deletions examples/example_builder.rs

This file was deleted.

2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,4 @@ pub use crate::rlfmi::RLFMIndex;

pub use character::Character;
pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator};
pub use search::{Search, SearchIndex, SearchIndexBuilder};
pub use search::{Search, SearchIndex};
121 changes: 3 additions & 118 deletions src/search.rs
Original file line number Diff line number Diff line change
@@ -1,125 +1,10 @@
#[cfg(doc)]
use crate::converter;

use crate::converter::{Converter, IndexWithConverter};
use crate::converter::IndexWithConverter;
use crate::iter::FMIndexBackend;
use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray};
use crate::{seal, Character, FMIndex, RLFMIndex};

/// A builder that builds [`SearchIndex`].
pub struct SearchIndexBuilder<I, T, C, S>
where
T: Character,
C: Converter<T>,
{
converter: C,
// We avoid extracting parts into another `type` definition.
// Also, we use dyn trait in order not to add another type variable for this closure type.
#[allow(clippy::type_complexity)]
get_sample: Box<dyn Fn(&[u64]) -> S>,
_i: std::marker::PhantomData<I>,
_t: std::marker::PhantomData<T>,
}

impl<T, C> SearchIndexBuilder<(), T, C, ()>
where
T: Character,
C: Converter<T>,
{
/// Create a new [`SearchIndexBuilder`].
///
/// - `converter` is a [`Converter`] is used to convert the characters to a
/// smaller alphabet. Use [`converter::IdConverter`] if you don't need to
/// restrict the alphabet. Use [`converter::RangeConverter`] if you can
/// contrain characters to a particular range. See [`converter`] for more
/// details.
pub fn new(
converter: C,
) -> SearchIndexBuilder<FMIndex<T, C, SuffixOrderSampledArray>, T, C, SuffixOrderSampledArray>
{
SearchIndexBuilder {
converter,
get_sample: Box::new(|sa| suffix_array::sample(sa, 0)),
_i: std::marker::PhantomData,
_t: std::marker::PhantomData,
}
}
}

impl<I, T, C, S> SearchIndexBuilder<I, T, C, S>
where
T: Character,
C: Converter<T>,
{
/// Make sure the index only supports the count operation.
///
/// The suffix array for the locate operation will be dropped from the index.
pub fn count_only(self) -> SearchIndexBuilder<FMIndex<T, C, ()>, T, C, ()> {
SearchIndexBuilder {
converter: self.converter,
get_sample: Box::new(|_| ()),
_i: std::marker::PhantomData,
_t: self._t,
}
}

/// Make sure the index will use RLFM-Index, which encodes the backing Wavelet Matrix using run-length encoding.
///
/// The index will be more space-efficient than the FM-Index, but is slower.
pub fn run_length_encoding(self) -> SearchIndexBuilder<RLFMIndex<T, C, S>, T, C, S> {
SearchIndexBuilder {
converter: self.converter,
get_sample: self.get_sample,
_i: std::marker::PhantomData,
_t: self._t,
}
}
}

impl<I, T, C> SearchIndexBuilder<I, T, C, SuffixOrderSampledArray>
where
I: FMIndexBackend,
T: Character,
C: Converter<T>,
{
/// Adjust the sampling level of the suffix array to use for position lookup.
///
/// A sampling level of 0 means the most memory is used (a full suffix-array is
/// retained), while looking up positions is faster. A sampling level of
/// 1 means half the memory is used, but looking up positions is slower.
/// Each increase in level halves the memory usage but slows down
/// position lookup.
pub fn level(mut self, level: usize) -> SearchIndexBuilder<I, T, C, SuffixOrderSampledArray> {
self.get_sample = Box::new(move |sa| suffix_array::sample(sa, level));
self
}
}

impl<T, C, S> SearchIndexBuilder<FMIndex<T, C, S>, T, C, S>
where
T: Character,
C: Converter<T>,
{
/// Build a new [SearchIndex] backed by [FMIndex].
pub fn build(self, text: Vec<T>) -> SearchIndex<FMIndex<T, C, S>> {
SearchIndex {
index: FMIndex::create(text, self.converter, self.get_sample),
}
}
}

impl<T, C, S> SearchIndexBuilder<RLFMIndex<T, C, S>, T, C, S>
where
T: Character,
C: Converter<T>,
{
/// Build a new [SearchIndex] backed by [RLFMIndex].
pub fn build(self, text: Vec<T>) -> SearchIndex<RLFMIndex<T, C, S>> {
SearchIndex {
index: RLFMIndex::create(text, self.converter, self.get_sample),
}
}
}
use crate::seal;
use crate::suffix_array::HasPosition;

/// A full-text index backed by FM-Index or its variant.
pub struct SearchIndex<I: FMIndexBackend> {
Expand Down

0 comments on commit d54da80

Please sign in to comment.