Skip to content

Commit

Permalink
Introduce a SearchIndex trait and SearchIndexWithLocate trait.
Browse files Browse the repository at this point in the history
These are intended to be the generic public API. FMIndexBackend is now not
intended to be used in the public API at all, though it's still exposed to it because
the SearchIndex trait depends on it.

The SearchIndex and SearchIndexWithLocate traits are object-safe, i.e. they're compatible
with dyn. The idea is that we can then use a builder that produces a Box<dyn SearchIndex>, as this is
required to support a dynamic builder that picks the backend based on the builder parameters. I haven't
implemented this new builder yet, but this is a prerequisite for that.

Unfortunately to make SearchIndex object-safe I can't use the generic K argument to `search` anymore. This
is used to easily take an AsRef and is especially useful when you pass in a string. I've instead come up
with a solution using dyn, which has some performance overhead but I suspect it's really tiny so we should be okay.

That solution is not the exact equivalent of AsRef; you have to manually `&` the string to make a reference to make it
work. I think that's acceptable enough.
  • Loading branch information
faassen committed Jan 30, 2025
1 parent c493db1 commit 4260fb7
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 45 deletions.
14 changes: 5 additions & 9 deletions benches/count.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use fm_index::{FMIndexBackend, SearchIndexBuilder};
use fm_index::{SearchIndex, SearchIndexBuilder};

use criterion::{criterion_group, criterion_main};
use criterion::{AxisScale, BatchSize, BenchmarkId, Criterion, PlotConfiguration, Throughput};

mod common;

fn prepare_fmindex(len: usize, prob: f64, m: usize) -> (impl FMIndexBackend<T = u8>, Vec<String>) {
fn prepare_fmindex(len: usize, prob: f64, m: usize) -> (impl SearchIndex<u8>, Vec<String>) {
let (text, converter) = common::binary_text_set(len, prob);
let patterns = common::binary_patterns(m);
(
Expand All @@ -16,11 +16,7 @@ fn prepare_fmindex(len: usize, prob: f64, m: usize) -> (impl FMIndexBackend<T =
)
}

fn prepare_rlfmindex(
len: usize,
prob: f64,
m: usize,
) -> (impl FMIndexBackend<T = u8>, Vec<String>) {
fn prepare_rlfmindex(len: usize, prob: f64, m: usize) -> (impl SearchIndex<u8>, Vec<String>) {
let (text, converter) = common::binary_text_set(len, prob);
let patterns = common::binary_patterns(m);
(
Expand All @@ -45,7 +41,7 @@ pub fn bench(c: &mut Criterion) {
|| prepare_fmindex(n, prob, m),
|(index, patterns)| {
for pattern in patterns {
index.search(pattern).count();
index.search(&pattern).count();
}
},
BatchSize::SmallInput,
Expand All @@ -57,7 +53,7 @@ pub fn bench(c: &mut Criterion) {
|| prepare_rlfmindex(n, prob, m),
|(index, patterns)| {
for pattern in patterns {
index.search(pattern).count();
index.search(&pattern).count();
}
},
BatchSize::SmallInput,
Expand Down
11 changes: 5 additions & 6 deletions benches/locate.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use fm_index::suffix_array::HasPosition;
use fm_index::{FMIndexBackend, SearchIndexBuilder};
use fm_index::{SearchIndexBuilder, SearchIndexWithLocate};

use criterion::{criterion_group, criterion_main};
use criterion::{BatchSize, BenchmarkId, Criterion, Throughput};
Expand All @@ -11,7 +10,7 @@ fn prepare_fmindex(
prob: f64,
m: usize,
l: usize,
) -> (impl FMIndexBackend<T = u8> + HasPosition, Vec<String>) {
) -> (impl SearchIndexWithLocate<u8>, Vec<String>) {
let (text, converter) = common::binary_text_set(len, prob);
let patterns = common::binary_patterns(m);
(
Expand All @@ -27,7 +26,7 @@ fn prepare_rlfmindex(
prob: f64,
m: usize,
l: usize,
) -> (impl FMIndexBackend<T = u8> + HasPosition, Vec<String>) {
) -> (impl SearchIndexWithLocate<u8>, Vec<String>) {
let (text, converter) = common::binary_text_set(len, prob);
let patterns = common::binary_patterns(m);
(
Expand All @@ -51,7 +50,7 @@ pub fn bench(c: &mut Criterion) {
|| prepare_fmindex(n, prob, m, l),
|(index, patterns)| {
for pattern in patterns {
index.search(pattern).locate();
index.search(&pattern).locate();
}
},
BatchSize::SmallInput,
Expand All @@ -63,7 +62,7 @@ pub fn bench(c: &mut Criterion) {
|| prepare_rlfmindex(n, prob, m, l),
|(index, patterns)| {
for pattern in patterns {
index.search(pattern).locate();
index.search(&pattern).locate();
}
},
BatchSize::SmallInput,
Expand Down
32 changes: 29 additions & 3 deletions src/fm_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use crate::character::{prepare_text, Character};
#[cfg(doc)]
use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::FMIndexBackend;
use crate::iter::{AsCharacters, FMIndexBackend, SearchIndex};
use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray};
use crate::{sais, seal, HeapSize};
use crate::{sais, seal, HeapSize, SearchIndexWithLocate};
use crate::{util, Search};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -155,7 +155,7 @@ where
{
type T = T;

fn len(&self) -> u64 {
fn len<L: seal::IsLocal>(&self) -> u64 {
self.bw.len() as u64
}

Expand Down Expand Up @@ -207,6 +207,32 @@ where
}
}

impl<T: Character, C: Converter<T>, S> SearchIndex<T> for FMIndex<T, C, S> {
type Backend = FMIndex<T, C, S>;

fn search(&self, pattern: &dyn AsCharacters<T>) -> Search<Self> {
Search::new(self).search(pattern.as_characters())
}

fn len(&self) -> u64 {
self.len()
}
}

impl<T: Character, C: Converter<T>> SearchIndexWithLocate<T>
for FMIndex<T, C, SuffixOrderSampledArray>
{
type Backend = FMIndex<T, C, SuffixOrderSampledArray>;

fn search(&self, pattern: &dyn AsCharacters<T>) -> Search<Self::Backend> {
Search::new(self).search(pattern.as_characters())
}

fn len(&self) -> u64 {
self.len()
}
}

impl<T, C> HasPosition for FMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
Expand Down
79 changes: 59 additions & 20 deletions src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,61 @@ use crate::character::Character;
use crate::converter::{Converter, IndexWithConverter};
use crate::seal;
use crate::search::Search;
use crate::suffix_array::HasPosition;

/// AsCharacters exists so we can have the equivalent of AsRef<[T]> on
/// SearchIndex, but without breaking object-safety. SearchIndex and
/// SearchIndexWithLocate need to be object-safe (dyn-compatible)
pub trait AsCharacters<T: Character> {
fn as_characters(&self) -> &[T];
}

// Implement for any type that implements AsRef<[T]>
impl<T: Character, A: AsRef<[T]>> AsCharacters<T> for A {
fn as_characters(&self) -> &[T] {
self.as_ref()
}
}

/// A search index that can be used to search for patterns in a text.
///
/// This only supports the count operation for search, not locate.
pub trait SearchIndex<T: Character> {
/// The backend type for this search index.
type Backend: FMIndexBackend<T = T>;

/// Search for a pattern in the text.
///
/// Return a [`Search`] object with information about the search
/// result.
fn search(&self, pattern: &dyn AsCharacters<T>) -> Search<Self::Backend>;

/// The size of the text in the index
///
/// Note that this includes an ending \0 (terminator) character
/// so will be one more than the length of the text.
fn len(&self) -> u64;
}

/// A search index that can be used to search for patterns in a text.
///
/// This also supports the locate operation for search.
pub trait SearchIndexWithLocate<T: Character> {
/// The backend type for this search index.
type Backend: FMIndexBackend<T = T> + HasPosition;

/// Search for a pattern in the text.
///
/// Return a [`Search`] object with information about the search
/// result.
fn search(&self, pattern: &dyn AsCharacters<T>) -> Search<Self::Backend>;

/// The size of the text in the index
///
/// Note that this includes an ending \0 (terminator) character
/// so will be one more than the length of the text.
fn len(&self) -> u64;
}

/// Trait for an FM-Index implementation.
///
Expand Down Expand Up @@ -29,34 +84,18 @@ pub trait FMIndexBackend: Sized + seal::Sealed {

#[doc(hidden)]
fn iter_forward<L: seal::IsLocal>(&self, i: u64) -> ForwardIterator<Self> {
debug_assert!(i < self.len());
debug_assert!(i < self.len::<L>());
ForwardIterator { index: self, i }
}

#[doc(hidden)]
fn iter_backward<L: seal::IsLocal>(&self, i: u64) -> BackwardIterator<Self> {
debug_assert!(i < self.len());
debug_assert!(i < self.len::<L>());
BackwardIterator { index: self, i }
}

// The following methods are public.

/// Search for a pattern in the text.
///
/// Return a [`Search`] object with information about the search
/// result.
fn search<K>(&self, pattern: K) -> Search<Self>
where
K: AsRef<[Self::T]>,
{
Search::new(self).search(pattern)
}

/// The size of the text in the index
///
/// Note that this includes an ending \0 (terminator) character
/// so will be one more than the length of the text.
fn len(&self) -> u64;
#[doc(hidden)]
fn len<L: seal::IsLocal>(&self) -> u64;
}

/// Access the heap size of the structure.
Expand Down
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,7 @@ pub use crate::rlfmi::RLFMIndex;

pub use builder::SearchIndexBuilder;
pub use character::Character;
pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator, HeapSize};
pub use iter::{
BackwardIterator, FMIndexBackend, ForwardIterator, HeapSize, SearchIndex, SearchIndexWithLocate,
};
pub use search::Search;
32 changes: 29 additions & 3 deletions src/rlfmi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use crate::character::{prepare_text, Character};
#[cfg(doc)]
use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::FMIndexBackend;
use crate::iter::{AsCharacters, FMIndexBackend, SearchIndex};
use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray};
use crate::{sais, HeapSize, Search};
use crate::{sais, HeapSize, Search, SearchIndexWithLocate};
use crate::{seal, util};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -203,7 +203,7 @@ where
{
type T = T;

fn len(&self) -> u64 {
fn len<L: seal::IsLocal>(&self) -> u64 {
self.len
}

Expand Down Expand Up @@ -270,6 +270,32 @@ where
}
}

impl<T: Character, C: Converter<T>, S> SearchIndex<T> for RLFMIndex<T, C, S> {
type Backend = RLFMIndex<T, C, S>;

fn search(&self, pattern: &dyn AsCharacters<T>) -> Search<Self> {
Search::new(self).search(pattern.as_characters())
}

fn len(&self) -> u64 {
self.len()
}
}

impl<T: Character, C: Converter<T>> SearchIndexWithLocate<T>
for RLFMIndex<T, C, SuffixOrderSampledArray>
{
type Backend = RLFMIndex<T, C, SuffixOrderSampledArray>;

fn search(&self, pattern: &dyn AsCharacters<T>) -> Search<Self::Backend> {
Search::new(self).search(pattern.as_characters())
}

fn len(&self) -> u64 {
self.len()
}
}

impl<T, C> HasPosition for RLFMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
Expand Down
2 changes: 1 addition & 1 deletion src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ where
Search {
index,
s: 0,
e: index.len(),
e: index.len::<seal::Local>(),
pattern: vec![],
}
}
Expand Down
4 changes: 2 additions & 2 deletions tests/test_api.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// tests that exercise the public API, especially the traits

use fm_index::{FMIndexBackend, HeapSize};
use fm_index::{HeapSize, SearchIndex};

fn len<T: FMIndexBackend>(index: &T) -> u64 {
fn len<T: SearchIndex<u8>>(index: &T) -> u64 {
index.len()
}

Expand Down

0 comments on commit 4260fb7

Please sign in to comment.