Skip to content

Commit

Permalink
Merge pull request #46 from faassen/trait-docs
Browse files Browse the repository at this point in the history
Improve traits and document them
  • Loading branch information
faassen authored Jan 30, 2025
2 parents 5f1832a + 72a86b2 commit c493db1
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 37 deletions.
36 changes: 32 additions & 4 deletions src/fm_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::FMIndexBackend;
use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray};
use crate::{sais, seal};
use crate::{sais, seal, HeapSize};
use crate::{util, Search};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -95,7 +95,11 @@ where
}
}

impl<T, C> FMIndex<T, C, ()> {
impl<T, C> FMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
/// The size on the heap of the FM-Index.
///
/// No suffix array information is stored in this index.
Expand All @@ -106,7 +110,11 @@ impl<T, C> FMIndex<T, C, ()> {
}
}

impl<T, C> FMIndex<T, C, SuffixOrderSampledArray> {
impl<T, C> FMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
/// The size on the heap of the FM-Index.
///
/// Sampled suffix array data is stored in this index.
Expand All @@ -118,6 +126,26 @@ impl<T, C> FMIndex<T, C, SuffixOrderSampledArray> {
}
}

impl<T, C> HeapSize for FMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
FMIndex::<T, C, SuffixOrderSampledArray>::size(self)
}
}

impl<T, C> HeapSize for FMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
FMIndex::<T, C, ()>::size(self)
}
}

impl<T, C, S> seal::Sealed for FMIndex<T, C, S> {}

impl<T, C, S> FMIndexBackend for FMIndex<T, C, S>
Expand All @@ -127,7 +155,7 @@ where
{
type T = T;

fn len<L: seal::IsLocal>(&self) -> u64 {
fn len(&self) -> u64 {
self.bw.len() as u64
}

Expand Down
49 changes: 36 additions & 13 deletions src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ use crate::converter::{Converter, IndexWithConverter};
use crate::seal;
use crate::search::Search;

/// A FM-Index that can search texts backwards and forwards.
/// Trait for an FM-Index implementation.
///
/// You can use this to implement against a FM-Index generically.
///
/// You cannot implement this trait yourself.
pub trait FMIndexBackend: Sized + seal::Sealed {
/// A [`Character`] type.
type T: Character;

#[doc(hidden)]
fn len<L: seal::IsLocal>(&self) -> u64;
// We hide all the methods involved in implementation.

#[doc(hidden)]
fn get_l<L: seal::IsLocal>(&self, i: u64) -> Self::T;
Expand All @@ -24,25 +27,45 @@ pub trait FMIndexBackend: Sized + seal::Sealed {
#[doc(hidden)]
fn fl_map2<L: seal::IsLocal>(&self, c: Self::T, i: u64) -> u64;

#[doc(hidden)]
fn search<K>(&self, pattern: K) -> Search<Self>
where
K: AsRef<[Self::T]>,
{
Search::new(self).search(pattern)
}

#[doc(hidden)]
fn iter_forward<L: seal::IsLocal>(&self, i: u64) -> ForwardIterator<Self> {
debug_assert!(i < self.len::<L>());
debug_assert!(i < self.len());
ForwardIterator { index: self, i }
}

#[doc(hidden)]
fn iter_backward<L: seal::IsLocal>(&self, i: u64) -> BackwardIterator<Self> {
debug_assert!(i < self.len::<seal::Local>());
debug_assert!(i < self.len());
BackwardIterator { index: self, i }
}

// The following methods are public.

/// Search for a pattern in the text.
///
/// Return a [`Search`] object with information about the search
/// result.
fn search<K>(&self, pattern: K) -> Search<Self>
where
K: AsRef<[Self::T]>,
{
Search::new(self).search(pattern)
}

/// The size of the text in the index
///
/// Note that this includes an ending \0 (terminator) character
/// so will be one more than the length of the text.
fn len(&self) -> u64;
}

/// Access the heap size of the structure.
///
/// This can be useful if you want to fine-tune the memory usage of your
/// application.
pub trait HeapSize {
/// The size on the heap of this structure, in bytes.
fn size(&self) -> usize;
}

/// An iterator that goes backwards through the text, producing [`Character`].
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,5 @@ pub use crate::rlfmi::RLFMIndex;

pub use builder::SearchIndexBuilder;
pub use character::Character;
pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator};
pub use search::{Search, SearchIndex};
pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator, HeapSize};
pub use search::Search;
26 changes: 24 additions & 2 deletions src/rlfmi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::FMIndexBackend;
use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray};
use crate::{sais, Search};
use crate::{sais, HeapSize, Search};
use crate::{seal, util};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -142,6 +142,7 @@ where
impl<T, C> RLFMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
/// Heap size of the index.
///
Expand All @@ -158,6 +159,7 @@ where
impl<T, C> RLFMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
/// The size on the heap of the FM-Index.
///
Expand All @@ -172,6 +174,26 @@ where
}
}

impl<T, C> HeapSize for RLFMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
RLFMIndex::<T, C, SuffixOrderSampledArray>::size(self)
}
}

impl<T, C> HeapSize for RLFMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
RLFMIndex::<T, C, ()>::size(self)
}
}

impl<T, C, S> seal::Sealed for RLFMIndex<T, C, S> {}

impl<T, C, S> FMIndexBackend for RLFMIndex<T, C, S>
Expand All @@ -181,7 +203,7 @@ where
{
type T = T;

fn len<L: seal::IsLocal>(&self) -> u64 {
fn len(&self) -> u64 {
self.len
}

Expand Down
17 changes: 1 addition & 16 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,6 @@ use crate::iter::FMIndexBackend;
use crate::seal;
use crate::suffix_array::HasPosition;

/// A full-text index backed by FM-Index or its variant.
pub struct SearchIndex<I: FMIndexBackend> {
index: I,
}

impl<I: FMIndexBackend> SearchIndex<I> {
/// Search for a pattern in the text.
///
/// Return a [`Search`] object with information about the search
/// result.
pub fn search<K: AsRef<[I::T]>>(&self, pattern: K) -> Search<I> {
self.index.search(pattern)
}
}

/// An object containing the result of a search.
///
/// This is expanded with a `locate` method if the index is
Expand All @@ -42,7 +27,7 @@ where
Search {
index,
s: 0,
e: index.len::<seal::Local>(),
e: index.len(),
pattern: vec![],
}
}
Expand Down
103 changes: 103 additions & 0 deletions tests/test_api.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// tests that exercise the public API, especially the traits

use fm_index::{FMIndexBackend, HeapSize};

fn len<T: FMIndexBackend>(index: &T) -> u64 {
index.len()
}

fn size<T: HeapSize>(t: &T) -> usize {
t.size()
}

#[test]
fn test_fm_index_backend_trait_fm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_fm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}

#[test]
fn test_fm_index_backend_trait_fm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new().count_only();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_fm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new().count_only();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}

#[test]
fn test_fm_index_backend_trait_rlfm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new().run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_rlfm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new().run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}

#[test]
fn test_fm_index_backend_trait_rlfm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new()
.count_only()
.run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_rlfm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new()
.count_only()
.run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}

0 comments on commit c493db1

Please sign in to comment.