Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve traits and document them #46

Merged
merged 2 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions src/fm_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::FMIndexBackend;
use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray};
use crate::{sais, seal};
use crate::{sais, seal, HeapSize};
use crate::{util, Search};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -95,7 +95,11 @@ where
}
}

impl<T, C> FMIndex<T, C, ()> {
impl<T, C> FMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
/// The size on the heap of the FM-Index.
///
/// No suffix array information is stored in this index.
Expand All @@ -106,7 +110,11 @@ impl<T, C> FMIndex<T, C, ()> {
}
}

impl<T, C> FMIndex<T, C, SuffixOrderSampledArray> {
impl<T, C> FMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
/// The size on the heap of the FM-Index.
///
/// Sampled suffix array data is stored in this index.
Expand All @@ -118,6 +126,26 @@ impl<T, C> FMIndex<T, C, SuffixOrderSampledArray> {
}
}

impl<T, C> HeapSize for FMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
FMIndex::<T, C, SuffixOrderSampledArray>::size(self)
}
}

impl<T, C> HeapSize for FMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
FMIndex::<T, C, ()>::size(self)
}
}

impl<T, C, S> seal::Sealed for FMIndex<T, C, S> {}

impl<T, C, S> FMIndexBackend for FMIndex<T, C, S>
Expand All @@ -127,7 +155,7 @@ where
{
type T = T;

fn len<L: seal::IsLocal>(&self) -> u64 {
fn len(&self) -> u64 {
self.bw.len() as u64
}

Expand Down
49 changes: 36 additions & 13 deletions src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ use crate::converter::{Converter, IndexWithConverter};
use crate::seal;
use crate::search::Search;

/// A FM-Index that can search texts backwards and forwards.
/// Trait for an FM-Index implementation.
///
/// You can use this to implement against a FM-Index generically.
///
/// You cannot implement this trait yourself.
pub trait FMIndexBackend: Sized + seal::Sealed {
/// A [`Character`] type.
type T: Character;

#[doc(hidden)]
fn len<L: seal::IsLocal>(&self) -> u64;
// We hide all the methods involved in implementation.

#[doc(hidden)]
fn get_l<L: seal::IsLocal>(&self, i: u64) -> Self::T;
Expand All @@ -24,25 +27,45 @@ pub trait FMIndexBackend: Sized + seal::Sealed {
#[doc(hidden)]
fn fl_map2<L: seal::IsLocal>(&self, c: Self::T, i: u64) -> u64;

#[doc(hidden)]
fn search<K>(&self, pattern: K) -> Search<Self>
where
K: AsRef<[Self::T]>,
{
Search::new(self).search(pattern)
}

#[doc(hidden)]
fn iter_forward<L: seal::IsLocal>(&self, i: u64) -> ForwardIterator<Self> {
debug_assert!(i < self.len::<L>());
debug_assert!(i < self.len());
ForwardIterator { index: self, i }
}

#[doc(hidden)]
fn iter_backward<L: seal::IsLocal>(&self, i: u64) -> BackwardIterator<Self> {
debug_assert!(i < self.len::<seal::Local>());
debug_assert!(i < self.len());
BackwardIterator { index: self, i }
}

// The following methods are public.

/// Search for a pattern in the text.
///
/// Return a [`Search`] object with information about the search
/// result.
fn search<K>(&self, pattern: K) -> Search<Self>
where
K: AsRef<[Self::T]>,
{
Search::new(self).search(pattern)
}

/// The size of the text in the index
///
/// Note that this includes an ending \0 (terminator) character
/// so will be one more than the length of the text.
fn len(&self) -> u64;
}

/// Access the heap size of the structure.
///
/// This can be useful if you want to fine-tune the memory usage of your
/// application.
pub trait HeapSize {
/// The size on the heap of this structure, in bytes.
fn size(&self) -> usize;
}

/// An iterator that goes backwards through the text, producing [`Character`].
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,5 @@ pub use crate::rlfmi::RLFMIndex;

pub use builder::SearchIndexBuilder;
pub use character::Character;
pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator};
pub use search::{Search, SearchIndex};
pub use iter::{BackwardIterator, FMIndexBackend, ForwardIterator, HeapSize};
pub use search::Search;
26 changes: 24 additions & 2 deletions src/rlfmi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::FMIndexBackend;
use crate::suffix_array::{self, HasPosition, SuffixOrderSampledArray};
use crate::{sais, Search};
use crate::{sais, HeapSize, Search};
use crate::{seal, util};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -142,6 +142,7 @@ where
impl<T, C> RLFMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
/// Heap size of the index.
///
Expand All @@ -158,6 +159,7 @@ where
impl<T, C> RLFMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
/// The size on the heap of the FM-Index.
///
Expand All @@ -172,6 +174,26 @@ where
}
}

impl<T, C> HeapSize for RLFMIndex<T, C, SuffixOrderSampledArray>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
RLFMIndex::<T, C, SuffixOrderSampledArray>::size(self)
}
}

impl<T, C> HeapSize for RLFMIndex<T, C, ()>
where
T: Character,
C: Converter<T>,
{
fn size(&self) -> usize {
RLFMIndex::<T, C, ()>::size(self)
}
}

impl<T, C, S> seal::Sealed for RLFMIndex<T, C, S> {}

impl<T, C, S> FMIndexBackend for RLFMIndex<T, C, S>
Expand All @@ -181,7 +203,7 @@ where
{
type T = T;

fn len<L: seal::IsLocal>(&self) -> u64 {
fn len(&self) -> u64 {
self.len
}

Expand Down
17 changes: 1 addition & 16 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,6 @@ use crate::iter::FMIndexBackend;
use crate::seal;
use crate::suffix_array::HasPosition;

/// A full-text index backed by FM-Index or its variant.
pub struct SearchIndex<I: FMIndexBackend> {
index: I,
}

impl<I: FMIndexBackend> SearchIndex<I> {
/// Search for a pattern in the text.
///
/// Return a [`Search`] object with information about the search
/// result.
pub fn search<K: AsRef<[I::T]>>(&self, pattern: K) -> Search<I> {
self.index.search(pattern)
}
}

/// An object containing the result of a search.
///
/// This is expanded with a `locate` method if the index is
Expand All @@ -42,7 +27,7 @@ where
Search {
index,
s: 0,
e: index.len::<seal::Local>(),
e: index.len(),
pattern: vec![],
}
}
Expand Down
103 changes: 103 additions & 0 deletions tests/test_api.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// tests that exercise the public API, especially the traits

use fm_index::{FMIndexBackend, HeapSize};

fn len<T: FMIndexBackend>(index: &T) -> u64 {
index.len()
}

fn size<T: HeapSize>(t: &T) -> usize {
t.size()
}

#[test]
fn test_fm_index_backend_trait_fm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_fm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}

#[test]
fn test_fm_index_backend_trait_fm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new().count_only();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_fm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new().count_only();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}

#[test]
fn test_fm_index_backend_trait_rlfm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new().run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_rlfm_index_suffix_array() {
let builder = fm_index::SearchIndexBuilder::new().run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}

#[test]
fn test_fm_index_backend_trait_rlfm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new()
.count_only()
.run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert_eq!(len(&index), 5);
}

#[test]
fn test_heap_size_trait_rlfm_index_count_only() {
let builder = fm_index::SearchIndexBuilder::new()
.count_only()
.run_length_encoding();
let text = "text";

let index = builder.build(text.as_bytes().to_vec());

// any result will do for this test
assert!(size(&index) > 0);
}