Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tidy up sais module #53

Merged
merged 2 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/fm_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ use crate::character::{prepare_text, Character};
use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::{FMIndexBackend, HasPosition};
use crate::suffix_array::sais;
use crate::suffix_array::sample::{self, SuffixOrderSampledArray};
use crate::{sais, seal, HeapSize};
use crate::{seal, HeapSize};
use crate::{util, Search};

use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -52,7 +53,7 @@ where
fn create(text: Vec<T>, converter: C, get_sample: impl Fn(&[u64]) -> S) -> Self {
let text = prepare_text(text);
let cs = sais::get_bucket_start_pos(&sais::count_chars(&text, &converter));
let sa = sais::sais(&text, &converter);
let sa = sais::build_suffix_array(&text, &converter);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is definitely a lot easier to read! It took me a while to figure out that this was a suffix array and now it's immediately obvious.

let bw = Self::wavelet_matrix(text, &sa, &converter);

FMIndex {
Expand Down
1 change: 0 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ mod character;
mod fm_index;
mod iter;
mod rlfmi;
mod sais;
mod seal;
mod search;
mod suffix_array;
Expand Down
5 changes: 3 additions & 2 deletions src/rlfmi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ use crate::character::{prepare_text, Character};
use crate::converter;
use crate::converter::{Converter, IndexWithConverter};
use crate::iter::{FMIndexBackend, HasPosition};
use crate::suffix_array::sais;
use crate::suffix_array::sample::{self, SuffixOrderSampledArray};
use crate::{sais, HeapSize, Search};
use crate::{seal, util};
use crate::{HeapSize, Search};

use serde::{Deserialize, Serialize};
use vers_vecs::{BitVec, RsVec, WaveletMatrix};
Expand Down Expand Up @@ -55,7 +56,7 @@ where

let n = text.len();
let m = converter.len();
let sa = sais::sais(&text, &converter);
let sa = sais::build_suffix_array(&text, &converter);

let mut c0 = T::zero();
// sequence of run heads
Expand Down
1 change: 1 addition & 0 deletions src/suffix_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
//!
//! Can also be used in sampled fashion to perform locate queries.

pub mod sais;
pub mod sample;
19 changes: 10 additions & 9 deletions src/sais.rs → src/suffix_array/sais.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ where
}
}

pub fn sais<T, C, K>(text: K, converter: &C) -> Vec<u64>
/// Build a suffix array from the given [`text`] using SA-IS algorithm.
pub fn build_suffix_array<T, C, K>(text: K, converter: &C) -> Vec<u64>
where
T: Character,
K: AsRef<[T]>,
Expand Down Expand Up @@ -368,37 +369,37 @@ mod tests {
fn test_sais_no_trailing_zero() {
let text = "nozero".to_string().into_bytes();
let converter = RangeConverter::new(b'a', b'z');
sais(&text, &converter);
build_suffix_array(&text, &converter);
}

#[test]
fn test_sais_1() {
let text = &[0u8];
let sa = sais(text, &IdConverter::with_size(4));
let sa = build_suffix_array(text, &IdConverter::with_size(4));
let expected = get_suffix_array(text);
assert_eq!(sa, expected);
}

#[test]
fn test_sais_2() {
let text = &[3u8, 0];
let sa = sais(text, &IdConverter::with_size(4));
let sa = build_suffix_array(text, &IdConverter::with_size(4));
let expected = get_suffix_array(text);
assert_eq!(sa, expected);
}

#[test]
fn test_sais_4() {
let text = &[3u8, 2, 1, 0];
let sa = sais(text, &IdConverter::with_size(4));
let sa = build_suffix_array(text, &IdConverter::with_size(4));
let expected = get_suffix_array(text);
assert_eq!(sa, expected);
}

#[test]
fn test_sais_with_nulls() {
let text = b"mm\0ii\0s\0sii\0ssii\0ppii\0".to_vec();
let sa = sais(&text, &RangeConverter::new(b'a', b'z'));
let sa = build_suffix_array(&text, &RangeConverter::new(b'a', b'z'));
let expected = get_suffix_array(text);
assert_eq!(sa, expected);
}
Expand All @@ -407,7 +408,7 @@ mod tests {
#[ignore]
fn test_sais_with_consecutive_nulls() {
let text = b"mm\0\0ii\0s\0\0\0sii\0ssii\0ppii\0".to_vec();
let sa = sais(&text, &RangeConverter::new(b'a', b'z'));
let sa = build_suffix_array(&text, &RangeConverter::new(b'a', b'z'));
let expected = get_suffix_array(text);
assert_eq!(sa, expected);
}
Expand All @@ -417,7 +418,7 @@ mod tests {
let mut text = "mmiissiissiippii".to_string().into_bytes();
text.push(0);
let converter = RangeConverter::new(b'a', b'z');
let sa = sais(&text, &converter);
let sa = build_suffix_array(&text, &converter);
let ans = get_suffix_array(text);

assert_eq!(sa.len(), ans.len());
Expand All @@ -441,7 +442,7 @@ mod tests {
text.push(0);

let converter = RangeConverter::new(b'a', b'b');
let sa = sais(&text, &converter);
let sa = build_suffix_array(&text, &converter);
let ans = get_suffix_array(&text);
assert_eq!(sa.len(), ans.len());
for (i, (actual, expected)) in sa.into_iter().zip(ans.into_iter()).enumerate() {
Expand Down