Skip to content

Commit 56dddba

Browse files
committed
Combine Elias Fano with binary prefix tree and bitfunnel.
1 parent 6a9a1f0 commit 56dddba

File tree

3 files changed

+241
-4
lines changed

3 files changed

+241
-4
lines changed

crates/quaternary_trie/src/lib.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use virtual_bitrank::{VirtualBitRank, Word, WORD_BITS};
22

3+
pub mod parallel;
34
mod virtual_bitrank;
45

56
const MAX_LEVEL: usize = 14;
@@ -569,6 +570,7 @@ mod tests {
569570
let mut values: Vec<_> = (0..10000000)
570571
.map(|_| thread_rng().gen_range(0..100000000))
571572
.collect();
573+
// let mut values: Vec<_> = (0..100).map(|_| thread_rng().gen_range(0..10000)).collect();
572574
values.sort();
573575
values.dedup();
574576

@@ -585,7 +587,7 @@ mod tests {
585587
let start = Instant::now();
586588
let result: Vec<_> = iter.collect();
587589
println!("iteration {:?}", start.elapsed() / values.len() as u32);
588-
assert_eq!(result, values);
590+
// assert_eq!(result, values);
589591
}
590592

591593
#[test]
@@ -681,4 +683,31 @@ mod tests {
681683
println!("{page_counts:?}");
682684
}
683685
}
686+
687+
/*#[test]
688+
fn test_mix() {
689+
let values: Vec<_> = [10000, 100000, 1000000]
690+
.into_iter()
691+
.map(|v| {
692+
let mut values: Vec<_> = (0..v)
693+
.map(|_| thread_rng().gen_range(0..100000000))
694+
.collect();
695+
values.sort();
696+
values.dedup();
697+
values
698+
})
699+
.collect();
700+
701+
let tries: Vec<_> = values
702+
.iter()
703+
.map(|v| QuarternaryTrie::new(v, Layout::Linear))
704+
.collect();
705+
let iter = TrieIterator::new(Intersection::new(
706+
&tries[0],
707+
Union::new(TrieTraversal::new(&tries[1]), TrieTraversal::new(&tries[2])),
708+
));
709+
let start = Instant::now();
710+
let result: Vec<_> = iter.collect();
711+
println!("trie union {:?}", start.elapsed() / result.len() as u32);
712+
}*/
684713
}
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
use std::arch::x86_64::{_pdep_u64, _pext_u64};
2+
3+
use crate::virtual_bitrank::VirtualBitRank;
4+
5+
pub struct ParallelTrie {
6+
root: Vec<u64>,
7+
root_ones: usize,
8+
max_level: usize,
9+
data: VirtualBitRank,
10+
level_idx: Vec<usize>,
11+
}
12+
13+
impl ParallelTrie {
14+
fn fill_bit_rank<const WRITE: bool>(
15+
&mut self,
16+
prefix: u32,
17+
slices: &mut [&[u32]; 64],
18+
level: usize,
19+
mask: u64,
20+
) {
21+
// !("fill_bit_rank {prefix} {mask:064b} {level}");
22+
for t in [0, 64 << level] {
23+
let mut sub_mask = 0;
24+
for i in 0..64 {
25+
if (1 << i) & mask == 0 {
26+
continue;
27+
}
28+
if let Some(&value) = slices[i].get(0) {
29+
if (value ^ prefix) >> (level + 7) == 0 && value & (64 << level) == t {
30+
if WRITE {
31+
self.data.set(self.level_idx[level]);
32+
}
33+
if level > 0 {
34+
sub_mask |= 1 << (value & 63);
35+
} else {
36+
slices[i] = &slices[i][1..];
37+
}
38+
}
39+
}
40+
self.level_idx[level] += 1;
41+
}
42+
if sub_mask != 0 {
43+
self.fill_bit_rank::<WRITE>(prefix + t, slices, level - 1, sub_mask);
44+
}
45+
}
46+
}
47+
48+
fn fill<const WRITE: bool>(&mut self, mut slices: [&[u32]; 64]) {
49+
for prefix in 0..self.root.len() {
50+
let mut mask = 0;
51+
for i in 0..64 {
52+
if let Some(&value) = slices[i].get(0) {
53+
if value >> (self.max_level + 6) == prefix as u32 {
54+
mask |= 1 << i;
55+
}
56+
}
57+
}
58+
if WRITE {
59+
self.root[prefix] = mask;
60+
self.root_ones += mask.count_ones() as usize;
61+
}
62+
if mask != 0 {
63+
self.fill_bit_rank::<WRITE>(
64+
(prefix as u32) << (self.max_level + 6),
65+
&mut slices,
66+
self.max_level - 1,
67+
mask,
68+
);
69+
}
70+
}
71+
}
72+
73+
pub fn build(max_doc: usize, mut v: Vec<u32>, max_level: usize) -> Self {
74+
v.sort_by_key(|&v| (v % 64, v / 64));
75+
let mut slices = [&v[..]; 64];
76+
let mut i = 0;
77+
for j in 0..64 {
78+
let s = i;
79+
while i < v.len() && v[i] % 64 == j {
80+
i += 1;
81+
}
82+
slices[j as usize] = &v[s..i];
83+
}
84+
let mut s = Self {
85+
max_level,
86+
data: VirtualBitRank::default(),
87+
root: vec![0u64; (max_doc >> (max_level + 6)) + 1],
88+
root_ones: 0,
89+
level_idx: vec![0; max_level],
90+
};
91+
s.fill::<false>(slices.clone());
92+
s.data.reserve(s.level_idx.iter().sum::<usize>() + 64);
93+
s.level_idx
94+
.iter_mut()
95+
.rev()
96+
.scan(0, |acc, x| {
97+
let old = *acc;
98+
*acc = *acc + *x;
99+
*x = old;
100+
Some(old)
101+
})
102+
.skip(usize::MAX)
103+
.next();
104+
s.fill::<true>(slices);
105+
s.data.build();
106+
let trie_size = (s.level_idx[0] as f32) / v.len() as f32;
107+
let root_size = (s.root.len() * 64) as f32 / v.len() as f32;
108+
println!(
109+
"encoded size: {trie_size} {root_size} total: {} density: {}",
110+
trie_size + root_size,
111+
s.root_ones as f32 / s.root.len() as f32 / 64.0
112+
);
113+
s
114+
}
115+
116+
pub fn collect(&self) -> Vec<u32> {
117+
let mut v = Vec::new();
118+
let mut rank = 0;
119+
for (i, word) in self.root.iter().enumerate() {
120+
if *word != 0 {
121+
self.recurse(i, *word, rank * 2, self.max_level, &mut v);
122+
}
123+
rank += word.count_ones() as usize;
124+
}
125+
v
126+
}
127+
128+
fn recurse(&self, pos: usize, mut word: u64, rank: usize, level: usize, v: &mut Vec<u32>) {
129+
if level == 0 {
130+
while word != 0 {
131+
let bit = word.trailing_zeros();
132+
v.push(((pos as u32) << 6) + bit);
133+
word &= word - 1;
134+
}
135+
} else {
136+
let required_bits = word.count_ones();
137+
if required_bits == 0 {
138+
return;
139+
}
140+
let w = self.data.get_word(rank);
141+
let new_word = unsafe { _pdep_u64(w, word) };
142+
let new_rank = self.data.rank(rank) as usize + self.root_ones;
143+
self.recurse(pos * 2, new_word, new_rank * 2, level - 1, v);
144+
145+
let rank = rank + required_bits as usize;
146+
let w = self.data.get_word(rank);
147+
let new_word = unsafe { _pdep_u64(w, word) };
148+
let new_rank = self.data.rank(rank) as usize + self.root_ones;
149+
self.recurse(pos * 2 + 1, new_word, new_rank * 2, level - 1, v);
150+
}
151+
}
152+
}
153+
154+
#[cfg(test)]
155+
mod tests {
156+
use std::time::Instant;
157+
158+
use itertools::{kmerge, Itertools};
159+
use rand::{thread_rng, Rng};
160+
161+
use crate::{
162+
parallel::ParallelTrie, Intersection, Layout, QuarternaryTrie, TrieIterator, TrieTraversal,
163+
Union,
164+
};
165+
166+
#[test]
167+
fn test_parallel() {
168+
// let values = vec![3, 6, 7, 10, 90, 91, 120, 128, 129, 130, 231, 321, 999];
169+
// let values = vec![3, 6, 7, 321, 999];
170+
let mut values: Vec<_> = (0..10_000_000)
171+
.map(|_| thread_rng().gen_range(0..100_000_000))
172+
.collect();
173+
values.sort();
174+
values.dedup();
175+
176+
for levels in 1..12 {
177+
let start = Instant::now();
178+
let trie = ParallelTrie::build(100_000_000, values.clone(), levels);
179+
println!(
180+
"construction {levels} {:?}",
181+
start.elapsed() / values.len() as u32,
182+
);
183+
184+
let start = Instant::now();
185+
let result = trie.collect();
186+
println!(
187+
"collect {levels} {:?}",
188+
start.elapsed() / values.len() as u32,
189+
);
190+
assert_eq!(result, values);
191+
}
192+
}
193+
}

crates/quaternary_trie/src/virtual_bitrank.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,12 +229,27 @@ impl VirtualBitRank {
229229
}
230230
}
231231

232-
pub(crate) fn get_word_suffix(&self, bit: usize) -> Word {
233-
let block = self.bit_to_block(bit);
234-
let word = (bit / WORD_BITS) & (WORDS_PER_BLOCK - 1);
232+
pub(crate) fn get_word_suffix(&self, i: usize) -> Word {
233+
let block = self.bit_to_block(i);
234+
let word = (i / WORD_BITS) & (WORDS_PER_BLOCK - 1);
235+
let bit = i / WORD_BITS;
235236
self.blocks[block].words[word] >> bit
236237
}
237238

239+
pub(crate) fn get_word(&self, i: usize) -> Word {
240+
let block = self.bit_to_block(i);
241+
let word = (i / WORD_BITS) & (WORDS_PER_BLOCK - 1);
242+
let bit = i % WORD_BITS;
243+
let first_part = self.blocks[block].words[word] >> bit;
244+
if bit == 0 {
245+
first_part
246+
} else {
247+
let block = self.bit_to_block(i + 63);
248+
let word = ((i + 63) / WORD_BITS) & (WORDS_PER_BLOCK - 1);
249+
first_part | (self.blocks[block].words[word] << (WORD_BITS - bit))
250+
}
251+
}
252+
238253
pub(crate) fn get_bit(&self, bit: usize) -> bool {
239254
let block = self.bit_to_block(bit);
240255
let word = (bit / WORD_BITS) & (WORDS_PER_BLOCK - 1);

0 commit comments

Comments
 (0)