Skip to content

Commit

Permalink
Clippy warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Jan 25, 2024
1 parent 60bacfb commit f2e48bc
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 91 deletions.
6 changes: 6 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ cli:

test:
cargo test --release

clippy:
cargo clippy

clippy-pedantic:
cargo clippy -- -W clippy::pedantic
10 changes: 5 additions & 5 deletions search/src/index/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use super::{
postings::{PostingEntry, PostingList, Postings},
preprocessor::Preprocessor,
vocabulary::Vocabulary,
InMemoryIndex,
InMemory,
};
use indicatif::{ParallelProgressIterator, ProgressStyle};
use rayon::prelude::*;
Expand All @@ -20,16 +20,16 @@ const PROGRESS_CHARS: &str = "=> ";
const CUTOFF_THRESHOLD: f64 = 0.8;

pub fn build_index(input_dir: &str, output_path: &str, preprocessor: &Preprocessor) {
let index: InMemoryIndex = build_in_memory(input_dir, preprocessor);
let index: InMemory = build_in_memory(input_dir, preprocessor);
Postings::write_postings(&index, output_path);
Vocabulary::write_vocabulary(&index, output_path);
Documents::write_documents(&index.documents, output_path);
}

fn build_in_memory(input_dir: &str, preprocessor: &Preprocessor) -> InMemoryIndex {
fn build_in_memory(input_dir: &str, preprocessor: &Preprocessor) -> InMemory {
let files: Vec<fs::DirEntry> = fs::read_dir(input_dir)
.expect("error while retrieving input directory content")
.map(|p| p.unwrap())
.map(std::result::Result::unwrap)
.collect();

// document counter
Expand Down Expand Up @@ -107,7 +107,7 @@ fn build_in_memory(input_dir: &str, preprocessor: &Preprocessor) -> InMemoryInde
.filter(|(_, v)| final_postings[*v].collection_frequency <= frequency_threshold)
.collect();

InMemoryIndex {
InMemory {
term_index_map: sorted_term_index_map,
postings: final_postings,
documents: documents.into_inner().unwrap(),
Expand Down
18 changes: 9 additions & 9 deletions search/src/index/documents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ impl Documents {
pub fn load_documents(input_path: &str) -> Documents {
let mut reader = BitsReader::new(&(input_path.to_string() + DOCUMENTS_EXTENSION));

let mut prev: String = "".to_string();
let mut prev = String::new();
let docs = (0..reader.read_vbyte())
.map(|_| {
let p_len = reader.read_gamma();
Expand All @@ -40,15 +40,15 @@ impl Documents {
let mut prev = "";

writer.write_vbyte(documents.len() as u32);
documents.iter().for_each(|l| {
for l in documents.iter() {
let p_len = utils::get_matching_prefix_len(prev, &l.path);
writer.write_gamma(p_len as u32);
let remaining: String = l.path.chars().skip(p_len).collect();
prev = &l.path;

writer.write_str(&remaining);
writer.write_vbyte(l.lenght);
});
}

writer.flush();
}
Expand Down Expand Up @@ -92,9 +92,9 @@ mod tests {

assert_eq!(loaded_documents.get_num_documents(), documents.len() as u32);

for i in 0..documents.len() {
assert_eq!(loaded_documents.get_doc_path(i as u32), documents[i].path);
assert_eq!(loaded_documents.get_doc_len(i as u32), documents[i].lenght);
for (i, d) in documents.iter().enumerate() {
assert_eq!(loaded_documents.get_doc_path(i as u32), d.path);
assert_eq!(loaded_documents.get_doc_len(i as u32), d.lenght);
}
}

Expand All @@ -117,9 +117,9 @@ mod tests {

assert_eq!(doc_collection.get_num_documents(), documents.len() as u32);

for i in 0..documents.len() {
assert_eq!(doc_collection.get_doc_path(i as u32), documents[i].path);
assert_eq!(doc_collection.get_doc_len(i as u32), documents[i].lenght);
for (i, d) in documents.iter().enumerate() {
assert_eq!(doc_collection.get_doc_path(i as u32), d.path);
assert_eq!(doc_collection.get_doc_len(i as u32), d.lenght);
}
}
}
2 changes: 1 addition & 1 deletion search/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub struct Index {
preprocessor: Preprocessor,
}

pub struct InMemoryIndex {
pub struct InMemory {
term_index_map: BTreeMap<String, usize>,
postings: Vec<PostingList>,
documents: Vec<Document>,
Expand Down
10 changes: 5 additions & 5 deletions search/src/index/postings.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{InMemoryIndex, OFFSETS_EXTENSION, POSTINGS_EXTENSION};
use super::{InMemory, OFFSETS_EXTENSION, POSTINGS_EXTENSION};
use crate::disk::{bits_reader::BitsReader, bits_writer::BitsWriter};

#[derive(Default)]
Expand Down Expand Up @@ -38,7 +38,7 @@ impl Postings {
Postings { reader, offsets }
}

pub fn write_postings(index: &InMemoryIndex, output_path: &str) {
pub fn write_postings(index: &InMemory, output_path: &str) {
let postings_path = output_path.to_string() + POSTINGS_EXTENSION;
let mut postings_writer = BitsWriter::new(&postings_path);

Expand All @@ -50,7 +50,7 @@ impl Postings {

offsets_writer.write_vbyte(index.term_index_map.len() as u32);

for (_, idx) in index.term_index_map.iter() {
for idx in index.term_index_map.values() {
offsets_writer.write_gamma(offset as u32 - prev_offset);
prev_offset = offset as u32;

Expand All @@ -59,13 +59,13 @@ impl Postings {
offset += postings_writer.write_vbyte(postings.documents.len() as u32);

let mut prev_doc_id = 0;
for entry in postings.documents.iter() {
for entry in &postings.documents {
offset += postings_writer.write_gamma(entry.document_id - prev_doc_id);
offset += postings_writer.write_gamma(entry.document_frequency);

let mut prev_pos = 0;
offset += postings_writer.write_vbyte(entry.positions.len() as u32);
for pos in entry.positions.iter() {
for pos in &entry.positions {
offset += postings_writer.write_gamma(*pos - prev_pos);
prev_pos = *pos;
}
Expand Down
2 changes: 1 addition & 1 deletion search/src/index/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ impl Preprocessor {
self.regex
.replace_all(text, " ")
.split_whitespace()
.map(|t| t.to_lowercase())
.map(str::to_lowercase)
.map(|t| self.stemmer.stem(&t).to_string())
.collect()
}
Expand Down
43 changes: 22 additions & 21 deletions search/src/index/vocabulary.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{utils, InMemoryIndex, VOCABULARY_ALPHA_EXTENSION};
use super::{utils, InMemory, VOCABULARY_ALPHA_EXTENSION};
use crate::disk::{bits_reader::BitsReader, bits_writer::BitsWriter};
use fxhash::FxHashMap;

Expand All @@ -11,7 +11,7 @@ pub struct Vocabulary {
}

impl Vocabulary {
pub fn write_vocabulary(index: &InMemoryIndex, output_path: &str) {
pub fn write_vocabulary(index: &InMemory, output_path: &str) {
let path = output_path.to_string() + VOCABULARY_ALPHA_EXTENSION;
let mut writer = BitsWriter::new(&path);

Expand All @@ -22,14 +22,14 @@ impl Vocabulary {
// write all terms with prefix compression
let mut prev = "";

vocab.keys().for_each(|s| {
for s in vocab.keys() {
let p_len = utils::get_matching_prefix_len(prev, s);
writer.write_gamma(p_len as u32);
let remaining: String = s.chars().skip(p_len).collect();
prev = s;

writer.write_str(&remaining);
});
}

// write all collection frequencies
index.postings.iter().for_each(|p| {
Expand All @@ -46,7 +46,7 @@ impl Vocabulary {
let num_terms: u32 = reader.read_vbyte();

// read prefix compressed terms
let mut prev = "".to_string();
let mut prev = String::new();

let mut index_to_term = Vec::new();

Expand Down Expand Up @@ -95,7 +95,7 @@ impl Vocabulary {
}

pub fn get_term_index(&self, term: &str) -> Option<usize> {
self.term_to_index.get(term).map(|i| *i)
self.term_to_index.get(term).copied()
}

#[allow(dead_code)]
Expand All @@ -109,12 +109,12 @@ impl Vocabulary {
fn get_closest_index(&self, term: &str) -> Option<usize> {
let candidates = (0..term.len() - 2)
.map(|i| term[i..i + 3].to_string())
.flat_map(|t| self.trigram_index.get(&t))
.flat_map(|v| v.into_iter());
.filter_map(|t| self.trigram_index.get(&t))
.flat_map(|v| v.iter());

candidates
.min_by_key(|i| Self::distance(term, &self.index_to_term[**i]))
.map(|i| *i)
.copied()
}

#[allow(unused_variables)]
Expand All @@ -139,19 +139,20 @@ mod tests {
map.insert("hello".to_string(), 0);
map.insert("world".to_string(), 0);

let mut postings = Vec::new();
postings.push(PostingList {
collection_frequency: 1,
documents: Vec::new(),
});
postings.push(PostingList {
collection_frequency: 2,
documents: Vec::new(),
});

let index = InMemoryIndex {
let postings = vec![
PostingList {
collection_frequency: 1,
documents: Vec::new(),
},
PostingList {
collection_frequency: 2,
documents: Vec::new(),
},
];

let index = InMemory {
term_index_map: map,
postings: postings,
postings,
documents: Vec::new(),
};

Expand Down
21 changes: 10 additions & 11 deletions search/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use indicatif::HumanDuration;
use search::index::Index;
use search::query::{QueryProcessor, QueryResult};
use search::query::{Processor, Result};
use std::cmp::min;
use std::env;
use std::io::{self, Write};
Expand All @@ -10,7 +10,7 @@ use std::time::{Duration, Instant};
const NUM_TOP_RESULTS: usize = 10;
const NUM_RESULTS: usize = 1_000_000;

fn print_results(result: QueryResult) {
fn print_results(result: &Result) {
println!("Search tokens: {:?}", result.tokens);

if result.documents.is_empty() {
Expand All @@ -35,7 +35,7 @@ fn print_results(result: QueryResult) {
}

fn read_line(prompt: &str) -> String {
print!("{}", prompt);
print!("{prompt}");
io::stdout().flush().unwrap();

let mut input = String::new();
Expand Down Expand Up @@ -66,16 +66,16 @@ fn main() {
let action = &args[2];
let build_index = action == "build";

let index_path = format!("{}/index/idx", base_path);
let docs_path = format!("{}/docs", base_path);
let index_path = format!("{base_path}/index/idx");
let docs_path = format!("{base_path}/docs");

if build_index {
println!("Start build on directory [{}]\n", docs_path);
println!("Start build on directory [{docs_path}]\n");

let num_threads = args.get(3).map_or(0, |s| s.parse().unwrap_or(0));

if num_threads != 0 {
println!("Setting thread number to {}", num_threads);
println!("Setting thread number to {num_threads}");

rayon::ThreadPoolBuilder::new()
.num_threads(num_threads)
Expand All @@ -96,18 +96,17 @@ fn main() {
exit(0);
}

let mut q = QueryProcessor::build_query_processor(&index_path);
let mut q = Processor::build_query_processor(&index_path);

println!(
"Loaded search engine for directory: [{}]\n\nWrite a query and press enter.\n",
base_path
"Loaded search engine for directory: [{base_path}]\n\nWrite a query and press enter.\n"
);

loop {
let query = read_line("> ");

let result = q.query(&query, NUM_RESULTS);

print_results(result);
print_results(&result);
}
}
6 changes: 3 additions & 3 deletions search/src/query/document_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{cmp::Ordering, collections::BinaryHeap};
#[derive(Debug)]
pub struct Entry {
pub id: u32,
pub score: f32,
pub score: f64,
}

impl PartialEq for Entry {
Expand Down Expand Up @@ -39,7 +39,7 @@ impl DocumentSelector {
}
}

pub fn push(&mut self, id: u32, score: f32) {
pub fn push(&mut self, id: u32, score: f64) {
self.heap.push(Entry { id, score });

if self.heap.len() > self.capacity {
Expand All @@ -48,7 +48,7 @@ impl DocumentSelector {
}

pub fn get_sorted_entries(&mut self) -> Vec<Entry> {
let mut res: Vec<Entry> = (0..self.capacity).flat_map(|_| self.heap.pop()).collect();
let mut res: Vec<Entry> = (0..self.capacity).filter_map(|_| self.heap.pop()).collect();
res.reverse();
res
}
Expand Down
Loading

0 comments on commit f2e48bc

Please sign in to comment.