Skip to content

Commit

Permalink
in-memory index creation with loading
Browse files Browse the repository at this point in the history
  • Loading branch information
tomfran committed Dec 12, 2023
1 parent 61282ee commit c938e62
Show file tree
Hide file tree
Showing 12 changed files with 281 additions and 131 deletions.
2 changes: 0 additions & 2 deletions src/bits/mod.rs

This file was deleted.

18 changes: 9 additions & 9 deletions src/bits/reader.rs → src/disk/bits_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ use std::{

const BUFFER_SIZE: u32 = 128;

pub struct Reader {
pub struct BitsReader {
file: BufReader<File>,
buffer: u128,
byte_buffer: [u8; 16],
read: u32,
}

impl Reader {
pub fn new(filename: &str) -> Reader {
let mut r = Reader {
impl BitsReader {
pub fn new(filename: &str) -> BitsReader {
let mut r = BitsReader {
file: BufReader::new(File::open(filename).expect("can not open input file")),
buffer: 0,
byte_buffer: [0; 16],
Expand Down Expand Up @@ -111,14 +111,14 @@ impl Reader {
mod test {

use super::*;
use crate::bits::writer::Writer;
use crate::disk::bits_writer::BitsWriter;
use std::fs::create_dir_all;

#[test]
fn test_read() {
create_dir_all("data/test/").expect("error while creating test dir");

let mut w = Writer::new("data/test/writer_unit.bin");
let mut w = BitsWriter::new("data/test/writer_unit.bin");

(1..100).for_each(|i| {
w.write_vbyte(i);
Expand All @@ -130,7 +130,7 @@ mod test {

w.flush();

let mut r = Reader::new("data/test/writer_unit.bin");
let mut r = BitsReader::new("data/test/writer_unit.bin");

(1..100).for_each(|i| assert_eq!(i, r.read_vbyte()));
(1..100).for_each(|i| assert_eq!(i, r.read_gamma()));
Expand All @@ -140,14 +140,14 @@ mod test {
fn test_seek() {
create_dir_all("data/test/").expect("error while creating test dir");

let mut w = Writer::new("data/test/writer_seek.bin");
let mut w = BitsWriter::new("data/test/writer_seek.bin");

let offset = (0..1000).map(|i| w.write_gamma(i)).sum();
w.write_gamma(10);

w.flush();

let mut r = Reader::new("data/test/writer_seek.bin");
let mut r = BitsReader::new("data/test/writer_seek.bin");

r.seek(offset);
assert_eq!(r.read_gamma(), 10);
Expand Down
22 changes: 11 additions & 11 deletions src/bits/writer.rs → src/disk/bits_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,23 @@ use std::{
io::{BufWriter, Write},
};

pub struct Writer {
pub struct BitsWriter {
file: BufWriter<File>,
buffer: u128,
written: u32,
}

impl Writer {
pub fn new(filename: &str) -> Writer {
Writer {
impl BitsWriter {
pub fn new(filename: &str) -> BitsWriter {
BitsWriter {
file: BufWriter::new(File::create(filename).expect("Can not create output file")),
buffer: 0,
written: 0,
}
}

pub fn write_gamma(&mut self, n: u32) -> u64 {
let (gamma, len) = Writer::int_to_gamma(n + 1);
let (gamma, len) = BitsWriter::int_to_gamma(n + 1);
self.write_internal(gamma, len)
}

Expand All @@ -31,7 +31,7 @@ impl Writer {
}

pub fn write_vbyte(&mut self, n: u32) -> u64 {
let (vbyte, len) = Writer::int_to_vbyte(n + 1);
let (vbyte, len) = BitsWriter::int_to_vbyte(n + 1);
self.write_internal(vbyte, len)
}

Expand Down Expand Up @@ -98,22 +98,22 @@ mod test {

#[test]
fn test_gamma_coding() {
let (g, l) = Writer::int_to_gamma(1);
let (g, l) = BitsWriter::int_to_gamma(1);
assert_eq!(format!("{g:b}"), "1");
assert_eq!(l, 1);

let (g, l) = Writer::int_to_gamma(7);
let (g, l) = BitsWriter::int_to_gamma(7);
assert_eq!(format!("{g:b}"), "11100");
assert_eq!(l, 5);
}

#[test]
fn test_vbyte_coding() {
let (vb, l) = Writer::int_to_vbyte(1024);
let (vb, l) = BitsWriter::int_to_vbyte(1024);
assert_eq!(format!("{vb:b}"), "1000100000000000");
assert_eq!(l, 16);

let (vb, l) = Writer::int_to_vbyte(1);
let (vb, l) = BitsWriter::int_to_vbyte(1);
assert_eq!(format!("{vb:b}"), "10000001");
assert_eq!(l, 8);
}
Expand All @@ -125,7 +125,7 @@ mod test {
let word = (1 << 10) - 1;
let len = 10;

let mut w = Writer::new("data/test/writer.bin");
let mut w = BitsWriter::new("data/test/writer.bin");
w.written = 125;

w.write_internal(word, len);
Expand Down
4 changes: 4 additions & 0 deletions src/disk/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod bits_reader;
pub mod bits_writer;
pub mod terms_reader;
pub mod terms_writer;
24 changes: 24 additions & 0 deletions src/disk/terms_reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use std::{
fs::File,
io::{BufReader, Read},
};

pub struct TermsReader {
file: BufReader<File>,
}

impl TermsReader {
pub fn new(filename: &str) -> TermsReader {
TermsReader {
file: BufReader::new(File::open(filename).expect("can not open input file")),
}
}

pub fn read_to_string(&mut self) -> String {
let mut buffer = String::new();
self.file
.read_to_string(&mut buffer)
.expect("error while reading to string");
buffer
}
}
28 changes: 28 additions & 0 deletions src/disk/terms_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use std::{
fs::File,
io::{BufWriter, Write},
};

pub struct TermsWriter {
file: BufWriter<File>,
}

impl TermsWriter {
pub fn new(filename: &str) -> TermsWriter {
TermsWriter {
file: BufWriter::new(File::create(filename).expect("Can not create output file")),
}
}

pub fn write_term(&mut self, term: &str) {
self.file
.write_all(term.as_bytes())
.expect("error while writing term to file");
}

pub fn flush(&mut self) {
self.file
.flush()
.expect("error while flushing BufWriter buffer");
}
}
104 changes: 0 additions & 104 deletions src/index.rs

This file was deleted.

Loading

0 comments on commit c938e62

Please sign in to comment.