Skip to content

Commit

Permalink
Merge branch 'master' of github.com:HudsonAlpha/rust-fmlrc
Browse files Browse the repository at this point in the history
  • Loading branch information
holtjma committed Oct 12, 2020
2 parents 7797f6a + cd499bb commit 98ac63e
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 12 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "fmlrc"
version = "0.1.1"
version = "0.1.2"
authors = ["holtjma <[email protected]>"]
edition = "2018"
license = "MIT OR Apache-2.0"
Expand All @@ -21,6 +21,7 @@ flate2 = "1.0.14"
libmath = "0.1.4"
log = "0.4.8"
needletail = "0.3.2"
serde_json = "1.0.58"
subprocess = "0.2.4"
tempfile = "3.1.0"
threadpool = "1.7.1"
Expand Down
59 changes: 48 additions & 11 deletions src/bv_bwt.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

extern crate log;
extern crate serde_json;

use log::info;
use std::io::prelude::*;
Expand Down Expand Up @@ -137,10 +138,10 @@ impl BitVectorBWT {

//read the initial fixed header
let mut file = fs::File::open(&filename)?;
let mut init_header: Vec<u8> = vec![0; 16];
let mut init_header: Vec<u8> = vec![0; 10];
let read_count: usize = file.read(&mut init_header[..])?;
if read_count != 16 {
panic!("Could not read initial 16 bytes of header for file {:?}", filename);
if read_count != 10 {
panic!("Could not read initial 10 bytes of header for file {:?}", filename);
}

//read the dynamic header
Expand All @@ -149,19 +150,55 @@ impl BitVectorBWT {
if skip_bytes % 16 != 0 {
skip_bytes = ((skip_bytes / 16)+1)*16;
}
let mut skip_header: Vec<u8> = vec![0; skip_bytes-16];
let read_count: usize = file.read(&mut skip_header[..])?;
if read_count != skip_bytes-16 {
panic!("Could not read bytes 16-{:?} of header for file {:?}", skip_bytes, filename);
let mut skip_header: Vec<u8> = vec![0; skip_bytes-10];
match file.read_exact(&mut skip_header[..]) {
Ok(()) => {},
Err(e) => {
return Err(
std::io::Error::new(
e.kind(),
format!("Could not read bytes 10-{:?} of header for file {:?}, root-error {:?}", skip_bytes, filename, e)
)
);
}
}

//parse the header string for the expected length, requires a lot of manipulation of the string because of numpy header styling
let header_string = String::from_utf8(skip_header).unwrap()
.replace("\'", "\"")
.replace("False", "false")
.replace("(", "[")
.replace(")", "]")
.replace(", }", "}")
.replace(", ]", "]")
.replace(",]", "]");
let header_dict: serde_json::Value = serde_json::from_str(&header_string)
.expect(&format!("Error while parsing header string: {:?}", header_string));
let expected_length: u64 = header_dict["shape"][0].as_u64().unwrap();

//check that the disk size matches our expectation
let bwt_disk_size: u64 = full_file_size - skip_bytes as u64;
if expected_length != bwt_disk_size {
return Err(
std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
format!("Header indicates shape of {:?}, but remaining file size is {:?}", expected_length, bwt_disk_size)
)
);
}

//finally read in everything else
let bwt_disk_size: u64 = full_file_size - skip_bytes as u64;
self.bwt = vec![0; bwt_disk_size as usize];
let read_count: usize = file.read(&mut self.bwt[..])?;
self.bwt = Vec::<u8>::with_capacity(bwt_disk_size as usize);//vec![0; bwt_disk_size as usize];
let read_count: usize = file.read_to_end(&mut self.bwt)?;
if read_count as u64 != bwt_disk_size {
panic!("Could not read {:?} bytes of BWT body for file {:?}", bwt_disk_size, filename);
return Err(
std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
format!("Only read {:?} of {:?} bytes of BWT body for file {:?}", read_count, bwt_disk_size, filename)
)
);
}

//TODO: I imagine we want to use the info here somehow?
//printf("loaded bwt with %lu compressed values\n", this->bwt.size());
info!("Loading BWT with {:?} compressed values", bwt_disk_size);
Expand Down

0 comments on commit 98ac63e

Please sign in to comment.