diff --git a/Cargo.toml b/Cargo.toml
index 547cb94..6a56586 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "libradicl"
-version = "0.7.0"
+version = "0.8.0"
 authors = [
   "Avi Srivastava <avi.srivastava@nyu.edu>",
   "Hirak Sarkar <hirak_sarkar@hms.harvard.edu>",
@@ -15,7 +15,7 @@ readme = "README.md"
 repository = "https://github.com/COMBINE-lab/alevin-fry"
 homepage = "https://github.com/COMBINE-lab/alevin-fry"
 documentation = "https://alevin-fry.readthedocs.io/en/latest/"
-include = ["src/*.rs", "/Cargo.toml", "/README.md", "/LICENSE"]
+include = ["src/*.rs", "examples/*.rs", "/Cargo.toml", "/README.md", "/LICENSE"]
 keywords = [
   "single-cell",
   "preprocessing",
@@ -29,10 +29,14 @@ categories = ["command-line-utilities", "science"]
 snap = "1"
 scroll = "0.12.0"
 num = "0.4.1"
-ahash = "0.8.8"
-serde = { version = "1.0.196", features = ["derive"] }
+ahash = "0.8.9"
+serde = { version = "1.0.197", features = ["derive"] }
 dashmap = "^5.5.3"
 bio-types = "1.0.1"
 smallvec = "1.13.1"
 noodles-bam = "0.56.0"
 noodles-sam = "0.53.0"
+anyhow = "1.0.80"
+
+[dev-dependencies]
+needletail="0.5.1"
diff --git a/examples/read_chunk_bulk.rs b/examples/read_chunk_bulk.rs
new file mode 100644
index 0000000..ea21b2f
--- /dev/null
+++ b/examples/read_chunk_bulk.rs
@@ -0,0 +1,32 @@
+use anyhow;
+use libradicl;
+use libradicl::chunk::Chunk;
+use libradicl::record::{PiscemBulkReadRecord, PiscemBulkRecordContext};
+use std::io::BufReader;
+
+fn main() -> anyhow::Result<()> {
+    let fname = std::env::args().nth(1).expect("input filename");
+    let f = std::fs::File::open(&fname)?;
+    let mut ifile = BufReader::new(f);
+    let p = libradicl::header::RadPrelude::from_bytes(&mut ifile)?;
+    if let Ok(summary) = p.summary(None) {
+        println!("{}", summary);
+    }
+
+    let _tag_map = p.file_tags.parse_tags_from_bytes_checked(&mut ifile)?;
+
+    // Any extra context we may need to parse the records. In this case, it's the
+    // size of the barcode and the umi.
+    let tag_context = p.get_record_context::<PiscemBulkRecordContext>()?;
+    let first_chunk = Chunk::<PiscemBulkReadRecord>::from_bytes(&mut ifile, &tag_context);
+    println!(
+        "Chunk :: nbytes: {}, nrecs: {}",
+        first_chunk.nbytes, first_chunk.nrec
+    );
+    assert_eq!(first_chunk.nrec as usize, first_chunk.reads.len());
+    for (i, r) in first_chunk.reads.iter().take(10).enumerate() {
+        println!("record {i}: {:?}", r);
+    }
+
+    Ok(())
+}
diff --git a/examples/read_chunk_single_cell.rs b/examples/read_chunk_single_cell.rs
new file mode 100644
index 0000000..a0c3260
--- /dev/null
+++ b/examples/read_chunk_single_cell.rs
@@ -0,0 +1,32 @@
+use anyhow;
+use libradicl::chunk::Chunk;
+use libradicl::header;
+use libradicl::record::{AlevinFryReadRecord, AlevinFryRecordContext};
+use std::io::BufReader;
+
+fn main() -> anyhow::Result<()> {
+    let fname = std::env::args().nth(1).expect("input filename");
+    let f = std::fs::File::open(&fname)?;
+    let mut ifile = BufReader::new(f);
+    let p = header::RadPrelude::from_bytes(&mut ifile)?;
+    if let Ok(summary) = p.summary(None) {
+        println!("{}", summary);
+    }
+
+    let tag_map = p.file_tags.parse_tags_from_bytes_checked(&mut ifile)?;
+    println!("tag map {:?}\n", tag_map);
+    // Any extra context we may need to parse the records. In this case, it's the
+    // size of the barcode and the umi.
+    let tag_context = p.get_record_context::<AlevinFryRecordContext>()?;
+    let first_chunk = Chunk::<AlevinFryReadRecord>::from_bytes(&mut ifile, &tag_context);
+    println!(
+        "Chunk :: nbytes: {}, nrecs: {}",
+        first_chunk.nbytes, first_chunk.nrec
+    );
+    assert_eq!(first_chunk.nrec as usize, first_chunk.reads.len());
+    for (i, r) in first_chunk.reads.iter().take(10).enumerate() {
+        println!("record {i}: {:?}", r);
+    }
+
+    Ok(())
+}
diff --git a/examples/read_header.rs b/examples/read_header.rs
new file mode 100644
index 0000000..02ab669
--- /dev/null
+++ b/examples/read_header.rs
@@ -0,0 +1,48 @@
+use anyhow;
+use libradicl::{self, rad_types};
+use std::io::BufReader;
+
+fn main() -> anyhow::Result<()> {
+    let fname = std::env::args().nth(1).expect("input filename");
+    /*
+    {
+        let f = std::fs::File::open(&fname)?;
+        let mut ifile = BufReader::new(f);
+        let h = rad_types::RadHeader::from_bytes(&mut ifile)?;
+
+        if let Ok(summary) = h.summary(None) {
+            println!("{}", summary);
+        }
+
+        let ts = rad_types::TagSection::from_bytes(&mut ifile)?;
+        println!("File-level tags: {ts:?}");
+
+        let ts = rad_types::TagSection::from_bytes(&mut ifile)?;
+        println!("Read-level tags: {ts:?}");
+
+        let ts = rad_types::TagSection::from_bytes(&mut ifile)?;
+        println!("Alignment-level tags: {ts:?}");
+    }
+    */
+
+    println!("\n");
+
+    {
+        let f = std::fs::File::open(&fname)?;
+        let mut ifile = BufReader::new(f);
+        let p = libradicl::header::RadPrelude::from_bytes(&mut ifile)?;
+        if let Ok(summary) = p.summary(None) {
+            println!("{}", summary);
+        }
+        let ftmp = p.file_tags.parse_tags_from_bytes(&mut ifile)?;
+        if let Some(rad_types::TagValue::ArrayU32(v)) = ftmp.get("ref_lengths") {
+            println!(
+                "file tag values: {:?}",
+                v.iter().take(10).collect::<Vec<&u32>>()
+            );
+        } else {
+            println!("file-level tags: {:?}", &ftmp);
+        }
+    }
+    Ok(())
+}
diff --git a/src/chunk.rs b/src/chunk.rs
new file mode 100644
index 0000000..2509ef2
--- /dev/null
+++ b/src/chunk.rs
@@ -0,0 +1,69 @@
+use crate::{self as libradicl};
+use libradicl::record::MappedRecord;
+use scroll::Pread;
+use std::io::{Cursor, Read};
+
+#[derive(Debug)]
+pub struct Chunk<T: MappedRecord> {
+    pub nbytes: u32,
+    pub nrec: u32,
+    pub reads: Vec<T>,
+}
+
+#[derive(Debug)]
+#[allow(dead_code)]
+pub struct CorrectedCbChunk {
+    pub(crate) remaining_records: u32,
+    pub(crate) corrected_bc: u64,
+    pub(crate) nrec: u32,
+    pub(crate) data: Cursor<Vec<u8>>, /*,
+                                      umis: Vec<u64>,
+                                      ref_offsets: Vec<u32>,
+                                      ref_ids: Vec<u32>,
+                                      */
+}
+
+pub struct ChunkConfig {
+    pub num_chunks: u64,
+    pub bc_type: u8,
+    pub umi_type: u8,
+}
+
+impl<R: MappedRecord> Chunk<R> {
+    /// Read the header of the next [Chunk] from the provided `reader`. This
+    /// function returns a tuple representing the number of bytes and number of
+    /// records, respectively, in the chunk.
+    #[inline]
+    pub fn read_header<T: Read>(reader: &mut T) -> (u32, u32) {
+        let mut buf = [0u8; 8];
+        reader.read_exact(&mut buf).unwrap();
+        let nbytes = buf.pread::<u32>(0).unwrap();
+        let nrec = buf.pread::<u32>(4).unwrap();
+        (nbytes, nrec)
+    }
+
+    /// Read the next [Chunk] from the provided reader and return it.
+    #[inline]
+    pub fn from_bytes<T: Read>(reader: &mut T, ctx: &R::ParsingContext) -> Self {
+        let (nbytes, nrec) = Self::read_header(reader);
+        let mut c = Self {
+            nbytes,
+            nrec,
+            reads: Vec::<R>::with_capacity(nrec as usize),
+        };
+
+        for _ in 0..(nrec as usize) {
+            c.reads.push(R::from_bytes_with_context(reader, ctx));
+        }
+
+        c
+    }
+
+    /// Peeks to the first [libradicl::record::AlevinFryReadRecord] in the buffer `buf`, and returns
+    /// the barcode and umi associated with this record.  It is assumed
+    /// that there is at least one [libradicl::record::AlevinFryReadRecord] present in the buffer.
+    #[inline]
+    pub fn peek_record(buf: &[u8], ctx: &R::ParsingContext) -> R::PeekResult {
+        R::peek_record(buf, ctx)
+    }
+}
diff --git a/src/constants.rs b/src/constants.rs
new file mode 100644
index 0000000..fcae1b3
--- /dev/null
+++ b/src/constants.rs
@@ -0,0 +1 @@
+pub(crate) const MAX_REF_NAME_LEN: usize = 65536;
diff --git a/src/header.rs b/src/header.rs
new file mode 100644
index 0000000..e26e984
--- /dev/null
+++ b/src/header.rs
@@ -0,0 +1,176 @@
+use crate::{self as libradicl, constants};
+use libradicl::rad_types::{TagSection, TagSectionLabel};
+use libradicl::record::RecordContext;
+use noodles_sam as sam;
+use scroll::Pread;
+use std::io::Read;
+
+/// The [RadPrelude] groups together the [RadHeader]
+/// as well as the relevant top-level [TagSection]s of the file.
+/// It constitutes everything in the initial file prior to the
+/// start of the first [libradicl::chunk::Chunk].
+pub struct RadPrelude {
+    pub hdr: RadHeader,
+    pub file_tags: TagSection,
+    pub read_tags: TagSection,
+    pub aln_tags: TagSection,
+}
+
+/// The [RadHeader] contains the relevant information about the
+/// references against which the reads in this file were mapped and
+/// information about the way in which mapping was performed.
+pub struct RadHeader {
+    pub is_paired: u8,
+    pub ref_count: u64,
+    pub ref_names: Vec<String>,
+    pub num_chunks: u64,
+}
+
+impl Default for RadHeader {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl RadHeader {
+    /// Create a new empty [RadHeader]
+    pub fn new() -> Self {
+        Self {
+            is_paired: 0,
+            ref_count: 0,
+            ref_names: vec![],
+            num_chunks: 0,
+        }
+    }
+
+    /// Create and return a new [RadHeader] by reading the contents of the
+    /// `reader`. If the reader is positioned such that a valid [RadHeader] comes
+    /// next, then this function returns [Ok(RadHeader)], otherwise, it returns
+    /// an [anyhow::Error] explaining the failure to parse the [RadHeader].
+    pub fn from_bytes<T: Read>(reader: &mut T) -> anyhow::Result<RadHeader> {
+        let mut rh = RadHeader::new();
+
+        // size of the longest allowable string.
+        let mut buf = [0u8; constants::MAX_REF_NAME_LEN];
+        reader.read_exact(&mut buf[0..9])?;
+        rh.is_paired = buf.pread(0)?;
+        rh.ref_count = buf.pread::<u64>(1)?;
+
+        // we know how many names we will read in.
+        rh.ref_names.reserve_exact(rh.ref_count as usize);
+
+        let mut num_read = 0u64;
+        while num_read < rh.ref_count {
+            // the length of the string
+            reader.read_exact(&mut buf[0..2])?;
+            let l: usize = buf.pread::<u16>(0)? as usize;
+            // the string itself
+            reader.read_exact(&mut buf[0..l])?;
+            rh.ref_names
+                .push(std::str::from_utf8(&buf[0..l])?.to_string());
+            num_read += 1;
+        }
+
+        reader.read_exact(&mut buf[0..8])?;
+        rh.num_chunks = buf.pread::<u64>(0)?;
+        Ok(rh)
+    }
+
+    /// Create and return a [RadHeader] from the provided BAM/SAM header
+    /// (represented by the noodles [sam::Header] `header`).  
+    /// **Note**: The returned [RadHeader] will *not* have a value for the `num_chunks`
+    /// field, which will remain set at 0, nor will it set a meaningful value for the
+    /// `is_paried` flag, since the SAM/BAM header itself doesn't encode this information.
+    pub fn from_bam_header(header: &sam::Header) -> RadHeader {
+        let mut rh = RadHeader {
+            is_paired: 0,
+            ref_count: 0,
+            ref_names: vec![],
+            num_chunks: 0,
+        };
+
+        let ref_seqs = header.reference_sequences();
+        rh.ref_count = ref_seqs.len() as u64;
+        // we know how many names we will read in.
+        rh.ref_names.reserve_exact(rh.ref_count as usize);
+        for (k, _v) in ref_seqs.iter() {
+            rh.ref_names.push(k.to_string());
+        }
+        rh
+    }
+
+    /// Returns the size, in bytes, that this [RadHeader] will take
+    /// if written to an output stream.
+    pub fn get_size(&self) -> usize {
+        let mut tot_size = 0usize;
+        tot_size += std::mem::size_of_val(&self.is_paired) + std::mem::size_of_val(&self.ref_count);
+        // each name takes 2 bytes for the length, plus the actual
+        // number of bytes required by the string itself.
+        for t in self.ref_names.iter().map(|a| a.len()) {
+            tot_size += std::mem::size_of::<u16>() + t;
+        }
+        tot_size += std::mem::size_of_val(&self.num_chunks);
+        tot_size
+    }
+
+    pub fn summary(&self, num_refs: Option<usize>) -> anyhow::Result<String> {
+        use std::fmt::Write as _;
+        let mut s = String::new();
+        writeln!(&mut s, "RadHeader {{")?;
+        writeln!(&mut s, "is_paired: {}", self.is_paired)?;
+        writeln!(&mut s, "ref_count: {}", self.ref_count)?;
+
+        let refs_to_print = match num_refs {
+            Some(rcount) => rcount.min(self.ref_count as usize),
+            None => (self.ref_count as usize).min(10_usize),
+        };
+
+        for rn in self.ref_names.iter().take(refs_to_print) {
+            writeln!(&mut s, "  ref: {}", rn)?;
+        }
+        writeln!(&mut s, "  ...")?;
+
+        writeln!(&mut s, "num_chunks: {}", self.num_chunks)?;
+        writeln!(&mut s, "}}")?;
+        Ok(s)
+    }
+}
+
+impl RadPrelude {
+    pub fn from_bytes<T: Read>(reader: &mut T) -> anyhow::Result<Self> {
+        let hdr = RadHeader::from_bytes(reader)?;
+        let file_tags = TagSection::from_bytes_with_label(reader, TagSectionLabel::FileTags)?;
+        let read_tags = TagSection::from_bytes_with_label(reader, TagSectionLabel::ReadTags)?;
+        let aln_tags = TagSection::from_bytes_with_label(reader, TagSectionLabel::AlignmentTags)?;
+
+        //let file_tag_vals = file_tags.parse_tags_from_bytes(reader)?;
+        //println!("file-level tag values: {:?}", file_tag_vals);
+
+        Ok(Self {
+            hdr,
+            file_tags,
+            read_tags,
+            aln_tags,
+        })
+    }
+
+    pub fn summary(&self, num_refs: Option<usize>) -> anyhow::Result<String> {
+        use std::fmt::Write as _;
+        let mut s = self.hdr.summary(num_refs)?;
+        writeln!(&mut s, "[[{:?}]]", self.file_tags)?;
+        writeln!(&mut s, "[[{:?}]]", self.read_tags)?;
+        writeln!(&mut s, "[[{:?}]]", self.aln_tags)?;
+        //writeln!(&mut s, "file-level tag values [{:?}]", self.file_tag_vals)?;
+        Ok(s)
+    }
+
+    /// Obtain a [RecordContext] for a record of type `R` from this prelude, by
+    /// using the associated [TagSection]s.  **Note**: Since this function
+    /// constructs the resulting `R` itself, and doesn't take any `R` parameter,
+    /// then it must always be invoked with the proper
+    /// [turbofish](https://doc.rust-lang.org/1.30.0/book/2018-edition/appendix-02-operators.html?highlight=turbofish#non-operator-symbols)
+    /// notation.
+    pub fn get_record_context<R: RecordContext>(&self) -> anyhow::Result<R> {
+        R::get_context_from_tag_section(&self.file_tags, &self.read_tags, &self.aln_tags)
+    }
+}
diff --git a/src/io.rs b/src/io.rs
new file mode 100644
index 0000000..ca308f6
--- /dev/null
+++ b/src/io.rs
@@ -0,0 +1,56 @@
+use crate::libradicl::rad_types::RadIntId;
+use scroll::Pread;
+use std::io::Write;
+use std::io::{Cursor, Read};
+
+pub fn read_into_u64<T: Read>(reader: &mut T, rt: &RadIntId) -> u64 {
+    let mut rbuf = [0u8; 8];
+
+    let v: u64 = match rt {
+        RadIntId::U8 => {
+            reader.read_exact(&mut rbuf[0..1]).unwrap();
+            rbuf.pread::<u8>(0).unwrap() as u64
+        }
+        RadIntId::U16 => {
+            reader.read_exact(&mut rbuf[0..2]).unwrap();
+            rbuf.pread::<u16>(0).unwrap() as u64
+        }
+        RadIntId::U32 => {
+            reader.read_exact(&mut rbuf[0..4]).unwrap();
+            rbuf.pread::<u32>(0).unwrap() as u64
+        }
+        RadIntId::U64 => {
+            reader.read_exact(&mut rbuf[0..8]).unwrap();
+            rbuf.pread::<u64>(0).unwrap()
+        }
+    };
+    v
+}
+
+pub fn write_str_bin(v: &str, type_id: &RadIntId, owriter: &mut Cursor<Vec<u8>>) {
+    match type_id {
+        RadIntId::U8 => {
+            owriter
+                .write_all(&(v.len() as u8).to_le_bytes())
+                .expect("coudn't write to output file");
+        }
+        RadIntId::U16 => {
+            owriter
+                .write_all(&(v.len() as u16).to_le_bytes())
+                .expect("coudn't write to output file");
+        }
+        RadIntId::U32 => {
+            owriter
+                .write_all(&(v.len() as u32).to_le_bytes())
+                .expect("coudn't write to output file");
+        }
+        RadIntId::U64 => {
+            owriter
+                .write_all(&(v.len() as u64).to_le_bytes())
+                .expect("coudn't write to output file");
+        }
+    }
+    owriter
+        .write_all(v.as_bytes())
+        .expect("coudn't write to output file");
+}
diff --git a/src/lib.rs b/src/lib.rs
index 1a417bb..0da7648 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -11,7 +11,9 @@
 
 use crate as libradicl;
 
-use self::libradicl::rad_types::{CorrectedCbChunk, RadIntId, ReadRecord};
+use self::libradicl::chunk::CorrectedCbChunk;
+use self::libradicl::rad_types::RadIntId;
+use self::libradicl::record::AlevinFryReadRecord;
 use self::libradicl::schema::TempCellInfo;
 #[allow(unused_imports)]
 use ahash::{AHasher, RandomState};
@@ -28,10 +30,17 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
 use std::vec::Vec;
 
+pub mod chunk;
+pub mod constants;
 pub mod exit_codes;
+pub mod header;
+pub mod io;
 pub mod rad_types;
+pub mod record;
 pub mod schema;
 pub mod utils;
+#[macro_use]
+mod macros;
 
 // Name of the program, to be used in diagnostic messages.
 static LIB_NAME: &str = "libradicl";
@@ -108,7 +117,7 @@ impl BarcodeLookupMap {
             end: self.offsets[(query_pref + 1) as usize],
         };
 
-        let qs = qrange.start as usize;
+        let qs = qrange.start;
 
         // if we can, then we return the found barcode and that there was 1 best hit
         if let Ok(res) = self.barcodes[qrange].binary_search(&query) {
@@ -143,7 +152,7 @@ impl BarcodeLookupMap {
             end: self.offsets[(query_pref + 1) as usize],
         };
 
-        let qs = qrange.start as usize;
+        let qs = qrange.start;
 
         if try_exact {
             // first, we try to find exactly.
@@ -164,7 +173,7 @@ impl BarcodeLookupMap {
         // that are 1 mismatch off in the suffix.
         if !(std::ops::Range::<usize>::is_empty(&qrange)) {
             // the initial offset of suffixes for this prefix
-            let qs = qrange.start as usize;
+            let qs = qrange.start;
 
             // for each position in the suffix
             for i in (0..suffix_bits).step_by(2) {
@@ -205,7 +214,7 @@ impl BarcodeLookupMap {
                         start: self.offsets[query_pref as usize],
                         end: self.offsets[(query_pref + 1) as usize],
                     };
-                    let qs = qrange.start as usize;
+                    let qs = qrange.start;
                     if let Ok(res) = self.barcodes[qrange].binary_search(&nquery) {
                         ret = Some(qs + res);
                         num_neighbors += 1;
@@ -295,7 +304,7 @@ pub fn dump_chunk(v: &mut CorrectedCbChunk, owriter: &Mutex<BufWriter<File>>) {
     owriter.lock().unwrap().write_all(v.data.get_ref()).unwrap();
 }
 
-/// Given a `BufReader<T>` from which to read a set of records that
+/// Given a [BufReader]`<T>` from which to read a set of records that
 /// should reside in the same collated bucket, this function will
 /// collate the records by cell barcode, filling them into a chunk of
 /// memory exactly as they will reside on disk.  If `compress` is true
@@ -326,7 +335,7 @@ pub fn collate_temporary_bucket_twopass<T: Read + Seek, U: Write>(
         // read the header of the record
         // we don't bother reading the whole thing here
         // because we will just copy later as need be
-        let tup = ReadRecord::from_bytes_record_header(reader, bct, umit);
+        let tup = AlevinFryReadRecord::from_bytes_record_header(reader, bct, umit);
 
         // get the entry for this chunk, or create a new one
         let v = cb_byte_map.entry(tup.0).or_insert(TempCellInfo {
@@ -344,10 +353,10 @@ pub fn collate_temporary_bucket_twopass<T: Read + Seek, U: Write>(
         reader.read_exact(&mut tbuf[0..(size_of_u32 * na)]).unwrap();
         // compute the total number of bytes this record requires
         let nbytes = calc_record_bytes(na);
-        (*v).offset += nbytes as u64;
-        (*v).nbytes += nbytes as u32;
-        (*v).nrec += 1;
-        total_bytes += nbytes as usize;
+        v.offset += nbytes as u64;
+        v.nbytes += nbytes as u32;
+        v.nrec += 1;
+        total_bytes += nbytes;
     }
 
     // each cell will have a header (8 bytes each)
@@ -363,14 +372,14 @@ pub fn collate_temporary_bucket_twopass<T: Read + Seek, U: Write>(
         // jump to the position where this chunk should start
         // and write the header
         output_buffer.set_position(next_offset);
-        let cell_bytes = (*v).nbytes as u32;
-        let cell_rec = (*v).nrec as u32;
+        let cell_bytes = v.nbytes;
+        let cell_rec = v.nrec;
         output_buffer.write_all(&cell_bytes.to_le_bytes()).unwrap();
         output_buffer.write_all(&cell_rec.to_le_bytes()).unwrap();
         // where we will start writing records for this cell
-        (*v).offset = output_buffer.position();
+        v.offset = output_buffer.position();
         // the number of bytes allocated to this chunk
-        let nbytes = (*v).nbytes as u64;
+        let nbytes = v.nbytes as u64;
         // the next record will start after this one
         next_offset += nbytes;
     }
@@ -387,7 +396,7 @@ pub fn collate_temporary_bucket_twopass<T: Read + Seek, U: Write>(
         // read the header of the record
         // we don't bother reading the whole thing here
         // because we will just copy later as need be
-        let tup = ReadRecord::from_bytes_record_header(reader, bct, umit);
+        let tup = AlevinFryReadRecord::from_bytes_record_header(reader, bct, umit);
 
         // get the entry for this chunk, or create a new one
         if let Some(v) = cb_byte_map.get_mut(&tup.0) {
@@ -403,15 +412,13 @@ pub fn collate_temporary_bucket_twopass<T: Read + Seek, U: Write>(
             umit.write_to(tup.1, &mut output_buffer).unwrap();
 
             // read the alignment records
-            reader
-                .read_exact(&mut tbuf[0..(size_of_u32 as usize * na)])
-                .unwrap();
+            reader.read_exact(&mut tbuf[0..(size_of_u32 * na)]).unwrap();
             // write them
             output_buffer
-                .write_all(&tbuf[..(size_of_u32 as usize * na)])
+                .write_all(&tbuf[..(size_of_u32 * na)])
                 .unwrap();
 
-            (*v).offset = output_buffer.position();
+            v.offset = output_buffer.position();
         } else {
             panic!("should not have any barcodes we can't find");
         }
@@ -461,7 +468,7 @@ pub fn collate_temporary_bucket<T: Read>(
         // read the header of the record
         // we don't bother reading the whole thing here
         // because we will just copy later as need be
-        let tup = ReadRecord::from_bytes_record_header(reader, bct, umit);
+        let tup = AlevinFryReadRecord::from_bytes_record_header(reader, bct, umit);
 
         // get the entry for this chunk, or create a new one
         let v = output_cache
@@ -469,17 +476,17 @@ pub fn collate_temporary_bucket<T: Read>(
             .or_insert_with(|| CorrectedCbChunk::from_label_and_counter(tup.0, est_num_rec));
 
         // keep track of the number of records we're writing
-        (*v).nrec += 1;
+        v.nrec += 1;
         // write the num align
         let na = tup.2;
-        (*v).data.write_all(&na.to_le_bytes()).unwrap();
+        v.data.write_all(&na.to_le_bytes()).unwrap();
         // write the corrected barcode
-        bct.write_to(tup.0, &mut (*v).data).unwrap();
-        umit.write_to(tup.1, &mut (*v).data).unwrap();
+        bct.write_to(tup.0, &mut v.data).unwrap();
+        umit.write_to(tup.1, &mut v.data).unwrap();
         // read the alignment records
         reader.read_exact(&mut tbuf[0..(4 * na as usize)]).unwrap();
         // write them
-        (*v).data.write_all(&tbuf[..(4 * na as usize)]).unwrap();
+        v.data.write_all(&tbuf[..(4 * na as usize)]).unwrap();
     }
 }
 
@@ -502,11 +509,10 @@ pub fn process_corrected_cb_chunk<T: Read>(
     let nrec = buf.pread::<u32>(4).unwrap();
     // for each record, read it
     for _ in 0..(nrec as usize) {
-        let tup = ReadRecord::from_bytes_record_header(reader, bct, umit);
-        //let rr = ReadRecord::from_bytes_keep_ori(reader, &bct, &umit, expected_ori);
+        let tup = AlevinFryReadRecord::from_bytes_record_header(reader, bct, umit);
         // if this record had a correct or correctable barcode
         if let Some(corrected_id) = correct_map.get(&tup.0) {
-            let rr = ReadRecord::from_bytes_with_header_keep_ori(
+            let rr = AlevinFryReadRecord::from_bytes_with_header_keep_ori(
                 reader,
                 tup.0,
                 tup.1,
@@ -562,7 +568,7 @@ impl TempBucket {
             bucket_id,
             bucket_writer: Arc::new(Mutex::new(BufWriter::with_capacity(
                 4096_usize,
-                File::create(parent.join(&format!("bucket_{}.tmp", bucket_id))).unwrap(),
+                File::create(parent.join(format!("bucket_{}.tmp", bucket_id))).unwrap(),
             ))),
             num_chunks: 0u32,
             num_records: 0u32,
@@ -606,11 +612,11 @@ pub fn dump_corrected_cb_chunk_to_temp_file<T: Read>(
 
     // for each record, read it
     for _ in 0..(nrec as usize) {
-        let tup = ReadRecord::from_bytes_record_header(reader, bct, umit);
+        let tup = AlevinFryReadRecord::from_bytes_record_header(reader, bct, umit);
 
         // if this record had a correct or correctable barcode
         if let Some(corrected_id) = correct_map.get(&tup.0) {
-            let rr = ReadRecord::from_bytes_with_header_keep_ori(
+            let rr = AlevinFryReadRecord::from_bytes_with_header_keep_ori(
                 reader,
                 tup.0,
                 tup.1,
@@ -641,9 +647,7 @@ pub fn dump_corrected_cb_chunk_to_temp_file<T: Read>(
                 // then first flush the buffer to file.
                 if len + nb as usize >= flush_limit {
                     let mut filebuf = v.bucket_writer.lock().unwrap();
-                    filebuf
-                        .write_all(&bcursor.get_ref()[0..len as usize])
-                        .unwrap();
+                    filebuf.write_all(&bcursor.get_ref()[0..len]).unwrap();
                     // and reset the local buffer cursor
                     bcursor.set_position(0);
                 }
@@ -684,17 +688,13 @@ pub fn dump_corrected_cb_chunk_to_temp_file<T: Read>(
 }
 
 pub fn as_u8_slice(v: &[u32]) -> &[u8] {
-    unsafe {
-        std::slice::from_raw_parts(
-            v.as_ptr() as *const u8,
-            v.len() * std::mem::size_of::<u32>(),
-        )
-    }
+    unsafe { std::slice::from_raw_parts(v.as_ptr() as *const u8, std::mem::size_of_val(v)) }
 }
 
 #[cfg(test)]
 mod tests {
     use crate::BarcodeLookupMap;
+    use needletail;
 
     #[test]
     fn test_barcode_lookup_map() {
diff --git a/src/macros.rs b/src/macros.rs
new file mode 100644
index 0000000..0e97b53
--- /dev/null
+++ b/src/macros.rs
@@ -0,0 +1,12 @@
+// macro generalizing solution from
+// https://stackoverflow.com/questions/77388769/convert-vecu8-to-vecfloat-in-rust
+#[macro_export]
+macro_rules! u8_to_vec_of {
+    ($a:expr, $b:ty) => {
+        $a.chunks_exact(std::mem::size_of::<$b>())
+            .map(TryInto::try_into)
+            .map(Result::unwrap)
+            .map(<$b>::from_le_bytes)
+            .collect()
+    };
+}
diff --git a/src/rad_types.rs b/src/rad_types.rs
index ee73093..48a974a 100644
--- a/src/rad_types.rs
+++ b/src/rad_types.rs
@@ -7,69 +7,47 @@
  * License: 3-clause BSD, see https://opensource.org/licenses/BSD-3-Clause
  */
 
-use crate as libradicl;
-
-use self::libradicl::utils;
-use bio_types::strand::*;
+use crate::{self as libradicl, constants};
+use anyhow::{self, bail};
+use libradicl::u8_to_vec_of;
 use num::cast::AsPrimitive;
-use noodles_sam as sam;
 use scroll::Pread;
+use std::io::Read;
 use std::io::Write;
-use std::io::{Cursor, Read};
 use std::mem;
 
-pub struct RadHeader {
-    pub is_paired: u8,
-    pub ref_count: u64,
-    pub ref_names: Vec<String>,
-    pub num_chunks: u64,
-}
-
+#[derive(Clone, Debug)]
 pub struct TagDesc {
     pub name: String,
-    pub typeid: u8,
+    pub typeid: RadType,
 }
 
+#[derive(Clone, Debug)]
+pub enum TagSectionLabel {
+    FileTags,
+    ReadTags,
+    AlignmentTags,
+    Unlabeled,
+}
+
+/// A [TagSection] consists of a series of [TagDesc]s that are
+/// logically grouped together as tags for a specific unit
+#[derive(Clone, Debug)]
 pub struct TagSection {
+    pub label: TagSectionLabel,
     pub tags: Vec<TagDesc>,
 }
 
-// The below are currently hard-coded
-// until we decide how to solve this
-// generally
+/// The below are currently hard-coded
+/// until we decide how to solve this
+/// generally
 #[derive(Debug)]
 pub struct FileTags {
     pub bclen: u16,
     pub umilen: u16,
 }
-#[derive(Debug)]
-pub struct ReadRecord {
-    pub bc: u64,
-    pub umi: u64,
-    pub dirs: Vec<bool>,
-    pub refs: Vec<u32>,
-}
-#[derive(Debug)]
-pub struct Chunk {
-    pub nbytes: u32,
-    pub nrec: u32,
-    pub reads: Vec<ReadRecord>,
-}
 
-#[derive(Debug)]
-#[allow(dead_code)]
-pub struct CorrectedCbChunk {
-    pub(crate) remaining_records: u32,
-    pub(crate) corrected_bc: u64,
-    pub(crate) nrec: u32,
-    pub(crate) data: Cursor<Vec<u8>>, /*,
-                                      umis: Vec<u64>,
-                                      ref_offsets: Vec<u32>,
-                                      ref_ids: Vec<u32>,
-                                      */
-}
-
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum RadIntId {
     U8,
     U16,
@@ -77,6 +55,56 @@ pub enum RadIntId {
     U64,
 }
 
+impl RadIntId {
+    #[inline]
+    pub fn size_of(&self) -> usize {
+        match self {
+            Self::U8 => mem::size_of::<u8>(),
+            Self::U16 => mem::size_of::<u16>(),
+            Self::U32 => mem::size_of::<u32>(),
+            Self::U64 => mem::size_of::<u64>(),
+        }
+    }
+}
+
+impl From<u8> for RadIntId {
+    fn from(x: u8) -> Self {
+        match x {
+            1 => Self::U8,
+            2 => Self::U16,
+            3 => Self::U32,
+            4 => Self::U64,
+            _ => panic!("Should not happen"),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum RadFloatId {
+    F32,
+    F64,
+}
+
+impl RadFloatId {
+    #[inline]
+    pub fn size_of(&self) -> usize {
+        match self {
+            Self::F32 => mem::size_of::<f32>(),
+            Self::F64 => mem::size_of::<f64>(),
+        }
+    }
+}
+
+impl From<u8> for RadFloatId {
+    fn from(x: u8) -> Self {
+        match x {
+            5 => Self::F32,
+            6 => Self::F64,
+            _ => panic!("Should not happen"),
+        }
+    }
+}
+
 pub trait PrimitiveInteger:
     AsPrimitive<u8>
     + AsPrimitive<u16>
@@ -107,13 +135,9 @@ impl<
 }
 
 impl RadIntId {
+    #[inline]
     pub fn bytes_for_type(&self) -> usize {
-        match self {
-            Self::U8 => std::mem::size_of::<u8>(),
-            Self::U16 => std::mem::size_of::<u16>(),
-            Self::U32 => std::mem::size_of::<u32>(),
-            Self::U64 => std::mem::size_of::<u64>(),
-        }
+        self.size_of()
     }
 
     /// Based on the variant of the current enum, write the value `v`
@@ -146,35 +170,80 @@ impl RadIntId {
             }
         }
     }
+
+    pub fn read_into_usize(&self, buf: &[u8]) -> usize {
+        match self {
+            Self::U8 => buf.pread::<u8>(0).unwrap() as usize,
+            Self::U16 => buf.pread::<u16>(0).unwrap() as usize,
+            Self::U32 => buf.pread::<u32>(0).unwrap() as usize,
+            Self::U64 => buf.pread::<u64>(0).unwrap() as usize,
+        }
+    }
 }
 
-pub struct ChunkConfig {
-    pub num_chunks: u64,
-    pub bc_type: u8,
-    pub umi_type: u8,
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum RadAtomicId {
+    Int(RadIntId),
+    Float(RadFloatId),
+    String,
+}
+
+impl RadAtomicId {
+    #[inline]
+    pub fn size_of(&self) -> usize {
+        match self {
+            Self::Int(x) => x.size_of(),
+            Self::Float(x) => x.size_of(),
+            Self::String => panic!("RadAtomicId::String does not have a fixed type"),
+        }
+    }
 }
 
-#[derive(Copy, Clone)]
+impl From<u8> for RadAtomicId {
+    fn from(x: u8) -> Self {
+        match x {
+            1 => Self::Int(RadIntId::U8),
+            2 => Self::Int(RadIntId::U16),
+            3 => Self::Int(RadIntId::U32),
+            4 => Self::Int(RadIntId::U64),
+            5 => Self::Float(RadFloatId::F32),
+            6 => Self::Float(RadFloatId::F64),
+            8 => Self::String,
+            x => panic!("Should not happen, num is {x}"),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum RadType {
     Bool,
-    U8,
-    U16,
-    U32,
-    U64,
-    F32,
-    F64,
+    Int(RadIntId),
+    Float(RadFloatId),
+    // holds length type and value type, but not length
+    // and data themselves
+    Array(RadIntId, RadAtomicId),
+    // does not hold length, just a marker for the type
+    String,
+}
+
+impl RadType {
+    #[inline]
+    pub fn is_int_type(&self) -> bool {
+        matches!(self, Self::Int(_))
+    }
 }
 
 pub fn encode_type_tag(type_tag: RadType) -> Option<u8> {
     match type_tag {
         RadType::Bool => Some(0),
-        RadType::U8 => Some(1),
-        RadType::U16 => Some(2),
-        RadType::U32 => Some(3),
-        RadType::U64 => Some(4),
-        RadType::F32 => Some(5),
-        RadType::F64 => Some(6),
-        //_ => None,
+        RadType::Int(RadIntId::U8) => Some(1),
+        RadType::Int(RadIntId::U16) => Some(2),
+        RadType::Int(RadIntId::U32) => Some(3),
+        RadType::Int(RadIntId::U64) => Some(4),
+        RadType::Float(RadFloatId::F32) => Some(5),
+        RadType::Float(RadFloatId::F64) => Some(6),
+        RadType::Array(_, _) => Some(7),
+        RadType::String => Some(8), //_ => None,
     }
 }
 
@@ -188,210 +257,118 @@ pub fn decode_int_type_tag(type_id: u8) -> Option<RadIntId> {
     }
 }
 
-fn read_into_u64<T: Read>(reader: &mut T, rt: &RadIntId) -> u64 {
-    let mut rbuf = [0u8; 8];
-
-    let v: u64 = match rt {
-        RadIntId::U8 => {
-            reader.read_exact(&mut rbuf[0..1]).unwrap();
-            rbuf.pread::<u8>(0).unwrap() as u64
-        }
-        RadIntId::U16 => {
-            reader.read_exact(&mut rbuf[0..2]).unwrap();
-            rbuf.pread::<u16>(0).unwrap() as u64
-        }
-        RadIntId::U32 => {
-            reader.read_exact(&mut rbuf[0..4]).unwrap();
-            rbuf.pread::<u32>(0).unwrap() as u64
-        }
-        RadIntId::U64 => {
-            reader.read_exact(&mut rbuf[0..8]).unwrap();
-            rbuf.pread::<u64>(0).unwrap()
-        }
-    };
-    v
+#[derive(Debug, Copy, Clone, Eq, PartialEq)]
+pub enum MappingType {
+    Unmapped,
+    SingleMapped,
+    MappedFirstOrphan,
+    MappedSecondOrphan,
+    MappedPair,
 }
 
-impl ReadRecord {
-    pub fn is_empty(&self) -> bool {
-        self.refs.is_empty()
-    }
-
-    pub fn from_bytes<T: Read>(reader: &mut T, bct: &RadIntId, umit: &RadIntId) -> Self {
-        let mut rbuf = [0u8; 255];
-
-        reader.read_exact(&mut rbuf[0..4]).unwrap();
-        let na = rbuf.pread::<u32>(0).unwrap();
-        let bc = read_into_u64(reader, bct);
-        let umi = read_into_u64(reader, umit);
-
-        let mut rec = Self {
-            bc,
-            umi,
-            dirs: Vec::with_capacity(na as usize),
-            refs: Vec::with_capacity(na as usize),
-        };
-
-        //println!("number of records : {:?}",na);
-
-        for _ in 0..(na as usize) {
-            reader.read_exact(&mut rbuf[0..4]).unwrap();
-            let v = rbuf.pread::<u32>(0).unwrap();
-            let dir = (v & utils::MASK_LOWER_31_U32) != 0;
-            rec.dirs.push(dir);
-            rec.refs.push(v & utils::MASK_TOP_BIT_U32);
+impl MappingType {
+    pub fn from_u8(t: u8) -> Self {
+        match t {
+            0 => MappingType::Unmapped,
+            1 => MappingType::SingleMapped,
+            2 => MappingType::MappedFirstOrphan,
+            3 => MappingType::MappedSecondOrphan,
+            4 => MappingType::MappedPair,
+            _ => MappingType::Unmapped,
         }
-
-        rec
     }
 
-    pub fn from_bytes_record_header<T: Read>(
-        reader: &mut T,
-        bct: &RadIntId,
-        umit: &RadIntId,
-    ) -> (u64, u64, u32) {
-        let mut rbuf = [0u8; 4];
-        reader.read_exact(&mut rbuf).unwrap();
-        let na = u32::from_le_bytes(rbuf); //.pread::<u32>(0).unwrap();
-        let bc = read_into_u64(reader, bct);
-        let umi = read_into_u64(reader, umit);
-        (bc, umi, na)
-    }
-
-    pub fn from_bytes_with_header_keep_ori<T: Read>(
-        reader: &mut T,
-        bc: u64,
-        umi: u64,
-        na: u32,
-        expected_ori: &Strand,
-    ) -> Self {
-        let mut rbuf = [0u8; 255];
-        let mut rec = Self {
-            bc,
-            umi,
-            dirs: Vec::with_capacity(na as usize),
-            refs: Vec::with_capacity(na as usize),
-        };
-
-        for _ in 0..(na as usize) {
-            reader.read_exact(&mut rbuf[0..4]).unwrap();
-            let v = rbuf.pread::<u32>(0).unwrap();
-
-            // fw if the leftmost bit is 1, otherwise rc
-            let strand = if (v & utils::MASK_LOWER_31_U32) > 0 {
-                Strand::Forward
-            } else {
-                Strand::Reverse
-            };
-
-            if expected_ori.same(&strand) || expected_ori.is_unknown() {
-                rec.refs.push(v & utils::MASK_TOP_BIT_U32);
-            }
+    #[inline]
+    pub fn get_mask(&self) -> u32 {
+        match &self {
+            // if unmapped, we ignore flags all together.
+            MappingType::Unmapped => 0b00,
+            // if paired we care about both read and mate.
+            MappingType::MappedPair => 0b11,
+            // if orphan or single, we care only about read
+            // mate flag should be ignored.
+            _ => 0b10,
         }
-
-        // make sure these are sorted in this step.
-        rec.refs.sort_unstable();
-        rec
     }
 
-    pub fn from_bytes_keep_ori<T: Read>(
-        reader: &mut T,
-        bct: &RadIntId,
-        umit: &RadIntId,
-        expected_ori: &Strand,
-    ) -> Self {
-        let mut rbuf = [0u8; 255];
-
-        reader.read_exact(&mut rbuf[0..4]).unwrap();
-        let na = rbuf.pread::<u32>(0).unwrap();
-
-        let bc = read_into_u64(reader, bct);
-        let umi = read_into_u64(reader, umit);
-
-        let mut rec = Self {
-            bc,
-            umi,
-            dirs: Vec::with_capacity(na as usize),
-            refs: Vec::with_capacity(na as usize),
-        };
+    #[inline]
+    pub fn is_orphan(&self) -> bool {
+        matches!(
+            &self,
+            MappingType::MappedFirstOrphan | MappingType::MappedSecondOrphan
+        )
+    }
+}
 
-        for _ in 0..(na as usize) {
-            reader.read_exact(&mut rbuf[0..4]).unwrap();
-            let v = rbuf.pread::<u32>(0).unwrap();
+#[derive(Debug, Copy, Clone)]
+pub enum MappedFragmentOrientation {
+    Reverse,
+    Forward,
+    ReverseReverse,
+    ReverseForward,
+    ForwardReverse,
+    ForwardForward,
+    Unknown,
+}
 
-            // fw if the leftmost bit is 1, otherwise rc
-            let strand = if (v & utils::MASK_LOWER_31_U32) > 0 {
-                Strand::Forward
+impl MappedFragmentOrientation {
+    pub fn from_u32_paired_status(n: u32, m: MappingType) -> Self {
+        // if not paired, then we don't care about
+        // the lowest order bit so shift it off
+        if matches!(
+            m,
+            MappingType::SingleMapped
+                | MappingType::MappedFirstOrphan
+                | MappingType::MappedSecondOrphan
+        ) {
+            if (n & 0b10) == 2 {
+                MappedFragmentOrientation::Forward
             } else {
-                Strand::Reverse
-            };
-
-            if expected_ori.same(&strand) || expected_ori.is_unknown() {
-                rec.refs.push(v & utils::MASK_TOP_BIT_U32);
+                MappedFragmentOrientation::Reverse
+            }
+        } else {
+            match n {
+                0 => MappedFragmentOrientation::ReverseReverse,
+                1 => MappedFragmentOrientation::ReverseForward,
+                2 => MappedFragmentOrientation::ForwardReverse,
+                3 => MappedFragmentOrientation::ForwardForward,
+                _ => MappedFragmentOrientation::Unknown,
             }
         }
-
-        // make sure these are sorted in this step.
-        rec.refs.sort_unstable();
-        rec
     }
 }
 
-impl Chunk {
-    pub fn read_header<T: Read>(reader: &mut T) -> (u32, u32) {
-        let mut buf = [0u8; 8];
-
-        reader.read_exact(&mut buf).unwrap();
-        let nbytes = buf.pread::<u32>(0).unwrap();
-        let nrec = buf.pread::<u32>(4).unwrap();
-        (nbytes, nrec)
-    }
-
-    pub fn from_bytes<T: Read>(reader: &mut T, bct: &RadIntId, umit: &RadIntId) -> Self {
-        let mut buf = [0u8; 8];
-
-        reader.read_exact(&mut buf).unwrap();
-        let nbytes = buf.pread::<u32>(0).unwrap();
-        let nrec = buf.pread::<u32>(4).unwrap();
-        let mut c = Self {
-            nbytes,
-            nrec,
-            reads: Vec::with_capacity(nrec as usize),
-        };
-
-        for _ in 0..(nrec as usize) {
-            c.reads.push(ReadRecord::from_bytes(reader, bct, umit));
+impl From<MappedFragmentOrientation> for u32 {
+    fn from(item: MappedFragmentOrientation) -> Self {
+        match item {
+            MappedFragmentOrientation::ForwardReverse => 0b011,
+            MappedFragmentOrientation::ForwardForward => 0b101,
+            MappedFragmentOrientation::ReverseReverse => 0b110,
+            MappedFragmentOrientation::ReverseForward => 0b100,
+            MappedFragmentOrientation::Forward => 0b1,
+            MappedFragmentOrientation::Reverse => 0b10,
+            MappedFragmentOrientation::Unknown => 0b0,
         }
-
-        c
     }
+}
 
-    /// peeks to the first record in the buffer `buf`, and returns
-    /// the barcode and umi associated with this record.  It is assumed
-    /// that there is at least one record present in the buffer.
-    pub fn peek_record(buf: &[u8], bct: &RadIntId, umit: &RadIntId) -> (u64, u64) {
-        let na_size = mem::size_of::<u32>();
-        let bc_size = bct.bytes_for_type();
-
-        let _na = buf.pread::<u32>(0).unwrap();
-
-        let bc = match bct {
-            RadIntId::U8 => buf.pread::<u8>(na_size).unwrap() as u64,
-            RadIntId::U16 => buf.pread::<u16>(na_size).unwrap() as u64,
-            RadIntId::U32 => buf.pread::<u32>(na_size).unwrap() as u64,
-            RadIntId::U64 => buf.pread::<u64>(na_size).unwrap(),
-        };
-        let umi = match umit {
-            RadIntId::U8 => buf.pread::<u8>(na_size + bc_size).unwrap() as u64,
-            RadIntId::U16 => buf.pread::<u16>(na_size + bc_size).unwrap() as u64,
-            RadIntId::U32 => buf.pread::<u32>(na_size + bc_size).unwrap() as u64,
-            RadIntId::U64 => buf.pread::<u64>(na_size + bc_size).unwrap(),
-        };
-        (bc, umi)
+impl From<u32> for MappedFragmentOrientation {
+    fn from(item: u32) -> Self {
+        match item {
+            0b011 => MappedFragmentOrientation::ForwardReverse,
+            0b101 => MappedFragmentOrientation::ForwardForward,
+            0b110 => MappedFragmentOrientation::ReverseReverse,
+            0b100 => MappedFragmentOrientation::ReverseForward,
+            0b1 => MappedFragmentOrientation::Forward,
+            0b10 => MappedFragmentOrientation::Reverse,
+            _ => MappedFragmentOrientation::Unknown,
+        }
     }
 }
 
 impl FileTags {
+    /// Reads the FileTags from the provided `reader` and return the
+    /// barcode length and umi length
     pub fn from_bytes<T: Read>(reader: &mut T) -> Self {
         let mut buf = [0u8; 4];
         reader.read_exact(&mut buf).unwrap();
@@ -403,128 +380,476 @@ impl FileTags {
     }
 }
 
+/// Convert from a u8 to the corresponding RadType.
+/// This will only work for *non-aggregate* types
+/// (i.e. it will not work for the array type, since
+/// the type requiers more than a u8 worth of information).
+impl From<u8> for RadType {
+    fn from(x: u8) -> Self {
+        match x {
+            0 => RadType::Bool,
+            1 => RadType::Int(RadIntId::U8),
+            2 => RadType::Int(RadIntId::U16),
+            3 => RadType::Int(RadIntId::U32),
+            4 => RadType::Int(RadIntId::U64),
+            5 => RadType::Float(RadFloatId::F32),
+            6 => RadType::Float(RadFloatId::F64),
+            7 => panic!("Should not happen"),
+            8 => RadType::String,
+            _ => panic!("Should not happen"),
+        }
+    }
+}
+
+/// This enum holds the different values that can be represented
+/// in a RAD tag. For the aggregate (i.e. array) types, the type
+/// of the element being stored in the array is encoded in the
+/// [TagValue] variant, but the length of the array is not.
+#[derive(Debug, PartialEq)]
+pub enum TagValue {
+    Bool(bool),
+    U8(u8),
+    U16(u16),
+    U32(u32),
+    U64(u64),
+    F32(f32),
+    F64(f64),
+    ArrayU8(Vec<u8>),
+    ArrayU16(Vec<u16>),
+    ArrayU32(Vec<u32>),
+    ArrayU64(Vec<u64>),
+    ArrayF32(Vec<f32>),
+    ArrayF64(Vec<f64>),
+    ArrayString(Vec<String>),
+    String(String),
+}
+
 impl TagDesc {
-    pub fn from_bytes<T: Read>(reader: &mut T) -> TagDesc {
+    /// Attempts to read a [TagDesc] from the provided `reader`. If the
+    /// `reader` is positioned at the start of a valid [TagDesc], then this
+    /// [TagDesc] is returned.  Otherwise, a description of the error is returned
+    /// via an [anyhow::Error].
+    pub fn from_bytes<T: Read>(reader: &mut T) -> anyhow::Result<TagDesc> {
         // space for the string length (1 byte)
         // the longest string possible (255 char)
         // and the typeid
-        let mut buf = [0u8; 257];
-        reader.read_exact(&mut buf[0..2]).unwrap();
-        let str_len = buf.pread::<u16>(0).unwrap() as usize;
+        let mut buf = [0u8; constants::MAX_REF_NAME_LEN];
+        reader.read_exact(&mut buf[0..2])?;
+        let str_len = buf.pread::<u16>(0)? as usize;
 
         // read str_len + 1 to get the type id that follows the string
-        reader.read_exact(&mut buf[0..str_len + 1]).unwrap();
-        TagDesc {
-            name: std::str::from_utf8(&buf[0..str_len]).unwrap().to_string(),
-            typeid: buf.pread(str_len).unwrap(),
+        reader.read_exact(&mut buf[0..str_len + 1])?;
+        let name = std::str::from_utf8(&buf[0..str_len])?.to_string();
+        let typeid = buf.pread(str_len)?;
+        // if the type id is 7, need to read the types of
+        // the length and element type, otherwise just turn the
+        // id into a proper RatType and we're done.
+        let rad_t = match typeid {
+            0..=6 | 8 => typeid.into(),
+            7 => {
+                reader.read_exact(&mut buf[0..2])?;
+                let t1: RadIntId = buf.pread::<u8>(0)?.into();
+                let t2: RadAtomicId = buf.pread::<u8>(1)?.into();
+                RadType::Array(t1, t2)
+            }
+            _ => {
+                bail!("{typeid} is an unrecognized RAD type id")
+            }
+        };
+
+        Ok(TagDesc {
+            name,
+            typeid: rad_t,
+        })
+    }
+
+    /// reads a [TagValue] from `reader` based on the type encoded in the
+    /// current [TagDesc], and returns this [TagValue]. This function
+    /// will `panic` if it cannot succesfully read the [TagValue]
+    /// from `reader`.
+    pub fn value_from_bytes<T: Read>(&self, reader: &mut T) -> TagValue {
+        let mut small_buf = [0u8; 8];
+        match self.typeid {
+            RadType::Bool => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<u8>()]);
+                TagValue::Bool(small_buf[0] > 1)
+            }
+            RadType::Int(RadIntId::U8) => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<u8>()]);
+                TagValue::U8(small_buf[0])
+            }
+            RadType::Int(RadIntId::U16) => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<u16>()]);
+                TagValue::U16(small_buf.pread::<u16>(0).unwrap())
+            }
+            RadType::Int(RadIntId::U32) => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<u32>()]);
+                TagValue::U32(small_buf.pread::<u32>(0).unwrap())
+            }
+            RadType::Int(RadIntId::U64) => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<u64>()]);
+                TagValue::U64(small_buf.pread::<u64>(0).unwrap())
+            }
+            RadType::Float(RadFloatId::F32) => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<f32>()]);
+                TagValue::F32(small_buf.pread::<f32>(0).unwrap())
+            }
+            RadType::Float(RadFloatId::F64) => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<f64>()]);
+                TagValue::F64(small_buf.pread::<f64>(0).unwrap())
+            }
+            RadType::Array(len_t, val_t) => {
+                let _ = reader.read_exact(&mut small_buf[0..len_t.size_of()]);
+                let vec_len = len_t.read_into_usize(&small_buf);
+                if val_t == RadAtomicId::String {
+                    let mut strings = Vec::with_capacity(vec_len);
+                    let sl: u16 = 0;
+                    let mut buf = [0u8; 65536];
+                    for _ in 0..vec_len {
+                        let _ = reader.read_exact(&mut sl.to_ne_bytes());
+                        let l = sl as usize;
+                        let _ = reader.read_exact(&mut buf[0..l]);
+                        unsafe {
+                            strings.push(std::str::from_utf8_unchecked(&buf[0..l]).to_string());
+                        }
+                    }
+                    TagValue::ArrayString(strings)
+                } else {
+                    let num_bytes = val_t.size_of() * vec_len;
+                    let mut data = vec![0u8; num_bytes];
+                    let _ = reader.read_exact(data.as_mut_slice());
+                    match val_t {
+                        RadAtomicId::Int(RadIntId::U8) => TagValue::ArrayU8(data),
+                        RadAtomicId::Int(RadIntId::U16) => {
+                            TagValue::ArrayU16(u8_to_vec_of!(data, u16))
+                        }
+                        RadAtomicId::Int(RadIntId::U32) => {
+                            TagValue::ArrayU32(u8_to_vec_of!(data, u32))
+                        }
+                        RadAtomicId::Int(RadIntId::U64) => {
+                            TagValue::ArrayU64(u8_to_vec_of!(data, u64))
+                        }
+                        RadAtomicId::Float(RadFloatId::F32) => {
+                            TagValue::ArrayF32(u8_to_vec_of!(data, f32))
+                        }
+                        RadAtomicId::Float(RadFloatId::F64) => {
+                            TagValue::ArrayF64(u8_to_vec_of!(data, f64))
+                        }
+                        RadAtomicId::String => {
+                            unimplemented!("match of RadAtomicId should not occur in this branch")
+                        }
+                    }
+                }
+            }
+            RadType::String => {
+                let _ = reader.read_exact(&mut small_buf[0..std::mem::size_of::<u16>()]);
+                let slen = small_buf.pread::<u16>(0).unwrap();
+                let mut dat: Vec<u8> = vec![0_u8; slen as usize];
+                let _ = reader.read_exact(dat.as_mut_slice());
+                let s = unsafe { String::from_utf8_unchecked(dat) };
+                TagValue::String(s)
+            }
         }
     }
-}
 
-impl TagSection {
-    pub fn from_bytes<T: Read>(reader: &mut T) -> TagSection {
-        let mut buf = [0u8; 2];
-        reader.read_exact(&mut buf).unwrap();
-        let num_tags = buf.pread::<u16>(0).unwrap() as usize;
+    /// Returns `true` if the [RadType] component of the current [TagDesc] matches
+    /// the type of the provided [TagValue] of `o` and `false` otherwise.
+    #[inline]
+    pub fn matches_value_type(&self, o: &TagValue) -> bool {
+        match (&self.typeid, o) {
+            (RadType::Bool, TagValue::Bool(_)) => true,
+            (RadType::Int(RadIntId::U8), TagValue::U8(_)) => true,
+            (RadType::Int(RadIntId::U16), TagValue::U16(_)) => true,
+            (RadType::Int(RadIntId::U32), TagValue::U32(_)) => true,
+            (RadType::Int(RadIntId::U64), TagValue::U64(_)) => true,
+            (RadType::Float(RadFloatId::F32), TagValue::F32(_)) => true,
+            (RadType::Float(RadFloatId::F64), TagValue::F64(_)) => true,
+            (RadType::Array(_, RadAtomicId::Int(RadIntId::U8)), TagValue::ArrayU8(_)) => true,
+            (RadType::Array(_, RadAtomicId::Int(RadIntId::U16)), TagValue::ArrayU16(_)) => true,
+            (RadType::Array(_, RadAtomicId::Int(RadIntId::U32)), TagValue::ArrayU32(_)) => true,
+            (RadType::Array(_, RadAtomicId::Int(RadIntId::U64)), TagValue::ArrayU64(_)) => true,
+            (RadType::Array(_, RadAtomicId::Float(RadFloatId::F32)), TagValue::ArrayF32(_)) => true,
+            (RadType::Array(_, RadAtomicId::Float(RadFloatId::F64)), TagValue::ArrayF64(_)) => true,
+            (RadType::Array(_, RadAtomicId::String), TagValue::ArrayString(_)) => true,
+            (RadType::String, TagValue::String(_)) => true,
+            (_, _) => false,
+        }
+    }
+}
 
-        let mut ts = TagSection {
-            tags: Vec::with_capacity(num_tags),
-        };
+/// This type represents a mapping from [TagDesc]s to a corresponding set of
+/// values conforming to these descriptions (i.e. in terms of types). The
+/// [TagMap] allows you to fetch the value for a specific tag by name or index, or
+/// to add values to a corresponding set of descriptions.
+#[derive(Debug)]
+pub struct TagMap<'a> {
+    keys: &'a [TagDesc],
+    dat: Vec<TagValue>,
+}
 
-        for _ in 0..num_tags {
-            ts.tags.push(TagDesc::from_bytes(reader));
+impl<'a> TagMap<'a> {
+    /// Create a new TagMap whose set of keys is determined by
+    /// the provided `keyset`. This will have one value slot for
+    /// each provided key.
+    pub fn with_keyset(keyset: &'a [TagDesc]) -> Self {
+        Self {
+            keys: keyset,
+            dat: Vec::with_capacity(keyset.len()),
         }
+    }
 
-        ts
+    /// Try to add the next tag value. If there is space and the type
+    /// matches, add it and return `true`, otherwise return `false`.
+    pub fn add_checked(&mut self, val: TagValue) -> bool {
+        let next_idx = self.dat.len();
+        if next_idx >= self.keys.len() || !self.keys[next_idx].matches_value_type(&val) {
+            false
+        } else {
+            self.dat.push(val);
+            true
+        }
     }
-}
 
-impl RadHeader {
-    pub fn from_bytes<T: Read>(reader: &mut T) -> RadHeader {
-        let mut rh = RadHeader {
-            is_paired: 0,
-            ref_count: 0,
-            ref_names: vec![],
-            num_chunks: 0,
-        };
+    /// add the next TagValue to the data for this TagMap.
+    /// This function doesn't check if the type is correct or
+    /// if too many tag values have been added. It should
+    /// only be used when one is certain that the next tag value
+    /// appropriately matches the next available key.
+    pub fn add(&mut self, val: TagValue) {
+        self.dat.push(val);
+    }
 
-        // size of the longest allowable string.
-        let mut buf = [0u8; 65536];
-        reader.read_exact(&mut buf[0..9]).unwrap();
-        rh.is_paired = buf.pread(0).unwrap();
-        rh.ref_count = buf.pread::<u64>(1).unwrap();
-
-        // we know how many names we will read in.
-        rh.ref_names.reserve_exact(rh.ref_count as usize);
-
-        let mut num_read = 0u64;
-        while num_read < rh.ref_count {
-            reader.read_exact(&mut buf[0..2]).unwrap();
-            let l: usize = buf.pread::<u16>(0).unwrap() as usize;
-            reader.read_exact(&mut buf[0..l]).unwrap();
-            rh.ref_names
-                .push(std::str::from_utf8(&buf[0..l]).unwrap().to_string());
-            num_read += 1;
+    /// get the value for the tag associated with the name `key`, returns
+    /// Some(&TagValue) for the appropriate tag if it exists, and None otherwise.
+    pub fn get(&self, key: &str) -> Option<&TagValue> {
+        for (k, val) in self.keys.iter().zip(self.dat.iter()) {
+            if k.name == key {
+                return Some(val);
+            }
         }
+        None
+    }
+
+    /// get the value for the tag at index `idx` returns Some(&TagValue) if `idx`
+    /// is in bounds and None otherwise.
+    pub fn get_at_index(&self, idx: usize) -> Option<&TagValue> {
+        self.dat.get(idx)
+    }
+}
 
-        reader.read_exact(&mut buf[0..8]).unwrap();
-        rh.num_chunks = buf.pread::<u64>(0).unwrap();
-        rh
+impl<'a> std::ops::Index<usize> for TagMap<'a> {
+    type Output = TagValue;
+    /// Returns a reference to the [TagValue] in the [TagMap] at the
+    /// provided `index`, panics if `index` is out of bounds.
+    #[inline]
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.dat[index]
     }
+}
+
+impl TagSection {
+    /// Attempts to read a [TagSection] from the provided `reader`. If the
+    /// `reader` is positioned at the start of a valid [TagSection], then this
+    /// [TagSection] is returned.  Otherwise, a description of the error is returned
+    /// via an [anyhow::Error].
+    pub fn from_bytes<T: Read>(reader: &mut T) -> anyhow::Result<Self> {
+        Self::from_bytes_with_label(reader, TagSectionLabel::Unlabeled)
+    }
+
+    /// Attempts to read a [TagSection] from the provided `reader`. If the
+    /// `reader` is positioned at the start of a valid [TagSection], then this
+    /// [TagSection] is returned.  Otherwise, a description of the error is returned
+    /// via an [anyhow::Error]. The returned [TagSection] will be labeled with the
+    /// provided `label`.
+    pub fn from_bytes_with_label<T: Read>(
+        reader: &mut T,
+        label: TagSectionLabel,
+    ) -> anyhow::Result<Self> {
+        let mut buf = [0u8; 2];
+        reader.read_exact(&mut buf)?;
+        let num_tags = buf.pread::<u16>(0)? as usize;
 
-    pub fn from_bam_header(header: &sam::Header) -> RadHeader {
-        let mut rh = RadHeader {
-            is_paired: 0,
-            ref_count: 0,
-            ref_names: vec![],
-            num_chunks: 0,
+        let mut ts = Self {
+            label,
+            tags: Vec::with_capacity(num_tags),
         };
 
-        let ref_seqs = header.reference_sequences();
-        rh.ref_count = ref_seqs.len() as u64;
-        // we know how many names we will read in.
-        rh.ref_names.reserve_exact(rh.ref_count as usize);
-        for (k, _v) in ref_seqs.iter() {
-            rh.ref_names.push(k.to_string());
+        for _ in 0..num_tags {
+            ts.tags.push(TagDesc::from_bytes(reader)?);
         }
-        rh
+        Ok(ts)
     }
 
-    pub fn get_size(&self) -> usize {
-        let mut tot_size = 0usize;
-        tot_size += std::mem::size_of::<u8>() + std::mem::size_of::<u64>();
-        for (_i, t) in self.ref_names.iter().map(|a| a.len()).enumerate() {
-            tot_size += t;
+    pub fn parse_tags_from_bytes<T: Read>(&self, reader: &mut T) -> anyhow::Result<TagMap> {
+        // loop over all of the tag descriptions in this section, and parse a
+        // tag value for each.
+        //let mut tv = Vec::<TagValue>::new();
+        let mut tm = TagMap::with_keyset(&self.tags);
+        for tag_desc in &self.tags {
+            tm.add(tag_desc.value_from_bytes(reader));
         }
-        tot_size += std::mem::size_of::<u64>();
-        tot_size
+        Ok(tm)
     }
-}
 
-pub fn write_str_bin(v: &str, type_id: &RadIntId, owriter: &mut Cursor<Vec<u8>>) {
-    match type_id {
-        RadIntId::U8 => {
-            owriter
-                .write_all(&(v.len() as u8).to_le_bytes())
-                .expect("coudn't write to output file");
-        }
-        RadIntId::U16 => {
-            owriter
-                .write_all(&(v.len() as u16).to_le_bytes())
-                .expect("coudn't write to output file");
+    pub fn parse_tags_from_bytes_checked<T: Read>(&self, reader: &mut T) -> anyhow::Result<TagMap> {
+        // loop over all of the tag descriptions in this section, and parse a
+        // tag value for each.
+        //let mut tv = Vec::<TagValue>::new();
+        let mut tm = TagMap::with_keyset(&self.tags);
+        for tag_desc in &self.tags {
+            if !tm.add_checked(tag_desc.value_from_bytes(reader)) {
+                panic!("Tried to read value for non-matching type");
+            }
         }
-        RadIntId::U32 => {
-            owriter
-                .write_all(&(v.len() as u32).to_le_bytes())
-                .expect("coudn't write to output file");
+        Ok(tm)
+    }
+
+    pub fn get_tag_type(&self, name: &str) -> Option<RadType> {
+        for td in &self.tags {
+            if name == td.name {
+                return Some(td.typeid);
+            }
         }
-        RadIntId::U64 => {
-            owriter
-                .write_all(&(v.len() as u64).to_le_bytes())
-                .expect("coudn't write to output file");
+        None
+    }
+
+    /// return `true` if this [TagSection] has a [TagDesc] with a
+    /// name matching `name`, and `false` otherwise.
+    pub fn has_tag(&self, name: &str) -> bool {
+        for td in &self.tags {
+            if name == td.name {
+                return true;
+            }
         }
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::rad_types::RadType;
+    use crate::rad_types::{RadAtomicId, RadIntId, TagSection, TagSectionLabel, TagValue};
+    use std::io::Write;
+
+    use super::TagDesc;
+    #[test]
+    fn can_parse_simple_tag_desc() {
+        let mut buf = Vec::<u8>::new();
+        let tag_name = b"mytag";
+        let _ = buf.write_all(&5_u16.to_ne_bytes());
+        let _ = buf.write_all(tag_name);
+        let tag_type = 4_u8;
+        let _ = buf.write_all(&tag_type.to_ne_bytes());
+
+        let desc = TagDesc::from_bytes(&mut buf.as_slice()).unwrap();
+        assert_eq!(desc.name, "mytag");
+        assert_eq!(desc.typeid, RadType::Int(RadIntId::U64));
+    }
+
+    #[test]
+    fn can_parse_array_tag_desc() {
+        let mut buf = Vec::<u8>::new();
+        let tag_name = b"mytag";
+        let _ = buf.write_all(&5_u16.to_ne_bytes());
+        let _ = buf.write_all(tag_name);
+        let tag_type = 7_u8;
+        let _ = buf.write_all(&tag_type.to_ne_bytes());
+        // length type
+        let _ = buf.write_all(&1_u8.to_ne_bytes());
+        // element type
+        let _ = buf.write_all(&2_u8.to_ne_bytes());
+
+        let desc = TagDesc::from_bytes(&mut buf.as_slice()).unwrap();
+        assert_eq!(desc.name, "mytag");
+        assert_eq!(
+            desc.typeid,
+            RadType::Array(RadIntId::U8, RadAtomicId::Int(RadIntId::U16))
+        );
+    }
+
+    #[test]
+    fn can_parse_tag_values_from_section() {
+        let mut buf = Vec::<u8>::new();
+        let tag_name = b"mytag";
+        let _ = buf.write_all(&5_u16.to_ne_bytes());
+        let _ = buf.write_all(tag_name);
+        let tag_type = 7_u8;
+        let _ = buf.write_all(&tag_type.to_ne_bytes());
+        // length type
+        let _ = buf.write_all(&1_u8.to_ne_bytes());
+        // element type
+        let _ = buf.write_all(&2_u8.to_ne_bytes());
+
+        let desc = TagDesc::from_bytes(&mut buf.as_slice()).unwrap();
+        assert_eq!(desc.name, "mytag");
+        assert_eq!(
+            desc.typeid,
+            RadType::Array(RadIntId::U8, RadAtomicId::Int(RadIntId::U16))
+        );
+
+        buf.clear();
+        let tag_name = b"stringtag";
+        let _ = buf.write_all(&9_u16.to_ne_bytes());
+        let _ = buf.write_all(tag_name);
+        // type id
+        let _ = buf.write_all(&8_u8.to_ne_bytes());
+        let desc_str = TagDesc::from_bytes(&mut buf.as_slice()).unwrap();
+
+        let tag_sec = TagSection {
+            label: TagSectionLabel::FileTags,
+            tags: vec![desc, desc_str],
+        };
+
+        buf.clear();
+        let _ = buf.write_all(&3_u8.to_ne_bytes());
+        let _ = buf.write_all(&1_u16.to_ne_bytes());
+        let _ = buf.write_all(&2_u16.to_ne_bytes());
+        let _ = buf.write_all(&3_u16.to_ne_bytes());
+
+        let _ = buf.write_all(&6_u16.to_ne_bytes());
+        let _ = buf.write_all(b"hi_rad");
+
+        let map = tag_sec.parse_tags_from_bytes(&mut buf.as_slice()).unwrap();
+        assert_eq!(
+            map.get("mytag").unwrap(),
+            &TagValue::ArrayU16(vec![1, 2, 3])
+        );
+        assert_eq!(
+            map.get("stringtag").unwrap(),
+            &TagValue::String(String::from("hi_rad"))
+        );
+
+        assert_eq!(
+            map.get_at_index(0).unwrap(),
+            &TagValue::ArrayU16(vec![1, 2, 3])
+        );
+        assert_eq!(
+            map.get_at_index(1).unwrap(),
+            &TagValue::String(String::from("hi_rad"))
+        );
+
+        let map = tag_sec
+            .parse_tags_from_bytes_checked(&mut buf.as_slice())
+            .unwrap();
+        assert_eq!(
+            map.get("mytag").unwrap(),
+            &TagValue::ArrayU16(vec![1, 2, 3])
+        );
+        assert_eq!(
+            map.get("stringtag").unwrap(),
+            &TagValue::String(String::from("hi_rad"))
+        );
+
+        assert_eq!(
+            map.get_at_index(0).unwrap(),
+            &TagValue::ArrayU16(vec![1, 2, 3])
+        );
+        assert_eq!(
+            map.get_at_index(1).unwrap(),
+            &TagValue::String(String::from("hi_rad"))
+        );
+
+        assert_eq!(map[0], TagValue::ArrayU16(vec![1, 2, 3]));
+        assert_eq!(map[1], TagValue::String(String::from("hi_rad")));
     }
-    owriter
-        .write_all(v.as_bytes())
-        .expect("coudn't write to output file");
 }
diff --git a/src/record.rs b/src/record.rs
new file mode 100644
index 0000000..1df3c65
--- /dev/null
+++ b/src/record.rs
@@ -0,0 +1,332 @@
+use crate::{
+    io as rad_io,
+    rad_types::{MappedFragmentOrientation, MappingType, RadIntId, RadType, TagSection},
+    utils,
+};
+use anyhow::{self, bail};
+use bio_types::strand::*;
+use scroll::Pread;
+use std::io::Read;
+use std::mem;
+
+/// A concrete struct representing a [MappedRecord]
+/// for reads processed upstream with `piscem` (or `salmon alevin`).
+/// This represents the set of alignments and relevant information
+/// for a basic alevin-fry record.
+#[derive(Debug)]
+pub struct AlevinFryReadRecord {
+    pub bc: u64,
+    pub umi: u64,
+    pub dirs: Vec<bool>,
+    pub refs: Vec<u32>,
+}
+
+#[derive(Debug)]
+pub struct PiscemBulkReadRecord {
+    pub frag_type: u8,
+    pub dirs: Vec<MappedFragmentOrientation>,
+    pub refs: Vec<u32>,
+    pub positions: Vec<u32>,
+    pub frag_lengths: Vec<u16>,
+}
+
+/// This trait represents a mapped read record that should be stored
+/// in the [crate::chunk::Chunk] of a RAD file.  The [crate::chunk::Chunk] type is parameterized on
+/// some concrete struct that must implement this [MappedRecord] trait.
+/// This trat defines the necessary functions required to be able to parse
+/// the read record from the underlying reader, as well as the associated
+/// types that are necessary to provide the context to perform this parsing.
+pub trait MappedRecord {
+    /// the information necessary to be able to correctly
+    /// parse a concrete instance of a struct implementing
+    /// [MappedRecord] from an input stream. This should
+    /// encapsulate any context necessary to perform such
+    /// parsing.
+    type ParsingContext;
+    /// The information that should be returned if one wishes
+    /// to peek at the next record in the input stream.
+    type PeekResult;
+
+    fn peek_record(buf: &[u8], ctx: &Self::ParsingContext) -> Self::PeekResult;
+    fn from_bytes_with_context<T: Read>(reader: &mut T, ctx: &Self::ParsingContext) -> Self;
+}
+
+/// This trait allows obtaining and passing along necessary information that
+/// may be required for a [MappedRecord] to be properly parsed from a file.
+/// Typically, this information will be relevant information about the tags
+/// that are used for parsing these records. It gets information about the
+/// file, read, and alignment-level [TagSection]s from the [crate::header::RadPrelude] and
+/// can then copy any information that may be later necessary for parsing.
+pub trait RecordContext {
+    fn get_context_from_tag_section(
+        ft: &TagSection,
+        rt: &TagSection,
+        at: &TagSection,
+    ) -> anyhow::Result<Self>
+    where
+        Self: Sized;
+}
+
+/// context needed to read an alevin-fry record
+/// (the types of the barcode and umi)
+#[derive(Debug)]
+pub struct AlevinFryRecordContext {
+    pub bct: RadIntId,
+    pub umit: RadIntId,
+}
+
+impl RecordContext for AlevinFryRecordContext {
+    /// Currently, the [AlevinFryRecordContext] only cares about and provides the read tags that
+    /// correspond to the length of the barcode and the UMI. Here, these are parsed from the
+    /// corresponding [TagSection].
+    fn get_context_from_tag_section(
+        _ft: &TagSection,
+        rt: &TagSection,
+        _at: &TagSection,
+    ) -> anyhow::Result<Self> {
+        let bct = rt
+            .get_tag_type("b")
+            .expect("alevin-fry record context requires a \'b\' read-level tag");
+        let umit = rt
+            .get_tag_type("b")
+            .expect("alevin-fry record context requires a \'u\' read-level tag");
+        if let (RadType::Int(x), RadType::Int(y)) = (bct, umit) {
+            Ok(Self { bct: x, umit: y })
+        } else {
+            bail!("alevin-fry record context requires that b and u tags are of type RadType::Int");
+        }
+    }
+}
+
+impl AlevinFryRecordContext {
+    pub fn from_bct_umit(bct: RadIntId, umit: RadIntId) -> Self {
+        Self { bct, umit }
+    }
+}
+
+#[derive(Debug)]
+pub struct PiscemBulkRecordContext {
+    pub frag_map_t: RadIntId,
+}
+
+impl RecordContext for PiscemBulkRecordContext {
+    fn get_context_from_tag_section(
+        _ft: &TagSection,
+        rt: &TagSection,
+        _at: &TagSection,
+    ) -> anyhow::Result<Self> {
+        let frag_map_t = rt
+            .get_tag_type("frag_map_type")
+            .expect("psicem bulk record cantext requires a \"frag_map_type\" read-level tag");
+        if let RadType::Int(x) = frag_map_t {
+            Ok(Self { frag_map_t: x })
+        } else {
+            bail!("piscem bulk record context requries that \"frag_map_type\" tag is of type RadType::Int");
+        }
+    }
+}
+
+impl MappedRecord for PiscemBulkReadRecord {
+    type ParsingContext = PiscemBulkRecordContext;
+    type PeekResult = Option<u64>;
+
+    #[inline]
+    fn from_bytes_with_context<T: Read>(reader: &mut T, ctx: &Self::ParsingContext) -> Self {
+        const MASK_LOWER_30_BITS: u32 = 0xC0000000;
+        const MASK_UPPER_2_BITS: u32 = 0x3FFFFFFF;
+        let mut rbuf = [0u8; 255];
+
+        reader.read_exact(&mut rbuf[0..4]).unwrap();
+        let na = rbuf.pread::<u32>(0).unwrap();
+        let fmt = rad_io::read_into_u64(reader, &ctx.frag_map_t);
+        let f = MappingType::from_u8(fmt as u8);
+
+        let mut rec = Self {
+            frag_type: fmt as u8,
+            dirs: Vec::with_capacity(na as usize),
+            refs: Vec::with_capacity(na as usize),
+            positions: Vec::with_capacity(na as usize),
+            frag_lengths: Vec::with_capacity(na as usize),
+        };
+
+        //println!("number of records : {:?}",na);
+
+        for _ in 0..(na as usize) {
+            reader.read_exact(&mut rbuf[0..4]).unwrap();
+            let v = rbuf.pread::<u32>(0).unwrap();
+
+            let dir_int = (v & MASK_LOWER_30_BITS) >> 30;
+            let dir = MappedFragmentOrientation::from_u32_paired_status(dir_int, f);
+            rec.dirs.push(dir);
+            rec.refs.push(v & MASK_UPPER_2_BITS);
+            // position
+            reader.read_exact(&mut rbuf[0..4]).unwrap();
+            let pos = rbuf.pread::<u32>(0).unwrap();
+            rec.positions.push(pos);
+            // length
+            reader.read_exact(&mut rbuf[0..2]).unwrap();
+            let flen = rbuf.pread::<u16>(0).unwrap();
+            rec.frag_lengths.push(flen);
+        }
+
+        rec
+    }
+
+    #[inline]
+    fn peek_record(_buf: &[u8], _ctx: &Self::ParsingContext) -> Self::PeekResult {
+        unimplemented!("Currently there is no implementation for peek_record for PiscemBulkReadRecord. This should not be needed");
+    }
+}
+
+impl MappedRecord for AlevinFryReadRecord {
+    type ParsingContext = AlevinFryRecordContext;
+    type PeekResult = (u64, u64);
+
+    #[inline]
+    fn peek_record(buf: &[u8], ctx: &Self::ParsingContext) -> Self::PeekResult {
+        let na_size = mem::size_of::<u32>();
+        let bc_size = ctx.bct.bytes_for_type();
+
+        let _na = buf.pread::<u32>(0).unwrap();
+
+        let bc = match ctx.bct {
+            RadIntId::U8 => buf.pread::<u8>(na_size).unwrap() as u64,
+            RadIntId::U16 => buf.pread::<u16>(na_size).unwrap() as u64,
+            RadIntId::U32 => buf.pread::<u32>(na_size).unwrap() as u64,
+            RadIntId::U64 => buf.pread::<u64>(na_size).unwrap(),
+        };
+        let umi = match ctx.umit {
+            RadIntId::U8 => buf.pread::<u8>(na_size + bc_size).unwrap() as u64,
+            RadIntId::U16 => buf.pread::<u16>(na_size + bc_size).unwrap() as u64,
+            RadIntId::U32 => buf.pread::<u32>(na_size + bc_size).unwrap() as u64,
+            RadIntId::U64 => buf.pread::<u64>(na_size + bc_size).unwrap(),
+        };
+        (bc, umi)
+    }
+
+    #[inline]
+    fn from_bytes_with_context<T: Read>(reader: &mut T, ctx: &Self::ParsingContext) -> Self {
+        let mut rbuf = [0u8; 255];
+
+        let (bc, umi, na) = Self::from_bytes_record_header(reader, &ctx.bct, &ctx.umit);
+
+        let mut rec = Self {
+            bc,
+            umi,
+            dirs: Vec::with_capacity(na as usize),
+            refs: Vec::with_capacity(na as usize),
+        };
+
+        for _ in 0..(na as usize) {
+            reader.read_exact(&mut rbuf[0..4]).unwrap();
+            let v = rbuf.pread::<u32>(0).unwrap();
+            let dir = (v & utils::MASK_LOWER_31_U32) != 0;
+            rec.dirs.push(dir);
+            rec.refs.push(v & utils::MASK_TOP_BIT_U32);
+        }
+        rec
+    }
+}
+
+impl AlevinFryReadRecord {
+    /// Returns `true` if this [AlevinFryReadRecord] contains no references and
+    /// `false` otherwise.
+    pub fn is_empty(&self) -> bool {
+        self.refs.is_empty()
+    }
+
+    /// Obtains the next [AlevinFryReadRecord] in the stream from the reader `reader`.
+    /// The barcode should be encoded with the [RadIntId] type `bct` and
+    /// the umi should be encoded with the [RadIntId] type `umit`.
+    pub fn from_bytes<T: Read>(reader: &mut T, bct: &RadIntId, umit: &RadIntId) -> Self {
+        let mut rbuf = [0u8; 255];
+
+        let (bc, umi, na) = Self::from_bytes_record_header(reader, bct, umit);
+
+        let mut rec = Self {
+            bc,
+            umi,
+            dirs: Vec::with_capacity(na as usize),
+            refs: Vec::with_capacity(na as usize),
+        };
+
+        for _ in 0..(na as usize) {
+            reader.read_exact(&mut rbuf[0..4]).unwrap();
+            let v = rbuf.pread::<u32>(0).unwrap();
+            let dir = (v & utils::MASK_LOWER_31_U32) != 0;
+            rec.dirs.push(dir);
+            rec.refs.push(v & utils::MASK_TOP_BIT_U32);
+        }
+        rec
+    }
+
+    #[inline]
+    pub fn from_bytes_record_header<T: Read>(
+        reader: &mut T,
+        bct: &RadIntId,
+        umit: &RadIntId,
+    ) -> (u64, u64, u32) {
+        let mut rbuf = [0u8; 4];
+        reader.read_exact(&mut rbuf).unwrap();
+        let na = u32::from_le_bytes(rbuf); //.pread::<u32>(0).unwrap();
+        let bc = rad_io::read_into_u64(reader, bct);
+        let umi = rad_io::read_into_u64(reader, umit);
+        (bc, umi, na)
+    }
+
+    /// Read the next [AlevinFryReadRecord] from `reader`, but retain only those
+    /// alignment records that match the prescribed orientation provided in
+    /// `expected_ori` (which is a [Strand]). This function assumes the
+    /// read header has already been parsed, and just reads the raw
+    /// record contents consisting of the references and directions.
+    #[inline]
+    pub fn from_bytes_with_header_keep_ori<T: Read>(
+        reader: &mut T,
+        bc: u64,
+        umi: u64,
+        na: u32,
+        expected_ori: &Strand,
+    ) -> Self {
+        let mut rbuf = [0u8; 255];
+        let mut rec = Self {
+            bc,
+            umi,
+            dirs: Vec::with_capacity(na as usize),
+            refs: Vec::with_capacity(na as usize),
+        };
+
+        for _ in 0..(na as usize) {
+            reader.read_exact(&mut rbuf[0..4]).unwrap();
+            let v = rbuf.pread::<u32>(0).unwrap();
+
+            // fw if the leftmost bit is 1, otherwise rc
+            let strand = if (v & utils::MASK_LOWER_31_U32) > 0 {
+                Strand::Forward
+            } else {
+                Strand::Reverse
+            };
+
+            if expected_ori.same(&strand) || expected_ori.is_unknown() {
+                rec.refs.push(v & utils::MASK_TOP_BIT_U32);
+            }
+        }
+
+        // make sure these are sorted in this step.
+        rec.refs.sort_unstable();
+        rec
+    }
+
+    /// Read the next [AlevinFryReadRecord], including the header, from `reader`, but
+    /// retain only those alignment records that match the prescribed
+    /// orientation provided in `expected_ori` (which is a [Strand]).
+    #[inline]
+    pub fn from_bytes_keep_ori<T: Read>(
+        reader: &mut T,
+        bct: &RadIntId,
+        umit: &RadIntId,
+        expected_ori: &Strand,
+    ) -> Self {
+        let (bc, umi, na) = Self::from_bytes_record_header(reader, bct, umit);
+        Self::from_bytes_with_header_keep_ori(reader, bc, umi, na, expected_ori)
+    }
+}