Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ readme = "README.md"
repository = "https://github.com/niklasf/rust-huffman-compress"
categories = ["compression", "encoding", "algorithms"]
description = "Huffman compression given a probability distribution over arbitrary symbols"
edition = "2021"

[[bench]]
name = "benches"
Expand All @@ -18,7 +19,7 @@ bit-vec = "0.6"
num-traits = "0.2"

[dev-dependencies]
bencher = "0.1"
criterion = "0.5"
quickcheck = "1"

[badges]
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ Documentation
Changelog
---------

* to be released
- Switch to 2021 edition.

* 0.6.1
- Fix deprecation warning and remove `#[deny(warnings)]` (a future
compatibility hazard in libraries).
Expand Down
35 changes: 16 additions & 19 deletions benches/benches.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
#[macro_use]
extern crate bencher;
extern crate bit_vec;
extern crate huffman_compress;

use bencher::{black_box, Bencher};
use bit_vec::BitVec;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use huffman_compress::codebook;
use std::collections::HashMap;

fn bench_encode_decode(b: &mut Bencher) {
fn bench_encode_decode(c: &mut Criterion) {
let mut weights = HashMap::new();
weights.insert("CG", 293);
weights.insert("AG", 34);
Expand All @@ -20,19 +15,21 @@ fn bench_encode_decode(b: &mut Bencher) {

let example = black_box(vec!["AT", "CG", "AT", "TG", "AG", "CT", "CT", "AG", "CG"]);

b.iter(|| {
let mut buffer = BitVec::new();
for symbol in &example {
book.encode(&mut buffer, symbol).unwrap();
}

assert!(example
.iter()
.zip(tree.unbounded_decoder(&buffer))
.all(|(l, r)| l == &r));
c.bench_function("encode-decode", |b| {
b.iter(|| {
let mut buffer = BitVec::new();
for symbol in &example {
book.encode(&mut buffer, symbol).unwrap();
}

assert!(example
.iter()
.zip(tree.unbounded_decoder(&buffer))
.all(|(l, r)| l == &r));
})
});
}

benchmark_group!(benches, bench_encode_decode);
criterion_group!(benches, bench_encode_decode);

benchmark_main!(benches);
criterion_main!(benches);
48 changes: 21 additions & 27 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
//!
//! # use std::error::Error;
//! #
//! # fn try_main() -> Result<(), Box<Error>> {
//! # fn try_main() -> Result<(), Box<dyn Error>> {
//! use std::iter::FromIterator;
//! use std::collections::HashMap;
//! use bit_vec::BitVec;
Expand Down Expand Up @@ -60,13 +60,8 @@
#![forbid(unsafe_code)]
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]

extern crate bit_vec;
extern crate num_traits;

#[cfg(test)]
#[macro_use]
extern crate quickcheck;
#![warn(clippy::pedantic)]
#![allow(clippy::manual_let_else)]

use std::borrow::Borrow;
use std::cmp;
Expand All @@ -80,7 +75,7 @@ use bit_vec::BitVec;

use num_traits::ops::saturating::Saturating;

/// A trie used for decoding.
/// A tree used for decoding.
#[derive(Debug, Clone)]
pub struct Tree<K> {
root: usize,
Expand Down Expand Up @@ -153,19 +148,16 @@ impl<'a, K: Clone, I: IntoIterator<Item = bool>> Iterator for UnboundedDecoder<'
type Item = K;

fn next(&mut self) -> Option<K> {
let mut node = match self.tree.arena.get(self.tree.root) {
Some(root) => root,
None => return None, // empty tree
};
let mut node = self.tree.arena.get(self.tree.root)?;

loop {
match node.data {
NodeData::Leaf { ref symbol } => return Some(symbol.clone()),
NodeData::Branch { left, right } => {
node = match self.iter.next() {
Some(true) => &self.tree.arena[left],
Some(false) => &self.tree.arena[right],
None => return None,
node = if self.iter.next()? {
&self.tree.arena[left]
} else {
&self.tree.arena[right]
};
}
}
Expand All @@ -181,6 +173,7 @@ pub struct Book<K> {

impl<K: Ord + Clone> Book<K> {
/// Returns the underlying B-Tree.
#[must_use]
pub fn into_inner(self) -> BTreeMap<K, BitVec> {
self.book
}
Expand All @@ -196,11 +189,13 @@ impl<K: Ord + Clone> Book<K> {
}

/// Returns the number of symbols in the book.
#[must_use]
pub fn len(&self) -> usize {
self.book.len()
}

/// Returns true if the map has no symbols.
#[must_use]
pub fn is_empty(&self) -> bool {
self.book.is_empty()
}
Expand Down Expand Up @@ -235,12 +230,7 @@ impl<K: Ord + Clone> Book<K> {
K: Borrow<Q>,
Q: Ord,
{
match self.book.get(k) {
Some(code) => buffer.extend(code),
None => return Err(EncodeError {}),
}

Ok(())
self.book.get(k).map(|code| buffer.extend(code)).ok_or(EncodeError {})
}

fn new() -> Book<K> {
Expand Down Expand Up @@ -303,6 +293,7 @@ pub struct CodeBuilder<K: Ord + Clone, W: Saturating + Ord> {

impl<K: Ord + Clone, W: Saturating + Ord> CodeBuilder<K, W> {
/// Creates a new, empty `CodeBuilder<K, W>`.
#[must_use]
pub fn new() -> CodeBuilder<K, W> {
CodeBuilder {
heap: BinaryHeap::new(),
Expand All @@ -312,6 +303,7 @@ impl<K: Ord + Clone, W: Saturating + Ord> CodeBuilder<K, W> {

/// Creates a new, empty `CodeBuilder<K, W>` and preallocates space
/// for `capacity` symbols.
#[must_use]
pub fn with_capacity(capacity: usize) -> CodeBuilder<K, W> {
CodeBuilder {
heap: BinaryHeap::with_capacity(capacity),
Expand All @@ -335,6 +327,7 @@ impl<K: Ord + Clone, W: Saturating + Ord> CodeBuilder<K, W> {

/// Constructs a [book](struct.Book.html) and [tree](struct.Tree.html) pair
/// for encoding and decoding.
#[must_use]
pub fn finish(mut self) -> (Book<K>, Tree<K>) {
let mut book = Book::new();

Expand Down Expand Up @@ -425,7 +418,7 @@ impl<'a, K: Ord + Clone, W: Saturating + Ord + Clone> FromIterator<(&'a K, &'a W
where
T: IntoIterator<Item = (&'a K, &'a W)>,
{
CodeBuilder::from_iter(weights.into_iter().map(|(k, v)| (k.clone(), v.clone())))
weights.into_iter().map(|(k, v)| (k.clone(), v.clone())).collect()
}
}

Expand Down Expand Up @@ -469,6 +462,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use quickcheck::quickcheck;
use std::collections::HashMap;

#[test]
Expand Down Expand Up @@ -500,7 +494,7 @@ mod tests {
#[test]
fn test_uniform_from_static() {
const WEIGHTS: &[(&char, &usize)] = &[(&'a', &1), (&'b', &1), (&'c', &1), (&'d', &1)];
let (book, tree) = codebook(WEIGHTS.iter().cloned());
let (book, tree) = codebook(WEIGHTS.iter().copied());

let mut buffer = BitVec::new();
book.encode(&mut buffer, &'a').unwrap();
Expand Down Expand Up @@ -552,11 +546,11 @@ mod tests {
let (book, _) = builder.finish();

let len = |symbol| {
book.get(symbol).map_or(0, |code| code.len())
book.get(symbol).map_or(0, bit_vec::BitVec::len)
};

at >= ct || len("CT") <= len("AT") ||
ag.saturating_add(at).saturating_add(cg).saturating_add(ct).saturating_add(tg) >= u32::MAX
ag.saturating_add(at).saturating_add(cg).saturating_add(ct).saturating_add(tg) == u32::MAX
}

fn encode_decode_bytes(symbols: Vec<u8>) -> bool {
Expand Down