diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 9a4b98adc7..ee72b70a44 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -3,8 +3,6 @@ name: Rust checks
 on:
   push:
     branches: [master]
-    paths:
-      - 'src/core/**'
   pull_request:
     paths:
       - 'src/core/**'
@@ -150,7 +148,7 @@ jobs:
         uses: actions-rs/cargo@v1
         with:
           command: clippy
-          args: -- -D warnings
+          args: --all -- -D warnings
 
   wasm-pack:
     name: Check if wasm-pack builds a valid package for the sourmash crate
diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml
index 7b93309992..3050438159 100644
--- a/src/core/Cargo.toml
+++ b/src/core/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "sourmash"
-version = "0.7.0"
+version = "0.8.0"
 authors = ["Luiz Irber <luiz.irber@gmail.com>"]
 description = "MinHash sketches for genomic data"
 repository = "https://github.com/dib-lab/sourmash"
@@ -25,6 +25,7 @@ parallel = ["rayon"]
 #cbindgen = "~0.14.2"
 
 [dependencies]
+backtrace = "=0.3.46" # later versions require rust 1.40
 byteorder = "1.3.4"
 cfg-if = "0.1.10"
 failure = "0.1.8"
diff --git a/src/core/src/cmd.rs b/src/core/src/cmd.rs
index 73430e6629..897920bfa7 100644
--- a/src/core/src/cmd.rs
+++ b/src/core/src/cmd.rs
@@ -2,7 +2,7 @@ use failure::Error;
 
 use crate::index::MHBT;
 use crate::signature::Signature;
-use crate::sketch::minhash::{max_hash_for_scaled, HashFunctions, KmerMinHash};
+use crate::sketch::minhash::{max_hash_for_scaled, HashFunctions, KmerMinHashBTree};
 use crate::sketch::Sketch;
 
 pub fn prepare(index_path: &str) -> Result<(), Error> {
@@ -101,15 +101,15 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
             let mut ksigs = vec![];
 
             if params.protein {
-                ksigs.push(Sketch::MinHash(
-                    KmerMinHash::builder()
+                ksigs.push(Sketch::LargeMinHash(
+                    KmerMinHashBTree::builder()
                         .num(params.num_hashes)
                         .ksize(*k)
                         .hash_function(HashFunctions::murmur64_protein)
                         .max_hash(max_hash)
                         .seed(params.seed)
                         .abunds(if params.track_abundance {
-                            Some(vec![])
+                            Some(Default::default())
                         } else {
                             None
                         })
@@ -118,15 +118,15 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
             }
 
             if params.dayhoff {
-                ksigs.push(Sketch::MinHash(
-                    KmerMinHash::builder()
+                ksigs.push(Sketch::LargeMinHash(
+                    KmerMinHashBTree::builder()
                         .num(params.num_hashes)
                         .ksize(*k)
                         .hash_function(HashFunctions::murmur64_dayhoff)
                         .max_hash(max_hash)
                         .seed(params.seed)
                         .abunds(if params.track_abundance {
-                            Some(vec![])
+                            Some(Default::default())
                         } else {
                             None
                         })
@@ -135,15 +135,15 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
             }
 
             if params.hp {
-                ksigs.push(Sketch::MinHash(
-                    KmerMinHash::builder()
+                ksigs.push(Sketch::LargeMinHash(
+                    KmerMinHashBTree::builder()
                         .num(params.num_hashes)
                         .ksize(*k)
                         .hash_function(HashFunctions::murmur64_hp)
                         .max_hash(max_hash)
                         .seed(params.seed)
                         .abunds(if params.track_abundance {
-                            Some(vec![])
+                            Some(Default::default())
                         } else {
                             None
                         })
@@ -152,15 +152,15 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
             }
 
             if params.dna {
-                ksigs.push(Sketch::MinHash(
-                    KmerMinHash::builder()
+                ksigs.push(Sketch::LargeMinHash(
+                    KmerMinHashBTree::builder()
                         .num(params.num_hashes)
                         .ksize(*k)
                         .hash_function(HashFunctions::murmur64_DNA)
                         .max_hash(max_hash)
                         .seed(params.seed)
                         .abunds(if params.track_abundance {
-                            Some(vec![])
+                            Some(Default::default())
                         } else {
                             None
                         })
diff --git a/src/core/src/index/sbt/mhbt.rs b/src/core/src/index/sbt/mhbt.rs
index 738648d7d8..6628b4e799 100644
--- a/src/core/src/index/sbt/mhbt.rs
+++ b/src/core/src/index/sbt/mhbt.rs
@@ -152,7 +152,6 @@ mod test {
     use std::path::PathBuf;
 
     use assert_matches::assert_matches;
-    use tempfile;
 
     use super::Factory;
 
@@ -206,9 +205,7 @@ mod test {
             None,
         )
         .unwrap();
-        let sig_data = sigs[0].clone();
-
-        let leaf = sig_data.into();
+        let leaf = sigs[0].clone();
 
         let results = sbt.find(search_minhashes, &leaf, 0.5).unwrap();
         assert_eq!(results.len(), 1);
diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs
index eb62046cbd..88cbd2829e 100644
--- a/src/core/src/signature.rs
+++ b/src/core/src/signature.rs
@@ -34,6 +34,7 @@ impl SigsTrait for Sketch {
         match *self {
             Sketch::UKHS(ref ukhs) => ukhs.size(),
             Sketch::MinHash(ref mh) => mh.size(),
+            Sketch::LargeMinHash(ref mh) => mh.size(),
         }
     }
 
@@ -41,6 +42,7 @@ impl SigsTrait for Sketch {
         match *self {
             Sketch::UKHS(ref ukhs) => ukhs.to_vec(),
             Sketch::MinHash(ref mh) => mh.to_vec(),
+            Sketch::LargeMinHash(ref mh) => mh.to_vec(),
         }
     }
 
@@ -48,6 +50,7 @@ impl SigsTrait for Sketch {
         match *self {
             Sketch::UKHS(ref ukhs) => ukhs.ksize(),
             Sketch::MinHash(ref mh) => mh.ksize(),
+            Sketch::LargeMinHash(ref mh) => mh.ksize(),
         }
     }
 
@@ -61,12 +64,17 @@ impl SigsTrait for Sketch {
                 Sketch::MinHash(ref ot) => mh.check_compatible(ot),
                 _ => Err(SourmashError::MismatchSignatureType.into()),
             },
+            Sketch::LargeMinHash(ref mh) => match other {
+                Sketch::LargeMinHash(ref ot) => mh.check_compatible(ot),
+                _ => Err(SourmashError::MismatchSignatureType.into()),
+            },
         }
     }
 
     fn add_sequence(&mut self, seq: &[u8], force: bool) -> Result<(), Error> {
         match *self {
             Sketch::MinHash(ref mut mh) => mh.add_sequence(seq, force),
+            Sketch::LargeMinHash(ref mut mh) => mh.add_sequence(seq, force),
             Sketch::UKHS(_) => unimplemented!(),
         }
     }
@@ -74,6 +82,7 @@ impl SigsTrait for Sketch {
     fn add_protein(&mut self, seq: &[u8]) -> Result<(), Error> {
         match *self {
             Sketch::MinHash(ref mut mh) => mh.add_protein(seq),
+            Sketch::LargeMinHash(ref mut mh) => mh.add_protein(seq),
             Sketch::UKHS(_) => unimplemented!(),
         }
     }
@@ -183,6 +192,7 @@ impl Signature {
         if self.signatures.len() == 1 {
             match &self.signatures[0] {
                 Sketch::MinHash(mh) => mh.md5sum(),
+                Sketch::LargeMinHash(mh) => mh.md5sum(),
                 Sketch::UKHS(hs) => hs.md5sum(),
             }
         } else {
@@ -267,6 +277,22 @@ impl Signature {
                                 None => return true, // TODO: match previous behavior
                             };
                         }
+                        Sketch::LargeMinHash(mh) => {
+                            if let Some(k) = ksize {
+                                if k != mh.ksize() as usize {
+                                    return false;
+                                }
+                            };
+
+                            match moltype {
+                                Some(x) => {
+                                    if mh.hash_function() == x {
+                                        return true;
+                                    }
+                                }
+                                None => return true, // TODO: match previous behavior
+                            };
+                        }
                         Sketch::UKHS(hs) => {
                             if let Some(k) = ksize {
                                 if k != hs.ksize() as usize {
diff --git a/src/core/src/sketch/minhash.rs b/src/core/src/sketch/minhash.rs
index 253730af7a..25fecd166e 100644
--- a/src/core/src/sketch/minhash.rs
+++ b/src/core/src/sketch/minhash.rs
@@ -1,5 +1,5 @@
 use std::cmp::Ordering;
-use std::collections::HashMap;
+use std::collections::{BTreeMap, BTreeSet, HashMap};
 use std::convert::TryFrom;
 use std::f64::consts::PI;
 use std::iter::{Iterator, Peekable};
@@ -258,6 +258,10 @@ impl KmerMinHash {
     }
 
     pub fn set_hash_function(&mut self, h: HashFunctions) -> Result<(), Error> {
+        if self.hash_function == h {
+            return Ok(());
+        }
+
         if !self.is_empty() {
             return Err(SourmashError::NonEmptyMinHash {
                 message: "hash_function".into(),
@@ -482,14 +486,22 @@ impl KmerMinHash {
         if merged.len() < (self.num as usize) || (self.num as usize) == 0 {
             self.mins = merged;
             self.abunds = if merged_abunds.is_empty() {
-                None
+                if self.abunds.is_some() {
+                    Some(vec![])
+                } else {
+                    None
+                }
             } else {
                 Some(merged_abunds)
             };
         } else {
             self.mins = merged.into_iter().take(self.num as usize).collect();
             self.abunds = if merged_abunds.is_empty() {
-                None
+                if self.abunds.is_some() {
+                    Some(vec![])
+                } else {
+                    None
+                }
             } else {
                 Some(merged_abunds.into_iter().take(self.num as usize).collect())
             }
@@ -686,7 +698,7 @@ impl KmerMinHash {
     }
 
     // create a downsampled copy of self
-    fn downsample_max_hash(&self, max_hash: u64) -> Result<KmerMinHash, Error> {
+    pub fn downsample_max_hash(&self, max_hash: u64) -> Result<KmerMinHash, Error> {
         let mut new_mh = KmerMinHash::new(
             self.num,
             self.ksize,
@@ -703,7 +715,7 @@ impl KmerMinHash {
         Ok(new_mh)
     }
 
-    fn to_vec_abunds(&self) -> Vec<(u64, u64)> {
+    pub fn to_vec_abunds(&self) -> Vec<(u64, u64)> {
         if let Some(abunds) = &self.abunds {
             self.mins
                 .iter()
@@ -1208,3 +1220,762 @@ const VALID: [bool; 256] = {
     lookup[b'T' as usize] = true;
     lookup
 };
+
+//#############
+// A MinHash implementation for low scaled or large cardinalities
+
+#[cfg_attr(all(target_arch = "wasm32", target_vendor = "unknown"), wasm_bindgen)]
+#[derive(Debug, Clone, PartialEq, TypedBuilder)]
+pub struct KmerMinHashBTree {
+    num: u32,
+    ksize: u32,
+
+    #[builder(default_code = "HashFunctions::murmur64_DNA")]
+    hash_function: HashFunctions,
+
+    #[builder(default_code = "42u64")]
+    seed: u64,
+
+    #[builder(default_code = "u64::max_value()")]
+    max_hash: u64,
+
+    #[builder(default)]
+    mins: BTreeSet<u64>,
+
+    #[builder(default)]
+    abunds: Option<BTreeMap<u64, u64>>,
+
+    #[builder(default_code = "0u64")]
+    current_max: u64,
+}
+
+impl Default for KmerMinHashBTree {
+    fn default() -> KmerMinHashBTree {
+        KmerMinHashBTree {
+            num: 1000,
+            ksize: 21,
+            hash_function: HashFunctions::murmur64_DNA,
+            seed: 42,
+            max_hash: 0,
+            mins: Default::default(),
+            abunds: None,
+            current_max: 0,
+        }
+    }
+}
+
+impl Serialize for KmerMinHashBTree {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let n_fields = match &self.abunds {
+            Some(_) => 8,
+            _ => 7,
+        };
+
+        let mut partial = serializer.serialize_struct("KmerMinHashBTree", n_fields)?;
+        partial.serialize_field("num", &self.num)?;
+        partial.serialize_field("ksize", &self.ksize)?;
+        partial.serialize_field("seed", &self.seed)?;
+        partial.serialize_field("max_hash", &self.max_hash)?;
+        partial.serialize_field("mins", &self.mins)?;
+        partial.serialize_field("md5sum", &self.md5sum())?;
+
+        if let Some(abunds) = &self.abunds {
+            let abs: Vec<u64> = abunds.values().cloned().collect();
+            partial.serialize_field("abundances", &abs)?;
+        }
+
+        partial.serialize_field("molecule", &self.hash_function.to_string())?;
+
+        partial.end()
+    }
+}
+
+impl<'de> Deserialize<'de> for KmerMinHashBTree {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        #[derive(Deserialize)]
+        struct TempSig {
+            num: u32,
+            ksize: u32,
+            seed: u64,
+            max_hash: u64,
+            //md5sum: String,
+            mins: Vec<u64>,
+            abundances: Option<Vec<u64>>,
+            molecule: String,
+        }
+
+        let tmpsig = TempSig::deserialize(deserializer)?;
+
+        let num = if tmpsig.max_hash != 0 { 0 } else { tmpsig.num };
+        let hash_function = match tmpsig.molecule.to_lowercase().as_ref() {
+            "protein" => HashFunctions::murmur64_protein,
+            "dayhoff" => HashFunctions::murmur64_dayhoff,
+            "hp" => HashFunctions::murmur64_hp,
+            "dna" => HashFunctions::murmur64_DNA,
+            _ => unimplemented!(), // TODO: throw error here
+        };
+
+        let current_max;
+        // This shouldn't be necessary, but at some point we
+        // created signatures with unordered mins =(
+        let (mins, abunds) = if let Some(abunds) = tmpsig.abundances {
+            let mut values: Vec<(_, _)> = tmpsig.mins.iter().zip(abunds.iter()).collect();
+            values.sort();
+            let mins: BTreeSet<_> = values.iter().map(|(v, _)| **v).collect();
+            let abunds = values.into_iter().map(|(v, x)| (*v, *x)).collect();
+            current_max = *mins.iter().rev().next().unwrap_or(&0);
+            (mins, Some(abunds))
+        } else {
+            current_max = 0;
+            (tmpsig.mins.into_iter().collect(), None)
+        };
+
+        Ok(KmerMinHashBTree {
+            num,
+            ksize: tmpsig.ksize,
+            seed: tmpsig.seed,
+            max_hash: tmpsig.max_hash,
+            mins,
+            abunds,
+            hash_function,
+            current_max,
+        })
+    }
+}
+
+impl KmerMinHashBTree {
+    pub fn new(
+        num: u32,
+        ksize: u32,
+        hash_function: HashFunctions,
+        seed: u64,
+        max_hash: u64,
+        track_abundance: bool,
+    ) -> KmerMinHashBTree {
+        let mins = Default::default();
+
+        let abunds = if track_abundance {
+            Some(Default::default())
+        } else {
+            None
+        };
+
+        KmerMinHashBTree {
+            num,
+            ksize,
+            hash_function,
+            seed,
+            max_hash,
+            mins,
+            abunds,
+            current_max: 0,
+        }
+    }
+
+    pub fn num(&self) -> u32 {
+        self.num
+    }
+
+    pub fn is_protein(&self) -> bool {
+        self.hash_function == HashFunctions::murmur64_protein
+    }
+
+    fn is_dna(&self) -> bool {
+        self.hash_function == HashFunctions::murmur64_DNA
+    }
+
+    pub fn seed(&self) -> u64 {
+        self.seed
+    }
+
+    pub fn max_hash(&self) -> u64 {
+        self.max_hash
+    }
+
+    pub fn clear(&mut self) {
+        self.mins.clear();
+        if let Some(ref mut abunds) = self.abunds {
+            abunds.clear();
+        }
+        self.current_max = 0;
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.mins.is_empty()
+    }
+
+    pub fn set_hash_function(&mut self, h: HashFunctions) -> Result<(), Error> {
+        if self.hash_function == h {
+            return Ok(());
+        }
+
+        if !self.is_empty() {
+            return Err(SourmashError::NonEmptyMinHash {
+                message: "hash_function".into(),
+            }
+            .into());
+        }
+
+        self.hash_function = h;
+        Ok(())
+    }
+
+    pub fn track_abundance(&self) -> bool {
+        self.abunds.is_some()
+    }
+
+    pub fn enable_abundance(&mut self) -> Result<(), Error> {
+        if !self.mins.is_empty() {
+            return Err(SourmashError::NonEmptyMinHash {
+                message: "track_abundance=True".into(),
+            }
+            .into());
+        }
+
+        self.abunds = Some(Default::default());
+
+        Ok(())
+    }
+
+    pub fn disable_abundance(&mut self) {
+        self.abunds = None;
+    }
+
+    pub fn md5sum(&self) -> String {
+        let mut md5_ctx = md5::Context::new();
+        md5_ctx.consume(self.ksize().to_string());
+        self.mins
+            .iter()
+            .for_each(|x| md5_ctx.consume(x.to_string()));
+        format!("{:x}", md5_ctx.compute())
+    }
+
+    pub fn add_hash(&mut self, hash: u64) {
+        self.add_hash_with_abundance(hash, 1);
+    }
+
+    pub fn add_hash_with_abundance(&mut self, hash: u64, abundance: u64) {
+        if hash > self.max_hash && self.max_hash != 0 {
+            // This is a scaled minhash, and we don't need to add the new hash
+            return;
+        }
+
+        if self.num == 0 && self.max_hash == 0 {
+            // why did you create this minhash? it will always be empty...
+            return;
+        }
+
+        if abundance == 0 {
+            // well, don't add it.
+            return;
+        }
+
+        // From this point on, hash is within scaled (or no scaled specified).
+
+        // empty mins? add it.
+        if self.mins.is_empty() {
+            self.mins.insert(hash);
+            if let Some(ref mut abunds) = self.abunds {
+                abunds.insert(hash, abundance);
+            }
+            self.current_max = hash;
+            return;
+        }
+
+        if hash <= self.max_hash || hash <= self.current_max || (self.mins.len() as u32) < self.num
+        {
+            // "good" hash - within range, smaller than current entry, or
+            // still have space available
+            if self.mins.insert(hash) && hash > self.current_max {
+                self.current_max = hash;
+            }
+            if let Some(ref mut abunds) = self.abunds {
+                *abunds.entry(hash).or_insert(0) += abundance;
+            }
+
+            // is it too big now?
+            if self.num != 0 && self.mins.len() > (self.num as usize) {
+                let last = *self.mins.iter().rev().next().unwrap();
+                self.mins.remove(&last);
+                if let Some(ref mut abunds) = self.abunds {
+                    abunds.remove(&last);
+                }
+                self.current_max = *self.mins.iter().rev().next().unwrap();
+            }
+        }
+    }
+
+    pub fn add_word(&mut self, word: &[u8]) {
+        let hash = _hash_murmur(word, self.seed);
+        self.add_hash(hash);
+    }
+
+    pub fn remove_hash(&mut self, hash: u64) {
+        if self.mins.remove(&hash) {
+            if let Some(ref mut abunds) = self.abunds {
+                abunds.remove(&hash);
+            }
+        }
+        if hash == self.current_max {
+            self.current_max = *self.mins.iter().rev().next().unwrap_or(&0);
+        }
+    }
+
+    pub fn remove_many(&mut self, hashes: &[u64]) -> Result<(), Error> {
+        for min in hashes {
+            self.remove_hash(*min);
+        }
+        Ok(())
+    }
+
+    pub fn merge(&mut self, other: &KmerMinHashBTree) -> Result<(), Error> {
+        self.check_compatible(other)?;
+        let union = self.mins.union(&other.mins);
+
+        let to_take = if self.num == 0 {
+            usize::max_value()
+        } else {
+            self.num as usize
+        };
+
+        self.mins = union.take(to_take).cloned().collect();
+
+        if let Some(abunds) = &self.abunds {
+            if let Some(oabunds) = &other.abunds {
+                let mut new_abunds = BTreeMap::new();
+
+                for hash in &self.mins {
+                    *new_abunds.entry(*hash).or_insert(0) +=
+                        abunds.get(&hash).unwrap_or(&0) + oabunds.get(&hash).unwrap_or(&0);
+                }
+                self.abunds = Some(new_abunds)
+            }
+        }
+
+        Ok(())
+    }
+
+    pub fn add_from(&mut self, other: &KmerMinHashBTree) -> Result<(), Error> {
+        for min in &other.mins {
+            self.add_hash(*min);
+        }
+        Ok(())
+    }
+
+    pub fn add_many(&mut self, hashes: &[u64]) -> Result<(), Error> {
+        for min in hashes {
+            self.add_hash(*min);
+        }
+        Ok(())
+    }
+
+    pub fn add_many_with_abund(&mut self, hashes: &[(u64, u64)]) -> Result<(), Error> {
+        for item in hashes {
+            self.add_hash_with_abundance(item.0, item.1);
+        }
+        Ok(())
+    }
+
+    pub fn count_common(&self, other: &KmerMinHashBTree, downsample: bool) -> Result<u64, Error> {
+        if downsample && self.max_hash != other.max_hash {
+            let (first, second) = if self.max_hash < other.max_hash {
+                (self, other)
+            } else {
+                (other, self)
+            };
+            let downsampled_mh = second.downsample_max_hash(first.max_hash)?;
+            first.count_common(&downsampled_mh, false)
+        } else {
+            self.check_compatible(other)?;
+            let iter = Intersection::new(self.mins.iter(), other.mins.iter());
+
+            Ok(iter.count() as u64)
+        }
+    }
+
+    pub fn intersection(&self, other: &KmerMinHashBTree) -> Result<(Vec<u64>, u64), Error> {
+        self.check_compatible(other)?;
+
+        let mut combined_mh = KmerMinHashBTree::new(
+            self.num,
+            self.ksize,
+            self.hash_function,
+            self.seed,
+            self.max_hash,
+            self.abunds.is_some(),
+        );
+
+        combined_mh.merge(&self)?;
+        combined_mh.merge(&other)?;
+
+        let it1 = Intersection::new(self.mins.iter(), other.mins.iter());
+
+        // TODO: there is probably a way to avoid this Vec here,
+        // and pass the it1 as left in it2.
+        let i1: Vec<u64> = it1.cloned().collect();
+        let i2: Vec<u64> = combined_mh.mins.iter().cloned().collect();
+        let it2 = Intersection::new(i1.iter(), i2.iter());
+
+        let common: Vec<u64> = it2.cloned().collect();
+        Ok((common, combined_mh.mins.len() as u64))
+    }
+
+    pub fn intersection_size(&self, other: &KmerMinHashBTree) -> Result<(u64, u64), Error> {
+        self.check_compatible(other)?;
+
+        let mut combined_mh = KmerMinHashBTree::new(
+            self.num,
+            self.ksize,
+            self.hash_function,
+            self.seed,
+            self.max_hash,
+            self.abunds.is_some(),
+        );
+
+        combined_mh.merge(&self)?;
+        combined_mh.merge(&other)?;
+
+        let it1 = Intersection::new(self.mins.iter(), other.mins.iter());
+
+        // TODO: there is probably a way to avoid this Vec here,
+        // and pass the it1 as left in it2.
+        let i1: Vec<u64> = it1.cloned().collect();
+        let i2: Vec<u64> = combined_mh.mins.iter().cloned().collect();
+        let it2 = Intersection::new(i1.iter(), i2.iter());
+
+        Ok((it2.count() as u64, combined_mh.mins.len() as u64))
+    }
+
+    // calculate Jaccard similarity, ignoring abundance.
+    pub fn jaccard(&self, other: &KmerMinHashBTree) -> Result<f64, Error> {
+        self.check_compatible(other)?;
+        if let Ok((common, size)) = self.intersection_size(other) {
+            Ok(common as f64 / u64::max(1, size) as f64)
+        } else {
+            Ok(0.0)
+        }
+    }
+
+    // compare two minhashes, with abundance;
+    // calculate their angular similarity.
+    pub fn angular_similarity(&self, other: &KmerMinHashBTree) -> Result<f64, Error> {
+        self.check_compatible(other)?;
+
+        if self.abunds.is_none() || other.abunds.is_none() {
+            // TODO: throw error, we need abundance for this
+            unimplemented!() // @CTB fixme
+        }
+
+        let abunds = self.abunds.as_ref().unwrap();
+        let other_abunds = other.abunds.as_ref().unwrap();
+
+        let mut prod = 0;
+        let a_sq: u64 = abunds.values().map(|a| (a * a)).sum();
+        let b_sq: u64 = other_abunds.values().map(|a| (a * a)).sum();
+
+        for (hash, value) in abunds.iter() {
+            if let Some(oa) = other_abunds.get(&hash) {
+                prod += value * oa
+            }
+        }
+
+        let norm_a = (a_sq as f64).sqrt();
+        let norm_b = (b_sq as f64).sqrt();
+
+        if norm_a == 0. || norm_b == 0. {
+            return Ok(0.0);
+        }
+        let prod = f64::min(prod as f64 / (norm_a * norm_b), 1.);
+        let distance = 2. * prod.acos() / PI;
+        Ok(1. - distance)
+    }
+
+    pub fn similarity(
+        &self,
+        other: &KmerMinHashBTree,
+        ignore_abundance: bool,
+        downsample: bool,
+    ) -> Result<f64, Error> {
+        if downsample && self.max_hash != other.max_hash {
+            let (first, second) = if self.max_hash < other.max_hash {
+                (self, other)
+            } else {
+                (other, self)
+            };
+            let downsampled_mh = second.downsample_max_hash(first.max_hash)?;
+            first.similarity(&downsampled_mh, ignore_abundance, false)
+        } else if ignore_abundance || self.abunds.is_none() || other.abunds.is_none() {
+            self.jaccard(&other)
+        } else {
+            self.angular_similarity(&other)
+        }
+    }
+
+    pub fn dayhoff(&self) -> bool {
+        self.hash_function == HashFunctions::murmur64_dayhoff
+    }
+
+    pub fn hp(&self) -> bool {
+        self.hash_function == HashFunctions::murmur64_hp
+    }
+
+    pub fn hash_function(&self) -> HashFunctions {
+        self.hash_function
+    }
+
+    pub fn mins(&self) -> Vec<u64> {
+        self.mins.iter().cloned().collect()
+    }
+
+    pub fn abunds(&self) -> Option<Vec<u64>> {
+        if let Some(abunds) = &self.abunds {
+            Some(abunds.values().cloned().collect())
+        } else {
+            None
+        }
+    }
+
+    // create a downsampled copy of self
+    pub fn downsample_max_hash(&self, max_hash: u64) -> Result<KmerMinHashBTree, Error> {
+        let mut new_mh = KmerMinHashBTree::new(
+            self.num,
+            self.ksize,
+            self.hash_function,
+            self.seed,
+            max_hash, // old max_hash => max_hash arg
+            self.abunds.is_some(),
+        );
+        if self.abunds.is_some() {
+            new_mh.add_many_with_abund(&self.to_vec_abunds())?;
+        } else {
+            new_mh.add_many(&self.mins())?;
+        }
+        Ok(new_mh)
+    }
+
+    pub fn to_vec_abunds(&self) -> Vec<(u64, u64)> {
+        if let Some(abunds) = &self.abunds {
+            abunds.iter().map(|(a, b)| (*a, *b)).collect()
+        } else {
+            self.mins
+                .iter()
+                .cloned()
+                .zip(std::iter::repeat(1))
+                .collect()
+        }
+    }
+}
+
+impl SigsTrait for KmerMinHashBTree {
+    fn size(&self) -> usize {
+        self.mins.len()
+    }
+
+    fn to_vec(&self) -> Vec<u64> {
+        self.mins()
+    }
+
+    fn ksize(&self) -> usize {
+        self.ksize as usize
+    }
+
+    fn check_compatible(&self, other: &KmerMinHashBTree) -> Result<(), Error> {
+        /*
+        if self.num != other.num {
+            return Err(SourmashError::MismatchNum {
+                n1: self.num,
+                n2: other.num,
+            }
+            .into());
+        }
+        */
+        if self.ksize != other.ksize {
+            return Err(SourmashError::MismatchKSizes.into());
+        }
+        if self.hash_function != other.hash_function {
+            // TODO: fix this error
+            return Err(SourmashError::MismatchDNAProt.into());
+        }
+        if self.max_hash != other.max_hash {
+            return Err(SourmashError::MismatchScaled.into());
+        }
+        if self.seed != other.seed {
+            return Err(SourmashError::MismatchSeed.into());
+        }
+        Ok(())
+    }
+
+    fn add_sequence(&mut self, seq: &[u8], force: bool) -> Result<(), Error> {
+        let ksize = self.ksize as usize;
+        let len = seq.len();
+
+        if len < ksize {
+            return Ok(());
+        };
+
+        // Here we convert the sequence to upper case and
+        // pre-calculate the reverse complement for the full sequence...
+        let sequence = seq.to_ascii_uppercase();
+        let rc = revcomp(&sequence);
+
+        if self.is_dna() {
+            let mut last_position_check = 0;
+
+            let mut is_valid_kmer = |i| {
+                for j in std::cmp::max(i, last_position_check)..i + ksize {
+                    if !VALID[sequence[j] as usize] {
+                        return false;
+                    }
+                    last_position_check += 1;
+                }
+                true
+            };
+
+            for i in 0..=len - ksize {
+                // ... and then while moving the k-mer window forward for the sequence
+                // we move another window backwards for the RC.
+                //   For a ksize = 3, and a sequence AGTCGT (len = 6):
+                //                   +-+---------+---------------+-------+
+                //   seq      RC     |i|i + ksize|len - ksize - i|len - i|
+                //  AGTCGT   ACGACT  +-+---------+---------------+-------+
+                //  +->         +->  |0|    2    |       3       |   6   |
+                //   +->       +->   |1|    3    |       2       |   5   |
+                //    +->     +->    |2|    4    |       1       |   4   |
+                //     +->   +->     |3|    5    |       0       |   3   |
+                //                   +-+---------+---------------+-------+
+                // (leaving this table here because I had to draw to
+                //  get the indices correctly)
+
+                let kmer = &sequence[i..i + ksize];
+
+                if !is_valid_kmer(i) {
+                    if !force {
+                        // throw error if DNA is not valid
+                        return Err(SourmashError::InvalidDNA {
+                            message: String::from_utf8(kmer.to_vec()).unwrap(),
+                        }
+                        .into());
+                    }
+
+                    continue; // skip invalid k-mer
+                }
+
+                let krc = &rc[len - ksize - i..len - i];
+                self.add_word(std::cmp::min(kmer, krc));
+            }
+        } else {
+            // protein
+            let aa_ksize = self.ksize / 3;
+
+            for i in 0..3 {
+                let substr: Vec<u8> = sequence
+                    .iter()
+                    .cloned()
+                    .skip(i)
+                    .take(sequence.len() - i)
+                    .collect();
+                let aa = to_aa(&substr, self.dayhoff(), self.hp()).unwrap();
+
+                aa.windows(aa_ksize as usize).for_each(|n| self.add_word(n));
+
+                let rc_substr: Vec<u8> = rc.iter().cloned().skip(i).take(rc.len() - i).collect();
+                let aa_rc = to_aa(&rc_substr, self.dayhoff(), self.hp()).unwrap();
+
+                aa_rc
+                    .windows(aa_ksize as usize)
+                    .for_each(|n| self.add_word(n));
+            }
+        }
+
+        Ok(())
+    }
+
+    fn add_protein(&mut self, seq: &[u8]) -> Result<(), Error> {
+        let ksize = (self.ksize / 3) as usize;
+        let len = seq.len();
+
+        if len < ksize {
+            return Ok(());
+        }
+
+        if let HashFunctions::murmur64_protein = self.hash_function {
+            for aa_kmer in seq.windows(ksize) {
+                self.add_word(&aa_kmer);
+            }
+            return Ok(());
+        }
+
+        let aa_seq: Vec<_> = match self.hash_function {
+            HashFunctions::murmur64_dayhoff => seq.iter().cloned().map(aa_to_dayhoff).collect(),
+            HashFunctions::murmur64_hp => seq.iter().cloned().map(aa_to_hp).collect(),
+            invalid => {
+                return Err(SourmashError::InvalidHashFunction {
+                    function: format!("{}", invalid),
+                }
+                .into())
+            }
+        };
+
+        for aa_kmer in aa_seq.windows(ksize) {
+            self.add_word(aa_kmer);
+        }
+
+        Ok(())
+    }
+}
+
+impl From<KmerMinHashBTree> for KmerMinHash {
+    fn from(other: KmerMinHashBTree) -> KmerMinHash {
+        let mut new_mh = KmerMinHash::new(
+            other.num(),
+            other.ksize() as u32,
+            other.hash_function(),
+            other.seed(),
+            other.max_hash(),
+            other.track_abundance(),
+        );
+
+        let mins = other.mins.into_iter().collect();
+        let abunds = if let Some(abunds) = other.abunds {
+            Some(abunds.values().cloned().collect())
+        } else {
+            None
+        };
+
+        new_mh.mins = mins;
+        new_mh.abunds = abunds;
+
+        new_mh
+    }
+}
+
+impl From<KmerMinHash> for KmerMinHashBTree {
+    fn from(other: KmerMinHash) -> KmerMinHashBTree {
+        let mut new_mh = KmerMinHashBTree::new(
+            other.num(),
+            other.ksize() as u32,
+            other.hash_function(),
+            other.seed(),
+            other.max_hash(),
+            other.track_abundance(),
+        );
+
+        let mins: BTreeSet<u64> = other.mins.into_iter().collect();
+        let abunds = if let Some(abunds) = other.abunds {
+            Some(mins.iter().cloned().zip(abunds.into_iter()).collect())
+        } else {
+            None
+        };
+
+        new_mh.mins = mins;
+        new_mh.abunds = abunds;
+
+        new_mh
+    }
+}
diff --git a/src/core/src/sketch/mod.rs b/src/core/src/sketch/mod.rs
index f02d0f7e1c..1c6ee710ff 100644
--- a/src/core/src/sketch/mod.rs
+++ b/src/core/src/sketch/mod.rs
@@ -5,12 +5,13 @@ pub mod ukhs;
 
 use serde_derive::{Deserialize, Serialize};
 
-use crate::sketch::minhash::KmerMinHash;
+use crate::sketch::minhash::{KmerMinHash, KmerMinHashBTree};
 use crate::sketch::ukhs::FlatUKHS;
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum Sketch {
     MinHash(KmerMinHash),
+    LargeMinHash(KmerMinHashBTree),
     UKHS(FlatUKHS), // FIXME
 }
diff --git a/src/core/src/sketch/nodegraph.rs b/src/core/src/sketch/nodegraph.rs
index 13f26007a6..c85df5b062 100644
--- a/src/core/src/sketch/nodegraph.rs
+++ b/src/core/src/sketch/nodegraph.rs
@@ -388,7 +388,7 @@ mod test {
     // >>> a.save("test.ng")
     // and dumping test.ng with xxd:
     // $ xxd -i test.ng
-    static RAW_DATA: &'static [u8] = &[
+    static RAW_DATA: &[u8] = &[
         0x4f, 0x58, 0x4c, 0x49, 0x04, 0x02, 0x03, 0x00, 0x00, 0x00, 0x06, 0x03, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x01,
         0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x01, 0x0d, 0x00, 0x00, 0x00,
@@ -397,7 +397,7 @@ mod test {
         0x00, 0x00, 0x00, 0x06,
     ];
 
-    static COMPRESSED_RAW_DATA: &'static [u8] = &[
+    static COMPRESSED_RAW_DATA: &[u8] = &[
         0x1f, 0x8b, 0x08, 0x08, 0x73, 0x88, 0x9f, 0x5e, 0x00, 0x03, 0x74, 0x65, 0x73, 0x74, 0x2e,
         0x6e, 0x67, 0x00, 0xf3, 0x8f, 0xf0, 0xf1, 0x64, 0x61, 0x62, 0x66, 0x60, 0x60, 0x60, 0x03,
         0x11, 0x20, 0x20, 0x0c, 0xa5, 0x19, 0x38, 0x19, 0x05, 0x61, 0x4c, 0x1e, 0x46, 0x5e, 0x28,
@@ -565,7 +565,7 @@ mod test {
             leaf5.count(_hash(kmer.as_bytes()));
         }
 
-        let h = _hash("AAAAT".as_bytes());
+        let h = _hash(b"AAAAT");
         for leaf in &[leaf1, leaf2, leaf3, leaf5] {
             assert_eq!(leaf.get(h), 1);
         }
diff --git a/src/core/tests/minhash.rs b/src/core/tests/minhash.rs
index 9d4beab247..344dcd130c 100644
--- a/src/core/tests/minhash.rs
+++ b/src/core/tests/minhash.rs
@@ -1,14 +1,28 @@
-use sourmash::signature::SigsTrait;
-use sourmash::sketch::minhash::{max_hash_for_scaled, HashFunctions, KmerMinHash};
+use std::fs::File;
+use std::io::BufReader;
+use std::path::PathBuf;
+
+use sourmash::signature::{Signature, SigsTrait};
+use sourmash::sketch::minhash::{
+    max_hash_for_scaled, HashFunctions, KmerMinHash, KmerMinHashBTree,
+};
+use sourmash::sketch::Sketch;
+
+use proptest::collection::vec;
+use proptest::num::u64;
+use proptest::proptest;
+
+// TODO: use f64::EPSILON when we bump MSRV
+const EPSILON: f64 = 0.01;
 
 #[test]
 fn throws_error() {
     let mut mh = KmerMinHash::new(1, 4, HashFunctions::murmur64_DNA, 42, 0, false);
 
-    match mh.add_sequence(b"ATGR", false) {
-        Ok(_) => assert!(false, "R is not a valid DNA character"),
-        Err(_) => assert!(true),
-    }
+    assert!(
+        mh.add_sequence(b"ATGR", false).is_err(),
+        "R is not a valid DNA character"
+    );
 }
 
 #[test]
@@ -59,8 +73,8 @@ fn similarity() -> Result<(), Box<dyn std::error::Error>> {
     b.add_hash(1);
     b.add_hash(2);
 
-    assert!((a.similarity(&a, false, false)? - 1.0).abs() < 0.001);
-    assert!((a.similarity(&b, false, false)? - 0.5).abs() < 0.001);
+    assert!((a.similarity(&a, false, false)? - 1.0).abs() < EPSILON);
+    assert!((a.similarity(&b, false, false)? - 0.5).abs() < EPSILON);
 
     Ok(())
 }
@@ -77,7 +91,7 @@ fn similarity_2() -> Result<(), Box<dyn std::error::Error>> {
     b.add_sequence(b"ATGGA", false)?;
 
     assert!(
-        (a.similarity(&b, false, false)? - 0.705).abs() < 0.001,
+        (a.similarity(&b, false, false)? - 0.705).abs() < EPSILON,
         "{}",
         a.similarity(&b, false, false)?
     );
@@ -100,11 +114,11 @@ fn similarity_3() -> Result<(), Box<dyn std::error::Error>> {
     b.add_hash(3);
     b.add_hash(4);
 
-    assert!((a.similarity(&a, false, false)? - 1.0).abs() < 0.001);
-    assert!((a.similarity(&b, false, false)? - 0.23).abs() < 0.001);
+    assert!((a.similarity(&a, false, false)? - 1.0).abs() < EPSILON);
+    assert!((a.similarity(&b, false, false)? - 0.23).abs() < EPSILON);
 
-    assert!((a.similarity(&a, true, false)? - 1.0).abs() < 0.001);
-    assert!((a.similarity(&b, true, false)? - 0.2).abs() < 0.001);
+    assert!((a.similarity(&a, true, false)? - 1.0).abs() < EPSILON);
+    assert!((a.similarity(&b, true, false)? - 0.2).abs() < EPSILON);
 
     Ok(())
 }
@@ -137,3 +151,567 @@ fn hp() {
 fn max_for_scaled() {
     assert_eq!(max_hash_for_scaled(100), Some(184467440737095520));
 }
+
+proptest! {
+#[test]
+fn oracle_mins(hashes in vec(u64::ANY, 1..10000)) {
+    let mut a = KmerMinHash::new(1000, 21, HashFunctions::murmur64_protein, 42, 0, true);
+    let mut b = KmerMinHashBTree::new(1000, 21, HashFunctions::murmur64_protein, 42, 0, true);
+
+    let mut c: KmerMinHash = Default::default();
+    c.set_hash_function(HashFunctions::murmur64_protein).unwrap();
+    c.enable_abundance().unwrap();
+
+    let mut d: KmerMinHashBTree = Default::default();
+    d.set_hash_function(HashFunctions::murmur64_protein).unwrap();
+    d.enable_abundance().unwrap();
+
+    let mut to_remove = vec![];
+    for hash in &hashes {
+        a.add_hash(*hash);
+        b.add_hash(*hash);
+
+        if hash % 2 == 0 {
+            to_remove.push(*hash);
+        }
+    }
+
+    c.add_from(&a).unwrap();
+    c.remove_many(&to_remove).unwrap();
+
+    d.add_from(&b).unwrap();
+    d.remove_many(&to_remove).unwrap();
+
+    assert_eq!(a.mins(), b.mins());
+    assert_eq!(c.mins(), d.mins());
+
+    assert_eq!(a.count_common(&c, false).unwrap(), b.count_common(&d, false).unwrap());
+    assert_eq!(a.count_common(&c, true).unwrap(), b.count_common(&d, true).unwrap());
+
+    assert_eq!(a.abunds(), b.abunds());
+    assert_eq!(c.abunds(), d.abunds());
+
+    assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON);
+}
+}
+
+proptest! {
+#[test]
+fn oracle_mins_scaled(hashes in vec(u64::ANY, 1..10000)) {
+    let max_hash = max_hash_for_scaled(100).unwrap();
+    let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    let mut c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let mut d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    let mut to_remove = vec![];
+    for hash in &hashes {
+        a.add_hash(*hash);
+        b.add_hash(*hash);
+
+        if hash % 2 == 0 {
+            to_remove.push(*hash);
+        }
+    }
+
+    c.add_many(&hashes).unwrap();
+    d.add_many(&hashes).unwrap();
+
+    c.remove_many(&to_remove).unwrap();
+    d.remove_many(&to_remove).unwrap();
+
+    a.remove_hash(hashes[0]);
+    b.remove_hash(hashes[0]);
+
+    assert_eq!(a.mins(), b.mins());
+    assert_eq!(c.mins(), d.mins());
+
+    assert_eq!(a.md5sum(), b.md5sum());
+    assert_eq!(c.md5sum(), d.md5sum());
+
+    assert_eq!(a.is_protein(), b.is_protein());
+    assert_eq!(a.num(), b.num());
+    assert_eq!(a.seed(), b.seed());
+    assert_eq!(a.ksize(), b.ksize());
+    assert_eq!(a.max_hash(), b.max_hash());
+    assert_eq!(a.track_abundance(), b.track_abundance());
+    assert_eq!(a.hash_function(), b.hash_function());
+
+    assert_eq!(a.abunds(), b.abunds());
+    assert_eq!(c.abunds(), d.abunds());
+
+    assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON);
+    assert!((c.similarity(&a, false, false).unwrap() - d.similarity(&b, false, false).unwrap()).abs() < EPSILON);
+    assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON);
+    assert!((c.similarity(&a, true, false).unwrap() - d.similarity(&b, true, false).unwrap()).abs() < EPSILON);
+
+    assert_eq!(a.count_common(&c, false).unwrap(), b.count_common(&d, false).unwrap());
+    assert_eq!(c.count_common(&a, false).unwrap(), d.count_common(&b, false).unwrap());
+    assert_eq!(a.count_common(&c, true).unwrap(), b.count_common(&d, true).unwrap());
+    assert_eq!(c.count_common(&a, true).unwrap(), d.count_common(&b, true).unwrap());
+
+    let mut e = a.downsample_max_hash(100).unwrap();
+    let mut f = b.downsample_max_hash(100).unwrap();
+
+    // Can't compare different scaled without explicit downsample
+    assert!(c.similarity(&e, false, false).is_err());
+    assert!(d.similarity(&f, false, false).is_err());
+    assert!(c.similarity(&e, true, false).is_err());
+    assert!(d.similarity(&f, true, false).is_err());
+
+    assert!((c.similarity(&e, true, true).unwrap() - d.similarity(&f, true, true).unwrap()).abs() < EPSILON);
+    assert!((e.similarity(&c, true, true).unwrap() - f.similarity(&d, true, true).unwrap()).abs() < EPSILON);
+    assert!((c.similarity(&e, false, true).unwrap() - d.similarity(&f, false, true).unwrap()).abs() < EPSILON);
+    assert!((e.similarity(&c, false, true).unwrap() - f.similarity(&d, false, true).unwrap()).abs() < EPSILON);
+
+    // Can't compare different scaled without explicit downsample
+    assert!(e.count_common(&c, false).is_err());
+    assert!(f.count_common(&d, false).is_err());
+
+    assert_eq!(e.count_common(&c, true).unwrap(), f.count_common(&d, true).unwrap());
+    assert_eq!(c.count_common(&e, true).unwrap(), d.count_common(&f, true).unwrap());
+
+    // disable abundances
+    e.disable_abundance();
+    f.disable_abundance();
+
+    // Can't compare different scaled without explicit downsample
+    assert!(c.similarity(&e, false, false).is_err());
+    assert!(d.similarity(&f, false, false).is_err());
+    assert!(c.similarity(&e, true, false).is_err());
+    assert!(d.similarity(&f, true, false).is_err());
+
+    assert!((c.similarity(&e, true, true).unwrap() - d.similarity(&f, true, true).unwrap()).abs() < EPSILON);
+    assert!((e.similarity(&c, true, true).unwrap() - f.similarity(&d, true, true).unwrap()).abs() < EPSILON);
+    assert!((c.similarity(&e, false, true).unwrap() - d.similarity(&f, false, true).unwrap()).abs() < EPSILON);
+    assert!((e.similarity(&c, false, true).unwrap() - f.similarity(&d, false, true).unwrap()).abs() < EPSILON);
+
+    // Can't compare different scaled without explicit downsample
+    assert!(e.count_common(&c, false).is_err());
+    assert!(f.count_common(&d, false).is_err());
+
+    assert_eq!(e.count_common(&c, true).unwrap(), f.count_common(&d, true).unwrap());
+    assert_eq!(c.count_common(&e, true).unwrap(), d.count_common(&f, true).unwrap());
+}
+}
+
+proptest! {
+#[test]
+fn prop_merge(seq1 in "[ACGT]{6,100}", seq2 in "[ACGT]{6,200}") {
+    let max_hash = max_hash_for_scaled(10).unwrap();
+    let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    let mut c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let mut d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    a.add_sequence(seq1.as_bytes(), false).unwrap();
+    b.add_sequence(seq1.as_bytes(), false).unwrap();
+
+    c.add_sequence(seq2.as_bytes(), false).unwrap();
+    d.add_sequence(seq2.as_bytes(), false).unwrap();
+
+    a.merge(&c).unwrap();
+    b.merge(&d).unwrap();
+
+    assert_eq!(a.mins(), b.mins());
+    assert_eq!(c.mins(), d.mins());
+
+    assert_eq!(a.abunds(), b.abunds());
+    assert_eq!(c.abunds(), d.abunds());
+
+    assert_eq!(a.intersection_size(&c).unwrap(), b.intersection_size(&d).unwrap());
+    assert_eq!(c.intersection(&a).unwrap(), d.intersection(&b).unwrap());
+
+    assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON);
+    assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON);
+
+    let mut e = a.downsample_max_hash(100).unwrap();
+    let mut f = b.downsample_max_hash(100).unwrap();
+
+    assert!((e.similarity(&c, false, true).unwrap() - f.similarity(&d, false, true).unwrap()).abs() < EPSILON);
+    assert!((e.similarity(&c, true, true).unwrap() - f.similarity(&d, true, true).unwrap()).abs() < EPSILON);
+
+    e.disable_abundance();
+    f.disable_abundance();
+
+    assert!((e.similarity(&c, false, true).unwrap() - f.similarity(&d, false, true).unwrap()).abs() < EPSILON);
+    assert!((e.similarity(&c, true, true).unwrap() - f.similarity(&d, true, true).unwrap()).abs() < EPSILON);
+
+    e.clear();
+    f.clear();
+
+    assert!(e.is_empty());
+    assert!(f.is_empty());
+}
+}
+
+#[test]
+fn load_save_minhash_sketches() {
+    let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    filename.push("../../tests/test-data/genome-s10+s11.sig");
+
+    let file = File::open(filename).unwrap();
+    let reader = BufReader::new(file);
+    let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
+
+    let sig = sigs.get(0).unwrap();
+    let sketches = sig.sketches();
+    let mut buffer = vec![];
+
+    if let Sketch::MinHash(mh) = &sketches[0] {
+        let bmh: KmerMinHashBTree = mh.clone().into();
+        {
+            serde_json::to_writer(&mut buffer, &bmh).unwrap();
+        }
+
+        let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap();
+        let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap();
+
+        assert_eq!(mh.md5sum(), new_mh.md5sum());
+        assert_eq!(bmh.md5sum(), new_bmh.md5sum());
+        assert_eq!(bmh.md5sum(), new_mh.md5sum());
+        assert_eq!(mh.md5sum(), new_bmh.md5sum());
+
+        assert_eq!(mh.mins(), new_mh.mins());
+        assert_eq!(bmh.mins(), new_bmh.mins());
+        assert_eq!(bmh.mins(), new_mh.mins());
+        assert_eq!(mh.mins(), new_bmh.mins());
+
+        assert_eq!(mh.abunds(), new_mh.abunds());
+        assert_eq!(bmh.abunds(), new_bmh.abunds());
+        assert_eq!(bmh.abunds(), new_mh.abunds());
+        assert_eq!(mh.abunds(), new_bmh.abunds());
+
+        assert!(
+            (mh.similarity(&new_mh, false, false).unwrap()
+                - bmh.similarity(&new_bmh, false, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+
+        assert!(
+            (mh.similarity(&new_mh, true, false).unwrap()
+                - bmh.similarity(&new_bmh, true, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+
+        buffer.clear();
+        let imh: KmerMinHash = bmh.clone().into();
+        {
+            serde_json::to_writer(&mut buffer, &imh).unwrap();
+        }
+
+        let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap();
+        let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap();
+
+        assert_eq!(mh.md5sum(), new_mh.md5sum());
+        assert_eq!(bmh.md5sum(), new_bmh.md5sum());
+        assert_eq!(bmh.md5sum(), new_mh.md5sum());
+        assert_eq!(mh.md5sum(), new_bmh.md5sum());
+
+        assert_eq!(mh.mins(), new_mh.mins());
+        assert_eq!(bmh.mins(), new_bmh.mins());
+        assert_eq!(bmh.mins(), new_mh.mins());
+        assert_eq!(mh.mins(), new_bmh.mins());
+
+        assert_eq!(mh.abunds(), new_mh.abunds());
+        assert_eq!(bmh.abunds(), new_bmh.abunds());
+        assert_eq!(bmh.abunds(), new_mh.abunds());
+        assert_eq!(mh.abunds(), new_bmh.abunds());
+
+        assert_eq!(mh.to_vec(), new_mh.to_vec());
+        assert_eq!(bmh.to_vec(), new_bmh.to_vec());
+        assert_eq!(bmh.to_vec(), new_mh.to_vec());
+        assert_eq!(mh.to_vec(), new_bmh.to_vec());
+
+        assert_eq!(mh.to_vec_abunds(), new_mh.to_vec_abunds());
+        assert_eq!(bmh.to_vec_abunds(), new_bmh.to_vec_abunds());
+        assert_eq!(bmh.to_vec_abunds(), new_mh.to_vec_abunds());
+        assert_eq!(mh.to_vec_abunds(), new_bmh.to_vec_abunds());
+
+        assert!(
+            (mh.similarity(&new_mh, false, false).unwrap()
+                - bmh.similarity(&new_bmh, false, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+
+        assert!(
+            (mh.similarity(&new_mh, true, false).unwrap()
+                - bmh.similarity(&new_bmh, true, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+    }
+}
+
+#[test]
+fn load_save_minhash_sketches_abund() {
+    let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    filename.push("../../tests/test-data/gather-abund/reads-s10-s11.sig");
+
+    let file = File::open(filename).unwrap();
+    let reader = BufReader::new(file);
+    let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
+
+    let sig = sigs.get(0).unwrap();
+    let sketches = sig.sketches();
+    let mut buffer = vec![];
+
+    if let Sketch::MinHash(mh) = &sketches[0] {
+        let bmh: KmerMinHashBTree = mh.clone().into();
+        {
+            serde_json::to_writer(&mut buffer, &bmh).unwrap();
+        }
+
+        let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap();
+        let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap();
+
+        assert_eq!(mh.md5sum(), new_mh.md5sum());
+        assert_eq!(bmh.md5sum(), new_bmh.md5sum());
+        assert_eq!(bmh.md5sum(), new_mh.md5sum());
+        assert_eq!(mh.md5sum(), new_bmh.md5sum());
+
+        assert_eq!(mh.mins(), new_mh.mins());
+        assert_eq!(bmh.mins(), new_bmh.mins());
+        assert_eq!(bmh.mins(), new_mh.mins());
+        assert_eq!(mh.mins(), new_bmh.mins());
+
+        assert_eq!(mh.abunds(), new_mh.abunds());
+        assert_eq!(bmh.abunds(), new_bmh.abunds());
+        assert_eq!(bmh.abunds(), new_mh.abunds());
+        assert_eq!(mh.abunds(), new_bmh.abunds());
+
+        assert_eq!(mh.to_vec(), new_mh.to_vec());
+        assert_eq!(bmh.to_vec(), new_bmh.to_vec());
+        assert_eq!(bmh.to_vec(), new_mh.to_vec());
+        assert_eq!(mh.to_vec(), new_bmh.to_vec());
+
+        assert_eq!(mh.to_vec_abunds(), new_mh.to_vec_abunds());
+        assert_eq!(bmh.to_vec_abunds(), new_bmh.to_vec_abunds());
+        assert_eq!(bmh.to_vec_abunds(), new_mh.to_vec_abunds());
+        assert_eq!(mh.to_vec_abunds(), new_bmh.to_vec_abunds());
+
+        assert!(
+            (mh.similarity(&new_mh, false, false).unwrap()
+                - bmh.similarity(&new_bmh, false, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+
+        assert!(
+            (mh.similarity(&new_mh, true, false).unwrap()
+                - bmh.similarity(&new_bmh, true, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+
+        buffer.clear();
+        let imh: KmerMinHash = bmh.clone().into();
+        {
+            serde_json::to_writer(&mut buffer, &imh).unwrap();
+        }
+
+        let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap();
+        let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap();
+
+        assert_eq!(mh.md5sum(), new_mh.md5sum());
+        assert_eq!(bmh.md5sum(), new_bmh.md5sum());
+        assert_eq!(bmh.md5sum(), new_mh.md5sum());
+        assert_eq!(mh.md5sum(), new_bmh.md5sum());
+
+        assert_eq!(mh.mins(), new_mh.mins());
+        assert_eq!(bmh.mins(), new_bmh.mins());
+        assert_eq!(bmh.mins(), new_mh.mins());
+        assert_eq!(mh.mins(), new_bmh.mins());
+
+        assert_eq!(mh.abunds(), new_mh.abunds());
+        assert_eq!(bmh.abunds(), new_bmh.abunds());
+        assert_eq!(bmh.abunds(), new_mh.abunds());
+        assert_eq!(mh.abunds(), new_bmh.abunds());
+
+        assert!(
+            (mh.similarity(&new_mh, false, false).unwrap()
+                - bmh.similarity(&new_bmh, false, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+
+        assert!(
+            (mh.similarity(&new_mh, true, false).unwrap()
+                - bmh.similarity(&new_bmh, true, false).unwrap())
+            .abs()
+                < EPSILON
+        );
+    }
+}
+
+#[test]
+fn merge_empty_scaled() {
+    let max_hash = max_hash_for_scaled(10).unwrap();
+    let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    let c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    a.merge(&c).unwrap();
+    b.merge(&d).unwrap();
+
+    assert!(a.is_empty());
+    assert!(b.is_empty());
+
+    a.add_hash_with_abundance(0, 0);
+    assert!(a.is_empty());
+    b.add_hash_with_abundance(0, 0);
+    assert!(b.is_empty());
+
+    a.clear();
+    assert!(a.is_empty());
+    b.clear();
+    assert!(b.is_empty());
+}
+
+#[test]
+fn check_errors() {
+    let max_hash = max_hash_for_scaled(10).unwrap();
+    let mut a = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, false);
+    let mut b = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 42, max_hash, false);
+
+    // sequence too short: OK
+    assert!(a.add_sequence(b"AC", false).is_ok());
+    assert!(b.add_sequence(b"AC", false).is_ok());
+
+    // invalid base, throw error
+    assert!(a.add_sequence(b"ACTGNN", false).is_err());
+    assert!(b.add_sequence(b"ACTGNN", false).is_err());
+
+    a.add_hash(1);
+    b.add_hash(1);
+
+    // Can't set abundance after something was inserted
+    assert!(a.enable_abundance().is_err());
+    assert!(b.enable_abundance().is_err());
+
+    // Can't change hash function after insertion
+    assert!(a.set_hash_function(HashFunctions::murmur64_hp).is_err());
+    assert!(b.set_hash_function(HashFunctions::murmur64_hp).is_err());
+
+    // setting to the same hash function is fine
+    assert!(a.set_hash_function(HashFunctions::murmur64_DNA).is_ok());
+    assert!(b.set_hash_function(HashFunctions::murmur64_DNA).is_ok());
+
+    let c = KmerMinHash::new(0, 7, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let d = KmerMinHashBTree::new(0, 7, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    // different ksize
+    assert!(a.check_compatible(&c).is_err());
+    assert!(b.check_compatible(&d).is_err());
+
+    let c = KmerMinHash::new(0, 6, HashFunctions::murmur64_protein, 42, max_hash, true);
+    let d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_protein, 42, max_hash, true);
+
+    // different hash_function
+    assert!(a.check_compatible(&c).is_err());
+    assert!(b.check_compatible(&d).is_err());
+
+    let c = KmerMinHash::new(0, 6, HashFunctions::murmur64_DNA, 31, max_hash, true);
+    let d = KmerMinHashBTree::new(0, 6, HashFunctions::murmur64_DNA, 31, max_hash, true);
+
+    // different seed
+    assert!(a.check_compatible(&c).is_err());
+    assert!(b.check_compatible(&d).is_err());
+}
+
+//fn prop_merge(seq1 in "[ACGT]{6,100}", seq2 in "[ACGT]{6,200}") {
+
+proptest! {
+#[test]
+fn load_save_minhash_dayhoff(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") {
+    let max_hash = max_hash_for_scaled(10).unwrap();
+    let mut a = KmerMinHash::new(0, 3, HashFunctions::murmur64_dayhoff, 42, max_hash, true);
+    let mut b = KmerMinHashBTree::new(0, 3, HashFunctions::murmur64_dayhoff, 42, max_hash, true);
+
+    a.add_protein(seq.as_bytes()).unwrap();
+    b.add_protein(seq.as_bytes()).unwrap();
+
+    let mut buffer_a = vec![];
+    let mut buffer_b = vec![];
+
+    {
+        serde_json::to_writer(&mut buffer_a, &a).unwrap();
+        serde_json::to_writer(&mut buffer_b, &b).unwrap();
+    }
+
+    assert_eq!(buffer_a, buffer_b);
+
+    let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap();
+    let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap();
+
+    assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON);
+    assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON);
+}
+}
+
+proptest! {
+#[test]
+fn load_save_minhash_hp(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") {
+    let max_hash = max_hash_for_scaled(10).unwrap();
+    let mut a = KmerMinHash::new(0, 3, HashFunctions::murmur64_hp, 42, max_hash, true);
+    let mut b = KmerMinHashBTree::new(0, 3, HashFunctions::murmur64_hp, 42, max_hash, true);
+
+    a.add_protein(seq.as_bytes()).unwrap();
+    b.add_protein(seq.as_bytes()).unwrap();
+
+    let mut buffer_a = vec![];
+    let mut buffer_b = vec![];
+
+    {
+        serde_json::to_writer(&mut buffer_a, &a).unwrap();
+        serde_json::to_writer(&mut buffer_b, &b).unwrap();
+    }
+
+    assert_eq!(buffer_a, buffer_b);
+
+    let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap();
+    let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap();
+
+    assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON);
+    assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON);
+}
+}
+
+proptest! {
+#[test]
+fn load_save_minhash_dna(seq in "ACGTN{0,1000}") {
+    let max_hash = max_hash_for_scaled(10).unwrap();
+    let mut a = KmerMinHash::new(0, 21, HashFunctions::murmur64_DNA, 42, max_hash, true);
+    let mut b = KmerMinHashBTree::new(0, 21, HashFunctions::murmur64_DNA, 42, max_hash, true);
+
+    a.add_sequence(seq.as_bytes(), true).unwrap();
+    b.add_sequence(seq.as_bytes(), true).unwrap();
+
+    // Can't add_protein to a DNA sig
+    a.add_protein(seq.as_bytes()).is_err();
+    b.add_protein(seq.as_bytes()).is_err();
+
+    let mut buffer_a = vec![];
+    let mut buffer_b = vec![];
+
+    {
+        serde_json::to_writer(&mut buffer_a, &a).unwrap();
+        serde_json::to_writer(&mut buffer_b, &b).unwrap();
+    }
+
+    assert_eq!(buffer_a, buffer_b);
+
+    let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap();
+    let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap();
+
+    assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON);
+    assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON);
+}
+}
diff --git a/src/core/tests/signature.rs b/src/core/tests/signature.rs
index 60dd07217b..7070de13c6 100644
--- a/src/core/tests/signature.rs
+++ b/src/core/tests/signature.rs
@@ -1,12 +1,13 @@
-use serde_json;
-
 use std::fs::File;
 use std::io::BufReader;
+use std::io::Read;
 use std::path::PathBuf;
 
+use needletail::parse_sequence_reader;
 use sourmash::cmd::ComputeParameters;
 use sourmash::signature::Signature;
 use sourmash::signature::SigsTrait;
+use sourmash::sketch::Sketch;
 
 #[test]
 fn load_signature() {
@@ -26,6 +27,43 @@ fn load_signature() {
     assert_eq!(sig.hash_function(), "0.murmur64");
     assert_eq!(sig.name(), "s10+s11");
     assert_eq!(sig.size(), 4);
+
+    let sketches = sig.sketches();
+    match sketches[0] {
+        Sketch::MinHash(_) => (),
+        Sketch::LargeMinHash(_) => panic!(),
+        Sketch::UKHS(_) => panic!(),
+    }
+}
+
+#[test]
+fn load_signature_2() {
+    let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    filename.push("../../tests/test-data/47.abunds.fa.sig");
+
+    let file = File::open(filename).unwrap();
+    let reader = BufReader::new(file);
+    let sigs: Vec<Signature> = serde_json::from_reader(reader).expect("Loading error");
+
+    assert_eq!(sigs.len(), 1);
+
+    let sig = sigs.get(0).unwrap();
+    assert_eq!(sig.class(), "sourmash_signature");
+    assert_eq!(sig.email(), "");
+    assert_eq!(sig.filename(), "47.fa");
+    assert_eq!(sig.hash_function(), "0.murmur64");
+    assert_eq!(
+        sig.name(),
+        "NC_009665.1 Shewanella baltica OS185, complete genome"
+    );
+    assert_eq!(sig.size(), 1);
+
+    let sketches = sig.sketches();
+    match sketches[0] {
+        Sketch::MinHash(_) => (),
+        Sketch::LargeMinHash(_) => panic!(),
+        Sketch::UKHS(_) => panic!(),
+    }
 }
 
 #[test]
@@ -106,3 +144,33 @@ fn signature_add_protein() {
     assert_eq!(sketches[0].size(), 3);
     assert_eq!(sketches[1].size(), 2);
 }
+
+#[test]
+fn signature_add_sequence_cp() {
+    let mut cp = ComputeParameters::default();
+    cp.dayhoff = true;
+    cp.protein = true;
+    cp.hp = true;
+    cp.dna = true;
+
+    let mut sig = Signature::from_params(&cp);
+
+    let mut data: Vec<u8> = vec![];
+    let mut f = File::open("../../tests/test-data/ecoli.genes.fna").unwrap();
+    let _ = f.read_to_end(&mut data);
+
+    parse_sequence_reader(
+        &data[..],
+        |_| {},
+        |rec| {
+            sig.add_sequence(&rec.seq, false).unwrap();
+        },
+    )
+    .unwrap();
+
+    let sketches = sig.sketches();
+    assert_eq!(sketches.len(), 12);
+    for sk in sketches {
+        assert_eq!(sk.size(), 500);
+    }
+}
diff --git a/src/core/tests/test.rs b/src/core/tests/test.rs
index f6a471df51..1df0e75049 100644
--- a/src/core/tests/test.rs
+++ b/src/core/tests/test.rs
@@ -1,5 +1,3 @@
-use sourmash;
-
 #[test]
 fn test_murmur() {
     assert_eq!(sourmash::_hash_murmur(b"ACG", 42), 1731421407650554201)