diff --git a/README.md b/README.md
index 4ebd933..1f49579 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,15 @@
 # VickyStore
-A pure rust implementation of a fast, persistent, in-process key-value store, that relies on a novel sharding 
-mechanism. 
+A pure rust implementation of a fast (*blazingly* :tm:, of course), persistent, in-process key-value store, that relies 
+on a novel sharding algorithm. Just how blazingly? It's over 9000!
+
+| Operation | Time  |
+|-----------|-------|
+| Lookup    | < 1us  |
+| Insert    | < 2us  |
+| Removal   | < 1us  |
+
+See [the benchmark](vicky-perf/README.md)
+
 
 ## Overview
 Being a hash-table, the key is hashed, producing a 64 bit number. The 16 most significant bits select 
@@ -119,8 +128,7 @@ for res in db.iter_collection("mycoll") {
 ```
 
 ## Design Goals
-* Fast and efficient
-* Low memory footprint
+* Fast and efficient, with a very low memory footprint (~0.6% overhead)
 * No heavy/unbounded merges
 * No Write-Ahead Log (WAL) or journalling of any kind
 * Crash safe: you may lose the latest operations, but never be in an inconsistent state
diff --git a/vicky-perf/Cargo.toml b/vicky-perf/Cargo.toml
index a95b9ac..e720039 100644
--- a/vicky-perf/Cargo.toml
+++ b/vicky-perf/Cargo.toml
@@ -5,3 +5,6 @@ edition = "2021"
 
 [dependencies]
 vicky-store={path=".."}
+
+[features]
+use_sink=[]
diff --git a/vicky-perf/README.md b/vicky-perf/README.md
index 5f1e6b3..2d705ca 100644
--- a/vicky-perf/README.md
+++ b/vicky-perf/README.md
@@ -4,59 +4,73 @@ Performance results from my machine
 * Lenovo ThinkPad X1 Carbon Gen 10 (12th Gen Intel® Core™ i7-1260P × 16)
 * RAM: 32.0 GiB
 * SSD: 512 GB
-* Built with `cargo build --release`
+* Built with `cargo build -F use_sink --release` (the `sink` makes sure the optimizer can't 
+  eliminate actual code paths, but it has some performance penalty)
 * Running on a local filesystem
 
+### Smallish entries (4 byte keys, 3 byte values)
 ```
 1000000 small entries with pre-split
-  Small entries insert: 1.388us
-  Small entries get 100% existing: 0.486us
-  Small entries get 50% existing: 0.483us
-  Small entries removal: 0.514us
-  Small entries mixed: 1.837us
+  Small entries insert: 1.347us
+  Small entries get 100% existing: 0.477us
+  Small entries get 50% existing: 0.474us
+  Small entries removal: 0.493us
+  Small entries mixed: 1.822us
 
 1000000 small entries without pre-split
-  Small entries insert: 4.332us
-  Small entries get 100% existing: 0.524us
-  Small entries get 50% existing: 0.527us
-  Small entries removal: 0.543us
-  Small entries mixed: 4.777us
+  Small entries insert: 4.151us
+  Small entries get 100% existing: 0.517us
+  Small entries get 50% existing: 0.515us
+  Small entries removal: 0.535us
+  Small entries mixed: 4.633us
+```
 
+### Largish entries (100 byte keys, 300 byte values)
+```
 500000 large entries with pre-split
-  Large entries insert: 1.703us
-  Large entries get 100% existing: 0.634us
-  Large entries removal: 0.134us
+  Large entries insert: 1.624us
+  Large entries get 100% existing: 0.618us
+  Large entries removal: 0.128us
 
 500000 large entries without pre-split
-  Large entries insert: 5.557us
-  Large entries get 100% existing: 0.782us
-  Large entries removal: 0.145us
+  Large entries insert: 5.422us
+  Large entries get 100% existing: 0.731us
+  Large entries removal: 0.139us
+```
 
+### Collections (linked-lists)
+```
 10 collections with 100000 items in each
-  Inserts: 8.356us
-  Updates: 2.704us
-  Gets: 0.632us
-  Iterations: 0.576us
-  Removal 50% of items: 4.192us
-  Discards: 0.536us
-
-10 threads accessing 100000 different keys - with pre-split
-  Inserts: 3.283us
-  Gets: 0.976us
-  Removals: 0.886us
-
-10 threads accessing 100000 different keys - without pre-split
-  Inserts: 19.353us
-  Gets: 1.027us
-  Removals: 0.927us
-
-10 threads accessing 1000000 same keys - with pre-split
-  Inserts: 12.029us
-  Gets: 2.333us
-  Removals: 2.989us
-
-10 threads accessing 1000000 same keys - without pre-split
-  Inserts: 10.777us
-  Gets: 2.586us
-  Removals: 2.818us
+  Inserts: 8.104us
+  Updates: 2.593us
+  Gets: 0.612us
+  Iterations: 0.556us
+  Removal of 50% items: 7.945us
+  Discards: 0.972us
+```
+
+### Threads without contention (different keys)
+```
+No-contention: 10 threads accessing 100000 different keys - with pre-split
+  Inserts: 3.238us
+  Gets: 1.004us
+  Removals: 0.929us
+
+No-contention: 10 threads accessing 100000 different keys - without pre-split
+  Inserts: 19.497us
+  Gets: 1.119us
+  Removals: 1.001us
+```
+
+### Threads with contention (same keys)
+```
+Contention: 10 threads accessing 1000000 same keys - with pre-split
+  Inserts: 4.556us
+  Gets: 1.204us
+  Removals: 1.334us
+
+Contention: 10 threads accessing 1000000 same keys - without pre-split
+  Inserts: 12.167us
+  Gets: 2.195us
+  Removals: 2.257us
 ```
diff --git a/vicky-perf/src/main.rs b/vicky-perf/src/main.rs
index 454d858..17cd0b2 100644
--- a/vicky-perf/src/main.rs
+++ b/vicky-perf/src/main.rs
@@ -5,6 +5,22 @@ use std::{
 
 use vicky_store::{Config, Result, VickyStore};
 
+static SINK: AtomicU64 = AtomicU64::new(0);
+
+// the sink will consume the data, thus making sure the compiler does not optimize out actually reading the data
+#[cfg(feature = "use_sink")]
+fn sink(buf: &[u8]) {
+    if !buf.is_empty() {
+        SINK.fetch_add(
+            buf[0] as u64 + buf[buf.len() - 1] as u64,
+            std::sync::atomic::Ordering::Relaxed,
+        );
+    }
+}
+
+#[cfg(not(feature = "use_sink"))]
+fn sink(_buf: &[u8]) {}
+
 fn run2(msg: &str, iters: u32, mut func: impl FnMut() -> Result<()>) -> Result<()> {
     let t0 = Instant::now();
     func()?;
@@ -50,17 +66,19 @@ fn test_small_keys(num_keys: u32) -> Result<()> {
 
         run("  Small entries get 100% existing", num_keys, |i| {
             let val = db.get(&(i * 2).to_le_bytes())?;
-            debug_assert!(val.is_some());
+            sink(&val.unwrap());
             Ok(())
         })?;
 
         run("  Small entries get 50% existing", num_keys, |i| {
-            db.get(&(i * 2).to_le_bytes())?;
+            let val = db.get(&(i * 2).to_le_bytes())?;
+            sink(&val.unwrap_or_default());
             Ok(())
         })?;
 
         run("  Small entries removal", num_keys, |i| {
-            db.remove(&(i * 2).to_le_bytes())?;
+            let val = db.remove(&(i * 2).to_le_bytes())?;
+            sink(&val.unwrap());
             Ok(())
         })?;
 
@@ -68,7 +86,8 @@ fn test_small_keys(num_keys: u32) -> Result<()> {
 
         run("  Small entries mixed", num_keys, |i| {
             db.set(&(i * 2).to_le_bytes(), "xxx")?;
-            db.get(&(i / 2).to_le_bytes())?;
+            let val = db.get(&(i / 2).to_le_bytes())?;
+            sink(&val.unwrap_or_default());
             if i % 8 == 7 {
                 db.remove(&(i / 2).to_le_bytes())?;
             }
@@ -111,14 +130,15 @@ fn test_large_keys(num_keys: u32) -> Result<()> {
             let mut key = [99u8; 100];
             key[0..4].copy_from_slice(&i.to_le_bytes());
             let val = db.get(&key)?;
-            debug_assert!(val.is_some());
+            sink(&val.unwrap_or_default());
             Ok(())
         })?;
 
         run("  Large entries removal", num_keys, |i| {
             let mut key = [99u8; 100];
             key[0..4].copy_from_slice(&i.to_le_bytes());
-            db.remove(&(i * 2).to_le_bytes())?;
+            let val = db.remove(&(i * 2).to_le_bytes())?;
+            sink(&val.unwrap_or_default());
             Ok(())
         })?;
 
@@ -160,7 +180,7 @@ fn test_collections(num_colls: u32, num_items_per_coll: u32) -> Result<()> {
         for coll in 0..num_colls {
             for item in 0..num_items_per_coll {
                 let val = db.get_from_collection(&coll.to_le_bytes(), &item.to_le_bytes())?;
-                debug_assert!(val.is_some());
+                sink(&val.unwrap());
             }
         }
         Ok(())
@@ -169,13 +189,14 @@ fn test_collections(num_colls: u32, num_items_per_coll: u32) -> Result<()> {
     run2("  Iterations", num_colls * num_items_per_coll, || {
         for coll in 0..num_colls {
             let count = db.iter_collection(&coll.to_le_bytes()).count();
+            sink(&count.to_le_bytes());
             debug_assert_eq!(count, num_items_per_coll as usize);
         }
         Ok(())
     })?;
 
     run2(
-        "  Removal 50% of items",
+        "  Removal of 50% items",
         num_colls * num_items_per_coll / 2,
         || {
             for coll in 0..num_colls {
@@ -183,7 +204,7 @@ fn test_collections(num_colls: u32, num_items_per_coll: u32) -> Result<()> {
                     if item % 2 == 0 {
                         let val =
                             db.remove_from_collection(&coll.to_le_bytes(), &item.to_le_bytes())?;
-                        debug_assert!(val.is_some());
+                        sink(&val.unwrap());
                     }
                 }
             }
@@ -219,10 +240,10 @@ fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Resul
         db.clear()?;
 
         if pre_split {
-            println!("{num_threads} threads accessing {num_keys} different keys - with pre-split");
+            println!("No-contention: {num_threads} threads accessing {num_keys} different keys - with pre-split");
         } else {
             println!(
-                "{num_threads} threads accessing {num_keys} different keys - without pre-split"
+                "No-contention: {num_threads} threads accessing {num_keys} different keys - without pre-split"
             );
         }
 
@@ -254,6 +275,7 @@ fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Resul
                     for i in thd * num_keys..(thd + 1) * num_keys {
                         let val = db.get(&i.to_le_bytes())?;
                         debug_assert_eq!(val, Some(thd.to_le_bytes().to_vec()));
+                        sink(&val.unwrap());
                     }
                     get_time_ns.fetch_add(
                         Instant::now().duration_since(t0).as_nanos() as u64,
@@ -266,6 +288,7 @@ fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Resul
                     for i in thd * num_keys..(thd + 1) * num_keys {
                         let val = db.remove(&i.to_le_bytes())?;
                         debug_assert!(val.is_some());
+                        sink(&val.unwrap());
                     }
                     removal_time_ns.fetch_add(
                         Instant::now().duration_since(t0).as_nanos() as u64,
@@ -315,7 +338,8 @@ fn do_inserts(
 fn do_gets(num_keys: u32, get_time_ns: &Arc<AtomicU64>, db: &Arc<VickyStore>) -> Result<()> {
     let t0 = Instant::now();
     for i in 0..num_keys {
-        db.get(&i.to_le_bytes())?;
+        let val = db.get(&i.to_le_bytes())?;
+        sink(&val.unwrap_or_default());
     }
     get_time_ns.fetch_add(
         Instant::now().duration_since(t0).as_nanos() as u64,
@@ -331,7 +355,8 @@ fn do_removals(
 ) -> Result<()> {
     let t0 = Instant::now();
     for i in 0..num_keys {
-        db.remove(&i.to_le_bytes())?;
+        let val = db.remove(&i.to_le_bytes())?;
+        sink(&val.unwrap_or_default());
     }
     removal_time_ns.fetch_add(
         Instant::now().duration_since(t0).as_nanos() as u64,
@@ -356,9 +381,11 @@ fn test_concurrency_with_contention(num_threads: u32, num_keys: u32) -> Result<(
         db.clear()?;
 
         if pre_split {
-            println!("{num_threads} threads accessing {num_keys} same keys - with pre-split");
+            println!(
+                "Contention: {num_threads} threads accessing {num_keys} same keys - with pre-split"
+            );
         } else {
-            println!("{num_threads} threads accessing {num_keys} same keys - without pre-split");
+            println!("Contention: {num_threads} threads accessing {num_keys} same keys - without pre-split");
         }
 
         let insert_time_ns = Arc::new(AtomicU64::new(0));
@@ -416,5 +443,7 @@ fn main() -> Result<()> {
     test_concurrency_without_contention(10, 100_000)?;
     test_concurrency_with_contention(10, 1_000_000)?;
 
+    println!("junk={}", SINK.load(std::sync::atomic::Ordering::Relaxed));
+
     Ok(())
 }