diff --git a/README.md b/README.md index 4ebd933..1f49579 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,15 @@ # VickyStore -A pure rust implementation of a fast, persistent, in-process key-value store, that relies on a novel sharding -mechanism. +A pure rust implementation of a fast (*blazingly* :tm:, of course), persistent, in-process key-value store, that relies +on a novel sharding algorithm. Just how blazingly? It's over 9000! + +| Operation | Time | +|-----------|-------| +| Lookup | < 1us | +| Insert | < 2us | +| Removal | < 1us | + +See [the benchmark](vicky-perf/README.md) + ## Overview Being a hash-table, the key is hashed, producing a 64 bit number. The 16 most significant bits select @@ -119,8 +128,7 @@ for res in db.iter_collection("mycoll") { ``` ## Design Goals -* Fast and efficient -* Low memory footprint +* Fast and efficient, with a very low memory footprint (~0.6% overhead) * No heavy/unbounded merges * No Write-Ahead Log (WAL) or journalling of any kind * Crash safe: you may lose the latest operations, but never be in an inconsistent state diff --git a/vicky-perf/Cargo.toml b/vicky-perf/Cargo.toml index a95b9ac..e720039 100644 --- a/vicky-perf/Cargo.toml +++ b/vicky-perf/Cargo.toml @@ -5,3 +5,6 @@ edition = "2021" [dependencies] vicky-store={path=".."} + +[features] +use_sink=[] diff --git a/vicky-perf/README.md b/vicky-perf/README.md index 5f1e6b3..2d705ca 100644 --- a/vicky-perf/README.md +++ b/vicky-perf/README.md @@ -4,59 +4,73 @@ Performance results from my machine * Lenovo ThinkPad X1 Carbon Gen 10 (12th Gen Intel® Core™ i7-1260P × 16) * RAM: 32.0 GiB * SSD: 512 GB -* Built with `cargo build --release` +* Built with `cargo build -F use_sink --release` (the `sink` makes sure the optimizer can't + eliminate actual code paths, but it has some performance penalty) * Running on a local filesystem +### Smallish entries (4 byte keys, 3 byte values) ``` 1000000 small entries with pre-split - Small entries insert: 1.388us - Small entries get 100% existing: 0.486us - Small entries get 50% existing: 0.483us - Small entries removal: 0.514us - Small entries mixed: 1.837us + Small entries insert: 1.347us + Small entries get 100% existing: 0.477us + Small entries get 50% existing: 0.474us + Small entries removal: 0.493us + Small entries mixed: 1.822us 1000000 small entries without pre-split - Small entries insert: 4.332us - Small entries get 100% existing: 0.524us - Small entries get 50% existing: 0.527us - Small entries removal: 0.543us - Small entries mixed: 4.777us + Small entries insert: 4.151us + Small entries get 100% existing: 0.517us + Small entries get 50% existing: 0.515us + Small entries removal: 0.535us + Small entries mixed: 4.633us +``` +### Largish entries (100 byte keys, 300 byte values) +``` 500000 large entries with pre-split - Large entries insert: 1.703us - Large entries get 100% existing: 0.634us - Large entries removal: 0.134us + Large entries insert: 1.624us + Large entries get 100% existing: 0.618us + Large entries removal: 0.128us 500000 large entries without pre-split - Large entries insert: 5.557us - Large entries get 100% existing: 0.782us - Large entries removal: 0.145us + Large entries insert: 5.422us + Large entries get 100% existing: 0.731us + Large entries removal: 0.139us +``` +### Collections (linked-lists) +``` 10 collections with 100000 items in each - Inserts: 8.356us - Updates: 2.704us - Gets: 0.632us - Iterations: 0.576us - Removal 50% of items: 4.192us - Discards: 0.536us - -10 threads accessing 100000 different keys - with pre-split - Inserts: 3.283us - Gets: 0.976us - Removals: 0.886us - -10 threads accessing 100000 different keys - without pre-split - Inserts: 19.353us - Gets: 1.027us - Removals: 0.927us - -10 threads accessing 1000000 same keys - with pre-split - Inserts: 12.029us - Gets: 2.333us - Removals: 2.989us - -10 threads accessing 1000000 same keys - without pre-split - Inserts: 10.777us - Gets: 2.586us - Removals: 2.818us + Inserts: 8.104us + Updates: 2.593us + Gets: 0.612us + Iterations: 0.556us + Removal of 50% items: 7.945us + Discards: 0.972us +``` + +### Threads without contention (different keys) +``` +No-contention: 10 threads accessing 100000 different keys - with pre-split + Inserts: 3.238us + Gets: 1.004us + Removals: 0.929us + +No-contention: 10 threads accessing 100000 different keys - without pre-split + Inserts: 19.497us + Gets: 1.119us + Removals: 1.001us +``` + +### Threads with contention (same keys) +``` +Contention: 10 threads accessing 1000000 same keys - with pre-split + Inserts: 4.556us + Gets: 1.204us + Removals: 1.334us + +Contention: 10 threads accessing 1000000 same keys - without pre-split + Inserts: 12.167us + Gets: 2.195us + Removals: 2.257us ``` diff --git a/vicky-perf/src/main.rs b/vicky-perf/src/main.rs index 454d858..17cd0b2 100644 --- a/vicky-perf/src/main.rs +++ b/vicky-perf/src/main.rs @@ -5,6 +5,22 @@ use std::{ use vicky_store::{Config, Result, VickyStore}; +static SINK: AtomicU64 = AtomicU64::new(0); + +// the sink will consume the data, thus making sure the compiler does not optimize out actually reading the data +#[cfg(feature = "use_sink")] +fn sink(buf: &[u8]) { + if !buf.is_empty() { + SINK.fetch_add( + buf[0] as u64 + buf[buf.len() - 1] as u64, + std::sync::atomic::Ordering::Relaxed, + ); + } +} + +#[cfg(not(feature = "use_sink"))] +fn sink(_buf: &[u8]) {} + fn run2(msg: &str, iters: u32, mut func: impl FnMut() -> Result<()>) -> Result<()> { let t0 = Instant::now(); func()?; @@ -50,17 +66,19 @@ fn test_small_keys(num_keys: u32) -> Result<()> { run(" Small entries get 100% existing", num_keys, |i| { let val = db.get(&(i * 2).to_le_bytes())?; - debug_assert!(val.is_some()); + sink(&val.unwrap()); Ok(()) })?; run(" Small entries get 50% existing", num_keys, |i| { - db.get(&(i * 2).to_le_bytes())?; + let val = db.get(&(i * 2).to_le_bytes())?; + sink(&val.unwrap_or_default()); Ok(()) })?; run(" Small entries removal", num_keys, |i| { - db.remove(&(i * 2).to_le_bytes())?; + let val = db.remove(&(i * 2).to_le_bytes())?; + sink(&val.unwrap()); Ok(()) })?; @@ -68,7 +86,8 @@ fn test_small_keys(num_keys: u32) -> Result<()> { run(" Small entries mixed", num_keys, |i| { db.set(&(i * 2).to_le_bytes(), "xxx")?; - db.get(&(i / 2).to_le_bytes())?; + let val = db.get(&(i / 2).to_le_bytes())?; + sink(&val.unwrap_or_default()); if i % 8 == 7 { db.remove(&(i / 2).to_le_bytes())?; } @@ -111,14 +130,15 @@ fn test_large_keys(num_keys: u32) -> Result<()> { let mut key = [99u8; 100]; key[0..4].copy_from_slice(&i.to_le_bytes()); let val = db.get(&key)?; - debug_assert!(val.is_some()); + sink(&val.unwrap_or_default()); Ok(()) })?; run(" Large entries removal", num_keys, |i| { let mut key = [99u8; 100]; key[0..4].copy_from_slice(&i.to_le_bytes()); - db.remove(&(i * 2).to_le_bytes())?; + let val = db.remove(&(i * 2).to_le_bytes())?; + sink(&val.unwrap_or_default()); Ok(()) })?; @@ -160,7 +180,7 @@ fn test_collections(num_colls: u32, num_items_per_coll: u32) -> Result<()> { for coll in 0..num_colls { for item in 0..num_items_per_coll { let val = db.get_from_collection(&coll.to_le_bytes(), &item.to_le_bytes())?; - debug_assert!(val.is_some()); + sink(&val.unwrap()); } } Ok(()) @@ -169,13 +189,14 @@ fn test_collections(num_colls: u32, num_items_per_coll: u32) -> Result<()> { run2(" Iterations", num_colls * num_items_per_coll, || { for coll in 0..num_colls { let count = db.iter_collection(&coll.to_le_bytes()).count(); + sink(&count.to_le_bytes()); debug_assert_eq!(count, num_items_per_coll as usize); } Ok(()) })?; run2( - " Removal 50% of items", + " Removal of 50% items", num_colls * num_items_per_coll / 2, || { for coll in 0..num_colls { @@ -183,7 +204,7 @@ fn test_collections(num_colls: u32, num_items_per_coll: u32) -> Result<()> { if item % 2 == 0 { let val = db.remove_from_collection(&coll.to_le_bytes(), &item.to_le_bytes())?; - debug_assert!(val.is_some()); + sink(&val.unwrap()); } } } @@ -219,10 +240,10 @@ fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Resul db.clear()?; if pre_split { - println!("{num_threads} threads accessing {num_keys} different keys - with pre-split"); + println!("No-contention: {num_threads} threads accessing {num_keys} different keys - with pre-split"); } else { println!( - "{num_threads} threads accessing {num_keys} different keys - without pre-split" + "No-contention: {num_threads} threads accessing {num_keys} different keys - without pre-split" ); } @@ -254,6 +275,7 @@ fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Resul for i in thd * num_keys..(thd + 1) * num_keys { let val = db.get(&i.to_le_bytes())?; debug_assert_eq!(val, Some(thd.to_le_bytes().to_vec())); + sink(&val.unwrap()); } get_time_ns.fetch_add( Instant::now().duration_since(t0).as_nanos() as u64, @@ -266,6 +288,7 @@ fn test_concurrency_without_contention(num_threads: u32, num_keys: u32) -> Resul for i in thd * num_keys..(thd + 1) * num_keys { let val = db.remove(&i.to_le_bytes())?; debug_assert!(val.is_some()); + sink(&val.unwrap()); } removal_time_ns.fetch_add( Instant::now().duration_since(t0).as_nanos() as u64, @@ -315,7 +338,8 @@ fn do_inserts( fn do_gets(num_keys: u32, get_time_ns: &Arc, db: &Arc) -> Result<()> { let t0 = Instant::now(); for i in 0..num_keys { - db.get(&i.to_le_bytes())?; + let val = db.get(&i.to_le_bytes())?; + sink(&val.unwrap_or_default()); } get_time_ns.fetch_add( Instant::now().duration_since(t0).as_nanos() as u64, @@ -331,7 +355,8 @@ fn do_removals( ) -> Result<()> { let t0 = Instant::now(); for i in 0..num_keys { - db.remove(&i.to_le_bytes())?; + let val = db.remove(&i.to_le_bytes())?; + sink(&val.unwrap_or_default()); } removal_time_ns.fetch_add( Instant::now().duration_since(t0).as_nanos() as u64, @@ -356,9 +381,11 @@ fn test_concurrency_with_contention(num_threads: u32, num_keys: u32) -> Result<( db.clear()?; if pre_split { - println!("{num_threads} threads accessing {num_keys} same keys - with pre-split"); + println!( + "Contention: {num_threads} threads accessing {num_keys} same keys - with pre-split" + ); } else { - println!("{num_threads} threads accessing {num_keys} same keys - without pre-split"); + println!("Contention: {num_threads} threads accessing {num_keys} same keys - without pre-split"); } let insert_time_ns = Arc::new(AtomicU64::new(0)); @@ -416,5 +443,7 @@ fn main() -> Result<()> { test_concurrency_without_contention(10, 100_000)?; test_concurrency_with_contention(10, 1_000_000)?; + println!("junk={}", SINK.load(std::sync::atomic::Ordering::Relaxed)); + Ok(()) }