Add examples, document APIs

sweet-security · Aug 4, 2024 · 9818f68 · 9818f68
1 parent 5c4fef8
commit 9818f68
Show file tree

Hide file tree

Showing 15 changed files with 577 additions and 372 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -19,4 +19,8 @@ jobs:
     - name: Build
       run: cargo build --verbose
     - name: Run tests
-      run: cargo test --release --verbose
+      run: cargo test --release --verbose -- --nocapture
+    - name: Run simple example
+      run: cargo run --example simple
+    - name: Run multithreaded example
+      run: cargo run --example multithreaded
diff --git a/README.md b/README.md
@@ -2,6 +2,40 @@
 A pure rust implementation of a fast, persistent, in-process key-value store, that relies on a novel sharding 
 mechanism. 
 
+## Example
+```rust
+use vicky_store::{Config, Result, VickyStore};
+
+let db = VickyStore::open("/tmp/vicky-dir", Config::default())?;
+
+db.insert("mykey", "myval")?;
+assert_eq!(db.get("mykey")?, Some("myval".into()));
+
+assert_eq!(db.get("yourkey")?, None);
+
+assert_eq!(db.iter().count(), 1);
+
+for res in db.iter() {
+    let (k, v) = res?;
+    assert_eq!(k, "mykey".into());
+    assert_eq!(v, "myval".into());
+}
+
+assert_eq!(db.iter().count(), 0);
+```
+
+## Design Goals
+* Fast and efficient
+* Low memory footprint
+* No heavy/unbounded merges
+* No Write-Ahead Log (WAL) or journalling of any kind
+* Crash safe: you may lose the latest operations, but never be in an inconsistent state
+* Splitting/compaction happens per-shard, so there's no global locking
+* Suitable for both write-heavy/read-heavy workloads
+* Concurrent by design (multiple threads getting/setting/removing keys at the same time)
+* The backing store is taken to be an SSD, thus it's not optimized for HDDs
+
+## Algorithm
 The algorithm is straight forward: 
 * A key is hashed, producing 64 bits of hash. The most significant 16 bits are taken to be "shard selector", followed
   by 16 bits of "row selector", followed by 32 bits of "signature".
@@ -12,7 +46,7 @@ The algorithm is straight forward:
   the keys are divided according to their shard selector. This process repeats as needed.
 * Inside a shard, we have a header table made of rows, each being an array of signatures. The row selector selects 
   the key's row, and within the row we use SIMD operations for matching the signature very quickly. This 
-  part of the file is kept mmap'ed.
+  part of the file is kept `mmap`ed.
 * Once we find the correct entry, we get its data offset in the file and read it. 
 
 The default parameters (chosen by simulations) are shards with 64 rows, each with 512 entries. The chances 
@@ -25,20 +59,11 @@ server, followed by the normal sharding mechanism described above.
 
 ## Notes
 * The file format is not yet stable
-
-## Design Goals
-* Fast and efficient
-* Low memory footprint
-* No heavy/unbounded merges
-* No Write-Ahead Log (WAL) or journalling of any kind
-* Splitting/compaction happens per-shard, so there's no global locking
-* Suitable for both write-heavy/read-heavy workloads
-* Concurrent by design (multiple threads getting/setting/removing keys at the same time)
-* The backing store is taken to be an SSD, thus it's not optimized for HDDs
+* Requires nightly (for `simd_itertools` and BTree cursors), uses very little `unsafe` (required due to `mmap`)
 
 ## Roadmap
 * Add TTL to keys (easy, but will screw up accounting)
 * Add key-prefixes which can be used to implement grouping of keys into "families", i.e. lightweight indexing
   by storing their parted-hash with an "anti collision" in a modifiable entry
 * Distributed protocol based on file locks (meant to run on a shared network folder)
-
+* Add some schema-like features, maybe using rkyv
diff --git a/examples/multithreaded.rs b/examples/multithreaded.rs
@@ -0,0 +1,57 @@
+use core::str;
+use std::{sync::Arc, time::Duration};
+
+use vicky_store::{Config, Result, VickyStore};
+
+fn main() -> Result<()> {
+    let db = Arc::new(VickyStore::open("/tmp/vicky-dir-mt", Config::default())?);
+
+    // clear the DB just in case we has something there before. in real-life scenarios you would probably
+    // not clear the DB every time
+    db.clear()?;
+
+    // clone db and spawn thread 1
+    let db1 = db.clone();
+    let h1 = std::thread::spawn(move || -> Result<()> {
+        for i in 0..100 {
+            db1.insert(&format!("key{i}"), "thread 1")?;
+            std::thread::sleep(Duration::from_millis(1));
+        }
+        Ok(())
+    });
+
+    // clone db and spawn thread 2
+    let db2 = db.clone();
+    let h2 = std::thread::spawn(move || -> Result<()> {
+        for i in 0..100 {
+            db2.insert(&format!("key{i}"), "thread 2")?;
+            std::thread::sleep(Duration::from_millis(1));
+        }
+        Ok(())
+    });
+
+    h1.join().unwrap()?;
+    h2.join().unwrap()?;
+
+    for res in db.iter() {
+        let (k, v) = res?;
+        println!(
+            "{} = {}",
+            str::from_utf8(&k).unwrap(),
+            str::from_utf8(&v).unwrap()
+        );
+    }
+
+    // key35 = thread 1
+    // key41 = thread 1
+    // key52 = thread 2
+    // key59 = thread 2
+    // key48 = thread 2
+    // key85 = thread 2
+    // key91 = thread 2
+    // key26 = thread 1
+    // key31 = thread 1
+    // ...
+
+    Ok(())
+}
diff --git a/examples/simple.rs b/examples/simple.rs
@@ -0,0 +1,35 @@
+use core::str;
+
+use vicky_store::{Config, Result, VickyStore};
+
+fn main() -> Result<()> {
+    let db = VickyStore::open("/tmp/vicky-dir", Config::default())?;
+
+    // clear the DB just in case we has something there before. in real-life scenarios you would probably
+    // not clear the DB every time
+    db.clear()?;
+
+    println!("{:?}", db.get("mykey")?); // None
+
+    db.insert("mykey", "myval")?;
+    println!("{:?}", db.get("mykey")?); // Some([109, 121, 118, 97, 108])
+
+    println!("{:?}", db.remove("mykey")?); // Some([109, 121, 118, 97, 108])
+    println!("{:?}", db.remove("mykey")?); // None
+
+    println!("{:?}", db.get("mykey")?); // None
+
+    for i in 0..10 {
+        db.insert(&format!("mykey{i}"), &format!("myval{i}"))?;
+    }
+    for res in db.iter() {
+        let (k, v) = res?;
+        println!(
+            "{} = {}",
+            str::from_utf8(&k).unwrap(),
+            str::from_utf8(&v).unwrap()
+        );
+    }
+
+    Ok(())
+}
diff --git a/src/hashing.rs b/src/hashing.rs
@@ -7,9 +7,13 @@ use crate::{Error, Result};
 #[derive(Debug, Clone, Copy)]
 pub struct SecretKey([u8; 16]);
 
+/// A struct that represents a "nonce" for seeding the hash function (keyed hash).
+/// Keeping it secret is only meaningful if you're concerned with DoS attacks
 impl SecretKey {
     pub const LEN: usize = size_of::<Self>();
 
+    /// Construct a SecretKey from the given byte buffer (must be 16 bytes in length)
+    ///
     pub fn new<B: AsRef<[u8]> + ?Sized>(key: &B) -> Result<Self> {
         let key = key.as_ref();
         if key.len() != Self::LEN {

diff --git a/src/lib.rs b/src/lib.rs
@@ -5,10 +5,7 @@ mod shard;
 mod store;
 
 pub use hashing::SecretKey;
-use std::{
-    fmt::{Display, Formatter},
-    path::PathBuf,
-};
+use std::fmt::{Display, Formatter};
 pub use store::{Stats, VickyStore};
 
 #[derive(Debug)]
@@ -42,19 +39,18 @@ impl std::error::Error for Error {}
 
 pub type Result<T> = std::result::Result<T, Error>;
 
+/// The configuration options for VickyStore. Comes with sane defaults, feel free to use them
 #[derive(Debug, Clone)]
 pub struct Config {
-    pub dir_path: PathBuf,
-    pub max_shard_size: u32,
+    pub max_shard_size: u32, // we don't want huge shards, because splitting would be expensive
     pub min_compaction_threashold: u32, // should be ~10% of max_shard_size
-    pub secret_key: SecretKey,
+    pub secret_key: SecretKey, // just some entropy, not so important unless you fear DoS
     pub expected_number_of_keys: usize, // hint for creating number of shards accordingly)
 }
 
 impl Default for Config {
     fn default() -> Self {
         Self {
-            dir_path: PathBuf::new(),
             max_shard_size: 64 * 1024 * 1024,
             min_compaction_threashold: 8 * 1024 * 1024,
             secret_key: SecretKey::new(b"kOYLu0xvq2WtzcKJ").unwrap(),

diff --git a/src/shard.rs b/src/shard.rs
@@ -155,6 +155,12 @@ impl Shard {
         })
     }
 
+    pub(crate) fn flush(&self) -> Result<()> {
+        //self.mmap.flush()?;
+        self.file.sync_data()?;
+        Ok(())
+    }
+
     #[inline]
     fn extract_offset_and_size(offset_and_size: u64) -> (usize, usize, u64) {
         let klen = (offset_and_size >> 48) as usize;