From 63a94932755056de5a312a5f76ab96e3d9a5ce04 Mon Sep 17 00:00:00 2001
From: Tomer Filiba <tomer@sweet.security>
Date: Wed, 14 Aug 2024 17:21:48 +0300
Subject: [PATCH] Add size histogram

---
 src/shard.rs        | 58 +++++++++++++++++++++++++++++-
 src/store.rs        | 88 ++++++++++++++++++++++++++++++++++++++++++++-
 tests/test_logic.rs | 38 ++++++++++++++++++++
 3 files changed, 182 insertions(+), 2 deletions(-)
diff --git a/src/shard.rs b/src/shard.rs
index 9dceb60..a27a4be 100644
--- a/src/shard.rs
+++ b/src/shard.rs
@@ -88,12 +88,66 @@ fn test_row_lookup() -> Result<()> {
 #[repr(C, align(4096))]
 pub(crate) struct PageAligned<T>(pub T);
 
+#[repr(C)]
+pub(crate) struct ShardSizeHistogram {
+    pub counts_64b: [AtomicU32; 16],
+    pub counts_1kb: [AtomicU32; 15],
+    pub counts_16kb: [AtomicU32; 4],
+}
+
+impl ShardSizeHistogram {
+    fn insert(&self, sz: usize) {
+        if sz < 1024 {
+            self.counts_64b[sz / 64].fetch_add(1, Ordering::Relaxed);
+        } else if sz < 16 * 1024 {
+            // index 15 will always be empty, but oh well
+            self.counts_1kb[(sz - 1024) / 1024].fetch_add(1, Ordering::Relaxed);
+        } else {
+            self.counts_16kb[(sz - 16 * 1024) / (16 * 1024)].fetch_add(1, Ordering::Relaxed);
+        }
+    }
+}
+
+#[test]
+fn test_shard_size_histogram() {
+    let hist = ShardSizeHistogram {
+        counts_64b: Default::default(),
+        counts_1kb: Default::default(),
+        counts_16kb: Default::default(),
+    };
+    hist.insert(0);
+    hist.insert(63);
+    hist.insert(1022);
+    hist.insert(1023);
+    assert_eq!(hist.counts_64b[0].load(Ordering::Relaxed), 2);
+    assert_eq!(hist.counts_64b[15].load(Ordering::Relaxed), 2);
+
+    hist.insert(1024);
+    hist.insert(1025);
+    hist.insert(16382);
+    hist.insert(16383);
+    assert_eq!(hist.counts_1kb[0].load(Ordering::Relaxed), 2);
+    assert_eq!(hist.counts_1kb[14].load(Ordering::Relaxed), 2);
+
+    hist.insert(16384);
+    hist.insert(16385);
+    hist.insert(65534);
+    hist.insert(65535);
+    assert_eq!(hist.counts_16kb[0].load(Ordering::Relaxed), 2);
+    assert_eq!(hist.counts_16kb[2].load(Ordering::Relaxed), 2);
+
+    hist.insert(65536);
+    hist.insert(65537);
+    assert_eq!(hist.counts_16kb[3].load(Ordering::Relaxed), 2);
+}
+
 #[repr(C)]
 pub(crate) struct ShardHeader {
     pub num_inserted: AtomicU64,
     pub num_removed: AtomicU64,
     pub wasted_bytes: AtomicU64,
     pub write_offset: AtomicU32,
+    pub size_histogram: ShardSizeHistogram,
     pub rows: PageAligned<[ShardRow; NUM_ROWS]>,
 }
 
@@ -237,7 +291,8 @@ impl Shard {
 
     // writing doesn't require holding any locks since we write with an offset
     fn write_kv(&self, key: &[u8], val: &[u8]) -> Result<u64> {
-        let mut buf = vec![0u8; key.len() + val.len()];
+        let entry_size = key.len() + val.len();
+        let mut buf = vec![0u8; entry_size];
         buf[..key.len()].copy_from_slice(key);
         buf[key.len()..].copy_from_slice(val);
 
@@ -250,6 +305,7 @@ impl Shard {
 
         // now writing can be non-atomic (pwrite)
         self.write_raw(&buf, write_offset)?;
+        self.header.size_histogram.insert(entry_size);
 
         Ok(((key.len() as u64) << 48) | ((val.len() as u64) << 32) | write_offset)
     }
diff --git a/src/store.rs b/src/store.rs
index fb016a8..6eb5d29 100644
--- a/src/store.rs
+++ b/src/store.rs
@@ -2,7 +2,7 @@ use anyhow::{anyhow, Context};
 use parking_lot::{Mutex, RwLock};
 use std::{
     collections::BTreeMap,
-    ops::Bound,
+    ops::{Bound, Range},
     path::{Path, PathBuf},
     sync::{
         atomic::{AtomicUsize, Ordering},
@@ -65,6 +65,75 @@ impl Stats {
     }
 }
 
+/// A histogram of inserted entry sizes, in three bucket sizes:
+/// * up to 1KB we keep 64-byte resolution
+/// * from 1KB-16K, we keep in 1KB resolution
+/// * over 16K, we keep in 16K resolution
+///
+/// Notes:
+/// * Entry sizes are rounded down to the nearest bucket, e.g., 100 goes to the bucket of [64..128)
+/// * Counts are updated on insert, and are unchanged by removals. They represent the entry sizes "seen" by this
+///   store, not the currently existing ones. When a shard is split or compacted, only the existing entries remain
+///   in the histogram.
+/// * Use [Self::iter] to get a user-friendly representation of the histogram
+#[derive(Clone, Debug, Default)]
+pub struct SizeHistogram {
+    pub counts_64b: [usize; 16],
+    pub counts_1kb: [usize; 15],
+    pub counts_16kb: [usize; 4],
+}
+
+impl SizeHistogram {
+    /// return the count of the bucket for the given `sz`
+    pub fn get(&self, sz: usize) -> usize {
+        if sz < 1024 {
+            self.counts_64b[sz / 64]
+        } else if sz < 16 * 1024 {
+            self.counts_1kb[(sz - 1024) / 1024]
+        } else {
+            self.counts_16kb[(sz - 16 * 1024) / (16 * 1024)]
+        }
+    }
+
+    /// iterate over all non-empty buckets, and return their spans and counts
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = (Range<usize>, usize)> + 'a {
+        self.counts_64b
+            .iter()
+            .enumerate()
+            .filter_map(|(i, &c)| {
+                if c == 0 {
+                    return None;
+                }
+                Some((i * 64..(i + 1) * 64, c))
+            })
+            .chain(self.counts_1kb.iter().enumerate().filter_map(|(i, &c)| {
+                if c == 0 {
+                    return None;
+                }
+                Some(((i + 1) * 1024..(i + 2) * 1024, c))
+            }))
+            .chain(self.counts_16kb.iter().enumerate().filter_map(|(i, &c)| {
+                if c == 0 {
+                    return None;
+                }
+                Some(((i + 1) * 16 * 1024..(i + 2) * 16 * 1024, c))
+            }))
+    }
+}
+
+impl std::fmt::Display for SizeHistogram {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for (r, c) in self.iter() {
+            if r.end == usize::MAX {
+                write!(f, "[{}..): {c}\n", r.start)?;
+            } else {
+                write!(f, "[{}..{}): {c}\n", r.start, r.end)?;
+            }
+        }
+        Ok(())
+    }
+}
+
 /// The CandyStore object. Note that it's fully sync'ed, so can be shared between threads using `Arc`
 pub struct CandyStore {
     pub(crate) shards: RwLock<BTreeMap<u32, Shard>>,
@@ -494,6 +563,23 @@ impl CandyStore {
         stats
     }
 
+    pub fn size_histogram(&self) -> SizeHistogram {
+        let guard = self.shards.read();
+        let mut hist = SizeHistogram::default();
+        for (_, shard) in guard.iter() {
+            for (i, h) in shard.header.size_histogram.counts_64b.iter().enumerate() {
+                hist.counts_64b[i] += h.load(Ordering::Relaxed) as usize;
+            }
+            for (i, h) in shard.header.size_histogram.counts_1kb.iter().enumerate() {
+                hist.counts_1kb[i] += h.load(Ordering::Relaxed) as usize;
+            }
+            for (i, h) in shard.header.size_histogram.counts_16kb.iter().enumerate() {
+                hist.counts_16kb[i] += h.load(Ordering::Relaxed) as usize;
+            }
+        }
+        hist
+    }
+
     /// Returns an iterator over the whole store (skipping linked lists or typed items)
     pub fn iter(&self) -> CandyStoreIterator {
         CandyStoreIterator::new(self)
diff --git a/tests/test_logic.rs b/tests/test_logic.rs
index 2b13519..07ad72e 100644
--- a/tests/test_logic.rs
+++ b/tests/test_logic.rs
@@ -106,3 +106,41 @@ fn test_logic() -> Result<()> {
         Ok(())
     })
 }
+
+#[test]
+fn test_histogram() -> Result<()> {
+    run_in_tempdir(|dir| {
+        let db = CandyStore::open(
+            dir,
+            Config {
+                expected_number_of_keys: 100_000, // pre-split
+                ..Default::default()
+            },
+        )?;
+
+        db.set("k1", "bbb")?;
+        db.set("k2", &vec![b'b'; 100])?;
+        db.set("k3", &vec![b'b'; 500])?;
+        db.set("k4", &vec![b'b'; 5000])?;
+        db.set("k4", &vec![b'b'; 4500])?;
+        db.set("k5", &vec![b'b'; 50000])?;
+        db.set("kkkkkkkkkkkkkkk", &vec![b'b'; 0xffff])?;
+
+        let hist = db.size_histogram();
+        assert_eq!(
+            hist.iter().collect::<Vec<_>>(),
+            vec![
+                (0..64, 1),
+                (64..128, 1),
+                (448..512, 1),
+                (4096..5120, 2),
+                (49152..65536, 1),
+                (65536..81920, 1)
+            ]
+        );
+
+        assert!(hist.to_string().contains("[64..128): 1"));
+
+        Ok(())
+    })
+}