From b14c8c156e812afa7b72330d13e04ece0e4da147 Mon Sep 17 00:00:00 2001 From: Tomer Filiba Date: Mon, 2 Sep 2024 16:20:06 +0300 Subject: [PATCH] Lists: add num_items and list_len API --- src/lists.rs | 63 +++++++++++++++++++++++++++++---------------- src/shard.rs | 2 +- tests/test_lists.rs | 8 ++++++ 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/src/lists.rs b/src/lists.rs index 5a788c1..8e0facf 100644 --- a/src/lists.rs +++ b/src/lists.rs @@ -16,23 +16,21 @@ struct List { head_idx: u64, // inclusive tail_idx: u64, // exclusive holes: u64, + num_items: u64, } impl std::fmt::Debug for List { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "List(0x{:016x}..0x{:016x} len={} holes={})", - self.head_idx, - self.tail_idx, - self.tail_idx - self.head_idx, - self.holes + "List(0x{:016x}..0x{:016x} items={} holes={})", + self.head_idx, self.tail_idx, self.num_items, self.holes ) } } impl List { - fn len(&self) -> u64 { + fn span_len(&self) -> u64 { self.tail_idx - self.head_idx } fn is_empty(&self) -> bool { @@ -211,6 +209,7 @@ impl CandyStore { head_idx: Self::FIRST_IDX, tail_idx: Self::FIRST_IDX + 1, holes: 0, + num_items: 1, }) .to_owned(), )?; @@ -247,6 +246,7 @@ impl CandyStore { }; // update list + list.num_items += 1; self.set_raw(&list_key, bytes_of(&list))?; // create chain @@ -482,23 +482,26 @@ impl CandyStore { existing_val.truncate(existing_val.len() - size_of::()); // update list, if the item was the head/tail - let list_bytes = self.get_raw(&list_key)?.unwrap(); - let mut list = *from_bytes::(&list_bytes); - - if list.head_idx == item_idx || list.tail_idx == item_idx + 1 { - if list.head_idx == item_idx { - list.head_idx += 1; - } else if list.tail_idx == item_idx + 1 { - list.tail_idx -= 1; - } - if list.is_empty() { - self.remove_raw(&list_key)?; + if let Some(list_bytes) = self.get_raw(&list_key)? { + let mut list = *from_bytes::(&list_bytes); + + list.num_items -= 1; + + if list.head_idx == item_idx || list.tail_idx == item_idx + 1 { + if list.head_idx == item_idx { + list.head_idx += 1; + } else if list.tail_idx == item_idx + 1 { + list.tail_idx -= 1; + } + if list.is_empty() { + self.remove_raw(&list_key)?; + } else { + self.set_raw(&list_key, bytes_of(&list))?; + } } else { + list.holes += 1; self.set_raw(&list_key, bytes_of(&list))?; } - } else { - list.holes += 1; - self.set_raw(&list_key, bytes_of(&list))?; } // remove chain @@ -567,10 +570,10 @@ impl CandyStore { return Ok(false); }; let list = *from_bytes::(&list_bytes); - if list.len() < params.min_length { + if list.span_len() < params.min_length { return Ok(false); } - if (list.holes as f64) < (list.len() as f64) * params.min_holes_ratio { + if (list.holes as f64) < (list.span_len() as f64) * params.min_holes_ratio { return Ok(false); } @@ -618,6 +621,7 @@ impl CandyStore { head_idx: list.tail_idx, tail_idx: new_idx, holes: 0, + num_items: new_idx - list.tail_idx, }), )?; } @@ -814,4 +818,19 @@ impl CandyStore { ) -> Result { self.owned_push_to_list(list_key, val, InsertToListPos::Tail) } + + /// Returns the estimated list length + pub fn list_len + ?Sized>(&self, list_key: &B) -> Result { + self.owned_list_len(list_key.as_ref().to_owned()) + } + pub fn owned_list_len(&self, list_key: Vec) -> Result { + let (_, list_key) = self.make_list_key(list_key); + + let Some(list_bytes) = self.get_raw(&list_key)? else { + return Ok(0); + }; + + let list = *from_bytes::(&list_bytes); + Ok(list.num_items as usize) + } } diff --git a/src/shard.rs b/src/shard.rs index 8242250..69465c6 100644 --- a/src/shard.rs +++ b/src/shard.rs @@ -96,7 +96,7 @@ fn test_row_lookup() -> Result<()> { struct PageAligned(T); pub(crate) const SHARD_FILE_MAGIC: [u8; 8] = *b"CandyStr"; -pub(crate) const SHARD_FILE_VERSION: u64 = 9; +pub(crate) const SHARD_FILE_VERSION: u64 = 10; #[derive(Clone, Copy, Default, Debug, Pod, Zeroable)] #[repr(C)] diff --git a/tests/test_lists.rs b/tests/test_lists.rs index 6fa3aa5..a798402 100644 --- a/tests/test_lists.rs +++ b/tests/test_lists.rs @@ -34,7 +34,9 @@ fn test_lists() -> Result<()> { ); assert_eq!(db.iter_list("texas").count(), 3); + assert_eq!(db.list_len("texas")?, 3); assert_eq!(db.iter_list("arkansas").count(), 0); + assert_eq!(db.list_len("arkansas")?, 0); let items = db .iter_list("texas") @@ -56,19 +58,24 @@ fn test_lists() -> Result<()> { // remove from the middle assert_eq!(db.remove_from_list("xxx", "k3")?, Some("v3".into())); assert_eq!(db.iter_list("xxx").count(), 3); + assert_eq!(db.list_len("xxx")?, 3); // remove first assert_eq!(db.remove_from_list("xxx", "k1")?, Some("v1".into())); assert_eq!(db.iter_list("xxx").count(), 2); + assert_eq!(db.list_len("xxx")?, 2); // remove last assert_eq!(db.remove_from_list("xxx", "k4")?, Some("v4".into())); assert_eq!(db.iter_list("xxx").count(), 1); + assert_eq!(db.list_len("xxx")?, 1); // remove single assert_eq!(db.remove_from_list("xxx", "k2")?, Some("v2".into())); assert_eq!(db.iter_list("xxx").count(), 0); + assert_eq!(db.list_len("xxx")?, 0); for i in 0..10_000 { db.set_in_list("xxx", &format!("my key {i}"), "very long key aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")?; + assert_eq!(db.list_len("xxx")?, i + 1); } // make sure we survive splits @@ -78,6 +85,7 @@ fn test_lists() -> Result<()> { let (k, _) = res?; assert_eq!(k, format!("my key {i}").as_bytes()); db.remove_from_list("xxx", &k)?; + assert_eq!(db.list_len("xxx")?, 10_000 - i - 1); } assert_eq!(db.iter_list("xxx").count(), 0);