Skip to content

Commit

Permalink
docs: add comments & hints for day one starter and reference code (#18)
Browse files Browse the repository at this point in the history
* feat(docs): Improve/Add comments & some hints for day one starter code

* feat(docs): Add comments for day one solution code

* feat(docs): Add figure for block storage format in starter code (block.rs)
  • Loading branch information
xzhseh authored Jul 11, 2023
1 parent e13ce4f commit de7f2ec
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 3 deletions.
15 changes: 13 additions & 2 deletions mini-lsm-starter/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,32 @@ mod builder;
mod iterator;

pub use builder::BlockBuilder;
/// You may want to check `bytes::BufMut` out when manipulating continuous chunks of memory
use bytes::Bytes;
pub use iterator::BlockIterator;

/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted
/// key-value pairs.
/// A block is the smallest unit of read and caching in LSM tree.
/// It is a collection of sorted key-value pairs.
/// The `actual` storage format is as below (After `Block::encode`):
///
/// ----------------------------------------------------------------------------------------------------
/// | Data Section | Offset Section | Extra |
/// ----------------------------------------------------------------------------------------------------
/// | Entry #1 | Entry #2 | ... | Entry #N | Offset #1 | Offset #2 | ... | Offset #N | num_of_elements |
/// ----------------------------------------------------------------------------------------------------
pub struct Block {
data: Vec<u8>,
offsets: Vec<u16>,
}

impl Block {
/// Encode the internal data to the data layout illustrated in the tutorial
/// Note: You may want to recheck if any of the expected field is missing from your output
pub fn encode(&self) -> Bytes {
unimplemented!()
}

/// Decode from the data layout, transform the input `data` to a single `Block`
pub fn decode(data: &[u8]) -> Self {
unimplemented!()
}
Expand Down
6 changes: 6 additions & 0 deletions mini-lsm-starter/src/block/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ use super::Block;

/// Iterates on a block.
pub struct BlockIterator {
/// The internal `Block`, wrapped by an `Arc`
block: Arc<Block>,
/// The current key, empty represents the iterator is invalid
key: Vec<u8>,
/// The corresponding value, can be empty
value: Vec<u8>,
/// Current index of the key-value pair, should be in range of [0, num_of_elements)
idx: usize,
}

Expand Down Expand Up @@ -44,6 +48,7 @@ impl BlockIterator {
}

/// Returns true if the iterator is valid.
/// Note: You may want to make use of `key`
pub fn is_valid(&self) -> bool {
unimplemented!()
}
Expand All @@ -59,6 +64,7 @@ impl BlockIterator {
}

/// Seek to the first key that >= `key`.
/// Note: You should assume the key-value pairs in the block are sorted when being added by callers.
pub fn seek_to_key(&mut self, key: &[u8]) {
unimplemented!()
}
Expand Down
1 change: 1 addition & 0 deletions mini-lsm/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ impl Block {
for offset in &self.offsets {
buf.put_u16(*offset);
}
// Adds number of elements at the end of the block
buf.put_u16(offsets_len as u16);
buf.into()
}
Expand Down
2 changes: 2 additions & 0 deletions mini-lsm/src/block/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@ impl BlockBuilder {
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
assert!(!key.is_empty(), "key must not be empty");
// The overhead here is `key_len` + `val_len` + `offset`, each is of type `u16`
if self.estimated_size() + key.len() + value.len() + SIZEOF_U16 * 3 > self.block_size
&& !self.is_empty()
{
return false;
}
// The offsets should be updated at first, to maintain the correct offset
self.offsets.push(self.data.len() as u16);
self.data.put_u16(key.len() as u16);
self.data.put(key);
Expand Down
6 changes: 5 additions & 1 deletion mini-lsm/src/block/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,12 @@ impl BlockIterator {
self.seek_to(self.idx);
}

/// Seek to the specified position and update the current `key` and `value`
/// Index update will be handled by caller
fn seek_to_offset(&mut self, offset: usize) {
let mut entry = &self.block.data[offset..];
// Since `get_u16()` will automatically move the ptr 2 bytes ahead here,
// we don't need to manually advance it
let key_len = entry.get_u16() as usize;
let key = entry[..key_len].to_vec();
entry.advance(key_len);
Expand All @@ -90,7 +94,7 @@ impl BlockIterator {
self.value.extend(value);
}

/// Seek to the first key that >= `key`.
/// Seek to the first key that is >= `key`.
pub fn seek_to_key(&mut self, key: &[u8]) {
let mut low = 0;
let mut high = self.block.offsets.len();
Expand Down

0 comments on commit de7f2ec

Please sign in to comment.