Skip to content

Commit

Permalink
[week1] 1.4 task1
Browse files Browse the repository at this point in the history
  • Loading branch information
HeartLinked committed Sep 24, 2024
1 parent 3f30ccc commit 4ee60ac
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 31 deletions.
28 changes: 24 additions & 4 deletions mini-lsm-starter/src/block.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod

mod builder;
mod iterator;

pub use builder::BlockBuilder;
use bytes::{Buf, BufMut, Bytes, BytesMut};
pub use iterator::BlockIterator;
use std::mem::size_of;

/// A block is the smallest unit of read and caching in LSM tree. It is a collection of sorted key-value pairs.
pub struct Block {
Expand Down Expand Up @@ -52,4 +48,28 @@ impl Block {
offsets,
}
}

pub fn first_key(&self) -> Option<Bytes> {
let offset = match self.offsets.first() {
Some(offset) => *offset as usize,
None => return None,
};

let mut data = &self.data[offset..];
let len = data.get_u16() as usize;

Some(Bytes::copy_from_slice(&data[..len]))
}

pub fn last_key(&self) -> Option<Bytes> {
let offset = match self.offsets.last() {
Some(offset) => *offset as usize,
None => return None,
};

let mut data = &self.data[offset..];
let len = data.get_u16() as usize;

Some(Bytes::copy_from_slice(&data[..len]))
}
}
1 change: 0 additions & 1 deletion mini-lsm-starter/src/block/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ impl BlockBuilder {
if self.offsets.len() * 2 + self.data.len() + size_add + NUM_OF_ELEMENTS_LEN
> self.block_size
{
println!("---------OVERFLOW!--------------");
return false;
}
}
Expand Down
2 changes: 1 addition & 1 deletion mini-lsm-starter/src/block/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub struct BlockIterator {
}

impl BlockIterator {
fn new(block: Arc<Block>) -> Self {
pub fn new(block: Arc<Block>) -> Self {
let mut iter = Self {
block,
key: KeyVec::new(),
Expand Down
3 changes: 0 additions & 3 deletions mini-lsm-starter/src/lsm_iterator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod

use anyhow::{anyhow, Result};

use crate::{
Expand Down
7 changes: 2 additions & 5 deletions mini-lsm-starter/src/lsm_storage.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(dead_code)] // REMOVE THIS LINE after fully implementing this functionality

use std::collections::HashMap;
use std::ops::Bound;
use std::path::{Path, PathBuf};
Expand All @@ -16,10 +14,9 @@ use crate::compact::{
SimpleLeveledCompactionController, SimpleLeveledCompactionOptions, TieredCompactionController,
};
use crate::iterators::merge_iterator::MergeIterator;
use crate::iterators::StorageIterator;
use crate::lsm_iterator::{FusedIterator, LsmIterator};
use crate::manifest::Manifest;
use crate::mem_table::{MemTable, MemTableIterator};
use crate::mem_table::MemTable;
use crate::mvcc::LsmMvccInner;
use crate::table::SsTable;

Expand Down Expand Up @@ -439,7 +436,7 @@ impl LsmStorageInner {
}
// 用 vec 创建
let lsm_iterator_inner = MergeIterator::create(iters);
let mut iter = LsmIterator::new(lsm_iterator_inner)?;
let iter = LsmIterator::new(lsm_iterator_inner)?;
Ok(FusedIterator::new(iter))
}
}
2 changes: 0 additions & 2 deletions mini-lsm-starter/src/mem_table.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(dead_code)] // REMOVE THIS LINE after fully implementing this functionality

use std::ops::Bound;
use std::path::Path;
use std::sync::atomic::AtomicUsize;
Expand Down
80 changes: 76 additions & 4 deletions mini-lsm-starter/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ pub(crate) mod bloom;
mod builder;
mod iterator;

use bytes::Bytes;
use std::fs::File;
use std::io::{Read, Seek};
use std::path::Path;
use std::sync::Arc;

use anyhow::Result;
use anyhow::{anyhow, Result};
pub use builder::SsTableBuilder;
use bytes::Buf;
pub use iterator::SsTableIterator;
Expand All @@ -21,6 +23,8 @@ use crate::lsm_storage::BlockCache;
use self::bloom::Bloom;

#[derive(Clone, Debug, PartialEq, Eq)]
/// you will need to maintain block metadata BlockMeta,
/// which includes the first/last keys in each block and the offsets of each block.
pub struct BlockMeta {
/// Offset of this data block.
pub offset: usize,
Expand All @@ -39,12 +43,47 @@ impl BlockMeta {
#[allow(clippy::ptr_arg)] // remove this allow after you finish
buf: &mut Vec<u8>,
) {
unimplemented!()
for meta in block_meta {
// 编码 offset,使用 8 字节(u64)来存储
buf.extend_from_slice(&(meta.offset as u64).to_le_bytes());
// 编码 first_key 的长度和内容
let first_key_len = meta.first_key.raw_ref().len() as u64;
buf.extend_from_slice(&first_key_len.to_le_bytes());
buf.extend_from_slice(meta.first_key.raw_ref());
// 编码 last_key 的长度和内容
let last_key_len = meta.last_key.raw_ref().len() as u64;
buf.extend_from_slice(&last_key_len.to_le_bytes());
buf.extend_from_slice(meta.last_key.raw_ref());
}
}

/// Decode block meta from a buffer.
pub fn decode_block_meta(buf: impl Buf) -> Vec<BlockMeta> {
unimplemented!()
let mut block_metas = Vec::new();
let mut buf = buf; // 不需要将 `buf` 声明为可变,因为 Buf trait 是内部可变的
while buf.remaining() > 0 {
// 解码 offset(8 字节)
let offset = buf.get_u64_le() as usize;

// 解码 first_key 的长度(8 字节),然后读取 first_key
let first_key_len = buf.get_u64_le() as usize;
let mut first_key = vec![0; first_key_len];
buf.copy_to_slice(&mut first_key);

// 解码 last_key 的长度(8 字节),然后读取 last_key
let last_key_len = buf.get_u64_le() as usize;
let mut last_key = vec![0; last_key_len];
buf.copy_to_slice(&mut last_key);

// 构建 BlockMeta 并推入列表
block_metas.push(BlockMeta {
offset,
first_key: KeyBytes::from_bytes(Bytes::from(first_key)),
last_key: KeyBytes::from_bytes(Bytes::from(last_key)),
});
}

block_metas
}
}

Expand Down Expand Up @@ -108,7 +147,40 @@ impl SsTable {

/// Open SSTable from a file.
pub fn open(id: usize, block_cache: Option<Arc<BlockCache>>, file: FileObject) -> Result<Self> {
unimplemented!()
let (mut file, file_size) = match file.0 {
Some(f) => (f, file.1),
None => return Err(anyhow!("file not exists")),
};

let mut bytes = vec![0; file_size as usize];
file.read_exact(&mut bytes)?;

const U32_SIZE: usize = size_of::<u32>();
let block_meta_offset = (&bytes[bytes.len() - U32_SIZE..]).get_u32() as usize;

let block_meta = &bytes[block_meta_offset..bytes.len() - U32_SIZE];
let block_meta = BlockMeta::decode_block_meta(block_meta);
let first_key = block_meta
.first()
.map(|meta| meta.first_key.clone())
.unwrap_or_default();
let last_key = block_meta
.last()
.map(|meta| meta.last_key.clone())
.unwrap_or_default();

file.rewind()?;
Ok(Self {
file: FileObject(Some(file), file_size),
block_meta,
block_meta_offset,
id,
block_cache,
first_key,
last_key,
bloom: None,
max_ts: 0,
})
}

/// Create a mock SST with only first key + last key metadata
Expand Down
85 changes: 74 additions & 11 deletions mini-lsm-starter/src/table/builder.rs
Original file line number Diff line number Diff line change
@@ -1,44 +1,74 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod

use std::path::Path;
use std::sync::Arc;

use anyhow::Result;
use bytes::BufMut;

use super::{BlockMeta, SsTable};
use super::{BlockMeta, FileObject, SsTable};
use crate::key::KeyBytes;
use crate::{block::BlockBuilder, key::KeySlice, lsm_storage::BlockCache};

/// Builds an SSTable from key-value pairs.
pub struct SsTableBuilder {
builder: BlockBuilder,
first_key: Vec<u8>,
last_key: Vec<u8>,
data: Vec<u8>,
pub(crate) meta: Vec<BlockMeta>,
data: Vec<u8>, // 目前打算只在块满和 build 时将 data block 写入该字段
pub(crate) meta: Vec<BlockMeta>, // 每个块的首尾键(first/last keys)和每个块的偏移量(offsets)
block_size: usize,
}

impl SsTableBuilder {
/// Create a builder based on target block size.
pub fn new(block_size: usize) -> Self {
unimplemented!()
Self {
builder: BlockBuilder::new(block_size),
first_key: Vec::new(),
last_key: Vec::new(),
data: Vec::new(),
meta: Vec::new(),
block_size,
}
}

/// Adds a key-value pair to SSTable.
///
/// Note: You should split a new block when the current block is full.(`std::mem::replace` may
/// be helpful here)
pub fn add(&mut self, key: KeySlice, value: &[u8]) {
unimplemented!()
if self.first_key.is_empty() {
self.first_key = key.raw_ref().to_vec();
}
self.last_key = key.raw_ref().to_vec();
if self.builder.add(key, value) == false {
// block 满,添加失败,创建新 block
self.split_new_block();
}
let _ = self.builder.add(key, value);
}

fn split_new_block(&mut self) {
let new_builder = BlockBuilder::new(self.block_size);
let old_builder = std::mem::replace(&mut self.builder, new_builder);
let old_block = old_builder.build();
// 将 old block 的数据作为一个新的 data block 写入 SsTableBuilder 的 data 字段
// 更新 SsTableBuilder 的 meta 字段,即我们需要创建一个新的 BlockMeta
let block_meta = BlockMeta {
offset: self.data.len(),
first_key: KeyBytes::from_bytes(old_block.first_key().unwrap_or_default()),
last_key: KeyBytes::from_bytes(old_block.last_key().unwrap_or_default()),
};
// old_block.encode() 返回 block 的 Bytes,AsRef 转换为 &[u8] slice
self.meta.push(block_meta);
self.data.extend_from_slice(&old_block.encode());
}

/// Get the estimated size of the SSTable.
///
/// Since the data blocks contain much more data than meta blocks, just return the size of data
/// blocks here.
pub fn estimated_size(&self) -> usize {
unimplemented!()
self.data.len()
}

/// Builds the SSTable and writes it to the given path. Use the `FileObject` structure to manipulate the disk objects.
Expand All @@ -48,7 +78,40 @@ impl SsTableBuilder {
block_cache: Option<Arc<BlockCache>>,
path: impl AsRef<Path>,
) -> Result<SsTable> {
unimplemented!()
let mut sst_builder = self;
sst_builder.split_new_block();

let mut bytes = vec![];
bytes.extend_from_slice(&sst_builder.data);

let block_meta_offset = bytes.len();
let first_key = sst_builder
.meta
.first()
.map(|meta| meta.first_key.clone())
.unwrap_or_default();
let last_key = sst_builder
.meta
.last()
.map(|meta| meta.last_key.clone())
.unwrap_or_default();
BlockMeta::encode_block_meta(&sst_builder.meta, &mut bytes);

bytes.put_u32(block_meta_offset as u32);

let file = FileObject::create(path.as_ref(), bytes)?;

Ok(SsTable {
file,
block_meta: sst_builder.meta,
block_meta_offset,
id,
block_cache,
first_key,
last_key,
bloom: None,
max_ts: 0,
})
}

#[cfg(test)]
Expand Down
1 change: 1 addition & 0 deletions mini-lsm-starter/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ mod harness;
mod week1_day1;
mod week1_day2;
mod week1_day3;
mod week1_day4;

0 comments on commit 4ee60ac

Please sign in to comment.