Skip to content

Commit 9355b02

Browse files
authored
[Refactor] Remove data loader to simplify api (#33)
* Feat: Make data load internal * Update docs for data loading * Ruff format * Update examples and remove not used import
1 parent 5b4da1b commit 9355b02

File tree

11 files changed

+522
-198
lines changed

11 files changed

+522
-198
lines changed

README.md

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -54,33 +54,37 @@ With libcachesim installed, you can start cache simulation for some eviction alg
5454
```python
5555
import libcachesim as lcs
5656

57-
# Step 1: Get one trace from S3 bucket
58-
URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
59-
dl = lcs.DataLoader()
60-
dl.load(URI)
61-
62-
# Step 2: Open trace and process efficiently
57+
# Step 1: Open a trace hosted on S3 (find more via https://github.com/cacheMon/cache_dataset)
58+
URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
6359
reader = lcs.TraceReader(
64-
trace = dl.get_cache_path(URI),
60+
trace = URI,
6561
trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
6662
reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
6763
)
6864

69-
# Step 3: Initialize cache
70-
cache = lcs.S3FIFO(cache_size=1024*1024)
71-
72-
# Step 4: Process entire trace efficiently (C++ backend)
73-
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
74-
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
65+
# Step 2: Initialize cache
66+
cache = lcs.S3FIFO(
67+
cache_size=1024*1024,
68+
# Cache specific parameters
69+
small_size_ratio=0.2,
70+
ghost_size_ratio=0.8,
71+
move_to_main_threshold=2,
72+
)
7573

76-
# Step 4.1: Process with limited number of requests
77-
cache = lcs.S3FIFO(cache_size=1024*1024)
78-
obj_miss_ratio, byte_miss_ratio = cache.process_trace(
79-
reader,
80-
start_req=0,
81-
max_req=1000
74+
# Step 3: Process entire trace efficiently (C++ backend)
75+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
76+
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
77+
78+
# Step 3.1: Further process the first 1000 requests again
79+
cache = lcs.S3FIFO(
80+
cache_size=1024 * 1024,
81+
# Cache specific parameters
82+
small_size_ratio=0.2,
83+
ghost_size_ratio=0.8,
84+
move_to_main_threshold=2,
8285
)
83-
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
86+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000)
87+
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
8488
```
8589

8690
## Plugin System

docs/src/en/getting_started/quickstart.md

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -56,33 +56,37 @@ With libcachesim installed, you can start cache simulation for some eviction alg
5656
```python
5757
import libcachesim as lcs
5858

59-
# Step 1: Get one trace from S3 bucket
60-
URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
61-
dl = lcs.DataLoader()
62-
dl.load(URI)
63-
64-
# Step 2: Open trace and process efficiently
59+
# Step 1: Open a trace hosted on S3 (find more via https://github.com/cacheMon/cache_dataset)
60+
URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
6561
reader = lcs.TraceReader(
66-
trace = dl.get_cache_path(URI),
62+
trace = URI,
6763
trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
6864
reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
6965
)
7066

71-
# Step 3: Initialize cache
72-
cache = lcs.S3FIFO(cache_size=1024*1024)
73-
74-
# Step 4: Process entire trace efficiently (C++ backend)
75-
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
76-
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
67+
# Step 2: Initialize cache
68+
cache = lcs.S3FIFO(
69+
cache_size=1024*1024,
70+
# Cache specific parameters
71+
small_size_ratio=0.2,
72+
ghost_size_ratio=0.8,
73+
move_to_main_threshold=2,
74+
)
7775

78-
# Step 4.1: Process with limited number of requests
79-
cache = lcs.S3FIFO(cache_size=1024*1024)
80-
obj_miss_ratio, byte_miss_ratio = cache.process_trace(
81-
reader,
82-
start_req=0,
83-
max_req=1000
76+
# Step 3: Process entire trace efficiently (C++ backend)
77+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
78+
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
79+
80+
# Step 3.1: Further process the first 1000 requests again
81+
cache = lcs.S3FIFO(
82+
cache_size=1024 * 1024,
83+
# Cache specific parameters
84+
small_size_ratio=0.2,
85+
ghost_size_ratio=0.8,
86+
move_to_main_threshold=2,
8487
)
85-
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
88+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000)
89+
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
8690
```
8791

8892
The above example demonstrates the basic workflow of using `libcachesim` for cache simulation:

examples/basic_usage.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
11
import libcachesim as lcs
22

3-
# Step 1: Get one trace from S3 bucket
4-
URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
5-
dl = lcs.DataLoader()
6-
dl.load(URI)
7-
8-
# Step 2: Open trace and process efficiently
3+
# Step 1: Open a trace hosted on S3 (find more via https://github.com/cacheMon/cache_dataset)
4+
URI = "s3://cache-datasets/cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
95
reader = lcs.TraceReader(
10-
trace=dl.get_cache_path(URI),
6+
trace=URI,
117
trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE,
128
reader_init_params=lcs.ReaderInitParam(ignore_obj_size=False),
139
)
1410

15-
# Step 3: Initialize cache
16-
cache = lcs.S3FIFO(cache_size=1024 * 1024)
11+
# Step 2: Initialize cache
12+
cache = lcs.S3FIFO(
13+
cache_size=1024 * 1024,
14+
# Cache specific parameters
15+
small_size_ratio=0.2,
16+
ghost_size_ratio=0.8,
17+
move_to_main_threshold=2,
18+
)
1719

18-
# Step 4: Process entire trace efficiently (C++ backend)
19-
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
20-
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
20+
# Step 3: Process entire trace efficiently (C++ backend)
21+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
22+
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
2123

22-
# Step 4.1: Process with limited number of requests
23-
cache = lcs.S3FIFO(cache_size=1024 * 1024)
24-
obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000)
25-
print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
24+
# Step 3.1: Further process the first 1000 requests again
25+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000)
26+
print(f"Request miss ratio: {req_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")

libcachesim/__init__.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
from .trace_analyzer import TraceAnalyzer
6060
from .synthetic_reader import SyntheticReader, create_zipf_requests, create_uniform_requests
6161
from .util import Util
62-
from .data_loader import DataLoader
6362

6463
__all__ = [
6564
# Core classes
@@ -118,8 +117,6 @@
118117
"create_uniform_requests",
119118
# Utilities
120119
"Util",
121-
# Data loader
122-
"DataLoader",
123120
# Metadata
124121
"__doc__",
125122
"__version__",

0 commit comments

Comments
 (0)