Skip to content

Commit e6e170e

Browse files
committed
fix: _read_hint to actually work on open after merge
1 parent c11e7be commit e6e170e

File tree

2 files changed

+22
-24
lines changed

2 files changed

+22
-24
lines changed

src/py_bitcask/bitcask.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
import shutil
33
import uuid
4+
from collections import namedtuple
45
from dataclasses import dataclass
56
from functools import reduce
67
from io import BytesIO
@@ -136,9 +137,9 @@ def _open_with_hints(self, hint_files) -> None:
136137
Returns:
137138
None
138139
"""
139-
for uid, hints in hint_files.items():
140-
file_id = crc32(uid.encode("utf-8"))
141-
file_name = os.path.join(self.__dirname, uid + ".db")
140+
for file_stem, hints in hint_files.items():
141+
file_id = crc32(file_stem.encode("utf-8"))
142+
file_name = os.path.join(self.__dirname, file_stem + ".db")
142143
current = open(file_name, "rb")
143144
self.__datadir[file_id] = current
144145
for hint in hints:
@@ -160,20 +161,19 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
160161
"""
161162
if self.__dirname == ":memory":
162163
return
163-
hint_files = {}
164-
seen = {}
165-
deleted = {}
164+
KeyState = namedtuple("KeyState", "tstamp deleted file_id hint")
165+
keys = {}
166166
files = os.listdir(self.__dirname)
167-
files.sort()
168-
files.reverse()
169167
for file in files:
168+
file_id, ext = os.path.splitext(file)
169+
# TODO: check if hint file is here and read it instead
170+
if ext != ".db":
171+
continue
170172
file_name = os.path.join(self.__dirname, file)
171173
if (
172174
os.path.isfile(file_name)
173175
and os.path.getsize(file_name) >= self.header_size
174176
):
175-
uid, _ = os.path.splitext(file)
176-
# TODO: check if hint file is here and read it instead
177177
current = open(file_name, "rb")
178178
while current.tell() < os.path.getsize(file_name):
179179
data = current.read(self.header_size)
@@ -183,16 +183,18 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
183183
tstamp = uuid.UUID(int=int.from_bytes(ts_bytes, "big"))
184184
key = current.read(key_sz)
185185
value_pos = current.tell()
186-
if value_sz == 0:
187-
deleted[key] = True
188-
continue
189-
if key not in seen and key not in deleted:
190-
seen[key] = True
186+
if key not in keys or keys[key].tstamp < tstamp:
191187
hint = Hint(tstamp, key_sz, value_sz, value_pos, key)
192-
if uid not in hint_files:
193-
hint_files[uid] = []
194-
hint_files[uid].append(hint)
188+
deleted = value_sz == 0
189+
keys[key] = KeyState(tstamp, deleted, file_id, hint)
195190
current.seek(value_sz, 1)
191+
hint_files = {}
192+
for key_state in keys.values():
193+
if key_state.deleted:
194+
continue
195+
if key_state.file_id not in hint_files:
196+
hint_files[key_state.file_id] = []
197+
hint_files[key_state.file_id].append(key_state.hint)
196198
return hint_files
197199

198200
def _reactivate(self) -> None:
@@ -394,8 +396,8 @@ def merge(self) -> bool:
394396
merge_cask._reactivate()
395397
# build and store hint fils for merged data files
396398
hint_files = merge_cask._read_hints()
397-
for uid, hints in hint_files.items():
398-
hint_file_name = os.path.join(merge_dir, uid + ".hint")
399+
for file_stem, hints in hint_files.items():
400+
hint_file_name = os.path.join(merge_dir, file_stem + ".hint")
399401
hint_file = open(hint_file_name, "a+b")
400402
for hint in hints:
401403
head = pack(

tests/test_bitcask.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,16 +309,12 @@ def test_check_merge(self, db, randomized):
309309
i += 1
310310

311311
def test_close(self, db):
312-
keys = db.list_keys()
313-
print(f"close: {len(keys)}")
314312
ok = db.close()
315313
assert ok
316314

317315
def test_reopen(self, db, test_dir):
318316
ok = db.open(test_dir)
319317
assert ok
320-
keys = db.list_keys()
321-
print(f"reopen: {len(keys)}")
322318

323319
def test_check_reopen(self, db, randomized):
324320
keys = db.list_keys()

0 commit comments

Comments
 (0)