1
1
import os
2
2
import shutil
3
3
import uuid
4
+ from collections import namedtuple
4
5
from dataclasses import dataclass
5
6
from functools import reduce
6
7
from io import BytesIO
@@ -136,9 +137,9 @@ def _open_with_hints(self, hint_files) -> None:
136
137
Returns:
137
138
None
138
139
"""
139
- for uid , hints in hint_files .items ():
140
- file_id = crc32 (uid .encode ("utf-8" ))
141
- file_name = os .path .join (self .__dirname , uid + ".db" )
140
+ for file_stem , hints in hint_files .items ():
141
+ file_id = crc32 (file_stem .encode ("utf-8" ))
142
+ file_name = os .path .join (self .__dirname , file_stem + ".db" )
142
143
current = open (file_name , "rb" )
143
144
self .__datadir [file_id ] = current
144
145
for hint in hints :
@@ -160,20 +161,19 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
160
161
"""
161
162
if self .__dirname == ":memory" :
162
163
return
163
- hint_files = {}
164
- seen = {}
165
- deleted = {}
164
+ KeyState = namedtuple ("KeyState" , "tstamp deleted file_id hint" )
165
+ keys = {}
166
166
files = os .listdir (self .__dirname )
167
- files .sort ()
168
- files .reverse ()
169
167
for file in files :
168
+ file_id , ext = os .path .splitext (file )
169
+ # TODO: check if hint file is here and read it instead
170
+ if ext != ".db" :
171
+ continue
170
172
file_name = os .path .join (self .__dirname , file )
171
173
if (
172
174
os .path .isfile (file_name )
173
175
and os .path .getsize (file_name ) >= self .header_size
174
176
):
175
- uid , _ = os .path .splitext (file )
176
- # TODO: check if hint file is here and read it instead
177
177
current = open (file_name , "rb" )
178
178
while current .tell () < os .path .getsize (file_name ):
179
179
data = current .read (self .header_size )
@@ -183,16 +183,18 @@ def _read_hints(self) -> Optional[Dict[str, List[Hint]]]:
183
183
tstamp = uuid .UUID (int = int .from_bytes (ts_bytes , "big" ))
184
184
key = current .read (key_sz )
185
185
value_pos = current .tell ()
186
- if value_sz == 0 :
187
- deleted [key ] = True
188
- continue
189
- if key not in seen and key not in deleted :
190
- seen [key ] = True
186
+ if key not in keys or keys [key ].tstamp < tstamp :
191
187
hint = Hint (tstamp , key_sz , value_sz , value_pos , key )
192
- if uid not in hint_files :
193
- hint_files [uid ] = []
194
- hint_files [uid ].append (hint )
188
+ deleted = value_sz == 0
189
+ keys [key ] = KeyState (tstamp , deleted , file_id , hint )
195
190
current .seek (value_sz , 1 )
191
+ hint_files = {}
192
+ for key_state in keys .values ():
193
+ if key_state .deleted :
194
+ continue
195
+ if key_state .file_id not in hint_files :
196
+ hint_files [key_state .file_id ] = []
197
+ hint_files [key_state .file_id ].append (key_state .hint )
196
198
return hint_files
197
199
198
200
def _reactivate (self ) -> None :
@@ -394,8 +396,8 @@ def merge(self) -> bool:
394
396
merge_cask ._reactivate ()
395
397
# build and store hint fils for merged data files
396
398
hint_files = merge_cask ._read_hints ()
397
- for uid , hints in hint_files .items ():
398
- hint_file_name = os .path .join (merge_dir , uid + ".hint" )
399
+ for file_stem , hints in hint_files .items ():
400
+ hint_file_name = os .path .join (merge_dir , file_stem + ".hint" )
399
401
hint_file = open (hint_file_name , "a+b" )
400
402
for hint in hints :
401
403
head = pack (
0 commit comments