Skip to content

Commit

Permalink
index: only iterate over trie items once
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrowla committed Nov 30, 2023
1 parent 6cba666 commit f36700c
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
17 changes: 7 additions & 10 deletions src/dvc_data/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ class BaseDataIndex(ABC, MutableMapping[DataIndexKey, DataIndexEntry]):
def iteritems(
self,
prefix: Optional[DataIndexKey] = None,
shallow: Optional[bool] = False,
shallow: bool = False,
) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]:
pass

Expand Down Expand Up @@ -687,8 +687,9 @@ def _load(self, key, entry):
self._trie.commit()

def load(self, **kwargs):
for key, entry in self.iteritems(shallow=True, **kwargs):
self._load(key, entry)
kwargs["shallow"] = True
for _ in self.iteritems(**kwargs):
pass

def has_node(self, key: DataIndexKey) -> bool:
return self._trie.has_node(key)
Expand All @@ -710,21 +711,17 @@ def traverse(self, *args, **kwargs) -> Any:
def iteritems(
self,
prefix: Optional[DataIndexKey] = None,
shallow: Optional[bool] = False,
shallow: bool = False,
) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]:
kwargs: Dict[str, Any] = {"shallow": shallow}
if prefix:
kwargs = {"prefix": prefix}
item = self._trie.longest_prefix(prefix)
if item:
key, entry = item
self._load(key, entry)

# FIXME could filter by loaded and/or isdir in sql on sqltrie side
for key, entry in self._trie.items(**kwargs):
for key, entry in self._trie.items(prefix=prefix, shallow=shallow):
self._load(key, entry)

yield from self._trie.items(**kwargs)
yield key, entry

def iterkeys(self, *args, **kwargs):
return self._trie.keys(*args, **kwargs)
Expand Down
4 changes: 2 additions & 2 deletions src/dvc_data/index/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __len__(self):
def _iteritems(
self,
prefix: Optional[DataIndexKey] = None,
shallow: Optional[bool] = False,
shallow: bool = False,
ensure_loaded: bool = False,
) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]:
# NOTE: iteration is implemented using traverse and not iter/iteritems
Expand Down Expand Up @@ -113,7 +113,7 @@ def _load_dir_keys(
def iteritems(
self,
prefix: Optional[DataIndexKey] = None,
shallow: Optional[bool] = False,
shallow: bool = False,
) -> Iterator[Tuple[DataIndexKey, DataIndexEntry]]:
return self._iteritems(prefix=prefix, shallow=shallow, ensure_loaded=True)

Expand Down

0 comments on commit f36700c

Please sign in to comment.