Skip to content

Commit

Permalink
index: md5: handle missing files and cloud versioned files missing ve…
Browse files Browse the repository at this point in the history
…rsion_id
  • Loading branch information
efiop committed Dec 22, 2023
1 parent 31a65d7 commit a535b4b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
14 changes: 11 additions & 3 deletions src/dvc_data/index/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,17 @@ def get(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]:
assert entry.key is not None
assert entry.key[: len(self.prefix)] == self.prefix
path = self.fs.join(self.path, *entry.key[len(self.prefix) :])
if self.fs.version_aware and entry.meta and entry.meta.version_id:
path = self.fs.version_path(path, entry.meta.version_id)
return self.fs, path

if not self.fs.version_aware:
return self.fs, path

if not entry.meta or entry.meta.isdir:
return self.fs, path

if entry.meta and entry.meta.version_id:
return self.fs, self.fs.version_path(path, entry.meta.version_id)

raise ValueError(f"Missing version_id for {path}")

def exists(self, entry: "DataIndexEntry", refresh: bool = False) -> bool:
if self.index is None:
Expand Down
18 changes: 15 additions & 3 deletions src/dvc_data/index/save.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ def md5(
if entry.hash_info and entry.hash_info.name in ("md5", "md5-dos2unix"):
continue

fs, path = index.storage_map.get_storage(entry, storage)
try:
fs, path = index.storage_map.get_storage(entry, storage)
except ValueError:
continue

info = None
if check_meta:
Expand All @@ -48,7 +51,11 @@ def md5(
if entry.meta != meta:
continue

meta, hash_info = hash_file(path, fs, name, state=state, info=info)
try:
_, hash_info = hash_file(path, fs, name, state=state, info=info)
except FileNotFoundError:
continue

entries[key] = DataIndexEntry(
key=entry.key,
meta=entry.meta,
Expand Down Expand Up @@ -126,7 +133,12 @@ def save(
if entry.meta and entry.meta.isdir:
dir_entries.append(key)
continue
fs, path = index.storage_map.get_storage(entry, storage)

try:
fs, path = index.storage_map.get_storage(entry, storage)
except ValueError:
continue

if entry.hash_info:
cache = odb or index.storage_map.get_cache_odb(entry)
assert cache
Expand Down

0 comments on commit a535b4b

Please sign in to comment.