Skip to content

Commit

Permalink
add gcsfs.GCSFileSystem with patched find method
Browse files Browse the repository at this point in the history
Workaround until fsspec/gcsfs#488 is merged
  • Loading branch information
dtrifiro committed Aug 26, 2022
1 parent 3250c38 commit 274d578
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 1 deletion.
4 changes: 3 additions & 1 deletion dvc_gs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def _prepare_credentials(self, **config):
@wrap_prop(threading.Lock())
@cached_property
def fs(self):
from gcsfs import GCSFileSystem
# TODO: Use `gcsfs` when https://github.com/fsspec/gcsfs/pull/488
# is merged and its version bumped
from .gcsfs import GCSFileSystem

return GCSFileSystem(**self.fs_args)

Expand Down
69 changes: 69 additions & 0 deletions dvc_gs/gcsfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# pylint: disable=abstract-method
# TODO: remove this module when https://github.com/fsspec/gcsfs/pull/488
# is merged and version is bumped

from gcsfs import GCSFileSystem as GCSFileSystem_


class GCSFileSystem(GCSFileSystem_):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

async def _find(
self, path, withdirs=False, detail=False, prefix="", **kwargs
):
path = self._strip_protocol(path)
bucket, key = self.split_path(path)

if prefix:
_path = "" if not key else key.rstrip("/") + "/"
_prefix = f"{_path}{prefix}"
else:
_prefix = key

objects, _ = await self._do_list_objects(
bucket, delimiter="", prefix=_prefix
)

dirs = {}
cache_entries = {}

for obj in objects:
parent = self._parent(obj["name"])
previous = obj

while parent:
dir_key = self.split_path(parent)[1]
if not dir_key:
break

dirs[parent] = {
"Key": dir_key,
"Size": 0,
"name": parent,
"StorageClass": "DIRECTORY",
"type": "directory",
"size": 0,
}

if len(parent) < len(path):
# don't go above the requested level
break

cache_entries.setdefault(parent, []).append(previous)

previous = dirs[parent]
parent = self._parent(parent)

if not prefix:
self.dircache.update(cache_entries)

if withdirs:
objects = sorted(
objects + list(dirs.values()), key=lambda x: x["name"]
)

if detail:
return {o["name"]: o for o in objects}

return [o["name"] for o in objects]

0 comments on commit 274d578

Please sign in to comment.