Skip to content

Commit

Permalink
Install new VSI plugin handler for every instance of using an opener (#…
Browse files Browse the repository at this point in the history
…1408)

* Set defaults for pyopener registry contextvar get()

Resolves #1406

* Install a new VSI plugin handler for every instance

Plugin handler installation is implicit and hidden from the API
user, but could be exposed in the future if needed.

Resolves #1406

* Add opener support to listdir(), listlayers(), and remove()

This involves adding GDAL callback support for unlinking VSI files
and also a major stat() bug fix.

* Adjust expectation of number of files

* Clean up

* More cleanup and change log update
  • Loading branch information
sgillies authored Jul 9, 2024
1 parent 4509d75 commit 8c0a31e
Show file tree
Hide file tree
Showing 11 changed files with 552 additions and 138 deletions.
5 changes: 5 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ All issue numbers are relative to https://github.com/Toblerity/Fiona/issues.

Bug fixes:

- The Pyopener registry and VSI plugin have been rewritten to avoid filename
conflicts and to be compatible with multithreading. Now, a new plugin handler
is registered for each instance of using an opener (#1408). Before GDAL 3.9.0
plugin handlers cannot not be removed and so it may be observed that the size
of the Pyopener registry grows during the execution of a program.
- A CSLConstList ctypedef has been added and is used where appropriate (#1404).
- Fiona model objects have a informative, printable representation again (#).

Expand Down
124 changes: 97 additions & 27 deletions fiona/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def func(*args, **kwds):
log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener)
vsi_path_ctx = _opener_registration(raw_dataset_path, opener)
registered_vsi_path = stack.enter_context(vsi_path_ctx)
log.debug("Registered vsi path: registered_vsi_path%r", registered_vsi_path)
log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path)
path = _UnparsedPath(registered_vsi_path)
else:
if vfs:
Expand Down Expand Up @@ -386,7 +386,7 @@ def func(*args, **kwds):


@ensure_env_with_credentials
def remove(path_or_collection, driver=None, layer=None):
def remove(path_or_collection, driver=None, layer=None, opener=None):
"""Delete an OGR data source or one of its layers.
If no layer is specified, the entire dataset and all of its layers
Expand All @@ -396,6 +396,19 @@ def remove(path_or_collection, driver=None, layer=None):
----------
path_or_collection : str, pathlib.Path, or Collection
The target Collection or its path.
opener : callable or obj, optional
A custom dataset opener which can serve GDAL's virtual
filesystem machinery via Python file-like objects. The
underlying file-like object is obtained by calling *opener* with
(*fp*, *mode*) or (*fp*, *mode* + "b") depending on the format
driver's native mode. *opener* must return a Python file-like
object that provides read, seek, tell, and close methods. Note:
only one opener at a time per fp, mode pair is allowed.
Alternatively, opener may be a filesystem object from a package
like fsspec that provides the following methods: isdir(),
isfile(), ls(), mtime(), open(), and size(). The exact interface
is defined in the fiona._vsiopener._AbstractOpener class.
driver : str, optional
The name of a driver to be used for deletion, optional. Can
usually be detected.
Expand All @@ -414,21 +427,37 @@ def remove(path_or_collection, driver=None, layer=None):
"""
if isinstance(path_or_collection, Collection):
collection = path_or_collection
path = collection.path
raw_dataset_path = collection.path
driver = collection.driver
collection.close()
elif isinstance(path_or_collection, Path):
path = str(path_or_collection)

else:
path = path_or_collection
if layer is None:
_remove(path, driver)
fp = path_or_collection
if hasattr(fp, "path") and hasattr(fp, "fs"):
log.debug("Detected fp is an OpenFile: fp=%r", fp)
raw_dataset_path = fp.path
opener = fp.fs.open
else:
raw_dataset_path = os.fspath(fp)

if opener:
log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener)
with _opener_registration(raw_dataset_path, opener) as registered_vsi_path:
log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path)
if layer is None:
_remove(registered_vsi_path, driver)
else:
_remove_layer(registered_vsi_path, layer, driver)
else:
_remove_layer(path, layer, driver)
pobj = _parse_path(raw_dataset_path)
if layer is None:
_remove(_vsi_path(pobj), driver)
else:
_remove_layer(_vsi_path(pobj), layer, driver)


@ensure_env_with_credentials
def listdir(fp):
def listdir(fp, opener=None):
"""Lists the datasets in a directory or archive file.
Archive files must be prefixed like "zip://" or "tar://".
Expand All @@ -437,6 +466,19 @@ def listdir(fp):
----------
fp : str or pathlib.Path
Directory or archive path.
opener : callable or obj, optional
A custom dataset opener which can serve GDAL's virtual
filesystem machinery via Python file-like objects. The
underlying file-like object is obtained by calling *opener* with
(*fp*, *mode*) or (*fp*, *mode* + "b") depending on the format
driver's native mode. *opener* must return a Python file-like
object that provides read, seek, tell, and close methods. Note:
only one opener at a time per fp, mode pair is allowed.
Alternatively, opener may be a filesystem object from a package
like fsspec that provides the following methods: isdir(),
isfile(), ls(), mtime(), open(), and size(). The exact interface
is defined in the fiona._vsiopener._AbstractOpener class.
Returns
-------
Expand All @@ -449,18 +491,25 @@ def listdir(fp):
If the input is not a str or Path.
"""
if isinstance(fp, Path):
fp = str(fp)

if not isinstance(fp, str):
raise TypeError("invalid path: %r" % fp)
if hasattr(fp, "path") and hasattr(fp, "fs"):
log.debug("Detected fp is an OpenFile: fp=%r", fp)
raw_dataset_path = fp.path
opener = fp.fs.open
else:
raw_dataset_path = os.fspath(fp)

pobj = _parse_path(fp)
return _listdir(_vsi_path(pobj))
if opener:
log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener)
with _opener_registration(raw_dataset_path, opener) as registered_vsi_path:
log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path)
return _listdir(registered_vsi_path)
else:
pobj = _parse_path(raw_dataset_path)
return _listdir(_vsi_path(pobj))


@ensure_env_with_credentials
def listlayers(fp, vfs=None, **kwargs):
def listlayers(fp, opener=None, vfs=None, **kwargs):
"""Lists the layers (collections) in a dataset.
Archive files must be prefixed like "zip://" or "tar://".
Expand All @@ -469,6 +518,19 @@ def listlayers(fp, vfs=None, **kwargs):
----------
fp : str, pathlib.Path, or file-like object
A dataset identifier or file object containing a dataset.
opener : callable or obj, optional
A custom dataset opener which can serve GDAL's virtual
filesystem machinery via Python file-like objects. The
underlying file-like object is obtained by calling *opener* with
(*fp*, *mode*) or (*fp*, *mode* + "b") depending on the format
driver's native mode. *opener* must return a Python file-like
object that provides read, seek, tell, and close methods. Note:
only one opener at a time per fp, mode pair is allowed.
Alternatively, opener may be a filesystem object from a package
like fsspec that provides the following methods: isdir(),
isfile(), ls(), mtime(), open(), and size(). The exact interface
is defined in the fiona._vsiopener._AbstractOpener class.
vfs : str
This is a deprecated parameter. A URI scheme such as "zip://"
should be used instead.
Expand All @@ -486,18 +548,26 @@ def listlayers(fp, vfs=None, **kwargs):
If the input is not a str, Path, or file object.
"""
if vfs and not isinstance(vfs, str):
raise TypeError(f"invalid vfs: {vfs!r}")

if hasattr(fp, 'read'):
with MemoryFile(fp.read()) as memfile:
return _listlayers(memfile.name, **kwargs)
else:
if isinstance(fp, Path):
fp = str(fp)

if not isinstance(fp, str):
raise TypeError(f"invalid path: {fp!r}")
if vfs and not isinstance(vfs, str):
raise TypeError(f"invalid vfs: {vfs!r}")
if hasattr(fp, "path") and hasattr(fp, "fs"):
log.debug("Detected fp is an OpenFile: fp=%r", fp)
raw_dataset_path = fp.path
opener = fp.fs.open
else:
raw_dataset_path = os.fspath(fp)

if opener:
log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener)
with _opener_registration(raw_dataset_path, opener) as registered_vsi_path:
log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path)
return _listlayers(registered_vsi_path, **kwargs)
else:
if vfs:
warnings.warn(
"The vfs keyword argument is deprecated and will be removed in 2.0. "
Expand All @@ -506,10 +576,10 @@ def listlayers(fp, vfs=None, **kwargs):
stacklevel=2,
)
pobj_vfs = _parse_path(vfs)
pobj_path = _parse_path(fp)
pobj_path = _parse_path(raw_dataset_path)
pobj = _ParsedPath(pobj_path.path, pobj_vfs.path, pobj_vfs.scheme)
else:
pobj = _parse_path(fp)
pobj = _parse_path(raw_dataset_path)

return _listlayers(_vsi_path(pobj), **kwargs)

Expand Down
5 changes: 0 additions & 5 deletions fiona/_env.pxd
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
include "gdal.pxi"


cdef extern from "ogr_srs_api.h":
void OSRSetPROJSearchPaths(const char *const *papszPaths)
void OSRGetPROJVersion (int *pnMajor, int *pnMinor, int *pnPatch)


cdef class ConfigEnv(object):
cdef public object options

Expand Down
3 changes: 0 additions & 3 deletions fiona/_env.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import threading

from fiona._err cimport exc_wrap_int, exc_wrap_ogrerr
from fiona._err import CPLE_BaseError
from fiona._vsiopener cimport install_pyopener_plugin
from fiona.errors import EnvError

level_map = {
Expand Down Expand Up @@ -406,10 +405,8 @@ cdef class GDALEnv(ConfigEnv):
if not self._have_registered_drivers:
with threading.Lock():
if not self._have_registered_drivers:

GDALAllRegister()
OGRRegisterAll()
install_pyopener_plugin(pyopener_plugin)

if 'GDAL_DATA' in os.environ:
log.debug("GDAL_DATA found in environment.")
Expand Down
15 changes: 7 additions & 8 deletions fiona/_err.pxd
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from libc.stdio cimport *

cdef extern from "cpl_vsi.h":

ctypedef FILE VSILFILE
include "gdal.pxi"

cdef extern from "ogr_core.h":

ctypedef int OGRErr
from libc.stdio cimport *

cdef get_last_error_msg()
cdef int exc_wrap_int(int retval) except -1
cdef OGRErr exc_wrap_ogrerr(OGRErr retval) except -1
cdef void *exc_wrap_pointer(void *ptr) except NULL
cdef VSILFILE *exc_wrap_vsilfile(VSILFILE *f) except NULL

cdef class StackChecker:
cdef object error_stack
cdef int exc_wrap_int(self, int retval) except -1
cdef void *exc_wrap_pointer(self, void *ptr) except NULL
Loading

0 comments on commit 8c0a31e

Please sign in to comment.