diff --git a/CHANGES.txt b/CHANGES.txt index 95855dac..47af04bb 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -8,6 +8,11 @@ All issue numbers are relative to https://github.com/Toblerity/Fiona/issues. Bug fixes: +- The Pyopener registry and VSI plugin have been rewritten to avoid filename + conflicts and to be compatible with multithreading. Now, a new plugin handler + is registered for each instance of using an opener (#1408). Before GDAL 3.9.0 + plugin handlers cannot not be removed and so it may be observed that the size + of the Pyopener registry grows during the execution of a program. - A CSLConstList ctypedef has been added and is used where appropriate (#1404). - Fiona model objects have a informative, printable representation again (#). diff --git a/fiona/__init__.py b/fiona/__init__.py index 5c3f3161..3197ea6d 100644 --- a/fiona/__init__.py +++ b/fiona/__init__.py @@ -324,7 +324,7 @@ def func(*args, **kwds): log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener) vsi_path_ctx = _opener_registration(raw_dataset_path, opener) registered_vsi_path = stack.enter_context(vsi_path_ctx) - log.debug("Registered vsi path: registered_vsi_path%r", registered_vsi_path) + log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path) path = _UnparsedPath(registered_vsi_path) else: if vfs: @@ -386,7 +386,7 @@ def func(*args, **kwds): @ensure_env_with_credentials -def remove(path_or_collection, driver=None, layer=None): +def remove(path_or_collection, driver=None, layer=None, opener=None): """Delete an OGR data source or one of its layers. If no layer is specified, the entire dataset and all of its layers @@ -396,6 +396,19 @@ def remove(path_or_collection, driver=None, layer=None): ---------- path_or_collection : str, pathlib.Path, or Collection The target Collection or its path. + opener : callable or obj, optional + A custom dataset opener which can serve GDAL's virtual + filesystem machinery via Python file-like objects. The + underlying file-like object is obtained by calling *opener* with + (*fp*, *mode*) or (*fp*, *mode* + "b") depending on the format + driver's native mode. *opener* must return a Python file-like + object that provides read, seek, tell, and close methods. Note: + only one opener at a time per fp, mode pair is allowed. + + Alternatively, opener may be a filesystem object from a package + like fsspec that provides the following methods: isdir(), + isfile(), ls(), mtime(), open(), and size(). The exact interface + is defined in the fiona._vsiopener._AbstractOpener class. driver : str, optional The name of a driver to be used for deletion, optional. Can usually be detected. @@ -414,21 +427,37 @@ def remove(path_or_collection, driver=None, layer=None): """ if isinstance(path_or_collection, Collection): collection = path_or_collection - path = collection.path + raw_dataset_path = collection.path driver = collection.driver collection.close() - elif isinstance(path_or_collection, Path): - path = str(path_or_collection) + else: - path = path_or_collection - if layer is None: - _remove(path, driver) + fp = path_or_collection + if hasattr(fp, "path") and hasattr(fp, "fs"): + log.debug("Detected fp is an OpenFile: fp=%r", fp) + raw_dataset_path = fp.path + opener = fp.fs.open + else: + raw_dataset_path = os.fspath(fp) + + if opener: + log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener) + with _opener_registration(raw_dataset_path, opener) as registered_vsi_path: + log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path) + if layer is None: + _remove(registered_vsi_path, driver) + else: + _remove_layer(registered_vsi_path, layer, driver) else: - _remove_layer(path, layer, driver) + pobj = _parse_path(raw_dataset_path) + if layer is None: + _remove(_vsi_path(pobj), driver) + else: + _remove_layer(_vsi_path(pobj), layer, driver) @ensure_env_with_credentials -def listdir(fp): +def listdir(fp, opener=None): """Lists the datasets in a directory or archive file. Archive files must be prefixed like "zip://" or "tar://". @@ -437,6 +466,19 @@ def listdir(fp): ---------- fp : str or pathlib.Path Directory or archive path. + opener : callable or obj, optional + A custom dataset opener which can serve GDAL's virtual + filesystem machinery via Python file-like objects. The + underlying file-like object is obtained by calling *opener* with + (*fp*, *mode*) or (*fp*, *mode* + "b") depending on the format + driver's native mode. *opener* must return a Python file-like + object that provides read, seek, tell, and close methods. Note: + only one opener at a time per fp, mode pair is allowed. + + Alternatively, opener may be a filesystem object from a package + like fsspec that provides the following methods: isdir(), + isfile(), ls(), mtime(), open(), and size(). The exact interface + is defined in the fiona._vsiopener._AbstractOpener class. Returns ------- @@ -449,18 +491,25 @@ def listdir(fp): If the input is not a str or Path. """ - if isinstance(fp, Path): - fp = str(fp) - - if not isinstance(fp, str): - raise TypeError("invalid path: %r" % fp) + if hasattr(fp, "path") and hasattr(fp, "fs"): + log.debug("Detected fp is an OpenFile: fp=%r", fp) + raw_dataset_path = fp.path + opener = fp.fs.open + else: + raw_dataset_path = os.fspath(fp) - pobj = _parse_path(fp) - return _listdir(_vsi_path(pobj)) + if opener: + log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener) + with _opener_registration(raw_dataset_path, opener) as registered_vsi_path: + log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path) + return _listdir(registered_vsi_path) + else: + pobj = _parse_path(raw_dataset_path) + return _listdir(_vsi_path(pobj)) @ensure_env_with_credentials -def listlayers(fp, vfs=None, **kwargs): +def listlayers(fp, opener=None, vfs=None, **kwargs): """Lists the layers (collections) in a dataset. Archive files must be prefixed like "zip://" or "tar://". @@ -469,6 +518,19 @@ def listlayers(fp, vfs=None, **kwargs): ---------- fp : str, pathlib.Path, or file-like object A dataset identifier or file object containing a dataset. + opener : callable or obj, optional + A custom dataset opener which can serve GDAL's virtual + filesystem machinery via Python file-like objects. The + underlying file-like object is obtained by calling *opener* with + (*fp*, *mode*) or (*fp*, *mode* + "b") depending on the format + driver's native mode. *opener* must return a Python file-like + object that provides read, seek, tell, and close methods. Note: + only one opener at a time per fp, mode pair is allowed. + + Alternatively, opener may be a filesystem object from a package + like fsspec that provides the following methods: isdir(), + isfile(), ls(), mtime(), open(), and size(). The exact interface + is defined in the fiona._vsiopener._AbstractOpener class. vfs : str This is a deprecated parameter. A URI scheme such as "zip://" should be used instead. @@ -486,18 +548,26 @@ def listlayers(fp, vfs=None, **kwargs): If the input is not a str, Path, or file object. """ + if vfs and not isinstance(vfs, str): + raise TypeError(f"invalid vfs: {vfs!r}") + if hasattr(fp, 'read'): with MemoryFile(fp.read()) as memfile: return _listlayers(memfile.name, **kwargs) - else: - if isinstance(fp, Path): - fp = str(fp) - if not isinstance(fp, str): - raise TypeError(f"invalid path: {fp!r}") - if vfs and not isinstance(vfs, str): - raise TypeError(f"invalid vfs: {vfs!r}") + if hasattr(fp, "path") and hasattr(fp, "fs"): + log.debug("Detected fp is an OpenFile: fp=%r", fp) + raw_dataset_path = fp.path + opener = fp.fs.open + else: + raw_dataset_path = os.fspath(fp) + if opener: + log.debug("Registering opener: raw_dataset_path=%r, opener=%r", raw_dataset_path, opener) + with _opener_registration(raw_dataset_path, opener) as registered_vsi_path: + log.debug("Registered vsi path: registered_vsi_path=%r", registered_vsi_path) + return _listlayers(registered_vsi_path, **kwargs) + else: if vfs: warnings.warn( "The vfs keyword argument is deprecated and will be removed in 2.0. " @@ -506,10 +576,10 @@ def listlayers(fp, vfs=None, **kwargs): stacklevel=2, ) pobj_vfs = _parse_path(vfs) - pobj_path = _parse_path(fp) + pobj_path = _parse_path(raw_dataset_path) pobj = _ParsedPath(pobj_path.path, pobj_vfs.path, pobj_vfs.scheme) else: - pobj = _parse_path(fp) + pobj = _parse_path(raw_dataset_path) return _listlayers(_vsi_path(pobj), **kwargs) diff --git a/fiona/_env.pxd b/fiona/_env.pxd index e854031d..4992f9c1 100644 --- a/fiona/_env.pxd +++ b/fiona/_env.pxd @@ -1,11 +1,6 @@ include "gdal.pxi" -cdef extern from "ogr_srs_api.h": - void OSRSetPROJSearchPaths(const char *const *papszPaths) - void OSRGetPROJVersion (int *pnMajor, int *pnMinor, int *pnPatch) - - cdef class ConfigEnv(object): cdef public object options diff --git a/fiona/_env.pyx b/fiona/_env.pyx index ea3f47bd..b4722f12 100644 --- a/fiona/_env.pyx +++ b/fiona/_env.pyx @@ -17,7 +17,6 @@ import threading from fiona._err cimport exc_wrap_int, exc_wrap_ogrerr from fiona._err import CPLE_BaseError -from fiona._vsiopener cimport install_pyopener_plugin from fiona.errors import EnvError level_map = { @@ -406,10 +405,8 @@ cdef class GDALEnv(ConfigEnv): if not self._have_registered_drivers: with threading.Lock(): if not self._have_registered_drivers: - GDALAllRegister() OGRRegisterAll() - install_pyopener_plugin(pyopener_plugin) if 'GDAL_DATA' in os.environ: log.debug("GDAL_DATA found in environment.") diff --git a/fiona/_err.pxd b/fiona/_err.pxd index 8ab23c4e..28229751 100644 --- a/fiona/_err.pxd +++ b/fiona/_err.pxd @@ -1,15 +1,14 @@ -from libc.stdio cimport * - -cdef extern from "cpl_vsi.h": - - ctypedef FILE VSILFILE +include "gdal.pxi" -cdef extern from "ogr_core.h": - - ctypedef int OGRErr +from libc.stdio cimport * cdef get_last_error_msg() cdef int exc_wrap_int(int retval) except -1 cdef OGRErr exc_wrap_ogrerr(OGRErr retval) except -1 cdef void *exc_wrap_pointer(void *ptr) except NULL cdef VSILFILE *exc_wrap_vsilfile(VSILFILE *f) except NULL + +cdef class StackChecker: + cdef object error_stack + cdef int exc_wrap_int(self, int retval) except -1 + cdef void *exc_wrap_pointer(self, void *ptr) except NULL diff --git a/fiona/_err.pyx b/fiona/_err.pyx index 0dff3b3f..aa6965f3 100644 --- a/fiona/_err.pyx +++ b/fiona/_err.pyx @@ -29,23 +29,17 @@ manager raises a more useful and informative error: ValueError: The PNG driver does not support update access to existing datasets. """ -# CPL function declarations. -cdef extern from "cpl_error.h": - - ctypedef enum CPLErr: - CE_None - CE_Debug - CE_Warning - CE_Failure - CE_Fatal +import contextlib +from contextvars import ContextVar +from enum import IntEnum +from itertools import zip_longest +import logging - int CPLGetLastErrorNo() - const char* CPLGetLastErrorMsg() - int CPLGetLastErrorType() - void CPLErrorReset() +log = logging.getLogger(__name__) +_ERROR_STACK = ContextVar("error_stack") +_ERROR_STACK.set([]) -from enum import IntEnum # Python exceptions expressing the CPL error numbers. @@ -132,6 +126,10 @@ class CPLE_AWSSignatureDoesNotMatchError(CPLE_BaseError): pass +class CPLE_AWSError(CPLE_BaseError): + pass + + class FionaNullPointerError(CPLE_BaseError): """ Returned from exc_wrap_pointer when a NULL pointer is passed, but no GDAL @@ -148,6 +146,14 @@ class FionaCPLError(CPLE_BaseError): pass +cdef dict _LEVEL_MAP = { + 0: 0, + 1: logging.DEBUG, + 2: logging.WARNING, + 3: logging.ERROR, + 4: logging.CRITICAL +} + # Map of GDAL error numbers to the Python exceptions. exception_map = { 1: CPLE_AppDefinedError, @@ -168,8 +174,30 @@ exception_map = { 13: CPLE_AWSObjectNotFoundError, 14: CPLE_AWSAccessDeniedError, 15: CPLE_AWSInvalidCredentialsError, - 16: CPLE_AWSSignatureDoesNotMatchError} - + 16: CPLE_AWSSignatureDoesNotMatchError, + 17: CPLE_AWSError +} + +cdef dict _CODE_MAP = { + 0: 'CPLE_None', + 1: 'CPLE_AppDefined', + 2: 'CPLE_OutOfMemory', + 3: 'CPLE_FileIO', + 4: 'CPLE_OpenFailed', + 5: 'CPLE_IllegalArg', + 6: 'CPLE_NotSupported', + 7: 'CPLE_AssertionFailed', + 8: 'CPLE_NoWriteAccess', + 9: 'CPLE_UserInterrupt', + 10: 'ObjectNull', + 11: 'CPLE_HttpResponse', + 12: 'CPLE_AWSBucketNotFound', + 13: 'CPLE_AWSObjectNotFound', + 14: 'CPLE_AWSAccessDenied', + 15: 'CPLE_AWSInvalidCredentials', + 16: 'CPLE_AWSSignatureDoesNotMatch', + 17: 'CPLE_AWSError' +} # CPL Error types as an enum. class GDALError(IntEnum): @@ -305,3 +333,127 @@ cdef VSILFILE *exc_wrap_vsilfile(VSILFILE *f) except NULL: return f cpl_errs = GDALErrCtxManager() + + +cdef class StackChecker: + + def __init__(self, error_stack=None): + self.error_stack = error_stack or {} + + cdef int exc_wrap_int(self, int err) except -1: + """Wrap a GDAL/OGR function that returns CPLErr (int). + + Raises a Rasterio exception if a non-fatal error has be set. + """ + if err: + stack = self.error_stack.get() + for error, cause in zip_longest(stack[::-1], stack[::-1][1:]): + if error is not None and cause is not None: + error.__cause__ = cause + + if stack: + last = stack.pop() + if last is not None: + raise last + + return err + + cdef void *exc_wrap_pointer(self, void *ptr) except NULL: + """Wrap a GDAL/OGR function that returns a pointer. + + Raises a Rasterio exception if a non-fatal error has be set. + """ + if ptr == NULL: + stack = self.error_stack.get() + for error, cause in zip_longest(stack[::-1], stack[::-1][1:]): + if error is not None and cause is not None: + error.__cause__ = cause + + if stack: + last = stack.pop() + if last is not None: + raise last + + return ptr + + +cdef void log_error( + CPLErr err_class, + int err_no, + const char* msg, +) noexcept with gil: + """Send CPL errors to Python's logger. + + Because this function is called by GDAL with no Python context, we + can't propagate exceptions that we might raise here. They'll be + ignored. + + """ + if err_no in _CODE_MAP: + # We've observed that some GDAL functions may emit multiple + # ERROR level messages and yet succeed. We want to see those + # messages in our log file, but not at the ERROR level. We + # turn the level down to INFO. + if err_class == 3: + log.info( + "GDAL signalled an error: err_no=%r, msg=%r", + err_no, + msg.decode("utf-8") + ) + elif err_no == 0: + log.log(_LEVEL_MAP[err_class], "%s", msg.decode("utf-8")) + else: + log.log(_LEVEL_MAP[err_class], "%s:%s", _CODE_MAP[err_no], msg.decode("utf-8")) + else: + log.info("Unknown error number %r", err_no) + + +IF UNAME_SYSNAME == "Windows": + cdef void __stdcall chaining_error_handler( + CPLErr err_class, + int err_no, + const char* msg + ) noexcept with gil: + global _ERROR_STACK + log_error(err_class, err_no, msg) + if err_class == 3: + stack = _ERROR_STACK.get() + stack.append( + exception_map.get(err_no, CPLE_BaseError)(err_class, err_no, msg.decode("utf-8")), + ) + _ERROR_STACK.set(stack) +ELSE: + cdef void chaining_error_handler( + CPLErr err_class, + int err_no, + const char* msg + ) noexcept with gil: + global _ERROR_STACK + log_error(err_class, err_no, msg) + if err_class == 3: + stack = _ERROR_STACK.get() + stack.append( + exception_map.get(err_no, CPLE_BaseError)(err_class, err_no, msg.decode("utf-8")), + ) + _ERROR_STACK.set(stack) + + +@contextlib.contextmanager +def stack_errors(): + # TODO: better name? + # Note: this manager produces one chain of errors and thus assumes + # that no more than one GDAL function is called. + CPLErrorReset() + global _ERROR_STACK + _ERROR_STACK.set([]) + + # chaining_error_handler (better name a TODO) records GDAL errors + # in the order they occur and converts to exceptions. + CPLPushErrorHandlerEx(chaining_error_handler, NULL) + + # Run code in the `with` block. + yield StackChecker(_ERROR_STACK) + + CPLPopErrorHandler() + _ERROR_STACK.set([]) + CPLErrorReset() diff --git a/fiona/_vsiopener.pxd b/fiona/_vsiopener.pxd index 983e23b8..a91d7931 100644 --- a/fiona/_vsiopener.pxd +++ b/fiona/_vsiopener.pxd @@ -1,4 +1 @@ include "gdal.pxi" - -cdef int install_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct) -cdef void uninstall_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct) diff --git a/fiona/_vsiopener.pyx b/fiona/_vsiopener.pyx index 828e16e6..0babadfe 100644 --- a/fiona/_vsiopener.pyx +++ b/fiona/_vsiopener.pyx @@ -8,19 +8,17 @@ from contextvars import ContextVar import logging import os from pathlib import Path - import stat +from uuid import uuid4 from libc.string cimport memcpy +from fiona._env import get_gdal_version_tuple from fiona.errors import OpenerRegistrationError log = logging.getLogger(__name__) -# Prefix for all in-memory paths used by GDAL's VSI system -# Except for errors and log messages this shouldn't really be seen by the user -cdef str PREFIX = "/vsifiopener/" -cdef bytes PREFIX_BYTES = PREFIX.encode("utf-8") +cdef str VSI_NS_ROOT = "vsifiopener" # This is global state for the Python filesystem plugin. It currently only # contains path -> PyOpenerBase (or subclass) instances. This is used by @@ -33,38 +31,12 @@ _OPEN_FILE_EXIT_STACKS = ContextVar("open_file_exit_stacks") _OPEN_FILE_EXIT_STACKS.set({}) -cdef int install_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct): - """Install handlers for python file openers if it isn't already installed.""" - cdef char **registered_prefixes = VSIGetFileSystemsPrefixes() - cdef int prefix_index = CSLFindString(registered_prefixes, PREFIX_BYTES) - CSLDestroy(registered_prefixes) - - if prefix_index < 0: - log.debug("Installing Python opener handler plugin...") - callbacks_struct = VSIAllocFilesystemPluginCallbacksStruct() - callbacks_struct.open = pyopener_open - callbacks_struct.eof = pyopener_eof - callbacks_struct.tell = pyopener_tell - callbacks_struct.seek = pyopener_seek - callbacks_struct.read = pyopener_read - callbacks_struct.write = pyopener_write - callbacks_struct.flush = pyopener_flush - callbacks_struct.close = pyopener_close - callbacks_struct.read_dir = pyopener_read_dir - callbacks_struct.stat = pyopener_stat - callbacks_struct.pUserData = _OPENER_REGISTRY - retval = VSIInstallPluginHandler(PREFIX_BYTES, callbacks_struct) - VSIFreeFilesystemPluginCallbacksStruct(callbacks_struct) - return retval - else: - return 0 - - -cdef void uninstall_pyopener_plugin(VSIFilesystemPluginCallbacksStruct *callbacks_struct): - if callbacks_struct is not NULL: - callbacks_struct.pUserData = NULL - VSIFreeFilesystemPluginCallbacksStruct(callbacks_struct) - callbacks_struct = NULL +# When an opener is registered for a path, this structure captures the +# path and unique registration instance. VSI stat, read_dir, and open +# calls have access to the struct instance. +cdef struct FSData: + char *path + char *uuid cdef int pyopener_stat( @@ -74,14 +46,20 @@ cdef int pyopener_stat( int nFlags ) with gil: """Provides POSIX stat data to GDAL from a Python filesystem.""" - # Convert the given filename to a registry key. - # Reminder: openers are registered by URI scheme, authority, and - # *directory* path. + cdef FSData *fsdata = pUserData + path = fsdata.path.decode("utf-8") + uuid = fsdata.uuid.decode("utf-8") + key = (Path(path), uuid) urlpath = pszFilename.decode("utf-8") - key = Path(urlpath).parent registry = _OPENER_REGISTRY.get() - log.debug("Looking up opener in pyopener_stat: registry=%r, key=%r", registry, key) + log.debug( + "Looking up opener in pyopener_stat: urlpath=%r, registry=%r, key=%r", + urlpath, + registry, + key + ) + try: file_opener = registry[key] except KeyError as err: @@ -91,15 +69,15 @@ cdef int pyopener_stat( try: if file_opener.isfile(urlpath): - fmode = 0o170000 | stat.S_IFREG + fmode = stat.S_IFREG elif file_opener.isdir(urlpath): - fmode = 0o170000 | stat.S_IFDIR + fmode = stat.S_IFDIR else: # No such file or directory. return -1 size = file_opener.size(urlpath) mtime = file_opener.mtime(urlpath) - except (FileNotFoundError, KeyError): + except (FileNotFoundError, KeyError) as err: # No such file or directory. return -1 except Exception as err: @@ -113,17 +91,64 @@ cdef int pyopener_stat( return 0 +cdef int pyopener_unlink( + void *pUserData, + const char *pszFilename, +) with gil: + """Unlink a file from a Python filesystem.""" + cdef FSData *fsdata = pUserData + path = fsdata.path.decode("utf-8") + uuid = fsdata.uuid.decode("utf-8") + key = (Path(path), uuid) + urlpath = pszFilename.decode("utf-8") + + registry = _OPENER_REGISTRY.get() + log.debug( + "Looking up opener in pyopener_unlink: urlpath=%r, registry=%r, key=%r", + urlpath, + registry, + key + ) + + try: + file_opener = registry[key] + except KeyError as err: + errmsg = f"Opener not found: {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return -1 + + try: + file_opener.rm(urlpath) + return 0 + except (FileNotFoundError, KeyError) as err: + # No such file or directory. + return -1 + except Exception as err: + errmsg = f"Opener failed to determine file info: {repr(err)}".encode("utf-8") + CPLError(CE_Failure, 4, "%s", errmsg) + return -1 + + cdef char ** pyopener_read_dir( void *pUserData, const char *pszDirname, int nMaxFiles ) with gil: """Provides a directory listing to GDAL from a Python filesystem.""" + cdef FSData *fsdata = pUserData + path = fsdata.path.decode("utf-8") + uuid = fsdata.uuid.decode("utf-8") + key = (Path(path), uuid) urlpath = pszDirname.decode("utf-8") - key = Path(urlpath) registry = _OPENER_REGISTRY.get() - log.debug("Looking up opener in pyopener_read_dir: registry=%r, key=%r", registry, key) + log.debug( + "Looking up opener in pyopener_read_dir: urlpath=%r, registry=%r, key=%r", + urlpath, + registry, + key + ) + try: file_opener = registry[key] except KeyError as err: @@ -134,8 +159,7 @@ cdef char ** pyopener_read_dir( try: # GDAL wants relative file names. contents = [Path(item).name for item in file_opener.ls(urlpath)] - log.debug("Looking for dir contents: urlpath=%r, contents=%r", urlpath, contents) - except (FileNotFoundError, KeyError): + except (FileNotFoundError, KeyError) as err: # No such file or directory. return NULL except Exception as err: @@ -163,12 +187,24 @@ cdef void* pyopener_open( GDAL may call this function multiple times per filename and each result must be seperately seekable. """ + cdef FSData *fsdata = pUserData + path = fsdata.path.decode("utf-8") + uuid = fsdata.uuid.decode("utf-8") + key = (Path(path), uuid) urlpath = pszFilename.decode("utf-8") + mode = pszAccess.decode("utf-8") - key = Path(urlpath).parent + if not "b" in mode: + mode += "b" registry = _OPENER_REGISTRY.get() - log.debug("Looking up opener in pyopener_open: registry=%r, key=%r", registry, key) + log.debug( + "Looking up opener in pyopener_open: urlpath=%r, registry=%r, key=%r", + urlpath, + registry, + key + ) + try: file_opener = registry[key] except KeyError as err: @@ -199,7 +235,6 @@ cdef void* pyopener_open( try: file_obj = stack.enter_context(file_obj) except (AttributeError, TypeError) as err: - log.error("File object is not a context manager: file_obj=%r", file_obj) errmsg = f"Opener failed to open file with arguments ({repr(urlpath)}, {repr(mode)}): {repr(err)}".encode("utf-8") CPLError(CE_Failure, 4, "%s", errmsg) return NULL @@ -207,10 +242,9 @@ cdef void* pyopener_open( errmsg = "OpenFile didn't resolve".encode("utf-8") return NULL else: - exit_stacks = _OPEN_FILE_EXIT_STACKS.get() + exit_stacks = _OPEN_FILE_EXIT_STACKS.get({}) exit_stacks[file_obj] = stack _OPEN_FILE_EXIT_STACKS.set(exit_stacks) - log.debug("Returning: file_obj=%r", file_obj) return file_obj @@ -222,6 +256,7 @@ cdef int pyopener_eof(void *pFile) with gil: else: return 0 + cdef vsi_l_offset pyopener_tell(void *pFile) with gil: cdef object file_obj = pFile return file_obj.tell() @@ -249,7 +284,11 @@ cdef size_t pyopener_write(void *pFile, void *pBuffer, size_t nSize, size_t nCou cdef object file_obj = pFile buffer_len = nSize * nCount cdef unsigned char [:] buff_view = pBuffer - log.debug("Writing data: file_obj=%r, buff_view=%r, buffer_len=%r", file_obj, buff_view, buffer_len) + log.debug( + "Writing data: file_obj=%r, buff_view=%r, buffer_len=%r", + file_obj, + buff_view, + buffer_len) try: num = file_obj.write(buff_view) except TypeError: @@ -279,32 +318,86 @@ cdef int pyopener_close(void *pFile) with gil: @contextlib.contextmanager def _opener_registration(urlpath, obj): - key = Path(urlpath).parent + cdef char **registered_prefixes = NULL + cdef int prefix_index = 0 + cdef VSIFilesystemPluginCallbacksStruct *callbacks_struct = NULL + cdef FSData fsdata + cdef char *path_c = NULL + cdef char *uuid_c = NULL + + # To resolve issue 1406 we add the opener or filesystem id to the + # registry key. + kpath = Path(urlpath).parent + kid = uuid4().hex + key = (kpath, kid) + + path_b = kpath.as_posix().encode("utf-8") + path_c = path_b + uuid_b = kid.encode("utf-8") + uuid_c = uuid_b + + fsdata = FSData(path_c, uuid_c) + + namespace = f"{VSI_NS_ROOT}_{kid}" + cdef bytes prefix_bytes = f"/{namespace}/".encode("utf-8") # Might raise. opener = _create_opener(obj) - registry = _OPENER_REGISTRY.get() + registry = _OPENER_REGISTRY.get({}) + if key in registry: if registry[key] != opener: raise OpenerRegistrationError(f"Opener already registered for urlpath.") else: try: - yield f"{PREFIX}{urlpath}" + yield f"/{namespace}/{urlpath}" finally: registry = _OPENER_REGISTRY.get() _ = registry.pop(key, None) _OPENER_REGISTRY.set(registry) + else: + # Install handler. + registered_prefixes = VSIGetFileSystemsPrefixes() + prefix_index = CSLFindString(registered_prefixes, prefix_bytes) + CSLDestroy(registered_prefixes) + + if prefix_index < 0: + log.debug("Installing Python opener handler plugin: prefix_bytes=%r", prefix_bytes) + callbacks_struct = VSIAllocFilesystemPluginCallbacksStruct() + callbacks_struct.open = pyopener_open + callbacks_struct.eof = pyopener_eof + callbacks_struct.tell = pyopener_tell + callbacks_struct.seek = pyopener_seek + callbacks_struct.read = pyopener_read + callbacks_struct.write = pyopener_write + callbacks_struct.flush = pyopener_flush + callbacks_struct.close = pyopener_close + callbacks_struct.read_dir = pyopener_read_dir + callbacks_struct.stat = pyopener_stat + callbacks_struct.unlink = pyopener_unlink + callbacks_struct.pUserData = &fsdata + retval = VSIInstallPluginHandler(prefix_bytes, callbacks_struct) + VSIFreeFilesystemPluginCallbacksStruct(callbacks_struct) + + registered_prefixes = VSIGetFileSystemsPrefixes() + prefix_index = CSLFindString(registered_prefixes, prefix_bytes) + CSLDestroy(registered_prefixes) + registry[key] = opener _OPENER_REGISTRY.set(registry) + try: - yield f"{PREFIX}{urlpath}" + yield f"/{namespace}/{urlpath}" finally: registry = _OPENER_REGISTRY.get() _ = registry.pop(key, None) _OPENER_REGISTRY.set(registry) + IF (CTE_GDAL_MAJOR_VERSION, CTE_GDAL_MINOR_VERSION) >= (3, 9): + retval = VSIRemovePluginHandler(prefix_bytes) + class _AbstractOpener: """Adapts a Python object to the opener interface.""" @@ -381,6 +474,19 @@ class _AbstractOpener: Modification timestamp in seconds. """ raise NotImplementedError + def rm(self, path): + """Remove a resource. + + Parameters + ---------- + path : str + The identifier/locator for a resource within a filesystem. + + Returns + ------- + None + """ + raise NotImplementedError def size(self, path): """Get the size, in bytes, of a resource.. @@ -427,14 +533,16 @@ class _FilesystemOpener(_AbstractOpener): def isdir(self, path): return self._obj.isdir(path) def ls(self, path): - return self._obj.ls(path) + # return value of ls() varies between file and zip fsspec filesystems. + return [item if isinstance(item, str) else item["filename"] for item in self._obj.ls(path)] def mtime(self, path): try: mtime = int(self._obj.modified(path).timestamp()) except NotImplementedError: mtime = 0 - log.debug("Modification time: mtime=%r", mtime) return mtime + def rm(self, path): + return self._obj.rm(path) def size(self, path): return self._obj.size(path) @@ -447,6 +555,8 @@ class _AltFilesystemOpener(_FilesystemOpener): return self._obj.is_dir(path) def mtime(self, path): return 0 + def rm(self, path): + self._obj.remove_file(path) def size(self, path): return self._obj.file_size(path) diff --git a/fiona/gdal.pxi b/fiona/gdal.pxi index d06a3dee..9403bb2b 100644 --- a/fiona/gdal.pxi +++ b/fiona/gdal.pxi @@ -51,7 +51,9 @@ cdef extern from "cpl_error.h" nogil: const char* CPLGetLastErrorMsg() CPLErr CPLGetLastErrorType() void CPLPushErrorHandler(CPLErrorHandler handler) + void CPLPushErrorHandlerEx(CPLErrorHandler handler, void *userdata) void CPLPopErrorHandler() + void CPLQuietErrorHandler(CPLErr eErrClass, CPLErrorNum nError, const char *pszErrorMsg) cdef extern from "cpl_vsi.h" nogil: @@ -141,6 +143,11 @@ cdef extern from "cpl_vsi.h" nogil: int VSI_ISDIR(int mode) +IF (CTE_GDAL_MAJOR_VERSION, CTE_GDAL_MINOR_VERSION) >= (3, 9): + cdef extern from "cpl_vsi.h" nogil: + int VSIRemovePluginHandler(const char*) + + cdef extern from "ogr_core.h" nogil: ctypedef int OGRErr char *OGRGeometryTypeToName(int type) @@ -301,7 +308,7 @@ cdef extern from "ogr_srs_api.h" nogil: OGRErr OSRExportToPROJJSON(OGRSpatialReferenceH hSRS, char ** ppszReturn, const char* const* papszOptions) - + void OSRGetPROJVersion (int *pnMajor, int *pnMinor, int *pnPatch) cdef extern from "gdal.h" nogil: diff --git a/fiona/ogrext.pyx b/fiona/ogrext.pyx index 1cb18cea..8ae7f799 100644 --- a/fiona/ogrext.pyx +++ b/fiona/ogrext.pyx @@ -18,11 +18,12 @@ from fiona._geometry cimport ( GeomBuilder, OGRGeomBuilder, geometry_type_code, normalize_geometry_type_code, base_geometry_type_code) from fiona._err cimport exc_wrap_int, exc_wrap_pointer, exc_wrap_vsilfile, get_last_error_msg +from fiona._err cimport StackChecker import fiona from fiona._env import get_gdal_version_num, calc_gdal_version_num, get_gdal_version_tuple from fiona._err import ( - cpl_errs, FionaNullPointerError, CPLE_BaseError, CPLE_AppDefinedError, + cpl_errs, stack_errors, FionaNullPointerError, CPLE_BaseError, CPLE_AppDefinedError, CPLE_OpenFailedError) from fiona._geometry import GEOMETRY_TYPES from fiona import compat @@ -92,6 +93,10 @@ cdef void* gdal_open_vector(const char* path_c, int mode, drivers, options) exce cdef char **drvs = NULL cdef void* drv = NULL cdef char **open_opts = NULL + cdef char **registered_prefixes = NULL + cdef int prefix_index = 0 + cdef VSIFilesystemPluginCallbacksStruct *callbacks_struct = NULL + cdef StackChecker checker flags = GDAL_OF_VECTOR | GDAL_OF_VERBOSE_ERROR if mode == 1: @@ -122,15 +127,13 @@ cdef void* gdal_open_vector(const char* path_c, int mode, drivers, options) exce open_opts = CSLAddNameValue(open_opts, "VALIDATE_OPEN_OPTIONS", "NO") try: - cogr_ds = exc_wrap_pointer( - GDALOpenEx(path_c, flags, drvs, open_opts, NULL) - ) - return cogr_ds - except FionaNullPointerError: - raise DriverError( - f"Failed to open dataset (mode={mode}): {path_c.decode('utf-8')}") + with stack_errors() as checker: + cogr_ds = GDALOpenEx( + path_c, flags, drvs, open_opts, NULL + ) + return checker.exc_wrap_pointer(cogr_ds) except CPLE_BaseError as exc: - raise DriverError(str(exc)) + raise DriverError(f"Failed to open dataset (flags={flags}): {path_c.decode('utf-8')}") from exc finally: CSLDestroy(drvs) CSLDestroy(open_opts) @@ -149,9 +152,7 @@ cdef void* gdal_create(void* cogr_driver, const char *path_c, options) except NU creation_option_keys = option_keys & set(meta.dataset_creation_options(db.decode("utf-8"))) for k, v in options.items(): - if k.upper() in creation_option_keys: - kb = k.upper().encode('utf-8') if isinstance(v, bool): @@ -171,7 +172,6 @@ cdef void* gdal_create(void* cogr_driver, const char *path_c, options) except NU CSLDestroy(creation_opts) - def _explode(coords): """Explode a GeoJSON geometry's coordinates object and yield coordinate tuples. As long as the input is conforming, the type of @@ -193,6 +193,7 @@ def _bounds(geometry): except (KeyError, TypeError): return None + cdef int GDAL_VERSION_NUM = get_gdal_version_num() @@ -2126,10 +2127,8 @@ def _remove_layer(path, layer, driver=None): def _listlayers(path, **kwargs): - """Provides a list of the layers in an OGR data source. """ - cdef void *cogr_ds = NULL cdef void *cogr_layer = NULL cdef const char *path_c diff --git a/tests/test_pyopener.py b/tests/test_pyopener.py index 1ae80ffb..b0914f17 100644 --- a/tests/test_pyopener.py +++ b/tests/test_pyopener.py @@ -1,6 +1,7 @@ """Tests of the Python opener VSI plugin.""" import io +import os import fsspec import pytest @@ -78,3 +79,85 @@ def test_opener_fsspec_fs_write(tmp_path): collection.write(feature) assert len(collection) == 1 assert collection.crs == "OGC:CRS84" + + +def test_threads_context(): + import io + from threading import Thread + + + def target(): + with fiona.open("tests/data/coutwildrnp.shp", opener=io.open) as colxn: + print(colxn.profile) + assert len(colxn) == 67 + + + thread = Thread(target=target) + thread.start() + thread.join() + + +def test_overwrite(data): + """Opener can overwrite data.""" + schema = {"geometry": "Point", "properties": {"zero": "int"}} + feature = Feature.from_dict( + **{ + "geometry": {"type": "Point", "coordinates": (0, 0)}, + "properties": {"zero": "0"}, + } + ) + fs = fsspec.filesystem("file") + outputfile = os.path.join(str(data), "coutwildrnp.shp") + + with fiona.open( + str(outputfile), + "w", + driver="ESRI Shapefile", + schema=schema, + crs="OGC:CRS84", + opener=fs, + ) as collection: + collection.write(feature) + assert len(collection) == 1 + assert collection.crs == "OGC:CRS84" + + +def test_opener_fsspec_zip_fs_listlayers(): + """Use fsspec zip filesystem as opener for listlayers().""" + fs = fsspec.filesystem("zip", fo="tests/data/coutwildrnp.zip") + assert fiona.listlayers("coutwildrnp.shp", opener=fs) == ["coutwildrnp"] + + +def test_opener_fsspec_zip_fs_listdir(): + """Use fsspec zip filesystem as opener for listdir().""" + fs = fsspec.filesystem("zip", fo="tests/data/coutwildrnp.zip") + listing = fiona.listdir("/", opener=fs) + assert len(listing) == 4 + assert set( + ["coutwildrnp.shp", "coutwildrnp.dbf", "coutwildrnp.shx", "coutwildrnp.prj"] + ) & set(listing) + + + +def test_opener_fsspec_file_fs_listdir(): + """Use fsspec file filesystem as opener for listdir().""" + fs = fsspec.filesystem("file") + listing = fiona.listdir("tests/data", opener=fs) + assert len(listing) >= 35 + assert set( + ["coutwildrnp.shp", "coutwildrnp.dbf", "coutwildrnp.shx", "coutwildrnp.prj"] + ) & set(listing) + + +def test_opener_fsspec_file_remove(data): + """Opener can remove data.""" + fs = fsspec.filesystem("file") + listing = fiona.listdir(str(data), opener=fs) + assert len(listing) == 4 + outputfile = os.path.join(str(data), "coutwildrnp.shp") + fiona.remove(outputfile) + listing = fiona.listdir(str(data), opener=fs) + assert len(listing) == 0 + assert not set( + ["coutwildrnp.shp", "coutwildrnp.dbf", "coutwildrnp.shx", "coutwildrnp.prj"] + ) & set(listing)