diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0425452de8d..4024c235ae8 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -42,6 +42,16 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Speed up the duck-array dispatch helpers on ``numpy.ndarray`` inputs: + ``is_chunked_array`` no longer duplicates ``is_duck_array``'s built-in + ``np.ndarray`` short-circuit, and ``is_dask_collection`` now + short-circuits ``np.ndarray`` directly, skipping the dask import and + dispatch on numpy-backed data. The knock-on speedup for + ``is_duck_dask_array`` (~2× on numpy) benefits many hot paths in + ``duck_array_ops``, ``variable``, ``indexing``, ``groupby`` and the + ``dt`` / ``str`` accessors. Roughly 1.4× faster ``isel().load()`` on a + 400-scalar-var dataset; no effect on chunked paths. + .. _whats-new.2026.04.0: diff --git a/xarray/namedarray/pycompat.py b/xarray/namedarray/pycompat.py index 5832f7cc9e7..893967adef3 100644 --- a/xarray/namedarray/pycompat.py +++ b/xarray/namedarray/pycompat.py @@ -8,7 +8,11 @@ from packaging.version import Version from xarray.core.utils import is_scalar -from xarray.namedarray.utils import is_duck_array, is_duck_dask_array +from xarray.namedarray.utils import ( + is_dask_collection, + is_duck_array, + is_duck_dask_array, +) integer_types = (int, np.integer) @@ -89,7 +93,12 @@ def mod_version(mod: ModType) -> Version: def is_chunked_array(x: duckarray[Any, Any]) -> bool: - return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks")) + # `is_duck_array` already short-circuits np.ndarray via isinstance, so we + # don't repeat that check here. `hasattr("chunks")` runs before + # `is_dask_collection` so the dominant numpy case skips the dask dispatch. + if not is_duck_array(x): + return False + return hasattr(x, "chunks") or is_dask_collection(x) def is_0d_dask_array(x: duckarray[Any, Any]) -> bool: diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 3490a76aa8d..a71ee4b0a50 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -66,6 +66,12 @@ def module_available(module: str, minversion: str | None = None) -> bool: def is_dask_collection(x: object) -> TypeGuard[DaskCollection]: + # Fast path: a plain numpy ndarray never satisfies __dask_graph__; skip + # the dask dispatch. Exact-type check (not isinstance) so any hypothetical + # ndarray subclass that *did* implement __dask_graph__ would still fall + # through to the real check. + if type(x) is np.ndarray: + return False if module_available("dask"): from dask.base import is_dask_collection