Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Internal changes
* Conversion indicators have been split from ``tests/test_atmos.py`` into new ``tests/test_converters.py``. This is to ensure that conversion indicators are tested separately from other atmospheric indicators. (:issue:`1289`, :pull:`2224`).
* ``xclim.testing.utils.show_versions`` now uses the `importlib.metadata` library to more accurately gather dependency information. (:pull:`2229`).
* Replaced the deprecated ``"time.week"`` grouping strings with ``da.time.dt.isocalendar().week`` in ``xclim.indices.stats.standardized_index`` functions. (:pull:`2230`).
* ``xclim.indices.run_length.lazy_indexing`` moved to utils. (:issue:`2107`, :pull:`2231`).

Bug fixes
^^^^^^^^^
Expand All @@ -58,6 +59,7 @@ Bug fixes
* Fix ``spell_length_statistics`` and related functions for cases where ``thresh`` is a DataArray. (:issue:`2216`, :pull:`2218`).
* Addressed a noisy warning emitted by `numpy` in the ``xclim.indices.stats`` when performing a fit over data with missing values. (:pull:`2224`).
* In the Canadian Forest Fire Weather Index System, values of 0 for both the Duff-Moisture code (DMC) and the Drought code (DC) will yield a 0 Build-Up index (BUI) instead of failing with division by zero error (:issue:`2145`, :pull:`2225`).
* ``xclim.indices.generic.{doymax|doymin}`` now work with dask arrays. (:issue:`2107`, :pull:`2231`).

v0.57.0 (2025-05-22)
--------------------
Expand Down
77 changes: 77 additions & 0 deletions src/xclim/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,83 @@ def _is_dask_array(da):
return any(_is_dask_array(da) for da in das)


def lazy_indexing(da: xr.DataArray, index: xr.DataArray, dim: str | None = None) -> xr.DataArray:
"""
Get values of `da` at indices `index` in a NaN-aware and lazy manner.

Parameters
----------
da : xr.DataArray
Input array. If not 1D, `dim` must be given and must not appear in index.
index : xr.DataArray
N-d integer indices, if DataArray is not 1D, all dimensions of index must be in DataArray.
dim : str, optional
Dimension along which to index, unused if `da` is 1D, should not be present in `index`.

Returns
-------
xr.DataArray
Values of `da` at indices `index`.
"""
if da.ndim == 1:
# Case where da is 1D and index is N-D
# Slightly better performance using map_blocks, over an apply_ufunc
def _index_from_1d_array(indices, array):
return array[indices]

idx_ndim = index.ndim
if idx_ndim == 0:
# The 0-D index case, we add a dummy dimension to help dask
dim = get_temp_dimname(da.dims, "x")
index = index.expand_dims(dim)
# Which indexes to mask.
invalid = index.isnull()
# NaN-indexing doesn't work, so fill with 0 and cast to int
index = index.fillna(0).astype(int)

# No need for coords, we extract by integer index.
# Renaming with no name to fix bug in xr 2024.01.0
tmpname = get_temp_dimname(da.dims, "temp")
da2 = xr.DataArray(da.data, dims=(tmpname,), name=None)
# Map blocks chunks aux coords. Remove them to avoid the alignment check load in `where`
index, auxcrd = split_auxiliary_coordinates(index)
# for each chunk of index, take corresponding values from da
out = index.map_blocks(_index_from_1d_array, args=(da2,)).rename(da.name)
# mask where index was NaN. Drop any auxiliary coord, they are already on `out`.
# Chunked aux coord would have the same name on both sides and xarray will want to check if they are equal,
# which means loading them making lazy_indexing not lazy. same issue as above
out = out.where(~invalid.drop_vars([crd for crd in invalid.coords if crd not in invalid.dims]))
out = out.assign_coords(auxcrd.coords)
if idx_ndim == 0:
# 0-D case, drop useless coords and dummy dim
out = out.drop_vars(da.dims[0], errors="ignore").squeeze()
return out.drop_vars(dim or da.dims[0], errors="ignore")

# Case where index.dims is a subset of da.dims.
if dim is None:
diff_dims = set(da.dims) - set(index.dims)
if len(diff_dims) == 0:
raise ValueError("da must have at least one dimension more than index for lazy_indexing.")
if len(diff_dims) > 1:
raise ValueError(
"If da has more than one dimension more than index, the indexing dim must be given through `dim`"
)
dim = diff_dims.pop()

def _index_from_nd_array(array, indices):
return np.take_along_axis(array, indices[..., np.newaxis], axis=-1)[..., 0]

return xr.apply_ufunc(
_index_from_nd_array,
da,
index.astype(int),
input_core_dims=[[dim], []],
output_core_dims=[[]],
dask="parallelized",
output_dtypes=[da.dtype],
)


def calc_perc(
arr: np.ndarray,
percentiles: Sequence[float] | None = None,
Expand Down
16 changes: 14 additions & 2 deletions src/xclim/indices/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
to_agg_units,
units2pint,
)
from xclim.core.utils import lazy_indexing, uses_dask
from xclim.indices import run_length as rl
from xclim.indices.helpers import resample_map

Expand Down Expand Up @@ -189,7 +190,12 @@ def doymax(da: xr.DataArray) -> xr.DataArray:
The day of year of the maximum value.
"""
i = da.argmax(dim="time")
out = da.time.dt.dayofyear.isel(time=i, drop=True)
doy = da.time.dt.dayofyear

if uses_dask(da):
out = lazy_indexing(doy, i, "time").astype(doy.dtype)
else:
out = doy.isel(time=i)
return to_agg_units(out, da, "doymax")


Expand All @@ -208,7 +214,13 @@ def doymin(da: xr.DataArray) -> xr.DataArray:
The day of year of the minimum value.
"""
i = da.argmin(dim="time")
out = da.time.dt.dayofyear.isel(time=i, drop=True)
doy = da.time.dt.dayofyear

if uses_dask(da):
out = lazy_indexing(doy, i, "time").astype(doy.dtype)
else:
out = doy.isel(time=i)

return to_agg_units(out, da, "doymin")


Expand Down
79 changes: 1 addition & 78 deletions src/xclim/indices/run_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from xclim.core import DateStr, DayOfYearStr
from xclim.core.options import OPTIONS, RUN_LENGTH_UFUNC
from xclim.core.utils import get_temp_dimname, split_auxiliary_coordinates, uses_dask
from xclim.core.utils import lazy_indexing, uses_dask
from xclim.indices.helpers import resample_map

npts_opt = 9000
Expand Down Expand Up @@ -1607,83 +1607,6 @@ def first_run_ufunc(
return ind


def lazy_indexing(da: xr.DataArray, index: xr.DataArray, dim: str | None = None) -> xr.DataArray:
"""
Get values of `da` at indices `index` in a NaN-aware and lazy manner.

Parameters
----------
da : xr.DataArray
Input array. If not 1D, `dim` must be given and must not appear in index.
index : xr.DataArray
N-d integer indices, if DataArray is not 1D, all dimensions of index must be in DataArray.
dim : str, optional
Dimension along which to index, unused if `da` is 1D, should not be present in `index`.

Returns
-------
xr.DataArray
Values of `da` at indices `index`.
"""
if da.ndim == 1:
# Case where da is 1D and index is N-D
# Slightly better performance using map_blocks, over an apply_ufunc
def _index_from_1d_array(indices, array):
return array[indices]

idx_ndim = index.ndim
if idx_ndim == 0:
# The 0-D index case, we add a dummy dimension to help dask
dim = get_temp_dimname(da.dims, "x")
index = index.expand_dims(dim)
# Which indexes to mask.
invalid = index.isnull()
# NaN-indexing doesn't work, so fill with 0 and cast to int
index = index.fillna(0).astype(int)

# No need for coords, we extract by integer index.
# Renaming with no name to fix bug in xr 2024.01.0
tmpname = get_temp_dimname(da.dims, "temp")
da2 = xr.DataArray(da.data, dims=(tmpname,), name=None)
# Map blocks chunks aux coords. Remove them to avoid the alignment check load in `where`
index, auxcrd = split_auxiliary_coordinates(index)
# for each chunk of index, take corresponding values from da
out = index.map_blocks(_index_from_1d_array, args=(da2,)).rename(da.name)
# mask where index was NaN. Drop any auxiliary coord, they are already on `out`.
# Chunked aux coord would have the same name on both sides and xarray will want to check if they are equal,
# which means loading them making lazy_indexing not lazy. same issue as above
out = out.where(~invalid.drop_vars([crd for crd in invalid.coords if crd not in invalid.dims]))
out = out.assign_coords(auxcrd.coords)
if idx_ndim == 0:
# 0-D case, drop useless coords and dummy dim
out = out.drop_vars(da.dims[0], errors="ignore").squeeze()
return out.drop_vars(dim or da.dims[0], errors="ignore")

# Case where index.dims is a subset of da.dims.
if dim is None:
diff_dims = set(da.dims) - set(index.dims)
if len(diff_dims) == 0:
raise ValueError("da must have at least one dimension more than index for lazy_indexing.")
if len(diff_dims) > 1:
raise ValueError(
"If da has more than one dimension more than index, the indexing dim must be given through `dim`"
)
dim = diff_dims.pop()

def _index_from_nd_array(array, indices):
return np.take_along_axis(array, indices[..., np.newaxis], axis=-1)[..., 0]

return xr.apply_ufunc(
_index_from_nd_array,
da,
index.astype(int),
input_core_dims=[[dim], []],
output_core_dims=[[]],
dask="parallelized",
output_dtypes=[da.dtype],
)


def index_of_date(
time: xr.DataArray,
date: DateStr | DayOfYearStr | None,
Expand Down
5 changes: 4 additions & 1 deletion tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,16 @@ def test_simple(self, tas_series):


class TestFlowGeneric:
def test_doyminmax(self, q_series):
@pytest.mark.parametrize("use_dask", [True, False])
def test_doyminmax(self, q_series, use_dask):
a = np.ones(365)
a[9] = 2
a[19] = -2
a[39] = 4
a[49] = -4
q = q_series(a)
if use_dask:
q = q.chunk({"time": 200})
dmx = generic.doymax(q)
dmn = generic.doymin(q)
assert dmx.values == [40]
Expand Down
Loading