Ouranosinc · coxipi · Aug 27, 2025 · Aug 23, 2025 · Aug 23, 2025 · Aug 23, 2025
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -49,6 +49,7 @@ Internal changes
 * Conversion indicators have been split from ``tests/test_atmos.py`` into new ``tests/test_converters.py``. This is to ensure that conversion indicators are tested separately from other atmospheric indicators. (:issue:`1289`, :pull:`2224`).
 * ``xclim.testing.utils.show_versions`` now uses the `importlib.metadata` library to more accurately gather dependency information. (:pull:`2229`).
 * Replaced the deprecated ``"time.week"`` grouping strings with ``da.time.dt.isocalendar().week`` in ``xclim.indices.stats.standardized_index`` functions. (:pull:`2230`).
+* ``xclim.indices.run_length.lazy_indexing`` moved to utils. (:issue:`2107`, :pull:`2231`).
 
 Bug fixes
 ^^^^^^^^^
@@ -58,6 +59,7 @@ Bug fixes
 * Fix ``spell_length_statistics`` and related functions for cases where ``thresh`` is a DataArray. (:issue:`2216`, :pull:`2218`).
 * Addressed a noisy warning emitted by `numpy` in the ``xclim.indices.stats`` when performing a fit over data with missing values. (:pull:`2224`).
 * In the Canadian Forest Fire Weather Index System, values of 0 for both the Duff-Moisture code (DMC) and the Drought code (DC) will yield a 0 Build-Up index (BUI) instead of failing with division by zero error (:issue:`2145`, :pull:`2225`).
+* ``xclim.indices.generic.{doymax|doymin}`` now work with dask arrays. (:issue:`2107`, :pull:`2231`).
 
 v0.57.0 (2025-05-22)
 --------------------

diff --git a/src/xclim/core/utils.py b/src/xclim/core/utils.py
@@ -199,6 +199,83 @@ def _is_dask_array(da):
     return any(_is_dask_array(da) for da in das)
 
 
+def lazy_indexing(da: xr.DataArray, index: xr.DataArray, dim: str | None = None) -> xr.DataArray:
+    """
+    Get values of `da` at indices `index` in a NaN-aware and lazy manner.
+
+    Parameters
+    ----------
+    da : xr.DataArray
+        Input array. If not 1D, `dim` must be given and must not appear in index.
+    index : xr.DataArray
+        N-d integer indices, if DataArray is not 1D, all dimensions of index must be in DataArray.
+    dim : str, optional
+        Dimension along which to index, unused if `da` is 1D, should not be present in `index`.
+
+    Returns
+    -------
+    xr.DataArray
+        Values of `da` at indices `index`.
+    """
+    if da.ndim == 1:
+        # Case where da is 1D and index is N-D
+        # Slightly better performance using map_blocks, over an apply_ufunc
+        def _index_from_1d_array(indices, array):
+            return array[indices]
+
+        idx_ndim = index.ndim
+        if idx_ndim == 0:
+            # The 0-D index case, we add a dummy dimension to help dask
+            dim = get_temp_dimname(da.dims, "x")
+            index = index.expand_dims(dim)
+        # Which indexes to mask.
+        invalid = index.isnull()
+        # NaN-indexing doesn't work, so fill with 0 and cast to int
+        index = index.fillna(0).astype(int)
+
+        # No need for coords, we extract by integer index.
+        # Renaming with no name to fix bug in xr 2024.01.0
+        tmpname = get_temp_dimname(da.dims, "temp")
+        da2 = xr.DataArray(da.data, dims=(tmpname,), name=None)
+        # Map blocks chunks aux coords. Remove them to avoid the alignment check load in `where`
+        index, auxcrd = split_auxiliary_coordinates(index)
+        # for each chunk of index, take corresponding values from da
+        out = index.map_blocks(_index_from_1d_array, args=(da2,)).rename(da.name)
+        # mask where index was NaN. Drop any auxiliary coord, they are already on `out`.
+        # Chunked aux coord would have the same name on both sides and xarray will want to check if they are equal,
+        # which means loading them making lazy_indexing not lazy. same issue as above
+        out = out.where(~invalid.drop_vars([crd for crd in invalid.coords if crd not in invalid.dims]))
+        out = out.assign_coords(auxcrd.coords)
+        if idx_ndim == 0:
+            # 0-D case, drop useless coords and dummy dim
+            out = out.drop_vars(da.dims[0], errors="ignore").squeeze()
+        return out.drop_vars(dim or da.dims[0], errors="ignore")
+
+    # Case where index.dims is a subset of da.dims.
+    if dim is None:
+        diff_dims = set(da.dims) - set(index.dims)
+        if len(diff_dims) == 0:
+            raise ValueError("da must have at least one dimension more than index for lazy_indexing.")
+        if len(diff_dims) > 1:
+            raise ValueError(
+                "If da has more than one dimension more than index, the indexing dim must be given through `dim`"
+            )
+        dim = diff_dims.pop()
+
+    def _index_from_nd_array(array, indices):
+        return np.take_along_axis(array, indices[..., np.newaxis], axis=-1)[..., 0]
+
+    return xr.apply_ufunc(
+        _index_from_nd_array,
+        da,
+        index.astype(int),
+        input_core_dims=[[dim], []],
+        output_core_dims=[[]],
+        dask="parallelized",
+        output_dtypes=[da.dtype],
+    )
+
+
 def calc_perc(
     arr: np.ndarray,
     percentiles: Sequence[float] | None = None,

diff --git a/src/xclim/indices/generic.py b/src/xclim/indices/generic.py
@@ -34,6 +34,7 @@
     to_agg_units,
     units2pint,
 )
+from xclim.core.utils import lazy_indexing, uses_dask
 from xclim.indices import run_length as rl
 from xclim.indices.helpers import resample_map
 
@@ -189,7 +190,12 @@ def doymax(da: xr.DataArray) -> xr.DataArray:
         The day of year of the maximum value.
     """
     i = da.argmax(dim="time")
-    out = da.time.dt.dayofyear.isel(time=i, drop=True)
+    doy = da.time.dt.dayofyear
+
+    if uses_dask(da):
+        out = lazy_indexing(doy, i, "time").astype(doy.dtype)
+    else:
+        out = doy.isel(time=i)
     return to_agg_units(out, da, "doymax")
 
 
@@ -208,7 +214,13 @@ def doymin(da: xr.DataArray) -> xr.DataArray:
         The day of year of the minimum value.
     """
     i = da.argmin(dim="time")
-    out = da.time.dt.dayofyear.isel(time=i, drop=True)
+    doy = da.time.dt.dayofyear
+
+    if uses_dask(da):
+        out = lazy_indexing(doy, i, "time").astype(doy.dtype)
+    else:
+        out = doy.isel(time=i)
+
     return to_agg_units(out, da, "doymin")
 
 

diff --git a/src/xclim/indices/run_length.py b/src/xclim/indices/run_length.py
@@ -20,7 +20,7 @@
 
 from xclim.core import DateStr, DayOfYearStr
 from xclim.core.options import OPTIONS, RUN_LENGTH_UFUNC
-from xclim.core.utils import get_temp_dimname, split_auxiliary_coordinates, uses_dask
+from xclim.core.utils import lazy_indexing, uses_dask
 from xclim.indices.helpers import resample_map
 
 npts_opt = 9000
@@ -1607,83 +1607,6 @@ def first_run_ufunc(
     return ind
 
 
-def lazy_indexing(da: xr.DataArray, index: xr.DataArray, dim: str | None = None) -> xr.DataArray:
-    """
-    Get values of `da` at indices `index` in a NaN-aware and lazy manner.
-
-    Parameters
-    ----------
-    da : xr.DataArray
-        Input array. If not 1D, `dim` must be given and must not appear in index.
-    index : xr.DataArray
-        N-d integer indices, if DataArray is not 1D, all dimensions of index must be in DataArray.
-    dim : str, optional
-        Dimension along which to index, unused if `da` is 1D, should not be present in `index`.
-
-    Returns
-    -------
-    xr.DataArray
-        Values of `da` at indices `index`.
-    """
-    if da.ndim == 1:
-        # Case where da is 1D and index is N-D
-        # Slightly better performance using map_blocks, over an apply_ufunc
-        def _index_from_1d_array(indices, array):
-            return array[indices]
-
-        idx_ndim = index.ndim
-        if idx_ndim == 0:
-            # The 0-D index case, we add a dummy dimension to help dask
-            dim = get_temp_dimname(da.dims, "x")
-            index = index.expand_dims(dim)
-        # Which indexes to mask.
-        invalid = index.isnull()
-        # NaN-indexing doesn't work, so fill with 0 and cast to int
-        index = index.fillna(0).astype(int)
-
-        # No need for coords, we extract by integer index.
-        # Renaming with no name to fix bug in xr 2024.01.0
-        tmpname = get_temp_dimname(da.dims, "temp")
-        da2 = xr.DataArray(da.data, dims=(tmpname,), name=None)
-        # Map blocks chunks aux coords. Remove them to avoid the alignment check load in `where`
-        index, auxcrd = split_auxiliary_coordinates(index)
-        # for each chunk of index, take corresponding values from da
-        out = index.map_blocks(_index_from_1d_array, args=(da2,)).rename(da.name)
-        # mask where index was NaN. Drop any auxiliary coord, they are already on `out`.
-        # Chunked aux coord would have the same name on both sides and xarray will want to check if they are equal,
-        # which means loading them making lazy_indexing not lazy. same issue as above
-        out = out.where(~invalid.drop_vars([crd for crd in invalid.coords if crd not in invalid.dims]))
-        out = out.assign_coords(auxcrd.coords)
-        if idx_ndim == 0:
-            # 0-D case, drop useless coords and dummy dim
-            out = out.drop_vars(da.dims[0], errors="ignore").squeeze()
-        return out.drop_vars(dim or da.dims[0], errors="ignore")
-
-    # Case where index.dims is a subset of da.dims.
-    if dim is None:
-        diff_dims = set(da.dims) - set(index.dims)
-        if len(diff_dims) == 0:
-            raise ValueError("da must have at least one dimension more than index for lazy_indexing.")
-        if len(diff_dims) > 1:
-            raise ValueError(
-                "If da has more than one dimension more than index, the indexing dim must be given through `dim`"
-            )
-        dim = diff_dims.pop()
-
-    def _index_from_nd_array(array, indices):
-        return np.take_along_axis(array, indices[..., np.newaxis], axis=-1)[..., 0]
-
-    return xr.apply_ufunc(
-        _index_from_nd_array,
-        da,
-        index.astype(int),
-        input_core_dims=[[dim], []],
-        output_core_dims=[[]],
-        dask="parallelized",
-        output_dtypes=[da.dtype],
-    )
-
-
 def index_of_date(
     time: xr.DataArray,
     date: DateStr | DayOfYearStr | None,

diff --git a/tests/test_generic.py b/tests/test_generic.py
@@ -82,13 +82,16 @@ def test_simple(self, tas_series):
 
 
 class TestFlowGeneric:
-    def test_doyminmax(self, q_series):
+    @pytest.mark.parametrize("use_dask", [True, False])
+    def test_doyminmax(self, q_series, use_dask):
         a = np.ones(365)
         a[9] = 2
         a[19] = -2
         a[39] = 4
         a[49] = -4
         q = q_series(a)
+        if use_dask:
+            q = q.chunk({"time": 200})
         dmx = generic.doymax(q)
         dmn = generic.doymin(q)
         assert dmx.values == [40]