diff --git a/.vscode/xcdat.code-workspace b/.vscode/xcdat.code-workspace index a772b05f..c0b24d0f 100644 --- a/.vscode/xcdat.code-workspace +++ b/.vscode/xcdat.code-workspace @@ -5,6 +5,7 @@ } ], "settings": { - "autoDocstring.docstringFormat": "numpy" + "autoDocstring.docstringFormat": "numpy", + "editor.formatOnSave": true } } diff --git a/docs/api.rst b/docs/api.rst index 4dc480da..a9d0043f 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -104,6 +104,7 @@ Methods Dataset.bounds.add_missing_bounds Dataset.spatial.average Dataset.temporal.average + Dataset.temporal.group_average Dataset.temporal.climatology Dataset.temporal.departures Dataset.temporal.center_times diff --git a/tests/test_temporal.py b/tests/test_temporal.py index 79440c30..15f4c480 100644 --- a/tests/test_temporal.py +++ b/tests/test_temporal.py @@ -1,8 +1,5 @@ -from datetime import datetime - import cftime import numpy as np -import pandas as pd import pytest import xarray as xr from xarray.tests import requires_dask @@ -24,1066 +21,991 @@ def test_decorator(self): class TestAverage: - def test_weighted_annual_avg(self): - ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - - result = ds.temporal.average("ts", "year") - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( + def test_averages_for_yearly_time_series(self): + ds = xr.Dataset( + coords={ + "lat": [-90], + "lon": [0], + "time": xr.DataArray( + data=np.array( + [ + "2000-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2002-01-01T00:00:00.000000000", + "2003-01-01T00:00:00.000000000", + "2004-01-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + } + ) + ds["time_bnds"] = xr.DataArray( + name="time_bnds", data=np.array( - ["2000-01-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000"], + [ + ["2000-01-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000"], + ["2001-01-01T00:00:00.000000000", "2002-01-01T00:00:00.000000000"], + ["2002-01-01T00:00:00.000000000", "2003-01-01T00:00:00.000000000"], + ["2003-01-01T00:00:00.000000000", "2004-01-01T00:00:00.000000000"], + ["2004-01-01T00:00:00.000000000", "2005-01-01T00:00:00.000000000"], + ], dtype="datetime64[ns]", ), - coords={ - "time": np.array( - [ - "2000-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ) - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, + coords={"time": ds.time}, + dims=["time", "bnds"], + attrs={"is_generated": "True"}, ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((2, 4, 4)), - coords={ - "lat": ds.lat, - "lon": ds.lon, - "time": time_new, - }, + ds["ts"] = xr.DataArray( + data=np.array([[[2]], [[1]], [[1]], [[1]], [[2]]]), + coords={"lat": ds.lat, "lon": ds.lon, "time": ds.time}, dims=["time", "lat", "lon"], + ) + + # Test averages weighted by year + result = ds.temporal.average("ts") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + data=np.array([[1.4]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], attrs={ "operation": "temporal_avg", - "mode": "time_series", + "mode": "average", "freq": "year", "weighted": "True", "center_times": "False", }, ) - # For some reason, there is a very small floating point difference - # between both for ts so we have to use floating point comparison - xr.testing.assert_allclose(result, expected) - assert result.ts.attrs == expected.ts.attrs + assert result.identical(expected) + # Test unweighted averages + result = ds.temporal.average("ts", weighted=False) + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + data=np.array([[1.4]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "average", + "freq": "year", + "weighted": "False", + "center_times": "False", + }, + ) -class TestClimatology: - def test_weighted_seasonal_climatology_with_DJF(self): - ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) + assert result.identical(expected) - result = ds.temporal.climatology( - "ts", - "season", - season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + def test_averages_for_monthly_time_series(self): + # Set up dataset + ds = xr.Dataset( + coords={ + "lat": [-90], + "lon": [0], + "time": xr.DataArray( + data=np.array( + [ + "2000-01-01T00:00:00.000000000", + "2000-02-01T00:00:00.000000000", + "2000-03-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + "2000-05-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + } ) - expected = ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( + ds["time_bnds"] = xr.DataArray( + name="time_bnds", data=np.array( [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), + ["2000-01-01T00:00:00.000000000", "2000-02-01T00:00:00.000000000"], + ["2000-02-01T00:00:00.000000000", "2000-03-01T00:00:00.000000000"], + ["2000-03-01T00:00:00.000000000", "2000-04-01T00:00:00.000000000"], + ["2000-04-01T00:00:00.000000000", "2000-05-01T00:00:00.000000000"], + ["2000-05-01T00:00:00.000000000", "2000-06-01T00:00:00.000000000"], ], + dtype="datetime64[ns]", ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, + coords={"time": ds.time}, + dims=["time", "bnds"], + attrs={"is_generated": "True"}, ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((4, 4, 4)), - coords={"lat": ds.lat, "lon": ds.lon, "time": expected_time}, + ds["ts"] = xr.DataArray( + data=np.array([[[2]], [[1]], [[1]], [[1]], [[1]]]), + coords={"lat": ds.lat, "lon": ds.lon, "time": ds.time}, dims=["time", "lat", "lon"], + ) + + # Test averages weighted by month + result = ds.temporal.average("ts") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + data=np.array([[1.2]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], attrs={ "operation": "temporal_avg", - "mode": "climatology", - "freq": "season", + "mode": "average", + "freq": "month", "weighted": "True", "center_times": "False", - "dec_mode": "DJF", - "drop_incomplete_djf": "True", }, ) assert result.identical(expected) - -class TestDepartures: - # TODO: Update TestDepartures tests to use other numbers rather than 1's for - # better test reliability and accuracy. This may require subsetting. - @pytest.fixture(autouse=True) - def setup(self): - self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - - self.seasons = ["JJA", "MAM", "SON", "DJF"] - - def test_weighted_seasonal_departures_with_DJF(self): - # Create a post-climatology dataset. - ds = self.ds.copy() - # Drop incomplete DJF seasons - ds = ds.isel(time=slice(2, -1)) - - # Compare result of the method against the expected. - result = ds.temporal.departures( - "ts", - "season", - season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, - ) + # Test unweighted averages + result = ds.temporal.average("ts", weighted=False) expected = ds.copy() + expected = expected.drop_dims("time") expected["ts"] = xr.DataArray( - data=np.zeros((12, 4, 4)), - coords={ - "lat": ds.lat, - "lon": ds.lon, - "time": ds.time, - }, - dims=["time", "lat", "lon"], + data=np.array([[1.2]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], attrs={ "operation": "temporal_avg", - "mode": "departures", - "freq": "season", - "weighted": "True", + "mode": "average", + "freq": "month", + "weighted": "False", "center_times": "False", - "dec_mode": "DJF", - "drop_incomplete_djf": "True", }, ) - assert result.identical(expected) - def test_unweighted_seasonal_departures_with_DJF(self): - ds = self.ds.copy() - # Drop incomplete DJF seasons - ds = ds.isel(time=slice(2, -1)) - - # Compare result of the method against the expected. - result = ds.temporal.departures( - "ts", - "season", - weighted=False, - season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + def test_averages_for_daily_time_series(self): + ds = xr.Dataset( + coords={ + "lat": [-90], + "lon": [0], + "time": xr.DataArray( + data=np.array( + [ + "2000-01-01T00:00:00.000000000", + "2000-01-02T00:00:00.000000000", + "2000-01-03T00:00:00.000000000", + "2000-01-04T00:00:00.000000000", + "2000-01-05T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + } ) + ds["time_bnds"] = xr.DataArray( + name="time_bnds", + data=np.array( + [ + ["2000-01-01T00:00:00.000000000", "2000-01-02T00:00:00.000000000"], + ["2000-01-02T00:00:00.000000000", "2000-01-03T00:00:00.000000000"], + ["2000-01-03T00:00:00.000000000", "2000-01-04T00:00:00.000000000"], + ["2000-01-04T00:00:00.000000000", "2000-01-05T00:00:00.000000000"], + ["2000-01-05T00:00:00.000000000", "2000-01-06T00:00:00.000000000"], + ], + dtype="datetime64[ns]", + ), + coords={"time": ds.time}, + dims=["time", "bnds"], + attrs={"is_generated": "True"}, + ) + ds["ts"] = xr.DataArray( + data=np.array([[[2]], [[1]], [[1]], [[1]], [[1]]]), + coords={"lat": ds.lat, "lon": ds.lon, "time": ds.time}, + dims=["time", "lat", "lon"], + ) + + # Test averages weighted by day + result = ds.temporal.average("ts") expected = ds.copy() + expected = expected.drop_dims("time") expected["ts"] = xr.DataArray( - data=np.zeros((12, 4, 4)), - coords={ - "lat": ds.lat, - "lon": ds.lon, - "time": ds.time, - }, - dims=["time", "lat", "lon"], + data=np.array([[1.2]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], attrs={ "operation": "temporal_avg", - "mode": "departures", - "freq": "season", - "weighted": "False", + "mode": "average", + "freq": "day", + "weighted": "True", "center_times": "False", - "dec_mode": "DJF", - "drop_incomplete_djf": "True", }, ) assert result.identical(expected) - def test_unweighted_seasonal_departures_with_JFD(self): - ds = self.ds.copy() - - # Compare result of the method against the expected. - result = ds.temporal.departures( - "ts", - "season", - weighted=False, - season_config={"dec_mode": "JFD"}, - ) + # Test unweighted averages + result = ds.temporal.average("ts", weighted=False) expected = ds.copy() + expected = expected.drop_dims("time") expected["ts"] = xr.DataArray( - data=np.zeros((15, 4, 4)), - coords={ - "lat": ds.lat, - "lon": ds.lon, - "time": ds.time, - }, - dims=["time", "lat", "lon"], + data=np.array([[1.2]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], attrs={ "operation": "temporal_avg", - "mode": "departures", - "freq": "season", + "mode": "average", + "freq": "day", "weighted": "False", "center_times": "False", - "dec_mode": "JFD", }, ) - assert result.identical(expected) - -class TestCenterTimes: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_raises_error_if_time_dimension_does_not_exist_in_dataset(self): - ds = self.ds.copy() - ds = ds.drop_dims("time") - - with pytest.raises(KeyError): - ds.temporal.center_times(ds) - - def test_gets_time_as_the_midpoint_between_time_bounds(self): - ds = self.ds.copy() - - # Make the time coordinates uncentered. - uncentered_time = np.array( - [ - "2000-01-31T12:00:00.000000000", - "2000-02-29T12:00:00.000000000", - "2000-03-31T12:00:00.000000000", - "2000-04-30T00:00:00.000000000", - "2000-05-31T12:00:00.000000000", - "2000-06-30T00:00:00.000000000", - "2000-07-31T12:00:00.000000000", - "2000-08-31T12:00:00.000000000", - "2000-09-30T00:00:00.000000000", - "2000-10-16T12:00:00.000000000", - "2000-11-30T00:00:00.000000000", - "2000-12-31T12:00:00.000000000", - "2001-01-31T12:00:00.000000000", - "2001-02-28T00:00:00.000000000", - "2001-12-31T12:00:00.000000000", - ], - dtype="datetime64[ns]", - ) - ds.time.data[:] = uncentered_time - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - - # Compare result of the method against the expected. - expected = ds.copy() - expected_time_data = np.array( - [ - "2000-01-16T12:00:00.000000000", - "2000-02-15T12:00:00.000000000", - "2000-03-16T12:00:00.000000000", - "2000-04-16T00:00:00.000000000", - "2000-05-16T12:00:00.000000000", - "2000-06-16T00:00:00.000000000", - "2000-07-16T12:00:00.000000000", - "2000-08-16T12:00:00.000000000", - "2000-09-16T00:00:00.000000000", - "2000-10-16T12:00:00.000000000", - "2000-11-16T00:00:00.000000000", - "2000-12-16T12:00:00.000000000", - "2001-01-16T12:00:00.000000000", - "2001-02-15T00:00:00.000000000", - "2001-12-16T12:00:00.000000000", - ], - dtype="datetime64[ns]", - ) - expected = expected.assign_coords( - { + def test_averages_for_hourly_time_series(self): + ds = xr.Dataset( + coords={ + "lat": [-90], + "lon": [0], "time": xr.DataArray( - name="time", - data=expected_time_data, - coords={"time": expected_time_data}, - dims="time", + data=np.array( + [ + "2000-01-01T01:00:00.000000000", + "2000-01-01T02:00:00.000000000", + "2000-01-01T03:00:00.000000000", + "2000-01-01T04:00:00.000000000", + "2000-01-01T05:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + dims=["time"], attrs={ + "axis": "T", "long_name": "time", "standard_name": "time", - "axis": "T", "bounds": "time_bnds", }, - ) + ), } ) - # Update time bounds with centered time coordinates. - time_bounds = ds.time_bnds.copy() - time_bounds["time"] = expected.time - expected["time_bnds"] = time_bounds + ds["time_bnds"] = xr.DataArray( + name="time_bnds", + data=np.array( + [ + ["2000-01-01T01:00:00.000000000", "2000-01-01T02:00:00.000000000"], + ["2000-01-01T02:00:00.000000000", "2000-01-01T03:00:00.000000000"], + ["2000-01-01T03:00:00.000000000", "2000-01-01T04:00:00.000000000"], + ["2000-01-01T04:00:00.000000000", "2000-01-01T05:00:00.000000000"], + ["2000-01-01T05:00:00.000000000", "2000-01-01T06:00:00.000000000"], + ], + dtype="datetime64[ns]", + ), + coords={"time": ds.time}, + dims=["time", "bnds"], + attrs={"is_generated": "True"}, + ) + ds["ts"] = xr.DataArray( + data=np.array([[[2]], [[1]], [[1]], [[1]], [[1]]]), + coords={"lat": ds.lat, "lon": ds.lon, "time": ds.time}, + dims=["time", "lat", "lon"], + ) + + # Test averages weighted by hour + result = ds.temporal.average("ts") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + data=np.array([[1.2]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "average", + "freq": "hour", + "weighted": "True", + "center_times": "False", + }, + ) - result = ds.temporal.center_times(ds) assert result.identical(expected) + # Test unweighted averages + result = ds.temporal.average("ts", weighted=False) + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + data=np.array([[1.2]]), + coords={"lat": expected.lat, "lon": expected.lon}, + dims=["lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "average", + "freq": "hour", + "weighted": "False", + "center_times": "False", + }, + ) -class TestTemporalAvg: - # TODO: Update TestTimeSeries tests to use other numbers rather than 1's - # for better test reliability and accuracy. This may require subsetting. - class TestTimeSeries: - @pytest.fixture(autouse=True) - def setup(self): - # FIXME: Update test this so that it is accurate, rather than 1's - # for averages - # May involve subsetting - self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) + assert result.identical(expected) - def test_weighted_annual_avg(self): - ds = self.ds.copy() - result = ds.temporal._temporal_avg("ts", "time_series", "year") - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - ["2000-01-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000"], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( +class TestGroupAverage: + @pytest.fixture(autouse=True) + def setup(self): + time = xr.DataArray( + data=np.array( + [ + "2000-01-16T12:00:00.000000000", + "2000-03-16T12:00:00.000000000", + "2000-06-16T00:00:00.000000000", + "2000-09-16T00:00:00.000000000", + "2001-02-15T12:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + dims=["time"], + attrs={"axis": "T", "long_name": "time", "standard_name": "time"}, + ) + time_bnds = xr.DataArray( + name="time_bnds", + data=np.array( + [ + ["2000-01-01T00:00:00.000000000", "2000-02-01T00:00:00.000000000"], + ["2000-03-01T00:00:00.000000000", "2000-04-01T00:00:00.000000000"], + ["2000-06-01T00:00:00.000000000", "2000-07-01T00:00:00.000000000"], + ["2000-09-01T00:00:00.000000000", "2000-10-01T00:00:00.000000000"], + ["2001-02-01T00:00:00.000000000", "2001-03-01T00:00:00.000000000"], + ], + dtype="datetime64[ns]", + ), + coords={"time": time}, + dims=["time", "bnds"], + attrs={"is_generated": "True"}, + ) + + self.ds = xr.Dataset( + data_vars={"time_bnds": time_bnds}, + coords={"lat": [-90], "lon": [0], "time": time}, + ) + self.ds.time.attrs["bounds"] = "time_bnds" + + self.ds["ts"] = xr.DataArray( + data=np.array( + [[[2.0]], [[1.0]], [[1.0]], [[1.0]], [[2.0]]], dtype="float64" + ), + coords={"time": self.ds.time, "lat": self.ds.lat, "lon": self.ds.lon}, + dims=["time", "lat", "lon"], + ) + + def test_weighted_annual_averages(self): + ds = self.ds.copy() + + result = ds.temporal.group_average("ts", "year") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[1.25409836]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ "2000-01-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000", ], dtype="datetime64[ns]", - ) - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((2, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "year", - "weighted": "True", - "center_times": "False", - }, - ) + ), + coords={ + "time": np.array( + [ + "2000-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ) + }, + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "year", + "weighted": "True", + "center_times": "False", + }, + ) - # For some reason, there is a very small floating point difference - # between both for ts so we have to use floating point comparison - xr.testing.assert_allclose(result, expected) - assert result.ts.attrs == expected.ts.attrs + xr.testing.assert_allclose(result, expected) + assert result.ts.attrs == expected.ts.attrs - @requires_dask - def test_weighted_annual_avg_with_chunking(self): - ds = self.ds.copy().chunk({"time": 2}) + @requires_dask + def test_weighted_annual_averages_with_chunking(self): + ds = self.ds.copy().chunk({"time": 2}) - result = ds.temporal._temporal_avg("ts", "time_series", "year") - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - ["2000-01-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000"], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( + result = ds.temporal.group_average("ts", "year") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[1.25409836]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ "2000-01-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000", ], dtype="datetime64[ns]", - ) - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((2, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "year", - "weighted": "True", - "center_times": "False", - }, - ) + ), + coords={ + "time": np.array( + [ + "2000-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ) + }, + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "year", + "weighted": "True", + "center_times": "False", + }, + ) - # For some reason, there is a very small floating point difference - # between both for ts so we have to use floating point comparison - xr.testing.assert_allclose(result, expected) - assert result.ts.attrs == expected.ts.attrs + xr.testing.assert_allclose(result, expected) + assert result.ts.attrs == expected.ts.attrs - def test_weighted_annual_avg_with_centering_time(self): - ds = self.ds.copy() + def test_weighted_seasonal_averages_with_DJF_and_drop_incomplete_seasons(self): + ds = self.ds.copy() - result = ds.temporal._temporal_avg( - "ts", "time_series", "year", center_times=True - ) - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - ["2000-01-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000"], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( + result = ds.temporal.group_average( + "ts", + "season", + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + ) + expected = ds.copy() + # Drop the incomplete DJF seasons + expected = expected.isel(time=slice(2, -1)) + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[1]], [[1]], [[1]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ - "2000-01-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + "2000-07-01T00:00:00.000000000", + "2000-10-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000", ], dtype="datetime64[ns]", - ) - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((2, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "year", - "weighted": "True", - "center_times": "True", - }, - ) - - # For some reason, there is a floating point difference between both - # for ts so we have to use floating point comparison - xr.testing.assert_allclose(result, expected) - assert result.ts.attrs == expected.ts.attrs - - def test_weighted_seasonal_avg_with_DJF(self): - ds = self.ds.copy() - - result = ds.temporal._temporal_avg( - "ts", - "time_series", - "season", - season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, - ) - expected = ds.copy() - # Drop the incomplete DJF seasons - expected = expected.isel(time=slice(2, -1)) - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - [ - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, ), - coords={ - "time": np.array( + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "season", + "weighted": "True", + "center_times": "False", + "dec_mode": "DJF", + "drop_incomplete_djf": "True", + }, + ) + + assert result.identical(expected) + + def test_weighted_seasonal_averages_with_DJF_without_dropping_incomplete_seasons( + self, + ): + ds = self.ds.copy() + + result = ds.temporal.group_average( + "ts", + "season", + season_config={"dec_mode": "DJF", "drop_incomplete_djf": False}, + ) + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[2.0]], [[1.0]], [[1.0]], [[1.0]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ + "2000-01-01T00:00:00.000000000", "2000-04-01T00:00:00.000000000", "2000-07-01T00:00:00.000000000", "2000-10-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000", ], dtype="datetime64[ns]", - ) - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((4, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "season", - "weighted": "True", - "center_times": "False", - "dec_mode": "DJF", - "drop_incomplete_djf": "True", - }, - ) + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "season", + "weighted": "True", + "center_times": "False", + "dec_mode": "DJF", + "drop_incomplete_djf": "False", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_weighted_seasonal_avg_with_DJF_without_dropping_incomplete_seasons( - self, - ): - ds = self.ds.copy() + def test_weighted_seasonal_averages_with_JFD(self): + ds = self.ds.copy() - result = ds.temporal._temporal_avg( - "ts", - "time_series", - "season", - season_config={"dec_mode": "DJF", "drop_incomplete_djf": False}, - ) - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - [ - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2002-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( + result = ds.temporal.group_average( + "ts", + "season", + season_config={"dec_mode": "JFD"}, + ) + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[2.0]], [[1.0]], [[1.0]], [[1.0]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ "2000-01-01T00:00:00.000000000", "2000-04-01T00:00:00.000000000", "2000-07-01T00:00:00.000000000", "2000-10-01T00:00:00.000000000", "2001-01-01T00:00:00.000000000", - "2002-01-01T00:00:00.000000000", ], dtype="datetime64[ns]", - ) - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((6, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "season", - "weighted": "True", - "center_times": "False", - "dec_mode": "DJF", - "drop_incomplete_djf": "False", - }, - ) - - assert result.identical(expected) + ), + coords={ + "time": np.array( + [ + "2000-01-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + "2000-07-01T00:00:00.000000000", + "2000-10-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ) + }, + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "season", + "weighted": "True", + "center_times": "False", + "dec_mode": "JFD", + }, + ) - def test_weighted_seasonal_avg_with_JFD(self): - ds = self.ds.copy() + assert result.identical(expected) - ds = self.ds.copy() + def test_weighted_custom_seasonal_averages(self): + ds = self.ds.copy() - result = ds.temporal._temporal_avg( - "ts", - "time_series", - "season", - season_config={"dec_mode": "JFD"}, - ) - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - [ - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( + custom_seasons = [ + ["Jan", "Feb", "Mar"], + ["Apr", "May", "Jun"], + ["Jul", "Aug", "Sep"], + ["Oct", "Nov", "Dec"], + ] + result = ds.temporal.group_average( + "ts", + "season", + season_config={"custom_seasons": custom_seasons}, + ) + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[1.5]], [[1.0]], [[1.0]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", + "2000-02-01T00:00:00.000000000", + "2000-05-01T00:00:00.000000000", + "2000-08-01T00:00:00.000000000", + "2001-02-01T00:00:00.000000000", ], dtype="datetime64[ns]", - ) - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((5, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "season", - "weighted": "True", - "center_times": "False", - "dec_mode": "JFD", - }, - ) + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "season", + "custom_seasons": [ + "JanFebMar", + "AprMayJun", + "JulAugSep", + "OctNovDec", + ], + "weighted": "True", + "center_times": "False", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_weighted_custom_season_avg(self): - ds = self.ds.copy() + def test_raises_error_with_incorrect_custom_seasons_argument(self): + # Test raises error with non-3 letter strings + with pytest.raises(ValueError): + custom_seasons = [ + ["J", "Feb", "Mar"], + ["Apr", "May", "Jun"], + ["Jul", "Aug", "Sep"], + ["Oct", "Nov", "Dec"], + ] + self.ds.temporal.group_average( + "ts", + "season", + season_config={"custom_seasons": custom_seasons}, + ) + # Test raises error with missing month(s) + with pytest.raises(ValueError): custom_seasons = [ - ["Jan", "Feb", "Mar"], + ["Feb", "Mar"], ["Apr", "May", "Jun"], ["Jul", "Aug", "Sep"], ["Oct", "Nov", "Dec"], ] - result = ds.temporal._temporal_avg( + self.ds.temporal.group_average( "ts", - "time_series", "season", season_config={"custom_seasons": custom_seasons}, ) - expected = ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( - [ - "2000-02-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2001-02-01T00:00:00.000000000", - "2001-11-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( - [ - "2000-02-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2001-02-01T00:00:00.000000000", - "2001-11-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((6, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "season", - "custom_seasons": [ - "JanFebMar", - "AprMayJun", - "JulAugSep", - "OctNovDec", - ], - "weighted": "True", - "center_times": "False", - }, - ) - - assert result.identical(expected) - - def test_weighted_monthly_avg(self): - ds = self.ds.copy() - - result = ds.temporal._temporal_avg("ts", "time_series", "month") - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - [ - "2000-01-01T00:00:00.000000000", - "2000-02-01T00:00:00.000000000", - "2000-03-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", - "2000-06-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", - "2000-09-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2000-12-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-02-01T00:00:00.000000000", - "2001-12-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( + + # Test raises error if duplicate month(s) were found + with pytest.raises(ValueError): + custom_seasons = [ + ["Jan", "Jan", "Mar"], + ["Apr", "May", "Jun"], + ["Jul", "Aug", "Sep"], + ["Oct", "Nov", "Dec"], + ] + self.ds.temporal.group_average( + "ts", + "season", + season_config={"custom_seasons": custom_seasons}, + ) + + def test_weighted_monthly_averages(self): + ds = self.ds.copy() + + result = ds.temporal.group_average("ts", "month") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[2.0]], [[1.0]], [[1.0]], [[1.0]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ "2000-01-01T00:00:00.000000000", - "2000-02-01T00:00:00.000000000", "2000-03-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", "2000-06-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", "2000-09-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2000-12-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", "2001-02-01T00:00:00.000000000", - "2001-12-01T00:00:00.000000000", ], dtype="datetime64[ns]", ), - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((15, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "month", - "weighted": "True", - "center_times": "False", - }, - ) + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "month", + "weighted": "True", + "center_times": "False", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_weighted_daily_avg(self): - ds = self.ds.copy() + def test_weighted_daily_averages(self): + ds = self.ds.copy() - result = ds.temporal._temporal_avg("ts", "time_series", "day") - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - [ - "2000-01-16T00:00:00.000000000", - "2000-02-15T00:00:00.000000000", - "2000-03-16T00:00:00.000000000", - "2000-04-16T00:00:00.000000000", - "2000-05-16T00:00:00.000000000", - "2000-06-16T00:00:00.000000000", - "2000-07-16T00:00:00.000000000", - "2000-08-16T00:00:00.000000000", - "2000-09-16T00:00:00.000000000", - "2000-10-16T00:00:00.000000000", - "2000-11-16T00:00:00.000000000", - "2000-12-16T00:00:00.000000000", - "2001-01-16T00:00:00.000000000", - "2001-02-15T00:00:00.000000000", - "2001-12-16T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( + result = ds.temporal.group_average("ts", "day") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[2.0]], [[1.0]], [[1.0]], [[1.0]], [[2.0]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ "2000-01-16T00:00:00.000000000", - "2000-02-15T00:00:00.000000000", "2000-03-16T00:00:00.000000000", - "2000-04-16T00:00:00.000000000", - "2000-05-16T00:00:00.000000000", "2000-06-16T00:00:00.000000000", - "2000-07-16T00:00:00.000000000", - "2000-08-16T00:00:00.000000000", "2000-09-16T00:00:00.000000000", - "2000-10-16T00:00:00.000000000", - "2000-11-16T00:00:00.000000000", - "2000-12-16T00:00:00.000000000", - "2001-01-16T00:00:00.000000000", "2001-02-15T00:00:00.000000000", - "2001-12-16T00:00:00.000000000", ], dtype="datetime64[ns]", ), - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((15, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "day", - "weighted": "True", - "center_times": "False", - }, - ) - - assert result.identical(expected) + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "day", + "weighted": "True", + "center_times": "False", + }, + ) - def test_weighted_hourly_avg(self): - ds = self.ds.copy() - ds.coords["time"].attrs["bounds"] = "time_bnds" + assert result.identical(expected) - result = ds.temporal._temporal_avg("ts", "time_series", "hour") - expected = ds.copy() - expected["ts_original"] = ds.ts.copy() - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((15, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "year_month_day_hour": pd.MultiIndex.from_tuples( - [ - (2000, 1, 16, 12), - (2000, 2, 15, 12), - (2000, 3, 16, 12), - (2000, 4, 16, 0), - (2000, 5, 16, 12), - (2000, 6, 16, 0), - (2000, 7, 16, 12), - (2000, 8, 16, 12), - (2000, 9, 16, 0), - (2000, 10, 16, 12), - (2000, 11, 16, 0), - (2000, 12, 16, 12), - (2001, 1, 16, 12), - (2001, 2, 15, 0), - (2001, 12, 16, 12), - ] - ), - }, - dims=["year_month_day_hour", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "hour", - "weighted": "True", - "center_times": "False", - }, - ) + def test_weighted_daily_averages_and_center_times(self): + ds = self.ds.copy() + ds["time"] = xr.DataArray( + data=np.array( + [ + "2000-01-01T12:00:00.000000000", + "2000-03-01T12:00:00.000000000", + "2000-06-01T00:00:00.000000000", + "2000-09-01T00:00:00.000000000", + "2001-02-01T12:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + ds["time_bnds"] = xr.DataArray( + name="time_bnds", + data=np.array( + [ + ["2000-01-01T00:00:00.000000000", "2000-02-01T00:00:00.000000000"], + ["2000-03-01T00:00:00.000000000", "2000-04-01T00:00:00.000000000"], + ["2000-06-01T00:00:00.000000000", "2000-07-01T00:00:00.000000000"], + ["2000-09-01T00:00:00.000000000", "2000-10-01T00:00:00.000000000"], + ["2001-02-01T00:00:00.000000000", "2001-03-01T00:00:00.000000000"], + ], + dtype="datetime64[ns]", + ), + coords={"time": ds.time}, + dims=["time", "bnds"], + attrs={"is_generated": "True"}, + ) - expected = ds.copy() - expected = expected.drop_dims("time") - time_new = xr.DataArray( - data=np.array( - [ - "2000-01-16T12:00:00.000000000", - "2000-02-15T12:00:00.000000000", - "2000-03-16T12:00:00.000000000", - "2000-04-16T00:00:00.000000000", - "2000-05-16T12:00:00.000000000", - "2000-06-16T00:00:00.000000000", - "2000-07-16T12:00:00.000000000", - "2000-08-16T12:00:00.000000000", - "2000-09-16T00:00:00.000000000", - "2000-10-16T12:00:00.000000000", - "2000-11-16T00:00:00.000000000", - "2000-12-16T12:00:00.000000000", - "2001-01-16T12:00:00.000000000", - "2001-02-15T00:00:00.000000000", - "2001-12-16T12:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={ - "time": np.array( + result = ds.temporal.group_average("ts", "day", center_times=True) + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[2]], [[1]], [[1]], [[1]], [[2]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": xr.DataArray( + data=np.array( [ - "2000-01-16T12:00:00.000000000", - "2000-02-15T12:00:00.000000000", - "2000-03-16T12:00:00.000000000", - "2000-04-16T00:00:00.000000000", - "2000-05-16T12:00:00.000000000", + "2000-01-16T00:00:00.000000000", + "2000-03-16T00:00:00.000000000", "2000-06-16T00:00:00.000000000", - "2000-07-16T12:00:00.000000000", - "2000-08-16T12:00:00.000000000", "2000-09-16T00:00:00.000000000", - "2000-10-16T12:00:00.000000000", - "2000-11-16T00:00:00.000000000", - "2000-12-16T12:00:00.000000000", - "2001-01-16T12:00:00.000000000", "2001-02-15T00:00:00.000000000", - "2001-12-16T12:00:00.000000000", ], dtype="datetime64[ns]", ), - }, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((15, 4, 4)), - coords={ - "lat": self.ds.lat, - "lon": self.ds.lon, - "time": time_new, - }, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "time_series", - "freq": "hour", - "weighted": "True", - "center_times": "False", - }, - ) + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ), + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "day", + "weighted": "True", + "center_times": "True", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - class TestClimatology: - # TODO: Update TestClimatology tests to use other numbers rather than 1's - # for better test reliability and accuracy. This may require subsetting. - @pytest.fixture(autouse=True) - def setup(self): - self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) + def test_weighted_hourly_averages(self): + ds = self.ds.copy() + ds.coords["time"].attrs["bounds"] = "time_bnds" - def test_raises_error_without_time_dimension(self): - ds = self.ds.copy() - ds = ds.drop_dims("time") + result = ds.temporal.group_average("ts", "hour") + expected = ds.copy() + expected = expected.drop_dims("time") + expected["ts"] = xr.DataArray( + name="ts", + data=np.array([[[2]], [[1]], [[1]], [[1]], [[2]]]), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": ds.time, + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "group_average", + "freq": "hour", + "weighted": "True", + "center_times": "False", + }, + ) - with pytest.raises(KeyError): - ds.temporal._temporal_avg("climatology", "season", "ts") + assert result.identical(expected) - def test_raises_error_with_incorrect_freq_arg(self): - with pytest.raises(ValueError): - self.ds.temporal._temporal_avg( - "ts", - "climatology", - "incorrect_freq", - ) - def test_raises_error_with_incorrect_dec_mode_arg(self): - with pytest.raises(ValueError): - self.ds.temporal._temporal_avg( - "ts", - "climatology", - freq="season", - season_config={"dec_mode": "incorrect"}, - ) +class TestClimatology: + # TODO: Update TestClimatology tests to use other numbers rather than 1's + # for better test reliability and accuracy. This may require subsetting. + @pytest.fixture(autouse=True) + def setup(self): + self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - def test_raises_error_if_data_var_does_not_exist_in_dataset(self): - with pytest.raises(KeyError): - self.ds.temporal._temporal_avg( - "nonexistent_var", "climatology", freq="season" - ) + def test_weighted_seasonal_climatology_with_DJF(self): + ds = self.ds.copy() - def test_weighted_seasonal_climatology_with_DJF(self): - ds = self.ds.copy() + result = ds.temporal.climatology( + "ts", + "season", + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + ) - result = ds.temporal._temporal_avg( - "ts", - "climatology", - "season", - season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, - ) - expected = ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + expected = ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 1, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 10, 1), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 1, 1), cftime.datetime(1, 4, 1), @@ -1091,55 +1013,55 @@ def test_weighted_seasonal_climatology_with_DJF(self): cftime.datetime(1, 10, 1), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((4, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "season", - "weighted": "True", - "center_times": "False", - "dec_mode": "DJF", - "drop_incomplete_djf": "True", - }, - ) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((4, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "season", + "weighted": "True", + "center_times": "False", + "dec_mode": "DJF", + "drop_incomplete_djf": "True", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - @requires_dask - def test_chunked_weighted_seasonal_climatology_with_DJF(self): - ds = self.ds.copy().chunk({"time": 2}) + @requires_dask + def test_chunked_weighted_seasonal_climatology_with_DJF(self): + ds = self.ds.copy().chunk({"time": 2}) - result = ds.temporal._temporal_avg( - "ts", - "climatology", - "season", - season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, - ) - expected = ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + result = ds.temporal.climatology( + "ts", + "season", + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + ) + + expected = ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 1, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 10, 1), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 1, 1), cftime.datetime(1, 4, 1), @@ -1147,54 +1069,52 @@ def test_chunked_weighted_seasonal_climatology_with_DJF(self): cftime.datetime(1, 10, 1), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((4, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "season", - "weighted": "True", - "center_times": "False", - "dec_mode": "DJF", - "drop_incomplete_djf": "True", - }, - ) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((4, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "season", + "weighted": "True", + "center_times": "False", + "dec_mode": "DJF", + "drop_incomplete_djf": "True", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_weighted_seasonal_climatology_with_JFD(self): - ds = self.ds.copy() + def test_weighted_seasonal_climatology_with_JFD(self): + ds = self.ds.copy() - result = ds.temporal._temporal_avg( - "ts", - "climatology", - "season", - season_config={"dec_mode": "JFD"}, - ) - expected = ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + result = ds.temporal.climatology( + "ts", "season", season_config={"dec_mode": "JFD"} + ) + + expected = ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 1, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 10, 1), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 1, 1), cftime.datetime(1, 4, 1), @@ -1202,60 +1122,57 @@ def test_weighted_seasonal_climatology_with_JFD(self): cftime.datetime(1, 10, 1), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((4, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "season", - "weighted": "True", - "center_times": "False", - "dec_mode": "JFD", - }, - ) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((4, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "season", + "weighted": "True", + "center_times": "False", + "dec_mode": "JFD", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_weighted_custom_seasonal_climatology(self): - # FIXME: Fix this test - ds = self.ds.copy() + def test_weighted_custom_seasonal_climatology(self): + ds = self.ds.copy() - custom_seasons = [ - ["Jan", "Feb", "Mar"], - ["Apr", "May", "Jun"], - ["Jul", "Aug", "Sep"], - ["Oct", "Nov", "Dec"], - ] - result = ds.temporal._temporal_avg( - "ts", - "climatology", - "season", - season_config={"custom_seasons": custom_seasons}, - ) - expected = ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + custom_seasons = [ + ["Jan", "Feb", "Mar"], + ["Apr", "May", "Jun"], + ["Jul", "Aug", "Sep"], + ["Oct", "Nov", "Dec"], + ] + result = ds.temporal.climatology( + "ts", "season", season_config={"custom_seasons": custom_seasons} + ) + + expected = ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 2, 1), + cftime.datetime(1, 5, 1), + cftime.datetime(1, 8, 1), + cftime.datetime(1, 11, 1), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 2, 1), cftime.datetime(1, 5, 1), @@ -1263,53 +1180,61 @@ def test_weighted_custom_seasonal_climatology(self): cftime.datetime(1, 11, 1), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 2, 1), - cftime.datetime(1, 5, 1), - cftime.datetime(1, 8, 1), - cftime.datetime(1, 11, 1), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((4, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "season", - "weighted": "True", - "center_times": "False", - "custom_seasons": [ - "JanFebMar", - "AprMayJun", - "JulAugSep", - "OctNovDec", - ], - }, - ) - - assert result.identical(expected) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) - def test_weighted_monthly_climatology(self): - result = self.ds.temporal._temporal_avg("ts", "climatology", "month") + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((4, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "season", + "weighted": "True", + "center_times": "False", + "custom_seasons": [ + "JanFebMar", + "AprMayJun", + "JulAugSep", + "OctNovDec", + ], + }, + ) - expected = self.ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + assert result.identical(expected) + + def test_weighted_monthly_climatology(self): + result = self.ds.temporal.climatology("ts", "month") + + expected = self.ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 1, 1), + cftime.datetime(1, 2, 1), + cftime.datetime(1, 3, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 5, 1), + cftime.datetime(1, 6, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 8, 1), + cftime.datetime(1, 9, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 11, 1), + cftime.datetime(1, 12, 1), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 1, 1), cftime.datetime(1, 2, 1), @@ -1325,57 +1250,55 @@ def test_weighted_monthly_climatology(self): cftime.datetime(1, 12, 1), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 2, 1), - cftime.datetime(1, 3, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 5, 1), - cftime.datetime(1, 6, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 8, 1), - cftime.datetime(1, 9, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 11, 1), - cftime.datetime(1, 12, 1), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((12, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "month", - "weighted": "True", - "center_times": "False", - }, - ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((12, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "month", + "weighted": "True", + "center_times": "False", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_unweighted_monthly_climatology(self): - result = self.ds.temporal._temporal_avg( - "ts", "climatology", "month", weighted=False - ) + def test_unweighted_monthly_climatology(self): + result = self.ds.temporal.climatology("ts", "month", weighted=False) - expected = self.ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + expected = self.ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 1, 1), + cftime.datetime(1, 2, 1), + cftime.datetime(1, 3, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 5, 1), + cftime.datetime(1, 6, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 8, 1), + cftime.datetime(1, 9, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 11, 1), + cftime.datetime(1, 12, 1), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 1, 1), cftime.datetime(1, 2, 1), @@ -1391,56 +1314,54 @@ def test_unweighted_monthly_climatology(self): cftime.datetime(1, 12, 1), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 2, 1), - cftime.datetime(1, 3, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 5, 1), - cftime.datetime(1, 6, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 8, 1), - cftime.datetime(1, 9, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 11, 1), - cftime.datetime(1, 12, 1), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((12, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "month", - "weighted": "False", - "center_times": "False", - }, - ) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((12, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "month", + "weighted": "False", + "center_times": "False", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_weighted_daily_climatology(self): - result = self.ds.temporal._temporal_avg( - "ts", "climatology", "day", weighted=True - ) + def test_weighted_daily_climatology(self): + result = self.ds.temporal.climatology("ts", "day", weighted=True) - expected = self.ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + expected = self.ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 1, 16), + cftime.datetime(1, 2, 15), + cftime.datetime(1, 3, 16), + cftime.datetime(1, 4, 16), + cftime.datetime(1, 5, 16), + cftime.datetime(1, 6, 16), + cftime.datetime(1, 7, 16), + cftime.datetime(1, 8, 16), + cftime.datetime(1, 9, 16), + cftime.datetime(1, 10, 16), + cftime.datetime(1, 11, 16), + cftime.datetime(1, 12, 16), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 1, 16), cftime.datetime(1, 2, 15), @@ -1456,56 +1377,54 @@ def test_weighted_daily_climatology(self): cftime.datetime(1, 12, 16), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 3, 16), - cftime.datetime(1, 4, 16), - cftime.datetime(1, 5, 16), - cftime.datetime(1, 6, 16), - cftime.datetime(1, 7, 16), - cftime.datetime(1, 8, 16), - cftime.datetime(1, 9, 16), - cftime.datetime(1, 10, 16), - cftime.datetime(1, 11, 16), - cftime.datetime(1, 12, 16), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((12, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "day", - "weighted": "True", - "center_times": "False", - }, - ) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((12, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "day", + "weighted": "True", + "center_times": "False", + }, + ) - assert result.identical(expected) + assert result.identical(expected) - def test_unweighted_daily_climatology(self): - result = self.ds.temporal._temporal_avg( - "ts", "climatology", "day", weighted=False - ) + def test_unweighted_daily_climatology(self): + result = self.ds.temporal.climatology("ts", "day", weighted=False) - expected = self.ds.copy() - expected = expected.drop_dims("time") - expected_time = xr.DataArray( - data=np.array( + expected = self.ds.copy() + expected = expected.drop_dims("time") + expected_time = xr.DataArray( + data=np.array( + [ + cftime.datetime(1, 1, 16), + cftime.datetime(1, 2, 15), + cftime.datetime(1, 3, 16), + cftime.datetime(1, 4, 16), + cftime.datetime(1, 5, 16), + cftime.datetime(1, 6, 16), + cftime.datetime(1, 7, 16), + cftime.datetime(1, 8, 16), + cftime.datetime(1, 9, 16), + cftime.datetime(1, 10, 16), + cftime.datetime(1, 11, 16), + cftime.datetime(1, 12, 16), + ], + ), + coords={ + "time": np.array( [ cftime.datetime(1, 1, 16), cftime.datetime(1, 2, 15), @@ -1521,57 +1440,239 @@ def test_unweighted_daily_climatology(self): cftime.datetime(1, 12, 16), ], ), - coords={ - "time": np.array( - [ - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 3, 16), - cftime.datetime(1, 4, 16), - cftime.datetime(1, 5, 16), - cftime.datetime(1, 6, 16), - cftime.datetime(1, 7, 16), - cftime.datetime(1, 8, 16), - cftime.datetime(1, 9, 16), - cftime.datetime(1, 10, 16), - cftime.datetime(1, 11, 16), - cftime.datetime(1, 12, 16), - ], - ), - }, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - expected["ts"] = xr.DataArray( - name="ts", - data=np.ones((12, 4, 4)), - coords={"lat": self.ds.lat, "lon": self.ds.lon, "time": expected_time}, - dims=["time", "lat", "lon"], - attrs={ - "operation": "temporal_avg", - "mode": "climatology", - "freq": "day", - "weighted": "False", - "center_times": "False", - }, - ) + }, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) + expected["ts"] = xr.DataArray( + name="ts", + data=np.ones((12, 4, 4)), + coords={"lat": expected.lat, "lon": expected.lon, "time": expected_time}, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "climatology", + "freq": "day", + "weighted": "False", + "center_times": "False", + }, + ) - assert result.identical(expected) + assert result.identical(expected) -class TestSetObjAttrs: +class TestDepartures: + # TODO: Update TestDepartures tests to use other numbers rather than 1's for + # better test reliability and accuracy. This may require subsetting. @pytest.fixture(autouse=True) def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) + self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - def test_raises_error_if_operation_is_not_supported(self): - with pytest.raises(ValueError): - self.ds.temporal._set_obj_attrs( - "unsupported", + self.seasons = ["JJA", "MAM", "SON", "DJF"] + + def test_weighted_seasonal_departures_with_DJF(self): + # Create a post-climatology dataset. + ds = self.ds.copy() + # Drop incomplete DJF seasons + ds = ds.isel(time=slice(2, -1)) + + # Compare result of the method against the expected. + result = ds.temporal.departures( + "ts", + "season", + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + ) + expected = ds.copy() + expected["ts"] = xr.DataArray( + data=np.zeros((12, 4, 4)), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": ds.time, + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "departures", + "freq": "season", + "weighted": "True", + "center_times": "False", + "dec_mode": "DJF", + "drop_incomplete_djf": "True", + }, + ) + + assert result.identical(expected) + + def test_unweighted_seasonal_departures_with_DJF(self): + ds = self.ds.copy() + # Drop incomplete DJF seasons + ds = ds.isel(time=slice(2, -1)) + + # Compare result of the method against the expected. + result = ds.temporal.departures( + "ts", + "season", + weighted=False, + season_config={"dec_mode": "DJF", "drop_incomplete_djf": True}, + ) + expected = ds.copy() + expected["ts"] = xr.DataArray( + data=np.zeros((12, 4, 4)), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": ds.time, + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "departures", + "freq": "season", + "weighted": "False", + "center_times": "False", + "dec_mode": "DJF", + "drop_incomplete_djf": "True", + }, + ) + + assert result.identical(expected) + + def test_unweighted_seasonal_departures_with_JFD(self): + ds = self.ds.copy() + + # Compare result of the method against the expected. + result = ds.temporal.departures( + "ts", + "season", + weighted=False, + season_config={"dec_mode": "JFD"}, + ) + expected = ds.copy() + expected["ts"] = xr.DataArray( + data=np.zeros((15, 4, 4)), + coords={ + "lat": expected.lat, + "lon": expected.lon, + "time": ds.time, + }, + dims=["time", "lat", "lon"], + attrs={ + "operation": "temporal_avg", + "mode": "departures", + "freq": "season", + "weighted": "False", + "center_times": "False", + "dec_mode": "JFD", + }, + ) + + assert result.identical(expected) + + +class TestCenterTimes: + @pytest.fixture(autouse=True) + def setup(self): + self.ds = generate_dataset(cf_compliant=True, has_bounds=True) + + def test_raises_error_if_time_dimension_does_not_exist_in_dataset(self): + ds = self.ds.copy() + ds = ds.drop_dims("time") + + with pytest.raises(KeyError): + ds.temporal.center_times(ds) + + def test_gets_time_as_the_midpoint_between_time_bounds(self): + ds = self.ds.copy() + + # Make the time coordinates uncentered. + uncentered_time = np.array( + [ + "2000-01-31T12:00:00.000000000", + "2000-02-29T12:00:00.000000000", + "2000-03-31T12:00:00.000000000", + "2000-04-30T00:00:00.000000000", + "2000-05-31T12:00:00.000000000", + "2000-06-30T00:00:00.000000000", + "2000-07-31T12:00:00.000000000", + "2000-08-31T12:00:00.000000000", + "2000-09-30T00:00:00.000000000", + "2000-10-16T12:00:00.000000000", + "2000-11-30T00:00:00.000000000", + "2000-12-31T12:00:00.000000000", + "2001-01-31T12:00:00.000000000", + "2001-02-28T00:00:00.000000000", + "2001-12-31T12:00:00.000000000", + ], + dtype="datetime64[ns]", + ) + ds.time.data[:] = uncentered_time + + # Set object attrs required to test the method. + ds.temporal._time_bounds = ds.time_bnds.copy() + + # Compare result of the method against the expected. + expected = ds.copy() + expected_time_data = np.array( + [ + "2000-01-16T12:00:00.000000000", + "2000-02-15T12:00:00.000000000", + "2000-03-16T12:00:00.000000000", + "2000-04-16T00:00:00.000000000", + "2000-05-16T12:00:00.000000000", + "2000-06-16T00:00:00.000000000", + "2000-07-16T12:00:00.000000000", + "2000-08-16T12:00:00.000000000", + "2000-09-16T00:00:00.000000000", + "2000-10-16T12:00:00.000000000", + "2000-11-16T00:00:00.000000000", + "2000-12-16T12:00:00.000000000", + "2001-01-16T12:00:00.000000000", + "2001-02-15T00:00:00.000000000", + "2001-12-16T12:00:00.000000000", + ], + dtype="datetime64[ns]", + ) + expected = expected.assign_coords( + { + "time": xr.DataArray( + name="time", + data=expected_time_data, + coords={"time": expected_time_data}, + dims="time", + attrs={ + "long_name": "time", + "standard_name": "time", + "axis": "T", + "bounds": "time_bnds", + }, + ) + } + ) + # Update time bounds with centered time coordinates. + time_bounds = ds.time_bnds.copy() + time_bounds["time"] = expected.time + expected["time_bnds"] = time_bounds + + result = ds.temporal.center_times(ds) + assert result.identical(expected) + + +class Test_SetObjAttrs: + # NOTE: Testing this private method directly instead of through the public + # methods because it eliminates redundancy. + @pytest.fixture(autouse=True) + def setup(self): + self.ds = generate_dataset(cf_compliant=True, has_bounds=True) + + def test_raises_error_if_operation_is_not_supported(self): + with pytest.raises(ValueError): + self.ds.temporal._set_obj_attrs( + "unsupported", freq="season", weighted=True, center_times=True, @@ -1587,7 +1688,7 @@ def test_raises_error_if_freq_arg_is_not_supported_by_operation(self): with pytest.raises(ValueError): ds.temporal._set_obj_attrs( - "time_series", + "group_average", freq="unsupported", weighted=True, center_times=True, @@ -1630,7 +1731,7 @@ def test_does_not_raise_error_if_freq_arg_is_supported_by_operation(self): for freq in time_series_freqs: ds.temporal._set_obj_attrs( - "time_series", + "group_average", freq=freq, weighted=True, center_times=True, @@ -1693,1056 +1794,256 @@ def test_raises_error_if_december_mode_is_not_supported(self): }, ) - def test_sets_object_attributes(self): - ds = self.ds.copy() - ds.temporal._set_obj_attrs( - "climatology", - freq="season", - weighted=True, - center_times=True, - season_config={"dec_mode": "JFD"}, - ) - assert ds.temporal._mode == "climatology" - assert ds.temporal._freq == "season" - assert ds.temporal._center_times - assert ds.temporal._weighted - assert ds.temporal._season_config == {"dec_mode": "JFD"} - - ds.temporal._set_obj_attrs( - "climatology", - freq="season", - weighted=True, - center_times=True, - season_config={ - "custom_seasons": [ - ["Jan", "Feb", "Mar"], - ["Apr", "May", "Jun"], - ["Jul", "Aug", "Sep"], - ["Oct", "Nov", "Dec"], - ], - }, - ) - assert ds.temporal._season_config == { - "custom_seasons": { - "JanFebMar": ["Jan", "Feb", "Mar"], - "AprMayJun": ["Apr", "May", "Jun"], - "JulAugSep": ["Jul", "Aug", "Sep"], - "OctNovDec": ["Oct", "Nov", "Dec"], - } - } - - -class TestCustomSeasons: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - self.expected = [ - "Jan", - "Feb", - "Mar", - "Apr", - "May", - "Jun", - "Jul", - "Aug", - "Sep", - "Oct", - "Nov", - "Dec", - ] - - def test_raises_error_if_month_str_not_supported(self): - # Incorrect str "J". - with pytest.raises(ValueError): - self.ds.temporal._form_seasons( - custom_seasons=[ - ["J", "Feb", "Mar"], - ["Apr", "May", "Jun"], - ["Jul", "Aug", "Sep"], - ["Oct", "Nov", "Dec"], - ] - ) - # Incorrect str "January". - with pytest.raises(ValueError): - self.ds.temporal._form_seasons( - custom_seasons=[ - ["January", "Feb", "Mar"], - ["Apr", "May", "Jun"], - ["Jul", "Aug", "Sep"], - ["Oct", "Nov", "Dec"], - ] - ) +class Test_GetWeights: + # NOTE: Testing this private method directly instead of through the public + # methods because there is potential for this method to become public. + class TestWeightsForAverageMode: + @pytest.fixture(autouse=True) + def setup(self): + self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - def test_raises_error_if_missing_months(self): - with pytest.raises(ValueError): - # "Jan" is missing. - self.ds.temporal._form_seasons( - custom_seasons=[ - ["Feb", "Mar"], - ["Apr", "May", "Jun"], - ["Jul", "Aug", "Sep"], - ["Oct", "Nov", "Dec"], - ] - ) + def test_weights_for_yearly_averages(self): + ds = self.ds.copy() - def test_raises_error_if_duplicate_months_were_found(self): - with pytest.raises(ValueError): - # "Jan" is duplicated. - self.ds.temporal._form_seasons( - custom_seasons=[ - ["Jan", "Jan", "Feb"], - ["Apr", "May", "Jun"], - ["Jul", "Aug", "Sep"], - ["Oct", "Nov", "Dec"], - ] + # Set object attrs required to test the method. + ds.temporal._time_bounds = ds.time_bnds.copy() + ds.temporal._mode = "average" + ds.temporal._freq = "year" + ds.temporal._weighted = "True" + ds.temporal._labeled_time = xr.DataArray( + name="year", + data=np.array( + [ + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + coords={"time": ds.time}, + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, ) - def test_does_not_raise_error(self): - result = self.ds.temporal._form_seasons( - custom_seasons=[ - ["Jan", "Feb", "Mar"], - ["Apr", "May", "Jun"], - ["Jul", "Aug", "Sep"], - ["Oct", "Nov", "Dec"], - ] - ) - expected = { - "JanFebMar": ["Jan", "Feb", "Mar"], - "AprMayJun": ["Apr", "May", "Jun"], - "JulAugSep": ["Jul", "Aug", "Sep"], - "OctNovDec": ["Oct", "Nov", "Dec"], - } - assert result == expected - - result = self.ds.temporal._form_seasons( - custom_seasons=[ - ["Jan", "Feb", "Mar", "Apr", "May", "Jun"], - ["Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], - ] - ) - expected = { - "JanFebMarAprMayJun": ["Jan", "Feb", "Mar", "Apr", "May", "Jun"], - "JulAugSepOctNovDec": ["Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], - } - assert result == expected - - result = self.ds.temporal._form_seasons( - custom_seasons=[ - ["Jan", "Feb", "Mar"], - ["Apr", "May", "Jun", "Jul"], - ["Aug", "Sep", "Oct", "Nov", "Dec"], - ] - ) - expected = { - "JanFebMar": ["Jan", "Feb", "Mar"], - "AprMayJunJul": ["Apr", "May", "Jun", "Jul"], - "AugSepOctNovDec": ["Aug", "Sep", "Oct", "Nov", "Dec"], - } - assert result == expected - - -class TestAverager: - # FIXME: Update test this so that it is accurate, rather than 1's - # for averages - # May involve subsetting - @pytest.fixture(autouse=True) - def setup(self): - self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - self.ds.attrs.update({"operation_type": "climatology"}) - - def test_weighted_by_month_day(self): - ds = self.ds.copy() - - # Set object attrs required to test the method - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" - ds.temporal._freq = "day" - ds.temporal._weighted = True - ds.temporal._center_times = True - ds.temporal._time_grouped = xr.DataArray( - name="month_day", - data=np.array( - [ - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 3, 16), - cftime.datetime(1, 4, 16), - cftime.datetime(1, 5, 6), - cftime.datetime(1, 6, 16), - cftime.datetime(1, 7, 16), - cftime.datetime(1, 8, 16), - cftime.datetime(1, 9, 16), - cftime.datetime(1, 10, 16), - cftime.datetime(1, 11, 16), - cftime.datetime(1, 12, 16), - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 12, 16), - ], - ), - coords={"time": ds.time}, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - ts_result = ds.temporal._averager(ds["ts"]) - ts_expected = np.ones((12, 4, 4)) - assert np.allclose(ts_result, ts_expected) - - def test_unweighted_daily_climatology(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" - ds.temporal._freq = "day" - ds.temporal._weighted = False - ds.temporal._center_times = True - ds.temporal._time_grouped = xr.DataArray( - name="month_day", - data=np.array( - [ - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 3, 16), - cftime.datetime(1, 4, 16), - cftime.datetime(1, 5, 6), - cftime.datetime(1, 6, 16), - cftime.datetime(1, 7, 16), - cftime.datetime(1, 8, 16), - cftime.datetime(1, 9, 16), - cftime.datetime(1, 10, 16), - cftime.datetime(1, 11, 16), - cftime.datetime(1, 12, 16), - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 12, 16), - ], - ), - coords={"time": ds.time}, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - ts_result = ds.temporal._averager(ds["ts"]) - ts_expected = np.ones((12, 4, 4)) - assert np.allclose(ts_result, ts_expected) - - def test_weighted_annual_climatology(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" - ds.temporal._freq = "month" - ds.temporal._weighted = True - ds.temporal._center_times = True - ds.temporal._time_grouped = xr.DataArray( - name="month", - data=np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 2, 1), - cftime.datetime(1, 3, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 5, 1), - cftime.datetime(1, 6, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 8, 1), - cftime.datetime(1, 9, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 11, 1), - cftime.datetime(1, 12, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 2, 1), - cftime.datetime(1, 12, 1), - ], - ), - coords={"time": ds.time}, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - # Check non-bounds variables were properly grouped and averaged - ts_result = ds.temporal._averager(ds["ts"]) - ts_expected = np.ones((12, 4, 4)) - assert np.allclose(ts_result, ts_expected) - - def test_weighted_seasonal_climatology_with_DJF_and_drop_incomplete_DJF(self): - ds = self.ds.copy() - # Drop the incomplete seasons - ds = ds.isel(time=slice(2, -1)) - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" - ds.temporal._freq = "season" - ds.temporal._weighted = True - ds.temporal._center_times = True - ds.temporal._season_config = { - "dec_mode": "DJF", - "drop_incomplete_djf": True, - } - ds.temporal._time_grouped = xr.DataArray( - name="season", - data=np.array( - [ - # MAM - cftime.datetime(1, 3, 1), - cftime.datetime(1, 3, 1), - cftime.datetime(1, 3, 1), - # JJA - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - # SON - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - # DJF - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - ], - ), - coords={"time": ds.time}, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - # Check non-bounds variables were properly grouped and averaged - ts_result = ds.temporal._averager(ds["ts"]) - ts_expected = np.ones((4, 4, 4)) - assert np.allclose(ts_result, ts_expected) - - def test_weighted_seasonal_climatology_with_JFD(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" - ds.temporal._freq = "season" - ds.temporal._weighted = True - ds.temporal._center_times = True - ds.temporal._season_config = {"dec_mode": "JFD"} - ds.temporal._time_grouped = xr.DataArray( - name="season", - data=np.array( - [ - # JFD - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - # MAM - cftime.datetime(1, 3, 1), - cftime.datetime(1, 3, 1), - cftime.datetime(1, 3, 1), - # JJA - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - # SON - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - # JFD - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - ], - ), - coords={"time": ds.time}, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - ts_result = ds.temporal._averager(ds["ts"]) - ts_expected = np.ones((4, 4, 4)) - assert np.allclose(ts_result, ts_expected) - - -class TestDropIncompleteDJF: - @pytest.fixture(autouse=True) - def setup(self): - self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_incomplete_DJF_seasons_are_dropped(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - - # Define method inputs. - ds["ts"] = xr.DataArray( - data=np.ones(5), - coords={ - "time": [ - datetime(2000, 1, 1), - datetime(2000, 2, 1), - datetime(2000, 3, 1), - datetime(2000, 4, 1), - datetime(2001, 12, 1), - ] - }, - dims=["time"], - ) - - # Compare result of the method against the expected. - result = ds.temporal._drop_incomplete_djf(ds) - expected = ds.copy() - # Drop the incomplete DJF seasons - expected = expected.isel(time=slice(2, -1)) - expected["ts"] = xr.DataArray( - data=np.ones(2), - coords={"time": [datetime(2000, 3, 1), datetime(2000, 4, 1)]}, - dims=["time"], - ) - assert result.identical(expected) - - def test_does_not_drop_incomplete_DJF_seasons_since_if_dont_exist(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - - # Update time coordinate points so that the months don't fall in - # incomplete seasons. - ds.time.values[0] = datetime(1999, 3, 1) - ds.time.values[1] = datetime(1999, 4, 1) - ds.time.values[-1] = datetime(1999, 5, 1) - - # Compare result of the method against the expected. - result = ds.temporal._drop_incomplete_djf(ds) - expected = ds - assert result.identical(expected) - - -class TestGroupTimeCoords: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_groups_time_coords_for_time_series_season_freq(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "time_series" - ds.temporal._freq = "season" - ds.temporal._season_config = {"dec_mode": "DJF", "drop_incomplete_djf": False} - - # Compare result of the method against the expected. - result = ds.temporal._group_time_coords(ds.ts) - expected = xr.DataArray( - name="year_month", - data=np.array( - [ - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2002-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={"time": ds.time}, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - assert result.identical(expected) - - def test_groups_time_coords_for_climatology_season_freq(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "climatology" - ds.temporal._freq = "season" - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._season_config = {"dec_mode": "DJF", "drop_incomplete_djf": False} - - # Compare result of the method against the expected. - result = ds.temporal._group_time_coords(ds.ts) - expected = xr.DataArray( - name="month", - data=np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - ], - ), - coords={"time": ds.time}, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - assert result.identical(expected) - - -class TestProcessSeasonDataFrame: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - self.df = pd.DataFrame( - data=np.array( - [ - (2000, "DJF", 1), - (2000, "DJF", 2), - (2000, "MAM", 3), - (2000, "MAM", 4), - (2000, "MAM", 5), - (2000, "JJA", 6), - (2000, "JJA", 7), - (2000, "JJA", 8), - (2000, "SON", 9), - (2000, "SON", 10), - (2000, "SON", 11), - (2000, "DJF", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - - def test_maps_custom_seasons_if_custom_seasons_specified_and_drops_columns(self): - ds = self.ds.copy() - df = self.df.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "time_series" - ds.temporal._season_config = { - "custom_seasons": { - "JanFebMar": ["Jan", "Feb", "Mar"], - "AprMayJun": ["Apr", "May", "Jun"], - "JulAugSep": ["Jul", "Aug", "Sep"], - "OctNovDec": ["Oct", "Nov", "Dec"], - } - } - - # Compare result of the method against the expected. - result = ds.temporal._process_season_dataframe(df) - expected = pd.DataFrame( - data=np.array( - [ - (2000, 2), - (2000, 2), - (2000, 2), - (2000, 5), - (2000, 5), - (2000, 5), - (2000, 8), - (2000, 8), - (2000, 8), - (2000, 11), - (2000, 11), - (2000, 11), - ], - dtype=object, - ), - columns=["year", "month"], - ) - expected["month"] = expected.month.astype("int64") - - assert result.equals(expected) - - def test_shifts_decembers_for_DJF_if_DJF_is_specified(self): - ds = self.ds.copy() - df = self.df.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "climatology" - ds.temporal._season_config = { - "dec_mode": "DJF", - "drop_incomplete_djf": True, - } - - # Compare result of the method against the expected. - result = ds.temporal._process_season_dataframe(df) - expected = pd.DataFrame( - data=np.array( - [1, 1, 4, 4, 4, 7, 7, 7, 10, 10, 10, 1], - dtype="int64", - ), - columns=["month"], - ) - assert result.equals(expected) - - -class TestConvertDFtoDT: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_converts_dataframe_to_datetime_for_seasonal_time_series(self): - ds = self.ds.copy() - df = pd.DataFrame( - data=[(2000, 1), (2000, 4), (2000, 7)], - columns=["year", "month"], - ) - - ds.temporal._mode = "time_series" - result = ds.temporal._convert_df_to_dt(df) - expected = np.array( - [ - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ) - - assert np.array_equal(result, expected) - - def test_converts_dataframe_to_datetime_for_seasonal_climatology(self): - ds = self.ds.copy() - df = pd.DataFrame(data=[1, 4, 7], columns=["month"]) - - ds.temporal._mode = "climatology" - result = ds.temporal._convert_df_to_dt(df) - expected = np.array( - [ - cftime.datetime(1, 1, 1, 0), - cftime.datetime(1, 4, 1, 0), - cftime.datetime(1, 7, 1, 0), - ] - ) - - assert np.array_equal(result, expected) - - def test_converts_dataframe_to_datetime_for_seasonal_departures(self): - ds = self.ds.copy() - df = pd.DataFrame(data=[1, 4, 7], columns=["month"]) - - ds.temporal._mode = "departures" - result = ds.temporal._convert_df_to_dt(df) - expected = np.array( - [ - cftime.datetime(1, 1, 1, 0), - cftime.datetime(1, 4, 1, 0), - cftime.datetime(1, 7, 1, 0), - ] - ) - - assert np.array_equal(result, expected) - - -class TestMapMonthsToCustomSeasons: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_raises_error_if_custom_seasons_are_not_mapped(self): - ds = self.ds.copy() - ds.temporal._season_config = {"custom_seasons": None} - df = pd.DataFrame( - data=np.array( - [ - (2000, "DJF", 1), - (2000, "DJF", 2), - (2000, "MAM", 3), - (2000, "MAM", 4), - (2000, "MAM", 5), - (2000, "JJA", 6), - (2000, "JJA", 7), - (2000, "JJA", 8), - (2000, "SON", 9), - (2000, "SON", 10), - (2000, "SON", 11), - (2000, "DJF", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - - with pytest.raises(ValueError): - ds.temporal._map_months_to_custom_seasons(df) - - def test_maps_three_month_custom_seasons(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._season_config = { - "custom_seasons": [ - "JanFebMar", - "AprMayJun", - "JulAugSep", - "OctNovDec", - ] - } - - # Define method inputs. - # Includes default seasons. - df = pd.DataFrame( - data=np.array( - [ - (2000, "DJF", 1), - (2000, "DJF", 2), - (2000, "MAM", 3), - (2000, "MAM", 4), - (2000, "MAM", 5), - (2000, "JJA", 6), - (2000, "JJA", 7), - (2000, "JJA", 8), - (2000, "SON", 9), - (2000, "SON", 10), - (2000, "SON", 11), - (2000, "DJF", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - - # Compare result of the method against the expected. - result = ds.temporal._map_months_to_custom_seasons(df) - expected = pd.DataFrame( - data=np.array( - [ - (2000, "JanFebMar", 1), - (2000, "JanFebMar", 2), - (2000, "JanFebMar", 3), - (2000, "AprMayJun", 4), - (2000, "AprMayJun", 5), - (2000, "AprMayJun", 6), - (2000, "JulAugSep", 7), - (2000, "JulAugSep", 8), - (2000, "JulAugSep", 9), - (2000, "OctNovDec", 10), - (2000, "OctNovDec", 11), - (2000, "OctNovDec", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - assert result.equals(expected) - - def test_maps_six_month_custom_seasons(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._season_config = { - "custom_seasons": [ - "JanFebMarAprMayJun", - "JulAugSepOctNovDec", - ] - } - - # Define method inputs. - # Includes default seasons. - df = pd.DataFrame( - data=np.array( - [ - (2000, "DJF", 1), - (2000, "DJF", 2), - (2000, "MAM", 3), - (2000, "MAM", 4), - (2000, "MAM", 5), - (2000, "JJA", 6), - (2000, "JJA", 7), - (2000, "JJA", 8), - (2000, "SON", 9), - (2000, "SON", 10), - (2000, "SON", 11), - (2000, "DJF", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - - # Compare result of the method against the expected. - result = ds.temporal._map_months_to_custom_seasons(df) - expected = pd.DataFrame( - data=np.array( + # Compare result of the method against the expected. + result = ds.temporal._get_weights() + expected = np.array( [ - (2000, "JanFebMarAprMayJun", 1), - (2000, "JanFebMarAprMayJun", 2), - (2000, "JanFebMarAprMayJun", 3), - (2000, "JanFebMarAprMayJun", 4), - (2000, "JanFebMarAprMayJun", 5), - (2000, "JanFebMarAprMayJun", 6), - (2000, "JulAugSepOctNovDec", 7), - (2000, "JulAugSepOctNovDec", 8), - (2000, "JulAugSepOctNovDec", 9), - (2000, "JulAugSepOctNovDec", 10), - (2000, "JulAugSepOctNovDec", 11), - (2000, "JulAugSepOctNovDec", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - assert result.equals(expected) - - def test_maps_three_month_custom_seasons_random_order(self): - ds = self.ds.copy() + 0.08469945, + 0.07923497, + 0.08469945, + 0.08196721, + 0.08469945, + 0.08196721, + 0.08469945, + 0.08469945, + 0.08196721, + 0.08469945, + 0.08196721, + 0.08469945, + 0.34444444, + 0.31111111, + 0.34444444, + ] + ) + assert np.allclose(result, expected) - # Set object attrs required to test the method. - ds.temporal._season_config = { - "custom_seasons": [ - # Swapped Jan and Dec - "DecFebMar", - "AprMayJun", - "JulAugSep", - "OctNovJan", - ] - } + def test_weights_for_monthly_averages(self): + ds = self.ds.copy() - # Define method inputs. - # Includes default seasons. - df = pd.DataFrame( - data=np.array( - [ - (2000, "DJF", 1), - (2000, "DJF", 2), - (2000, "MAM", 3), - (2000, "MAM", 4), - (2000, "MAM", 5), - (2000, "JJA", 6), - (2000, "JJA", 7), - (2000, "JJA", 8), - (2000, "SON", 9), - (2000, "SON", 10), - (2000, "SON", 11), - (2000, "DJF", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) + # Set object attrs required to test the method. + ds.temporal._time_bounds = ds.time_bnds.copy() + ds.temporal._mode = "average" + ds.temporal._freq = "month" + ds.temporal._weighted = "True" + ds.temporal._labeled_time = xr.DataArray( + name="month", + data=np.array( + [ + cftime.datetime(1, 1, 1), + cftime.datetime(1, 2, 1), + cftime.datetime(1, 3, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 5, 1), + cftime.datetime(1, 6, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 8, 1), + cftime.datetime(1, 9, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 11, 1), + cftime.datetime(1, 12, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 2, 1), + cftime.datetime(1, 12, 1), + ], + ), + coords={"time": ds.time}, + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) - # Compare result of the method against the expected. - result = ds.temporal._map_months_to_custom_seasons(df) - expected = pd.DataFrame( - data=np.array( + # Compare result of the method against the expected. + result = ds.temporal._get_weights() + expected = np.array( [ - (2000, "OctNovJan", 1), - (2000, "DecFebMar", 2), - (2000, "DecFebMar", 3), - (2000, "AprMayJun", 4), - (2000, "AprMayJun", 5), - (2000, "AprMayJun", 6), - (2000, "JulAugSep", 7), - (2000, "JulAugSep", 8), - (2000, "JulAugSep", 9), - (2000, "OctNovJan", 10), - (2000, "OctNovJan", 11), - (2000, "DecFebMar", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - assert result.equals(expected) - - -class TestMapSeasonstoMidMonths: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_maps_predefined_seasons_to_middle_months(self): - ds = self.ds.copy() - - ds.temporal._season_config = {"custom_seasons": None} - df = pd.DataFrame({"season": ["DJF", "MAM", "JJA", "SON"]}) - result = ds.temporal._map_seasons_to_mid_months(df) - expected = pd.DataFrame({"month": [1, 4, 7, 10]}) - - assert result.equals(expected) - - def test_maps_custom_seasons_with_odd_months_to_middle_months(self): - ds = self.ds.copy() - ds.temporal._season_config = { - "custom_seasons": { - "FebMarApr": ["Feb", "Mar", "Apr"], - "MayJunJul": ["May", "Jun", "Jul"], - "AugSepOct": ["Aug", "Sep", "Oct"], - "NovDecJan": ["Nov", "Dec", "Jan"], - } - } - - df = pd.DataFrame( - {"season": ["FebMarApr", "MayJunJul", "AugSepOct", "NovDecJan"]} - ) - result = ds.temporal._map_seasons_to_mid_months(df) - expected = pd.DataFrame({"month": [3, 6, 9, 12]}) - - assert result.equals(expected) - - def test_maps_custom_seasons_with_even_months_to_middle_months(self): - ds = self.ds.copy() - ds.temporal._season_config = { - "custom_seasons": { - "FebMarAprMay": ["Feb", "Mar", "Apr", "May"], - "JunJulAugSep": ["Jun", "Jul", "Aug", "Sep"], - "OctNovDecJan": ["Oct", "Nov", "Dec", "Jan"], - } - } - - df = pd.DataFrame({"season": ["FebMarAprMay", "JunJulAugSep", "OctNovDecJan"]}) - result = ds.temporal._map_seasons_to_mid_months(df) - expected = pd.DataFrame({"month": [4, 8, 12]}) - - assert result.equals(expected) - + 0.5, + 0.50877193, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 0.5, + 0.5, + # This is a leap year month, so the weight is less. + 0.49122807, + 0.5, + ] + ) + assert np.allclose(result, expected) -class TestShiftDecembers: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) + class TestWeightsForGroupAverageMode: + @pytest.fixture(autouse=True) + def setup(self): + self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - def test_decembers_shift_for_all_years(self): - ds = self.ds.copy() + def test_weights_for_yearly_averages(self): + ds = self.ds.copy() - # Define method inputs. - df = pd.DataFrame( - data=np.array( - [ - (2000, "DJF", 1), - (2000, "DJF", 2), - (2000, "DJF", 12), - (2001, "DJF", 1), - (2001, "DJF", 2), - (2001, "DJF", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) + # Set object attrs required to test the method. + ds.temporal._time_bounds = ds.time_bnds.copy() + ds.temporal._mode = "group_average" + ds.temporal._freq = "year" + ds.temporal._weighted = "True" + ds.temporal._labeled_time = xr.DataArray( + name="year", + data=np.array( + [ + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + coords={"time": ds.time}, + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) - # Compare result of the method against the expected. - result = ds.temporal._shift_decembers(df) - expected = pd.DataFrame( - data=np.array( + # Compare result of the method against the expected. + result = ds.temporal._get_weights() + expected = np.array( [ - (2000, "DJF", 1), - (2000, "DJF", 2), - (2001, "DJF", 12), - (2001, "DJF", 1), - (2001, "DJF", 2), - (2002, "DJF", 12), - ], - dtype=object, - ), - columns=["year", "season", "month"], - ) - - assert result.equals(expected) - - -class TestDropObsoleteColumns: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_drops_month_col_for_time_series_operations(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "time_series" - - # Define method inputs. - df = pd.DataFrame(columns=["year", "season", "month"]) - - # Compare result of the method against the expected. - result = ds.temporal._drop_obsolete_columns(df) - expected = pd.DataFrame(columns=["year", "season"]) - - assert result.equals(expected) - - def test_drops_year_and_month_cols_for_climatology_and_departure_operations(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "climatology" - - # Define method inputs. - df = pd.DataFrame(columns=["year", "season", "month"]) - - # Compare result of the method against the expected. - result = ds.temporal._drop_obsolete_columns(df) - expected = pd.DataFrame(columns=["season"]) - - assert result.equals(expected) - - def test_raises_error_with_unsupported_operation(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "unsupported_operation" + 0.08469945, + 0.07923497, + 0.08469945, + 0.08196721, + 0.08469945, + 0.08196721, + 0.08469945, + 0.08469945, + 0.08196721, + 0.08469945, + 0.08196721, + 0.08469945, + 0.34444444, + 0.31111111, + 0.34444444, + ] + ) + assert np.allclose(result, expected) - df = pd.DataFrame(columns=["year", "season", "month"]) - with pytest.raises(ValueError): - ds.temporal._drop_obsolete_columns(df) + def test_weights_for_monthly_averages(self): + ds = self.ds.copy() + # Set object attrs required to test the method. + ds.temporal._time_bounds = ds.time_bnds.copy() + ds.temporal._mode = "group_average" + ds.temporal._freq = "month" + ds.temporal._weighted = "True" + ds.temporal._labeled_time = xr.DataArray( + name="year_month", + data=np.array( + [ + "2000-01-01T00:00:00.000000000", + "2000-02-01T00:00:00.000000000", + "2000-03-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + "2000-05-01T00:00:00.000000000", + "2000-06-01T00:00:00.000000000", + "2000-07-01T00:00:00.000000000", + "2000-08-01T00:00:00.000000000", + "2000-09-01T00:00:00.000000000", + "2000-10-01T00:00:00.000000000", + "2000-11-01T00:00:00.000000000", + "2000-12-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2001-02-01T00:00:00.000000000", + "2001-12-01T00:00:00.000000000", + ], + dtype="datetime64[ns]", + ), + coords={"time": ds.time}, + dims=["time"], + attrs={ + "axis": "T", + "long_name": "time", + "standard_name": "time", + "bounds": "time_bnds", + }, + ) -class TestCalculateWeights: - class TestClimatology: - @pytest.fixture(autouse=True) - def setup(self): - self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) + # Compare result of the method against the expected. + result = ds.temporal._get_weights() + expected = np.ones(15) + assert np.allclose(result, expected) - def test_weights_for_seasonal_climatology_with_DJF(self): + def test_weights_for_seasonal_averages_with_DJF_and_drop_incomplete_seasons( + self, + ): ds = self.ds.copy() # Replace time and time bounds with incomplete seasons removed @@ -2842,27 +2143,32 @@ def test_weights_for_seasonal_climatology_with_DJF(self): # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" + ds.temporal._mode = "group_average" ds.temporal._freq = "season" ds.temporal._weighted = "True" - ds.temporal.season_config = {"dec_mode": "DJF"} - ds.temporal._time_grouped = xr.DataArray( - name="season", + ds.temporal._season_config = {"dec_mode": "DJF"} + ds.temporal._labeled_time = xr.DataArray( + name="year_season", data=np.array( [ - cftime.datetime(1, 4, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), + # 2000 MAM + "2000-04-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + # 2000 JJA + "2000-07-01T00:00:00.000000000", + "2000-07-01T00:00:00.000000000", + "2000-07-01T00:00:00.000000000", + # 2000 SON + "2000-10-01T00:00:00.000000000", + "2000-10-01T00:00:00.000000000", + "2000-10-01T00:00:00.000000000", + # 2001 DJF + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", ], + dtype="datetime64[ns]", ), coords={"time": ds.time}, dims=["time"], @@ -2873,8 +2179,9 @@ def test_weights_for_seasonal_climatology_with_DJF(self): "bounds": "time_bnds", }, ) + # Compare result of the method against the expected. - result = ds.temporal._get_weights(ds["ts"]) + result = ds.temporal._get_weights() expected = np.array( [ 0.33695652, @@ -2891,38 +2198,44 @@ def test_weights_for_seasonal_climatology_with_DJF(self): 0.31111111, ] ) - assert np.allclose(result, expected, equal_nan=True) - def test_weights_for_seasonal_climatology_with_JFD(self): + def test_weights_for_seasonal_averages_with_JFD(self): ds = self.ds.copy() # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" + ds.temporal._mode = "group_average" ds.temporal._freq = "season" ds.temporal._weighted = "True" - ds.temporal.season_config = {"dec_mode": "JDF"} - ds.temporal._time_grouped = xr.DataArray( - name="season", + ds.temporal._season_config = {"dec_mode": "JDF"} + ds.temporal._labeled_time = xr.DataArray( + name="year_season", data=np.array( [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), + # 2000 JFD + "2000-01-01T00:00:00.000000000", + "2000-01-01T00:00:00.000000000", + # 2000 MAM + "2000-04-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + "2000-04-01T00:00:00.000000000", + # 2000 JJA + "2000-07-01T00:00:00.000000000", + "2000-07-01T00:00:00.000000000", + "2000-07-01T00:00:00.000000000", + # 2000 SON + "2000-10-01T00:00:00.000000000", + "2000-10-01T00:00:00.000000000", + "2000-10-01T00:00:00.000000000", + # 2000 JFD + "2000-01-01T00:00:00.000000000", + # 2001 JFD + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", + "2001-01-01T00:00:00.000000000", ], + dtype="datetime64[ns]", ), coords={"time": ds.time}, dims=["time"], @@ -2935,128 +2248,74 @@ def test_weights_for_seasonal_climatology_with_JFD(self): ) # Compare result of the method against the expected. - result = ds.temporal._get_weights(ds["ts"]) - expected = np.array( - [ - [ - 0.17127072, - 0.16022099, - 0.33695652, - 0.32608696, - 0.33695652, - 0.32608696, - 0.33695652, - 0.33695652, - 0.32967033, - 0.34065934, - 0.32967033, - 0.17127072, - 0.17127072, - 0.15469613, - 0.17127072, - ] - ] - ) - assert np.allclose(result, expected, equal_nan=True) - - def test_weights_for_annual_climatology(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" - ds.temporal._freq = "month" - ds.temporal._weighted = "True" - ds.temporal.season_config = {"dec_mode": "DJF"} - ds.temporal._time_grouped = xr.DataArray( - name="month", - data=np.array( - [ - cftime.datetime(1, 1, 1), - cftime.datetime(1, 2, 1), - cftime.datetime(1, 3, 1), - cftime.datetime(1, 4, 1), - cftime.datetime(1, 5, 1), - cftime.datetime(1, 6, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 8, 1), - cftime.datetime(1, 9, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 11, 1), - cftime.datetime(1, 12, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 2, 1), - cftime.datetime(1, 12, 1), - ], - ), - coords={"time": ds.time}, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) + result = ds.temporal._get_weights() expected = np.array( [ - [ - 0.5, - 0.50877193, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 0.5, - 0.5, - 0.49122807, - 0.5, - ] + 0.34065934, + 0.31868132, + 0.33695652, + 0.32608696, + 0.33695652, + 0.32608696, + 0.33695652, + 0.33695652, + 0.32967033, + 0.34065934, + 0.32967033, + 0.34065934, + 0.34444444, + 0.31111111, + 0.34444444, ] ) assert np.allclose(result, expected) - def test_weights_for_daily_climatology(self): + def test_custom_season_time_series_weights(self): ds = self.ds.copy() # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "climatology" - ds.temporal._freq = "day" + ds.temporal._mode = "group_average" + ds.temporal._freq = "season" ds.temporal._weighted = "True" ds.temporal._season_config = { - "dec_mode": "DJF", - "drop_incomplete_djf": True, + "custom_seasons": { + "JanFebMar": ["Jan", "Feb", "Mar"], + "AprMayJun": ["Apr", "May", "Jun"], + "JulAugSep": ["Jul", "Aug", "Sep"], + "OctNovDec": ["Oct", "Nov", "Dec"], + } } - ds.temporal._time_grouped = xr.DataArray( - name="month_day", + + ds.temporal._labeled_time = xr.DataArray( + name="year_season", data=np.array( [ - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 3, 16), - cftime.datetime(1, 4, 16), - cftime.datetime(1, 5, 6), - cftime.datetime(1, 6, 16), - cftime.datetime(1, 7, 16), - cftime.datetime(1, 8, 16), - cftime.datetime(1, 9, 16), - cftime.datetime(1, 10, 16), - cftime.datetime(1, 11, 16), - cftime.datetime(1, 12, 16), - cftime.datetime(1, 1, 16), - cftime.datetime(1, 2, 15), - cftime.datetime(1, 12, 16), + # 2000 JanFebMar + "2000-02-01T00:00:00.000000000", + "2000-02-01T00:00:00.000000000", + "2000-02-01T00:00:00.000000000", + # 2000 AprMayJun + "2000-05-01T00:00:00.000000000", + "2000-05-01T00:00:00.000000000", + "2000-05-01T00:00:00.000000000", + # 2000 JunAugSep + "2000-08-01T00:00:00.000000000", + "2000-08-01T00:00:00.000000000", + "2000-08-01T00:00:00.000000000", + # 2000 OctNovDec + "2000-11-01T00:00:00.000000000", + "2000-11-01T00:00:00.000000000", + "2000-11-01T00:00:00.000000000", + # 2001 JanFebMar + "2001-02-01T00:00:00.000000000", + "2001-02-01T00:00:00.000000000", + "2002-02-01T00:00:00.000000000", ], + dtype="datetime64[ns]", ), coords={"time": ds.time}, + dims=["time"], attrs={ "axis": "T", "long_name": "time", @@ -3066,60 +2325,55 @@ def test_weights_for_daily_climatology(self): ) # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) + result = ds.temporal._get_weights() expected = np.array( [ - 0.5, - 0.50877193, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 0.5, - 0.5, - 0.49122807, - 0.5, + 0.34065934, + 0.31868132, + 0.34065934, + 0.32967033, + 0.34065934, + 0.32967033, + 0.33695652, + 0.33695652, + 0.32608696, + 0.33695652, + 0.32608696, + 0.33695652, + 0.52542373, + 0.47457627, + 1.0, ] ) assert np.allclose(result, expected) - class TestTimeSeries: - @pytest.fixture(autouse=True) - def setup(self): - self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_weights_for_yearly_averages(self): + def test_weights_for_daily_averages(self): ds = self.ds.copy() # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "time_series" - ds.temporal._freq = "year" + ds.temporal._mode = "group_average" + ds.temporal._freq = "day" ds.temporal._weighted = "True" - ds.temporal._time_grouped = xr.DataArray( - name="year", + ds.temporal._labeled_time = xr.DataArray( + name="year_month_day", data=np.array( [ - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", + "2000-01-16T00:00:00.000000000", + "2000-02-15T00:00:00.000000000", + "2000-03-16T00:00:00.000000000", + "2000-04-16T00:00:00.000000000", + "2000-05-16T00:00:00.000000000", + "2000-06-16T00:00:00.000000000", + "2000-07-16T00:00:00.000000000", + "2000-08-16T00:00:00.000000000", + "2000-09-16T00:00:00.000000000", + "2000-10-16T00:00:00.000000000", + "2000-11-16T00:00:00.000000000", + "2000-12-16T00:00:00.000000000", + "2001-01-16T00:00:00.000000000", + "2001-02-15T00:00:00.000000000", + "2001-12-16T00:00:00.000000000", ], dtype="datetime64[ns]", ), @@ -3134,55 +2388,38 @@ def test_weights_for_yearly_averages(self): ) # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) - expected = np.array( - [ - 0.08469945, - 0.07923497, - 0.08469945, - 0.08196721, - 0.08469945, - 0.08196721, - 0.08469945, - 0.08469945, - 0.08196721, - 0.08469945, - 0.08196721, - 0.08469945, - 0.34444444, - 0.31111111, - 0.34444444, - ] - ) + result = ds.temporal._get_weights() + expected = np.ones(15) assert np.allclose(result, expected) - def test_weights_for_monthly_averages(self): + def test_weights_for_hourly_averages(self): ds = self.ds.copy() # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "time_series" - ds.temporal._freq = "month" + ds.temporal._mode = "group_average" + ds.temporal._freq = "hour" ds.temporal._weighted = "True" - ds.temporal._time_grouped = xr.DataArray( - name="year_month", + ds.temporal._season_config = {"dec_mode": "JDF"} + ds.temporal._labeled_time = xr.DataArray( + name="year_month_day_hour", data=np.array( [ - "2000-01-01T00:00:00.000000000", - "2000-02-01T00:00:00.000000000", - "2000-03-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", - "2000-06-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", - "2000-09-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2000-12-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-02-01T00:00:00.000000000", - "20012-12-01T00:00:00.000000000", + "2000-01-16T12:00:00.000000000", + "2000-02-15T12:00:00.000000000", + "2000-03-16T12:00:00.000000000", + "2000-04-16T00:00:00.000000000", + "2000-05-16T12:00:00.000000000", + "2000-06-16T00:00:00.000000000", + "2000-07-16T12:00:00.000000000", + "2000-08-16T12:00:00.000000000", + "2000-09-16T00:00:00.000000000", + "2000-10-16T12:00:00.000000000", + "2000-11-16T00:00:00.000000000", + "2000-12-16T12:00:00.000000000", + "2001-01-16T12:00:00.000000000", + "2001-02-15T00:00:00.000000000", + "2001-12-16T12:00:00.000000000", ], dtype="datetime64[ns]", ), @@ -3197,13 +2434,16 @@ def test_weights_for_monthly_averages(self): ) # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) + result = ds.temporal._get_weights() expected = np.ones(15) assert np.allclose(result, expected) - def test_weights_for_seasonal_averages_with_DJF_and_drop_incomplete_seasons( - self, - ): + class TestWeightsForClimatologyMode: + @pytest.fixture(autouse=True) + def setup(self): + self.ds: xr.Dataset = generate_dataset(cf_compliant=True, has_bounds=True) + + def test_weights_for_seasonal_climatology_with_DJF(self): ds = self.ds.copy() # Replace time and time bounds with incomplete seasons removed @@ -3303,28 +2543,27 @@ def test_weights_for_seasonal_averages_with_DJF_and_drop_incomplete_seasons( # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "time_series" + ds.temporal._mode = "climatology" ds.temporal._freq = "season" ds.temporal._weighted = "True" - ds.temporal.season_config = {"dec_mode": "DJF"} - ds.temporal._time_grouped = xr.DataArray( - name="year_season", + ds.temporal._season_config = {"dec_mode": "DJF"} + ds.temporal._labeled_time = xr.DataArray( + name="season", data=np.array( [ - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", + cftime.datetime(1, 4, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 1, 1), ], - dtype="datetime64[ns]", ), coords={"time": ds.time}, dims=["time"], @@ -3335,9 +2574,8 @@ def test_weights_for_seasonal_averages_with_DJF_and_drop_incomplete_seasons( "bounds": "time_bnds", }, ) - # Compare result of the method against the expected. - result = ds.temporal._get_weights(ds["ts"]) + result = ds.temporal._get_weights() expected = np.array( [ 0.33695652, @@ -3354,39 +2592,38 @@ def test_weights_for_seasonal_averages_with_DJF_and_drop_incomplete_seasons( 0.31111111, ] ) + assert np.allclose(result, expected, equal_nan=True) - def test_weights_for_seasonal_averages_with_JFD(self): + def test_weights_for_seasonal_climatology_with_JFD(self): ds = self.ds.copy() # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "time_series" + ds.temporal._mode = "climatology" ds.temporal._freq = "season" ds.temporal._weighted = "True" - ds.temporal.season_config = {"dec_mode": "JDF"} - ds.temporal._time_grouped = xr.DataArray( - name="year_season", + ds.temporal._season_config = {"dec_mode": "JDF"} + ds.temporal._labeled_time = xr.DataArray( + name="season", data=np.array( [ - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - # This month is included in the JFD season - "2000-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2002-01-01T00:00:00.000000000", + cftime.datetime(1, 1, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 1, 1), ], - dtype="datetime64[ns]", ), coords={"time": ds.time}, dims=["time"], @@ -3399,132 +2636,60 @@ def test_weights_for_seasonal_averages_with_JFD(self): ) # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) + result = ds.temporal._get_weights() expected = np.array( [ - 0.34065934, - 0.31868132, - 0.33695652, - 0.32608696, - 0.33695652, - 0.32608696, - 0.33695652, - 0.33695652, - 0.32967033, - 0.34065934, - 0.32967033, - 0.34065934, - 0.52542373, - 0.47457627, - 1.0, - ] - ) - assert np.allclose(result, expected) - - def test_custom_season_time_series_weights(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "time_series" - ds.temporal._freq = "season" - ds.temporal._weighted = "True" - ds.temporal._season_config = { - "custom_seasons": { - "JanFebMar": ["Jan", "Feb", "Mar"], - "AprMayJun": ["Apr", "May", "Jun"], - "JulAugSep": ["Jul", "Aug", "Sep"], - "OctNovDec": ["Oct", "Nov", "Dec"], - } - } - - ds.temporal._time_grouped = xr.DataArray( - name="year_season", - data=np.array( [ - "2000-02-01T00:00:00.000000000", - "2000-02-01T00:00:00.000000000", - "2000-02-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", - "2000-05-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", - "2000-08-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2000-11-01T00:00:00.000000000", - "2001-02-01T00:00:00.000000000", - "2001-02-01T00:00:00.000000000", - "2002-02-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={"time": ds.time}, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) - expected = np.array( - [ - 0.34065934, - 0.31868132, - 0.34065934, - 0.32967033, - 0.34065934, - 0.32967033, - 0.33695652, - 0.33695652, - 0.32608696, - 0.33695652, - 0.32608696, - 0.33695652, - 0.52542373, - 0.47457627, - 1.0, + 0.17127072, + 0.16022099, + 0.33695652, + 0.32608696, + 0.33695652, + 0.32608696, + 0.33695652, + 0.33695652, + 0.32967033, + 0.34065934, + 0.32967033, + 0.17127072, + 0.17127072, + 0.15469613, + 0.17127072, + ] ] ) - assert np.allclose(result, expected) + assert np.allclose(result, expected, equal_nan=True) - def test_weights_for_daily_averages(self): + def test_weights_for_annual_climatology(self): ds = self.ds.copy() # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "time_series" - ds.temporal._freq = "daily" + ds.temporal._mode = "climatology" + ds.temporal._freq = "month" ds.temporal._weighted = "True" - ds.temporal._time_grouped = xr.DataArray( - name="year_month_day", + ds.temporal._labeled_time = xr.DataArray( + name="month", data=np.array( [ - "2000-01-16T00:00:00.000000000", - "2000-02-15T00:00:00.000000000", - "2000-03-16T00:00:00.000000000", - "2000-04-16T00:00:00.000000000", - "2000-05-16T00:00:00.000000000", - "2000-06-16T00:00:00.000000000", - "2000-07-16T00:00:00.000000000", - "2000-08-16T00:00:00.000000000", - "2000-09-16T00:00:00.000000000", - "2000-10-16T00:00:00.000000000", - "2000-11-16T00:00:00.000000000", - "2000-12-16T00:00:00.000000000", - "2001-01-16T00:00:00.000000000", - "2001-02-15T00:00:00.000000000", - "2001-12-16T00:00:00.000000000", + cftime.datetime(1, 1, 1), + cftime.datetime(1, 2, 1), + cftime.datetime(1, 3, 1), + cftime.datetime(1, 4, 1), + cftime.datetime(1, 5, 1), + cftime.datetime(1, 6, 1), + cftime.datetime(1, 7, 1), + cftime.datetime(1, 8, 1), + cftime.datetime(1, 9, 1), + cftime.datetime(1, 10, 1), + cftime.datetime(1, 11, 1), + cftime.datetime(1, 12, 1), + cftime.datetime(1, 1, 1), + cftime.datetime(1, 2, 1), + cftime.datetime(1, 12, 1), ], - dtype="datetime64[ns]", ), coords={"time": ds.time}, - dims=["time"], attrs={ "axis": "T", "long_name": "time", @@ -3534,43 +2699,60 @@ def test_weights_for_daily_averages(self): ) # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) - expected = np.ones(15) + result = ds.temporal._get_weights() + expected = np.array( + [ + [ + 0.5, + 0.50877193, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 0.5, + 0.5, + 0.49122807, + 0.5, + ] + ] + ) assert np.allclose(result, expected) - def test_weights_for_hourly_averages(self): + def test_weights_for_daily_climatology(self): ds = self.ds.copy() # Set object attrs required to test the method. ds.temporal._time_bounds = ds.time_bnds.copy() - ds.temporal._mode = "time_series" - ds.temporal._freq = "hour" + ds.temporal._mode = "climatology" + ds.temporal._freq = "day" ds.temporal._weighted = "True" - ds.temporal.season_config = {"dec_mode": "JDF"} - ds.temporal._time_grouped = xr.DataArray( - name="year_month_day_hour", + ds.temporal._labeled_time = xr.DataArray( + name="month_day", data=np.array( [ - "2000-01-16T12:00:00.000000000", - "2000-02-15T12:00:00.000000000", - "2000-03-16T12:00:00.000000000", - "2000-04-16T00:00:00.000000000", - "2000-05-16T12:00:00.000000000", - "2000-06-16T00:00:00.000000000", - "2000-07-16T12:00:00.000000000", - "2000-08-16T12:00:00.000000000", - "2000-09-16T00:00:00.000000000", - "2000-10-16T12:00:00.000000000", - "2000-11-16T00:00:00.000000000", - "2000-12-16T12:00:00.000000000", - "2001-01-16T12:00:00.000000000", - "2001-02-15T00:00:00.000000000", - "2001-12-16T12:00:00.000000000", + cftime.datetime(1, 1, 16), + cftime.datetime(1, 2, 15), + cftime.datetime(1, 3, 16), + cftime.datetime(1, 4, 16), + cftime.datetime(1, 5, 6), + cftime.datetime(1, 6, 16), + cftime.datetime(1, 7, 16), + cftime.datetime(1, 8, 16), + cftime.datetime(1, 9, 16), + cftime.datetime(1, 10, 16), + cftime.datetime(1, 11, 16), + cftime.datetime(1, 12, 16), + cftime.datetime(1, 1, 16), + cftime.datetime(1, 2, 15), + cftime.datetime(1, 12, 16), ], - dtype="datetime64[ns]", ), coords={"time": ds.time}, - dims=["time"], attrs={ "axis": "T", "long_name": "time", @@ -3580,235 +2762,24 @@ def test_weights_for_hourly_averages(self): ) # Compare result of the method against the expected. - result = ds.temporal._get_weights(self.ds["ts"]) - expected = np.ones(15) - assert np.allclose(result, expected) - - -class TestGroupByFreq: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_groups_data_var_for_seasonal_averaging_with_JFD(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - time_grouped = xr.DataArray( - name="year_season", - data=np.array( - [ - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2002-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={"time": ds.time}, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - ds.temporal._time_grouped = time_grouped - - ts = ds.ts.copy() - expected = ts.copy() - expected.coords["year_season"] = time_grouped - expected = expected.groupby("year_season") - result = ds.temporal._groupby_freq(ts) - - assert result.groups == expected.groups - - def test_groups_data_var_for_seasonal_climatology_with_DJF(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - time_grouped = xr.DataArray( - name="season", - data=np.array( - [ - # JFD - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - # MAM - cftime.datetime(1, 3, 1), - cftime.datetime(1, 3, 1), - cftime.datetime(1, 3, 1), - # JJA - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - cftime.datetime(1, 7, 1), - # SON - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - cftime.datetime(1, 10, 1), - # JFD - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - cftime.datetime(1, 1, 1), - ], - ), - coords={"time": ds.time}, - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - ds.temporal._time_grouped = time_grouped - - ts = ds.ts.copy() - expected = ts.copy() - expected.coords["season"] = time_grouped - expected = expected.groupby("season") - result = ds.temporal._groupby_freq(ts) - - assert result.groups == expected.groups - - -class TestAddOperationAttributes: - @pytest.fixture(autouse=True) - def setup(self): - self.ds = generate_dataset(cf_compliant=True, has_bounds=True) - - def test_adds_attrs_to_data_var_with_DJF(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "climatology" - ds.temporal._freq = "season" - ds.temporal._weighted = True - ds.temporal._center_times = True - ds.temporal._season_config = { - "dec_mode": "DJF", - "drop_incomplete_djf": "True", - } - ds.temporal._time_grouped = xr.DataArray( - name="year_season", - data=np.array( - [ - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2002-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={"time": ds.time}, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - - # Compare result of the method against the expected. - result = ds.temporal._add_operation_attrs(ds.ts) - expected = ds.ts.copy() - expected.attrs.update( - { - "operation": "temporal_avg", - "mode": ds.temporal._mode, - "freq": ds.temporal._freq, - "weighted": "True", - "center_times": "True", - "dec_mode": "DJF", - "drop_incomplete_djf": "True", - } - ) - - assert result.identical(expected) - - def test_adds_attrs_to_data_var_with_custom_seasons(self): - ds = self.ds.copy() - - # Set object attrs required to test the method. - ds.temporal._mode = "climatology" - ds.temporal._freq = "season" - ds.temporal._weighted = True - ds.temporal._center_times = True - ds.temporal._season_config = { - "custom_seasons": { - "JanFebMar": ["Jan", "Feb", "Mar"], - "AprMayJun": ["Apr", "May", "Jun"], - "JulAugSep": ["Jul", "Aug", "Sep"], - "OctNovDec": ["Oct", "Nov", "Dec"], - } - } - ds.temporal._time_grouped = xr.DataArray( - name="year_season", - data=np.array( + result = ds.temporal._get_weights() + expected = np.array( [ - "2000-01-01T00:00:00.000000000", - "2000-01-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-04-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-07-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2000-10-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2001-01-01T00:00:00.000000000", - "2002-01-01T00:00:00.000000000", - ], - dtype="datetime64[ns]", - ), - coords={"time": ds.time}, - dims=["time"], - attrs={ - "axis": "T", - "long_name": "time", - "standard_name": "time", - "bounds": "time_bnds", - }, - ) - # Compare result of the method against the expected. - result = ds.temporal._add_operation_attrs(ds.ts) - expected = ds.ts.copy() - expected.attrs.update( - { - "operation": "temporal_avg", - "mode": ds.temporal._mode, - "freq": ds.temporal._freq, - "weighted": "True", - "center_times": "True", - "custom_seasons": ["JanFebMar", "AprMayJun", "JulAugSep", "OctNovDec"], - } - ) - - assert result.identical(expected) + 0.5, + 0.50877193, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 0.5, + 0.5, + 0.49122807, + 0.5, + ] + ) + assert np.allclose(result, expected) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index a92116c3..20baec64 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -1,6 +1,6 @@ """Module containing temporal functions.""" from itertools import chain -from typing import Dict, List, Literal, Optional, TypedDict, get_args +from typing import Dict, List, Literal, Optional, Tuple, TypedDict, get_args import cf_xarray # noqa: F401 import cftime @@ -17,14 +17,46 @@ logger = setup_custom_logger(__name__) # Type alias for supported time averaging modes. -Mode = Literal["time_series", "climatology", "departures"] +Mode = Literal["average", "group_average", "climatology", "departures"] +#: Tuple of supported temporal averaging modes. MODES = get_args(Mode) # Type alias for supported grouping frequencies. -Frequency = Literal["hour", "day", "month", "season", "year"] +Frequency = Literal["year", "season", "month", "day", "hour"] #: Tuple of supported grouping frequencies. FREQUENCIES = get_args(Frequency) +# Type alias representing xarray datetime accessor components. +# https://xarray.pydata.org/en/stable/user-guide/time-series.html#datetime-components +DateTimeComponent = Literal["year", "season", "month", "day", "hour"] + +#: A dictionary mapping temporal averaging mode and frequency to the time groups. +TIME_GROUPS: Dict[Mode, Dict[Frequency, Tuple[DateTimeComponent, ...]]] = { + "average": { + "year": ("year",), + "month": ("month",), + "day": ("day",), + "hour": ("hour",), + }, + "group_average": { + "year": ("year",), + "season": ("year", "season"), + "month": ("year", "month"), + "day": ("year", "month", "day"), + "hour": ("year", "month", "day", "hour"), + }, + "climatology": { + "season": ("season",), + "month": ("month",), + "day": ("month", "day"), + }, + "departures": { + "season": ("season",), + "month": ("month",), + "day": ("month", "day"), + }, +} + # Configuration specific to the "season" frequency. SeasonConfigInput = TypedDict( "SeasonConfigInput", @@ -46,35 +78,10 @@ total=False, ) -SEASON_CONFIG_KEYS = ["dec_mode", "drop_incomplete_djf", "custom_seasons"] - -# Type alias representing xarray datetime accessor components. -# https://xarray.pydata.org/en/stable/user-guide/time-series.html#datetime-components -DateTimeComponent = Literal["hour", "day", "month", "season", "year"] - -# A dictionary mapping temporal averaging mode and frequency to the xarray -# datetime components used for grouping. Xarray datetime components are -# extracted from the time coordinates of a data variable. The "season" -# frequency involves additional processing that requires the "year" and/or -# "month" components. These components are removed before grouping. -DATETIME_COMPONENTS = { - "time_series": { - "year": ("year",), - "season": ("year", "season", "month"), # becomes ("year", "season") - "month": ("year", "month"), - "day": ("year", "month", "day"), - "hour": ("year", "month", "day", "hour"), - }, - "climatology": { - "season": ("year", "season", "month"), # becomes ("season") - "month": ("month",), - "day": ("month", "day"), - }, - "departures": { - "season": ("year", "season", "month"), # becomes ("season") - "month": ("month",), - "day": ("month", "day"), - }, +DEFAULT_SEASON_CONFIG: SeasonConfigInput = { + "dec_mode": "DJF", + "drop_incomplete_djf": False, + "custom_seasons": None, } #: A dictionary mapping month integers to their equivalent 3-letter string. @@ -120,6 +127,15 @@ class TemporalAccessor: >>> ds.temporal. >>> ds.temporal. + Check the 'axis' attribute is set on the time coordinates: + + >>> ds.time.attrs["axis"] + >>> T + + Set the 'axis' attribute for the time coordinates if it isn't: + + >>> ds.time.attrs["axis"] = "T" + Parameters ---------- dataset : xr.Dataset @@ -127,8 +143,10 @@ class TemporalAccessor: """ def __init__(self, dataset: xr.Dataset): + self._dataset: xr.Dataset = dataset + try: - dataset.cf["T"] + self._dim_name = self._dataset.cf["T"].name except KeyError: raise KeyError( "A 'T' axis dimension was not found in the dataset. Make sure the " @@ -136,24 +154,99 @@ def __init__(self, dataset: xr.Dataset): "'T'." ) - self._dataset: xr.Dataset = dataset - # The weights for time coordinates, which are based on a chosen frequency. self._weights: Optional[xr.DataArray] = None def average( + self, + data_var: str, + weighted: bool = True, + center_times: bool = False, + ): + """ + Returns a Dataset with the average of a data variable and the time + dimension removed. + + This method is particularly useful for calculating the weighted averages + of monthly or yearly time series data because the number of days per + month/year can vary based on the calendar type, which can affect + weighting. For other frequencies, the distribution of weights will be + equal so ``weighted=True`` is the same as ``weighted=False``. + + Parameters + ---------- + data_var: str + The key of the data variable for calculating averages + + weighted : bool, optional + Calculate averages using weights, by default True. + + Weights are calculated by first determining the length of time for + each coordinate point using the difference of its upper and lower + bounds. The time lengths are grouped, then each time length is + divided by the total sum of the time lengths to get the weight of + each coordinate point. + + center_times: bool, optional + If True, center time coordinates using the midpoint between its + upper and lower bounds. Otherwise, use the provided time + coordinates by default False. + + Returns + ------- + xr.Dataset + Dataset with the average of the data variable and the time dimension + removed. + + Examples + -------- + + Get weighted averages for a monthly time series data variable: + + >>> ds_month = ds.temporal.average("ts", freq="month", center_times=False) + >>> ds_month.ts + """ + freq = self._infer_freq() + + return self._averager(data_var, "average", freq, weighted, center_times) + + def _infer_freq(self) -> Frequency: + """Infers the time frequency from the coordinates. + + This method infers the time frequency from the coordinates by + calculating the minimum delta and comparing it against a set of + conditionals. + + The native ``xr.infer_freq()`` method does not work for all cases + because the frequency can be irregular (e.g., different hour + measurements), which ends up returning None. + + Returns + ------- + Frequency + The time frequency. + """ + time_coords = self._dataset[self._dim_name] + min_delta = pd.to_timedelta(np.diff(time_coords).min(), unit="ns") + + if min_delta < pd.Timedelta(days=1): + return "hour" + elif min_delta >= pd.Timedelta(days=1) and min_delta < pd.Timedelta(days=28): + return "day" + elif min_delta >= pd.Timedelta(days=28) and min_delta < pd.Timedelta(days=365): + return "month" + else: + return "year" + + def group_average( self, data_var: str, freq: Frequency, weighted: bool = True, center_times: bool = False, - season_config: SeasonConfigInput = { - "dec_mode": "DJF", - "drop_incomplete_djf": False, - "custom_seasons": None, - }, + season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, ): - """Calculates the time series averages for a data variable. + """Returns a Dataset with average of a data variable by time group. Parameters ---------- @@ -171,15 +264,15 @@ def average( weighted : bool, optional Calculate averages using weights, by default True. - To calculate the weights for the time dimension, first the length of - time for each coordinate point is calculated using the difference of - its upper and lower bounds. The time lengths are grouped, then each - time length is divided by the total sum of the time lengths to get - the weights. + Weights are calculated by first determining the length of time for + each coordinate point using the difference of its upper and lower + bounds. The time lengths are grouped, then each time length is + divided by the total sum of the time lengths to get the weight of + each coordinate point. center_times: bool, optional If True, center time coordinates using the midpoint between its - upper and lower bounds. Otherwise, use the provided time coordinates, - by default False. + upper and lower bounds. Otherwise, use the provided time + coordinates, by default False. season_config: SeasonConfigInput, optional A dictionary for "season" frequency configurations. If configs for predefined seasons are passed, configs for custom seasons are @@ -191,10 +284,9 @@ def average( The mode for the season that includes December. * "DJF": season includes the previous year December. - * "JFD": season includes the same year December. Xarray - incorrectly labels the season with December as "DJF" when it - should be "JFD". Refer to [1]_ for more information on this - xarray behavior. + * "JFD": season includes the same year December. + Xarray labels the season with December as "DJF", but it is + actually "JFD". * "drop_incomplete_djf" (bool, by default False) If the "dec_mode" is "DJF", this flag drops (True) or keeps @@ -224,31 +316,14 @@ def average( Returns ------- xr.Dataset - Dataset containing the averaged data variable. - - References - ---------- - .. [1] https://github.com/pydata/xarray/issues/810 + Dataset with the average of a data variable by time group. Examples -------- - Check the 'axis' attribute is set on the time coordinates: - - >>> ds.time.attrs["axis"] - >>> T - - Set the 'axis' attribute for the time coordinates if it isn't: - - >>> ds.time.attrs["axis"] = "T" - - Call ``average()`` method: + Get seasonal averages for a data variable: - >>> ds.temporal.average(...) - - Get a data variable's seasonal averages: - - >>> ds_season = ds.temporal.average( + >>> ds_season = ds.temporal.group_average( >>> "ts", >>> "season", >>> season_config={ @@ -258,14 +333,14 @@ def average( >>> ) >>> ds_season.ts >>> - >>> ds_season_with_jfd = ds.temporal.average( + >>> ds_season_with_jfd = ds.temporal.group_average( >>> "ts", >>> "season", >>> season_config={"dec_mode": "JFD"} >>> ) >>> ds_season_with_jfd.ts - Get a data variable seasonal averages with custom seasons: + Get seasonal averages with custom seasons for a data variable: >>> custom_seasons = [ >>> ["Jan", "Feb", "Mar"], # "JanFebMar" @@ -274,7 +349,7 @@ def average( >>> ["Oct", "Nov", "Dec"], # "OctNovDec" >>> ] >>> - >>> ds_season_custom = ds.temporal.average( + >>> ds_season_custom = ds.temporal.group_average( >>> "ts", >>> "season", >>> season_config={"custom_seasons": custom_seasons} @@ -293,8 +368,8 @@ def average( 'drop_incomplete_djf': 'False' } """ - return self._temporal_avg( - data_var, "time_series", freq, weighted, center_times, season_config + return self._averager( + data_var, "group_average", freq, weighted, center_times, season_config ) def climatology( @@ -303,18 +378,14 @@ def climatology( freq: Frequency, weighted: bool = True, center_times: bool = False, - season_config: SeasonConfigInput = { - "dec_mode": "DJF", - "drop_incomplete_djf": False, - "custom_seasons": None, - }, + season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, ): - """Calculates the climatology for a data variable. + """Returns a Dataset with the climatology of a data variable. Parameters ---------- data_var: str - The key of the data variable to calculate climatology for. + The key of the data variable for calculating climatology. freq : Frequency The time frequency to group by. @@ -325,15 +396,15 @@ def climatology( weighted : bool, optional Calculate averages using weights, by default True. - To calculate the weights for the time dimension, first the length of - time for each coordinate point is calculated using the difference of - its upper and lower bounds. The time lengths are grouped, then each - time length is divided by the total sum of the time lengths to get - the weights. + Weights are calculated by first determining the length of time for + each coordinate point using the difference of its upper and lower + bounds. The time lengths are grouped, then each time length is + divided by the total sum of the time lengths to get the weight of + each coordinate point. center_times: bool, optional If True, center time coordinates using the midpoint between its - upper and lower bounds. Otherwise, use the provided time coordinates, - by default False. + upper and lower bounds. Otherwise, use the provided time + coordinates, by default False. season_config: SeasonConfigInput, optional A dictionary for "season" frequency configurations. If configs for predefined seasons are passed, configs for custom seasons are @@ -345,10 +416,9 @@ def climatology( The mode for the season that includes December. * "DJF": season includes the previous year December. - * "JFD": season includes the same year December. Xarray - incorrectly labels the season with December as "DJF" when it - should be "JFD". Refer to [2]_ for more information on this - xarray behavior. + * "JFD": season includes the same year December. + Xarray labels the season with December as "DJF", but it is + actually "JFD". * "drop_incomplete_djf" (bool, by default False) If the "dec_mode" is "DJF", this flag drops (True) or keeps @@ -378,21 +448,10 @@ def climatology( Returns ------- xr.Dataset - Dataset containing the averaged data variable. - - References - ---------- - .. [2] https://github.com/pydata/xarray/issues/810 + Dataset with the climatology of a data variable. Examples -------- - Import TemporalAccessor class: - - >>> import xcdat - - Call ``climatology()`` method: - - >>> ds.temporal.climatology(...) Get a data variable's seasonal climatology: @@ -441,7 +500,7 @@ def climatology( 'drop_incomplete_djf': 'False' } """ - return self._temporal_avg( + return self._averager( data_var, "climatology", freq, weighted, center_times, season_config ) @@ -451,13 +510,11 @@ def departures( freq: Frequency, weighted: bool = True, center_times: bool = False, - season_config: SeasonConfigInput = { - "dec_mode": "DJF", - "drop_incomplete_djf": False, - "custom_seasons": None, - }, + season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, ) -> xr.Dataset: - """Calculates climatological departures ("anomalies"). + """ + Returns a Dataset with the climatological departures (anomalies) for a + data variable. In climatology, “anomalies” refer to the difference between the value during a given time interval (e.g., the January average surface air @@ -467,20 +524,19 @@ def departures( This method uses xarray's grouped arithmetic as a shortcut for mapping over all unique labels. Grouped arithmetic works by assigning a grouping label to each time coordinate of the observation data based on the - grouping frequency. Afterwards, the corresponding climatology is removed - from the observation data at each time coordinate based on the matching - labels. + averaging mode and frequency. Afterwards, the corresponding climatology + is removed from the observation data at each time coordinate based on + the matching labels. xarray's grouped arithmetic operates over each value of the DataArray corresponding to each grouping label without changing the size of the - DataArra. For example,the original monthly time coordinates are + DataArray. For example,the original monthly time coordinates are maintained when calculating seasonal departures on monthly data. - Visit [3]_ to learn more about how xarray's grouped arithmetic works. Parameters ---------- data_var: str - The key of the data variable to calculate departures for. + The key of the data variable for calculating departures. freq : Frequency The frequency of time to group by. @@ -492,11 +548,11 @@ def departures( weighted : bool, optional Calculate averages using weights, by default True. - To calculate the weights for the time dimension, first the length of - time for each coordinate point is calculated using the difference of - its upper and lower bounds. The time lengths are grouped, then each - time length is divided by the total sum of the time lengths to get - the weights. + Weights are calculated by first determining the length of time for + each coordinate point using the difference of its upper and lower + bounds. The time lengths are grouped, then each time length is + divided by the total sum of the time lengths to get the weight of + each coordinate point. center_times: bool, optional If True, center time coordinates using the midpoint between its upper and lower bounds. Otherwise, use the provided time coordinates, @@ -512,10 +568,9 @@ def departures( The mode for the season that includes December. * "DJF": season includes the previous year December. - * "JFD": season includes the same year December. Xarray - incorrectly labels the season with December as "DJF" when it - should be "JFD". Refer to [4]_ for more information on this - xarray behavior. + * "JFD": season includes the same year December. + Xarray labels the season with December as "DJF", but it is + actually "JFD". * "drop_incomplete_djf" (bool, by default False) If the "dec_mode" is "DJF", this flag drops (True) or keeps @@ -547,16 +602,16 @@ def departures( xr.Dataset The Dataset containing the departures for a data var's climatology. + Notes + ----- + Refer to [1]_ to learn more about how xarray's grouped arithmetic works. + References ---------- - .. [3] https://xarray.pydata.org/en/stable/user-guide/groupby.html#grouped-arithmetic - .. [4] https://github.com/pydata/xarray/issues/810 + .. [1] https://xarray.pydata.org/en/stable/user-guide/groupby.html#grouped-arithmetic Examples -------- - Import TemporalAccessor class: - - >>> import xcdat Get a data variable's annual cycle departures: @@ -576,11 +631,12 @@ def departures( """ ds = self._dataset.copy() - # Calculate the climatology data variable and use its attributes - # to set the object attributes for calculating departures. + # Calculate the climatology data variable and set the object attributes + # using the method arguments. dv_climo = ds.temporal.climatology( data_var, freq, weighted, center_times, season_config )[data_var] + self._set_obj_attrs( "departures", dv_climo.attrs["freq"], @@ -595,11 +651,9 @@ def departures( }, ) - # Get the observation data and group it using the time coordinate - # groups. + # Get the observation data and group it using the time coordinates. dv_obs = _get_data_var(ds, data_var) - self._time_grouped = self._group_time_coords(ds.cf["T"]) - dv_obs_grouped = self._groupby_freq(dv_obs) + dv_obs_grouped = self._group_data(dv_obs) # Rename the climatology data var's time dimension to align with the # grouped observation data var's time dimension so that xarray's @@ -607,7 +661,7 @@ def departures( # is thrown: `ValueError: incompatible dimensions for a grouped # binary operation: the group variable '' is not a # dimension on the other argument` - dv_climo = dv_climo.rename({"time": self._time_grouped.name}) + dv_climo = dv_climo.rename({self._dim_name: self._labeled_time.name}) with xr.set_options(keep_attrs=True): # Use xarray's grouped arithmetic to subtract the climatology @@ -616,9 +670,10 @@ def departures( ds_departs[data_var] = dv_obs_grouped - dv_climo ds_departs[data_var] = self._add_operation_attrs(ds_departs[data_var]) - # Drop the grouped time coordinates from the final output since - # it is no longer needed. - ds_departs = ds_departs.drop_vars(self._time_grouped.name) + # The original time coordinates are restored after performing + # grouped arithmethic on the data variable. As a result, the grouped + # time coordinates are no longer used and are dropped. + ds_departs = ds_departs.drop_vars(self._labeled_time.name) return ds_departs @@ -650,7 +705,7 @@ def center_times(self, dataset: xr.Dataset) -> xr.Dataset: bounds_diffs: np.timedelta64 = (upper_bounds - lower_bounds) / 2 bounds_mids: np.ndarray = lower_bounds + bounds_diffs - time: xr.DataArray = ds.cf["T"].copy() + time: xr.DataArray = ds[self._dim_name].copy() time_centered = xr.DataArray( name=time.name, data=bounds_mids, @@ -666,43 +721,30 @@ def center_times(self, dataset: xr.Dataset) -> xr.Dataset: ds[time_bounds.name] = self._time_bounds return ds - def _temporal_avg( + def _averager( self, data_var: str, mode: Mode, freq: Frequency, weighted: bool = True, center_times: bool = False, - season_config: SeasonConfigInput = { - "dec_mode": "DJF", - "drop_incomplete_djf": False, - "custom_seasons": None, - }, + season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, ) -> xr.Dataset: - """Calculates the temporal average for a data variable.""" + """Averages a data variable based on the averaging mode and frequency.""" self._set_obj_attrs(mode, freq, weighted, center_times, season_config) - ds = self._dataset.copy() - - # Perform operations on the Dataset's time coordinates before operating - # on the data variable so that these updates cascade down to it. - if self._center_times: - ds = self.center_times(ds) + ds = self._process_dataset() - if ( - self._freq == "season" - and self._season_config.get("dec_mode") == "DJF" - and self._season_config.get("drop_incomplete_djf") is True - ): - ds = self._drop_incomplete_djf(ds) - - # Group the time coordinates and average the data variable using them. - self._time_grouped = self._group_time_coords(ds.cf["T"]) dv = _get_data_var(ds, data_var) - dv = self._averager(dv) - # The dataset's original "time" dimension becomes obsolete after - # calculating the climatology of the data variable, so it is dropped - # and replaced. + if self._mode == "average": + dv = self._average(dv) + elif self._mode in ["group_average", "climatology", "departures"]: + dv = self._group_average(dv) + + # The original time dimension is dropped from the Dataset because + # it becomes obsolete after the data variable is averaged. A new time + # dimension will be added to the Dataset when adding the averaged + # data variable. ds = ds.drop_dims("time") ds[dv.name] = dv @@ -714,7 +756,7 @@ def _set_obj_attrs( freq: Frequency, weighted: bool, center_times: bool, - season_config: SeasonConfigInput, + season_config: SeasonConfigInput = DEFAULT_SEASON_CONFIG, ): """Validates method arguments and sets them as object attributes. @@ -730,10 +772,10 @@ def _set_obj_attrs( If True, center time coordinates using the midpoint between of its upper and lower bounds. Otherwise, use the provided time coordinates, by default False. - season_config: SeasonConfigInput + season_config: Optional[SeasonConfigInput] A dictionary for "season" frequency configurations. If configs for predefined seasons are passed, configs for custom seasons are - ignored and vice versa. + ignored and vice versa, by default DEFAULT_SEASON_CONFIG. Raises ------ @@ -750,7 +792,8 @@ def _set_obj_attrs( raise ValueError( f"Incorrect `mode` argument. Supported modes include: " f"{modes}." ) - freq_keys = DATETIME_COMPONENTS[mode].keys() + + freq_keys = TIME_GROUPS[mode].keys() if freq not in freq_keys and "hour" not in freq: raise ValueError( f"Incorrect `freq` argument. Supported frequencies for {mode} " @@ -765,10 +808,10 @@ def _set_obj_attrs( # "season" frequency specific configuration attributes. for key in season_config.keys(): - if key not in SEASON_CONFIG_KEYS: + if key not in DEFAULT_SEASON_CONFIG.keys(): raise KeyError( f"'{key}' is not a supported season config. Supported " - f"configs include: {SEASON_CONFIG_KEYS}." + f"configs include: {DEFAULT_SEASON_CONFIG.keys()}." ) custom_seasons = season_config.get("custom_seasons", None) dec_mode = season_config.get("dec_mode", "DJF") @@ -788,6 +831,28 @@ def _set_obj_attrs( else: self._season_config["custom_seasons"] = self._form_seasons(custom_seasons) + def _process_dataset(self) -> xr.Dataset: + """Processes a dataset based on the set values of the object attributes. + + Returns + ------- + xr.Dataset + The dataset object. + """ + ds = self._dataset.copy() + + if self._center_times: + ds = self.center_times(ds) + + if ( + self._freq == "season" + and self._season_config.get("dec_mode") == "DJF" + and self._season_config.get("drop_incomplete_djf") is True + ): + ds = self._drop_incomplete_djf(ds) + + return ds + def _drop_incomplete_djf(self, dataset: xr.Dataset) -> xr.Dataset: """Drops incomplete DJF seasons within a continuous time series. @@ -800,12 +865,12 @@ def _drop_incomplete_djf(self, dataset: xr.Dataset) -> xr.Dataset: Parameters ---------- data_var : xr.DataArray - The data variable with some incomplete DJF seasons. + The data variable with some possibly incomplete DJF seasons. Returns ------- xr.DataArray - The data variable with all complete DJF seasons. + The data variable with only complete DJF seasons. """ # Separate the dataset into two datasets, one with and one without # the time dimension. This is necessary because the xarray .where() @@ -822,7 +887,7 @@ def _drop_incomplete_djf(self, dataset: xr.Dataset) -> xr.Dataset: coord_pt = ds.loc[dict(time=year_month)].time[0] ds_time = ds_time.where(ds_time.time != coord_pt, drop=True) # type: ignore self._time_bounds = ds_time[self._time_bounds.name] - except KeyError: + except (KeyError, IndexError): continue ds_final = xr.merge((ds_time, ds_no_time)) # type: ignore @@ -879,13 +944,34 @@ def _form_seasons(self, custom_seasons: List[List[str]]) -> Dict[str, List[str]] return c_seasons - def _averager(self, data_var: xr.DataArray) -> xr.DataArray: - """Averages a data variable by a grouping frequency. + def _average(self, data_var: xr.DataArray) -> xr.DataArray: + """Averages a data variable with the time dimension removed. + + Parameters + ---------- + data_var : xr.DataArray + The data variable. + + Returns + ------- + xr.DataArray + The averages for a data variable with the time dimension removed. + """ + dv = data_var.copy() + + with xr.set_options(keep_attrs=True): + if self._weighted: + self._weights = self._get_weights() + dv = dv.weighted(self._weights).mean(dim=self._dim_name) + else: + dv = dv.mean(dim=self._dim_name) + + dv = self._add_operation_attrs(dv) + + return dv - This method groups the data variable's values by the time coordinates - and averages them with or without weights. The parameters for - ``self._temporal_average()`` are stored as DataArray attributes in the - averaged data variable. + def _group_average(self, data_var: xr.DataArray) -> xr.DataArray: + """Averages a data variable by time group. Parameters ---------- @@ -895,46 +981,45 @@ def _averager(self, data_var: xr.DataArray) -> xr.DataArray: Returns ------- xr.DataArray - The averaged data variable. + The data variable averaged by time group. """ dv = data_var.copy() if self._weighted: - self._weights = self._get_weights(dv) + self._weights = self._get_weights() dv *= self._weights - dv = self._groupby_freq(dv).sum() # type: ignore + dv = self._group_data(dv).sum() # type: ignore else: - dv = self._groupby_freq(dv).mean() # type: ignore + dv = self._group_data(dv).mean() # type: ignore - # After grouping and aggregating on the grouped time coordinates, the + # After grouping and aggregating the data variable values, the # original time dimension is replaced with the grouped time dimension. # For example, grouping on "year_season" replaces the "time" dimension - # with "year_season". This dimension will eventually be renamed back - # to "time" when the data variable as added back to the dataset. - dv = dv.rename({self._time_grouped.name: "time"}) # type: ignore + # with "year_season". This dimension needs to be renamed back to + # the original time dimension name before the data variable is added + # back to the dataset so that the CF compliant name is maintained. + dv = dv.rename({self._labeled_time.name: self._dim_name}) # type: ignore # After grouping and aggregating, the grouped time dimension's - # attributes are removed. Unfortunately, `xr.set_options(keep_attrs=True)`, - # `.sum(keep_attrs=True)`, and `.mean(keep_attrs=True)` only keeps - # attributes for data variables and not their coordinates so they need - # to be restored manually - dv["time"].attrs = self._time_grouped.attrs - dv["time"].encoding = self._time_grouped.encoding + # attributes are removed. Xarray's `keep_attrs=True` option only keeps + # attributes for data variables and not their coordinates, so the + # coordinate attributes have to be restored manually. + dv[self._dim_name].attrs = self._labeled_time.attrs + dv[self._dim_name].encoding = self._labeled_time.encoding dv = self._add_operation_attrs(dv) return dv - def _group_time_coords(self, time_coords: xr.DataArray) -> xr.DataArray: - """Groups the time coordinates by a frequency. + def _label_time_coords(self, time_coords: xr.DataArray) -> xr.DataArray: + """Labels time coordinates with a group for grouping. - This method extracts xarray datetime components from the time - coordinates and stores them as column values in a pandas DataFrame. A - pandas DataFrame is the chosen data structure because it simplifies the - additional steps for processing the component values, specifically for - the "season" frequency. The DataFrame is then converted to a numpy - list of cftime.datetime or datetime.datetime that is used as the data - for the final xarray DataArray of grouped time coordinates. + This methods labels time coordinates for grouping by first extracting + specific xarray datetime components from time coordinates and storing + them in a pandas DataFrame. After processing (if necessary) is performed + on the DataFrame, it is converted to a numpy array of datetime + objects. This numpy serves as the data source for the final + DataArray of labeled time coordinates. Parameters ---------- @@ -944,22 +1029,13 @@ def _group_time_coords(self, time_coords: xr.DataArray) -> xr.DataArray: Returns ------- xr.DataArray - The time coordinates grouped by a frequency. - - Notes - ----- - Refer to [5]_ for information on xarray datetime accessor components. - - References - ---------- - .. [5] https://xarray.pydata.org/en/stable/user-guide/time-series.html#datetime-components + The DataArray of labeled time coordinates for grouping. Examples -------- Original daily time coordinates: - >>> # Original daily time coordinates. >>> >>> array(['2000-01-01T12:00:00.000000000', >>> '2000-01-31T21:00:00.000000000', @@ -968,13 +1044,8 @@ def _group_time_coords(self, time_coords: xr.DataArray) -> xr.DataArray: >>> dtype='datetime64[ns]') >>> Coordinates: >>> * time (time) datetime64[ns] 2000-01-01T12:00:00 ... 2000-04-01T03:00:00 - >>> Attributes: - >>> long_name: time - >>> standard_name: time - >>> axis: T - >>> bounds: time_bnds - Daily time coordinates grouped by month for time series averaging: + Daily time coordinates labeled by year and month: >>> >>> array(['2000-01-01T00:00:00.000000000', @@ -983,25 +1054,13 @@ def _group_time_coords(self, time_coords: xr.DataArray) -> xr.DataArray: >>> dtype='datetime64[ns]') >>> Coordinates: >>> * time (time) datetime64[ns] 2000-01-01T00:00:00 ... 2000-04-01T00:00:00 - >>> Attributes: - >>> long_name: time - >>> standard_name: time - >>> axis: T - >>> bounds: time_bnds """ - df = pd.DataFrame() - - for component in DATETIME_COMPONENTS[self._mode][self._freq]: - df[component] = time_coords[f"time.{component}"].values - - if self._freq == "season": - df = self._process_season_dataframe(df) - - datetime_objs = self._convert_df_to_dt(df) + df_dt_components: pd.DataFrame = self._get_df_dt_components(time_coords) + dt_objects = self._convert_df_to_dt(df_dt_components) time_grouped = xr.DataArray( - name="_".join(df.columns), - data=datetime_objs, + name="_".join(df_dt_components.columns), + data=dt_objects, coords={"time": time_coords.time}, dims=["time"], attrs=time_coords.time.attrs, @@ -1010,95 +1069,100 @@ def _group_time_coords(self, time_coords: xr.DataArray) -> xr.DataArray: return time_grouped - def _process_season_dataframe(self, df: pd.DataFrame) -> pd.DataFrame: - """Processes a DataFrame of datetime components for the season frequency. + def _get_df_dt_components(self, time_coords: xr.DataArray) -> pd.DataFrame: + """Returns a DataFrame of xarray datetime components. - Processing includes: + This method extracts the applicable xarray datetime components from each + time coordinate based on the averaging mode and frequency, and stores + them in a DataFrame. - * Mapping custom seasons to each time coordinate if they are used. + Additional processing is performed for the seasonal frequency, + including: + + * If custom seasons are used, map them to each time coordinate based + on the middle month of the custom season. * If season with December is "DJF", shift Decembers over to the next - year so DJF groups are correctly formed. + year so DJF seasons are correctly grouped using the previous year + December. * Drop obsolete columns after processing is done. Parameters ---------- - df : pd.DataFrame - A DataFrame of xarray datetime components. + time_coords : xr.DataArray + The time coordinates. Returns ------- pd.DataFrame - A DataFrame of processed xarray datetime components. + A DataFrame of datetime components. + + Notes + ----- + Refer to [2]_ for information on xarray datetime accessor components. + + References + ---------- + .. [2] https://xarray.pydata.org/en/stable/user-guide/time-series.html#datetime-components """ - df_new = df.copy() - custom_seasons = self._season_config.get("custom_seasons") - dec_mode = self._season_config.get("dec_mode") + df = pd.DataFrame() - if custom_seasons is None: - if dec_mode == "DJF": - df_new = self._shift_decembers(df_new) - else: - df_new = self._map_months_to_custom_seasons(df_new) + # Use the TIME_GROUPS dictionary to determine which components + # are needed to form the labeled time coordinates. + for component in TIME_GROUPS[self._mode][self._freq]: + df[component] = time_coords[f"{self._dim_name}.{component}"].values - df_new = self._drop_obsolete_columns(df_new) - df_new = self._map_seasons_to_mid_months(df_new) - return df_new + # The season frequency requires additional datetime components for + # processing, which are later removed before time coordinates are + # labeled for grouping. These components weren't included in the + # `TIME_GROUPS` dictionary for the "season" frequency because + # `TIME_GROUPS` represents the final grouping labels. + if self._freq == "season": + if self._mode in ["climatology", "departures"]: + df["year"] = time_coords[f"{self._dim_name}.year"].values + df["month"] = time_coords[f"{self._dim_name}.month"].values - def _convert_df_to_dt(self, df: pd.DataFrame) -> np.ndarray: - """Converts a DataFrame of datetime components to datetime objects. + if self._mode == "group_average": + df["month"] = time_coords[f"{self._dim_name}.month"].values + + df = self._process_season_df(df) + + return df + + def _process_season_df(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Processes a DataFrame of datetime components for the season frequency. Parameters ---------- df : pd.DataFrame - The DataFrame of xarray datetime components. + A DataFrame of xarray datetime components. Returns ------- - np.ndarray - A numpy ndarray of datetime.datetime or cftime.datetime objects. - - Examples - -------- + pd.DataFrame + A DataFrame of processed xarray datetime components. """ df_new = df.copy() + custom_seasons = self._season_config.get("custom_seasons") + dec_mode = self._season_config.get("dec_mode") - # Some time frequencies don't require all of the datetime components - # for grouping, so default values are used for creating the `datetime` - # objects (which require at least a year, month, and day). - dt_components_defaults = {"year": 1, "month": 1, "day": 1, "hour": 0} - for component, default_val in dt_components_defaults.items(): - if component not in df_new.columns: - df_new[component] = default_val - - if self._mode == "time_series": - dates = pd.to_datetime(df_new).to_numpy() - elif self._mode in ["climatology", "departures"]: - # The "year" values are not considered when grouping the time - # coordinates for "climatology" and "departures", but are required - # for creating datetime objects. The fallback value of 1 is - # used as a placeholder for the year. However, year 1 is outside the - # Timestamp-valid range so `cftime.datetime` objects are used - # instead of `datetime.datetime`. - # https://docs.xarray.dev/en/stable/user-guide/weather-climate.html#non-standard-calendars-and-dates-outside-the-timestamp-valid-range - # https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations - dates = np.array( - [ - cftime.datetime(year, month, day, hour) - for year, month, day, hour in zip( - df_new.year, df_new.month, df_new.day, df_new.hour - ) - ] - ) + if custom_seasons is not None: + df_new = self._map_months_to_custom_seasons(df_new) + else: + if dec_mode == "DJF": + df_new = self._shift_decembers(df_new) - return dates + df_new = self._drop_obsolete_columns(df_new) + df_new = self._map_seasons_to_mid_months(df_new) + return df_new def _map_months_to_custom_seasons(self, df: pd.DataFrame) -> pd.DataFrame: - """Maps months to custom seasons. + """Maps the month column in the DataFrame to a custom season. This method maps each integer value in the "month" column to its string represention, which then maps to a custom season that is stored in the - "season" column. For example, 1 maps to "Jan" and "Jan" maps to the - "JanFebMar" custom season. + "season" column. For example, the month of 1 maps to "Jan" and "Jan" + maps to the "JanFebMar" custom season. Parameters ---------- @@ -1111,14 +1175,13 @@ def _map_months_to_custom_seasons(self, df: pd.DataFrame) -> pd.DataFrame: The DataFrame of xarray datetime coordinates, with each row mapped to a custom season. """ - custom_seasons = self._season_config.get("custom_seasons") - if custom_seasons is None: - raise ValueError("Custom seasons were not assigned to this object.") + custom_seasons = self._season_config["custom_seasons"] - # Time complexity of O(n^2), but okay with these small data structures. + # NOTE: This for loop has a time complexity of O(n^2), but it is fine + # because these data structures are small. seasons_map = {} for mon_int, mon_str in MONTH_INT_TO_STR.items(): - for season in custom_seasons: + for season in custom_seasons: # type: ignore if mon_str in season: seasons_map[mon_int] = season @@ -1128,12 +1191,13 @@ def _map_months_to_custom_seasons(self, df: pd.DataFrame) -> pd.DataFrame: return df_new def _shift_decembers(self, df_season: pd.DataFrame) -> pd.DataFrame: - """Shifts Decembers over to the next year for "DJF" seasons. + """Shifts Decembers over to the next year for "DJF" seasons in-place. For "DJF" seasons, Decembers must be shifted over to the next year in - order for the xarray groupby operation to correctly group the time - coordinates. Otherwise, grouping is incorrectly performed with the - native xarray "DJF" season, which is actually "JFD". + order for the xarray groupby operation to correctly label and group the + corresponding time coordinates. If the aren't shifted over, grouping is + incorrectly performed with the native xarray "DJF" season (which is + actually "JFD"). Parameters ---------- @@ -1150,15 +1214,16 @@ def _shift_decembers(self, df_season: pd.DataFrame) -> pd.DataFrame: Examples -------- - Comparison of "DJF" and "JFD" seasons: + Comparison of "JFD" and "DJF" seasons: + + >>> # "JFD" (native xarray behavior) + >>> [(2000, "DJF", 1), (2000, "DJF", 2), (2000, "DJF", 12), + >>> (2001, "DJF", 1), (2001, "DJF", 2)] >>> # "DJF" (shifted Decembers) >>> [(2000, "DJF", 1), (2000, "DJF", 2), (2001, "DJF", 12), - >>> (2001, "DJF", 1), (2001, "DJF", 2), (2002, "DJF", 12)] + >>> (2001, "DJF", 1), (2001, "DJF", 2)] - >>> # "JFD" (native xarray behavior) - >>> [(2000, "DJF", 1), (2000, "DJF", 2), (2000, "DJF", 12), - >>> (2001, "DJF", 1), (2001, "DJF", 2), (2001, "DJF", 12)] """ df_season.loc[df_season["month"] == 12, "year"] = df_season["year"] + 1 @@ -1168,8 +1233,8 @@ def _map_seasons_to_mid_months(self, df: pd.DataFrame) -> pd.DataFrame: """Maps the season column values to the integer of its middle month. DateTime objects don't support storing seasons as strings, so the middle - month is used to represent the season. For example, for the pre-defined - season "DJF", the middle month "J" is mapped to the integer value 1. + months are used to represent the season. For example, for the season + "DJF", the middle month "J" is mapped to the integer value 1. The middle month of a custom season is extracted using the ceiling of the middle index from its list of months. For example, for the custom @@ -1177,7 +1242,8 @@ def _map_seasons_to_mid_months(self, df: pd.DataFrame) -> pd.DataFrame: "May"], the index 3 is used to get the month "Apr". "Apr" is then mapped to the integer value 4. - After mapping, the "season" column is renamed to "month". + After mapping the season to its month, the "season" column is renamed to + "month". Parameters ---------- @@ -1232,19 +1298,68 @@ def _drop_obsolete_columns(self, df_season: pd.DataFrame) -> pd.DataFrame: The DataFrame of time coordinates for the "season" frequency with obsolete columns dropped. """ - if self._mode == "time_series": + if self._mode == "group_average": df_season = df_season.drop("month", axis=1) elif self._mode in ["climatology", "departures"]: df_season = df_season.drop(["year", "month"], axis=1) - else: - raise ValueError( - "Unable to drop columns in the datetime components " - f"DataFrame for unsupported mode, '{self._mode}'." - ) return df_season - def _get_weights(self, data_var: xr.DataArray) -> xr.DataArray: + def _convert_df_to_dt(self, df: pd.DataFrame) -> np.ndarray: + """Converts a DataFrame of datetime components to datetime objects. + + datetime objects require at least a year, month, and day value. However, + some modes and time frequencies don't require year, month, and/or day + for grouping. For these cases, use default values of 1 in order to + meet this datetime requirement. + + If the default value of 1 is used for the years, datetime objects + must be created using `cftime.datetime` because year 1 is outside the + Timestamp-valid range. + + Parameters + ---------- + df : pd.DataFrame + The DataFrame of xarray datetime components. + + Returns + ------- + np.ndarray + A numpy ndarray of datetime.datetime or cftime.datetime objects. + + Notes + ----- + Refer to [3]_ and [4]_ for more information on Timestamp-valid range. + + References + ---------- + .. [3] https://docs.xarray.dev/en/stable/user-guide/weather-climate.html#non-standard-calendars-and-dates-outside-the-timestamp-valid-range + + .. [4] https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations + """ + df_new = df.copy() + + dt_components_defaults = {"year": 1, "month": 1, "day": 1, "hour": 0} + for component, default_val in dt_components_defaults.items(): + if component not in df_new.columns: + df_new[component] = default_val + + year_is_unused = self._mode in ["climatology", "departures"] or ( + self._mode == "average" and self._freq != "year" + ) + if year_is_unused: + dates = [ + cftime.datetime(year, month, day, hour) + for year, month, day, hour in zip( + df_new.year, df_new.month, df_new.day, df_new.hour + ) + ] + else: + dates = pd.to_datetime(df_new) + + return np.array(dates) + + def _get_weights(self) -> xr.DataArray: """Calculates weights for a data variable using time bounds. This method gets the length of time for each coordinate point by using @@ -1253,15 +1368,12 @@ def _get_weights(self, data_var: xr.DataArray) -> xr.DataArray: coordinates are recorded (e.g., monthly, daily, hourly) and the calendar type used. - The time lengths are grouped by the grouping frequency, then each time - length is divided by the total sum of the time lengths in its group to - get the weights. The sum of the weights for each group is validated to - ensure it equals 1.0 (100%). + The time lengths are labeled and grouped, then each time length is + divided by the total sum of the time lengths in its group to get its + corresponding weight. - Parameters - ------- - data_var : xr.DataArray - The data variable. + The sum of the weights for each group is validated to ensure it equals + 1.0. Returns ------- @@ -1270,11 +1382,11 @@ def _get_weights(self, data_var: xr.DataArray) -> xr.DataArray: Notes ----- - Refer to [6]_ for the supported CF convention calendar types. + Refer to [5]_ for the supported CF convention calendar types. References ---------- - .. [6] https://cfconventions.org/cf-conventions/cf-conventions.html#calendar + .. [5] https://cfconventions.org/cf-conventions/cf-conventions.html#calendar """ with xr.set_options(keep_attrs=True): time_lengths: xr.DataArray = ( @@ -1289,41 +1401,34 @@ def _get_weights(self, data_var: xr.DataArray) -> xr.DataArray: # notes). To avoid this warning please use the scalar types # `np.float64`, or string notation.` time_lengths = time_lengths.astype(np.float64) - time_grouped = self._groupby_freq(time_lengths) - weights: xr.DataArray = time_grouped / time_grouped.sum() # type: ignore + grouped_time_lengths = self._group_data(time_lengths) + weights: xr.DataArray = grouped_time_lengths / grouped_time_lengths.sum() # type: ignore + + num_groups = len(grouped_time_lengths.groups) + self._validate_weights(weights, num_groups) - self._validate_weights(data_var, weights) return weights - def _validate_weights(self, data_var: xr.DataArray, weights: xr.DataArray): - """Validates the sums of the weights for each group equals 1.0 (100%). + def _validate_weights(self, weights: xr.DataArray, num_groups: int): + """Validates that the sum of the weights for each group equals 1.0. Parameters ---------- - data_var : xr.DataArray - The data variable. weights : xr.DataArray - The data variable's time coordinates weights. + The weights for the time coordinates. + num_groups : int + The number of groups. """ - freq_groups = self._groupby_freq(data_var).count() # type: ignore - # Sum the frequency group counts by all the dims except the grouped time - # dimension to get a 1D array of counts. - summing_dims = tuple( - x for x in freq_groups.dims if x != self._time_grouped.name - ) - freq_sums = freq_groups.sum(summing_dims) + actual_sum = self._group_data(weights).sum().values # type: ignore + expected_sum = np.ones(num_groups) - # Replace all non-zero counts with 1.0 (total weight of 100%). - expected_sum = np.where(freq_sums > 0, 1.0, freq_sums) - actual_sum = self._groupby_freq(weights).sum().values # type: ignore np.testing.assert_allclose(actual_sum, expected_sum) - def _groupby_freq(self, data_var: xr.DataArray) -> DataArrayGroupBy: - """Groups a data variable by a time frequency. + def _group_data(self, data_var: xr.DataArray) -> DataArrayGroupBy: + """Groups a data variable. - This method returning a DataArrayGroupBy object, enabling support for - xarray's grouped arithmetic as a shortcut for mapping over all unique - labels. + This method groups a data variable by a single datetime component for + the "average" mode or labeled time coordinates for all other modes. Parameters ---------- @@ -1333,11 +1438,16 @@ def _groupby_freq(self, data_var: xr.DataArray) -> DataArrayGroupBy: Returns ------- DataArrayGroupBy - A data variable grouped by the frequency. + A data variable grouped by label. """ dv = data_var.copy() - dv.coords[self._time_grouped.name] = self._time_grouped - dv_gb = dv.groupby(self._time_grouped.name) + + if self._mode == "average": + dv_gb = dv.groupby(f"{self._dim_name}.{self._freq}") + else: + self._labeled_time = self._label_time_coords(dv[self._dim_name]) + dv.coords[self._labeled_time.name] = self._labeled_time + dv_gb = dv.groupby(self._labeled_time.name) return dv_gb