From 2bf15f8d29251c94471da89a177814ed09cf8d20 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 19 Jul 2023 12:25:33 -0600 Subject: [PATCH] Allow opening datasets with nD dimenson coordinate variables. (#7989) * Warn instead of raising for nD index variable Avoid automatic creating of Index variable when nD variable shares name with one of its dimensions. Closes #2233 * fix tests * Remove warning * Add invariants check Co-authored-by: Benoit Bovy * Add whats-new --------- Co-authored-by: Benoit Bovy --- doc/whats-new.rst | 4 ++++ xarray/core/indexes.py | 2 +- xarray/core/variable.py | 10 ++-------- xarray/testing.py | 15 +++++++-------- xarray/tests/test_dataset.py | 8 ++++++-- xarray/tests/test_variable.py | 6 ++++-- 6 files changed, 24 insertions(+), 21 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 682b8cf3066..0cc18cc7279 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,10 @@ v2023.07.1 (unreleased) New Features ~~~~~~~~~~~~ +- Allow creating Xarray objects where a multidimensional variable shares its name + with a dimension. Examples include output from finite volume models like FVCOM. + (:issue:`2233`, :pull:`7989`) + By `Deepak Cherian `_ and `Benoit Bovy `_. Breaking changes diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index bfa8b9da07a..33b9b7bcff9 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1645,7 +1645,7 @@ def default_indexes( coord_names = set(coords) for name, var in coords.items(): - if name in dims: + if name in dims and var.ndim == 1: index, index_vars = create_default_index_implicit(var, coords) if set(index_vars) <= coord_names: indexes.update({k: index for k in index_vars}) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 720701be6f0..9d859c0d8a7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -156,14 +156,8 @@ def as_variable(obj, name=None) -> Variable | IndexVariable: f"explicit list of dimensions: {obj!r}" ) - if name is not None and name in obj.dims: - # convert the Variable into an Index - if obj.ndim != 1: - raise MissingDimensionsError( - f"{name!r} has more than 1-dimension and the same name as one of its " - f"dimensions {obj.dims!r}. xarray disallows such variables because they " - "conflict with the coordinates used to label dimensions." - ) + if name is not None and name in obj.dims and obj.ndim == 1: + # automatically convert the Variable into an Index obj = obj.to_index_variable() return obj diff --git a/xarray/testing.py b/xarray/testing.py index dfd84851ac1..47e7dca81ae 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -364,14 +364,13 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool): assert all( ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes ), (ds._dims, {k: v.sizes for k, v in ds._variables.items()}) - assert all( - isinstance(v, IndexVariable) - for (k, v) in ds._variables.items() - if v.dims == (k,) - ), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)} - assert all(v.dims == (k,) for (k, v) in ds._variables.items() if k in ds._dims), { - k: v.dims for k, v in ds._variables.items() if k in ds._dims - } + + if check_default_indexes: + assert all( + isinstance(v, IndexVariable) + for (k, v) in ds._variables.items() + if v.dims == (k,) + ), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)} if ds._indexes is not None: _assert_indexes_invariants_checks( diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 01c26ad6104..f7f91d0e134 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -35,6 +35,7 @@ from xarray.core.indexes import Index, PandasIndex from xarray.core.pycompat import array_type, integer_types from xarray.core.utils import is_scalar +from xarray.testing import _assert_internal_invariants from xarray.tests import ( DuckArrayWrapper, InaccessibleArray, @@ -467,13 +468,16 @@ def test_constructor(self) -> None: with pytest.raises(ValueError, match=r"conflicting sizes"): Dataset({"a": x1, "b": x2}) - with pytest.raises(ValueError, match=r"disallows such variables"): - Dataset({"a": x1, "x": z}) with pytest.raises(TypeError, match=r"tuple of form"): Dataset({"x": (1, 2, 3, 4, 5, 6, 7)}) with pytest.raises(ValueError, match=r"already exists as a scalar"): Dataset({"x": 0, "y": ("x", [1, 2, 3])}) + # nD coordinate variable "x" sharing name with dimension + actual = Dataset({"a": x1, "x": z}) + assert "x" not in actual.xindexes + _assert_internal_invariants(actual, check_default_indexes=True) + # verify handling of DataArrays expected = Dataset({"x": x1, "z": z}) actual = Dataset({"z": expected["z"]}) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index a6dffb82660..9b70dcb5464 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1247,8 +1247,10 @@ def test_as_variable(self): expected = Variable(("x", "y"), data) with pytest.raises(ValueError, match=r"without explicit dimension names"): as_variable(data, name="x") - with pytest.raises(ValueError, match=r"has more than 1-dimension"): - as_variable(expected, name="x") + + # name of nD variable matches dimension name + actual = as_variable(expected, name="x") + assert_identical(expected, actual) # test datetime, timedelta conversion dt = np.array([datetime(1999, 1, 1) + timedelta(days=x) for x in range(10)])