diff --git a/.gitignore b/.gitignore index 21c18c17ff7..21011f0eaa7 100644 --- a/.gitignore +++ b/.gitignore @@ -50,7 +50,8 @@ nosetests.xml dask-worker-space/ # asv environments -.asv +asv_bench/.asv +asv_bench/pkgs # Translations *.mo @@ -68,7 +69,7 @@ dask-worker-space/ # xarray specific doc/_build -generated/ +doc/generated/ xarray/tests/data/*.grib.*.idx # Sync tools diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c9a8630a575..76fcac62cd3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1521,42 +1521,6 @@ def save_mfdataset( ) -def _validate_datatypes_for_zarr_append(zstore, dataset): - """If variable exists in the store, confirm dtype of the data to append is compatible with - existing dtype. - """ - - existing_vars = zstore.get_variables() - - def check_dtype(vname, var): - if ( - vname not in existing_vars - or np.issubdtype(var.dtype, np.number) - or np.issubdtype(var.dtype, np.datetime64) - or np.issubdtype(var.dtype, np.bool_) - or var.dtype == object - ): - # We can skip dtype equality checks under two conditions: (1) if the var to append is - # new to the dataset, because in this case there is no existing var to compare it to; - # or (2) if var to append's dtype is known to be easy-to-append, because in this case - # we can be confident appending won't cause problems. Examples of dtypes which are not - # easy-to-append include length-specified strings of type `|S*` or ` None: assert original.chunks == actual.chunks +@requires_zarr +@pytest.mark.skipif(not have_zarr_v3, reason="requires zarr version 3") +class TestInstrumentedZarrStore: + methods = [ + "__iter__", + "__contains__", + "__setitem__", + "__getitem__", + "listdir", + "list_prefix", + ] + + @contextlib.contextmanager + def create_zarr_target(self): + import zarr + + if Version(zarr.__version__) < Version("2.18.0"): + pytest.skip("Instrumented tests only work on latest Zarr.") + + store = KVStoreV3({}) + yield store + + def make_patches(self, store): + from unittest.mock import MagicMock + + return { + method: MagicMock( + f"KVStoreV3.{method}", + side_effect=getattr(store, method), + autospec=True, + ) + for method in self.methods + } + + def summarize(self, patches): + summary = {} + for name, patch_ in patches.items(): + count = 0 + for call in patch_.mock_calls: + if "zarr.json" not in call.args: + count += 1 + summary[name.strip("__")] = count + return summary + + def check_requests(self, expected, patches): + summary = self.summarize(patches) + for k in summary: + assert summary[k] <= expected[k], (k, summary) + + def test_append(self) -> None: + original = Dataset({"foo": ("x", [1])}, coords={"x": [0]}) + modified = Dataset({"foo": ("x", [2])}, coords={"x": [1]}) + with self.create_zarr_target() as store: + expected = { + "iter": 2, + "contains": 9, + "setitem": 9, + "getitem": 6, + "listdir": 2, + "list_prefix": 2, + } + patches = self.make_patches(store) + with patch.multiple(KVStoreV3, **patches): + original.to_zarr(store) + self.check_requests(expected, patches) + + patches = self.make_patches(store) + # v2024.03.0: {'iter': 6, 'contains': 2, 'setitem': 5, 'getitem': 10, 'listdir': 6, 'list_prefix': 0} + # 6057128b: {'iter': 5, 'contains': 2, 'setitem': 5, 'getitem': 10, "listdir": 5, "list_prefix": 0} + expected = { + "iter": 2, + "contains": 2, + "setitem": 5, + "getitem": 6, + "listdir": 2, + "list_prefix": 0, + } + with patch.multiple(KVStoreV3, **patches): + modified.to_zarr(store, mode="a", append_dim="x") + self.check_requests(expected, patches) + + patches = self.make_patches(store) + expected = { + "iter": 2, + "contains": 2, + "setitem": 5, + "getitem": 6, + "listdir": 2, + "list_prefix": 0, + } + with patch.multiple(KVStoreV3, **patches): + modified.to_zarr(store, mode="a-", append_dim="x") + self.check_requests(expected, patches) + + with open_dataset(store, engine="zarr") as actual: + assert_identical( + actual, xr.concat([original, modified, modified], dim="x") + ) + + @requires_dask + def test_region_write(self) -> None: + ds = Dataset({"foo": ("x", [1, 2, 3])}, coords={"x": [0, 1, 2]}).chunk() + with self.create_zarr_target() as store: + expected = { + "iter": 2, + "contains": 7, + "setitem": 8, + "getitem": 6, + "listdir": 2, + "list_prefix": 4, + } + patches = self.make_patches(store) + with patch.multiple(KVStoreV3, **patches): + ds.to_zarr(store, mode="w", compute=False) + self.check_requests(expected, patches) + + # v2024.03.0: {'iter': 5, 'contains': 2, 'setitem': 1, 'getitem': 6, 'listdir': 5, 'list_prefix': 0} + # 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 5, 'listdir': 4, 'list_prefix': 0} + expected = { + "iter": 2, + "contains": 2, + "setitem": 1, + "getitem": 3, + "listdir": 2, + "list_prefix": 0, + } + patches = self.make_patches(store) + with patch.multiple(KVStoreV3, **patches): + ds.to_zarr(store, region={"x": slice(None)}) + self.check_requests(expected, patches) + + # v2024.03.0: {'iter': 6, 'contains': 4, 'setitem': 1, 'getitem': 11, 'listdir': 6, 'list_prefix': 0} + # 6057128b: {'iter': 4, 'contains': 2, 'setitem': 1, 'getitem': 7, 'listdir': 4, 'list_prefix': 0} + expected = { + "iter": 2, + "contains": 2, + "setitem": 1, + "getitem": 5, + "listdir": 2, + "list_prefix": 0, + } + patches = self.make_patches(store) + with patch.multiple(KVStoreV3, **patches): + ds.to_zarr(store, region="auto") + self.check_requests(expected, patches) + + expected = { + "iter": 1, + "contains": 2, + "setitem": 0, + "getitem": 5, + "listdir": 1, + "list_prefix": 0, + } + patches = self.make_patches(store) + with patch.multiple(KVStoreV3, **patches): + with open_dataset(store, engine="zarr") as actual: + assert_identical(actual, ds) + self.check_requests(expected, patches) + + @requires_zarr class TestZarrDictStore(ZarrBase): @contextlib.contextmanager