diff --git a/doc/internals/zarr-encoding-spec.rst b/doc/internals/zarr-encoding-spec.rst index 958dad166e1..83f07929b03 100644 --- a/doc/internals/zarr-encoding-spec.rst +++ b/doc/internals/zarr-encoding-spec.rst @@ -73,3 +73,51 @@ re-open it directly with Zarr: import shutil shutil.rmtree("rasm.zarr") + +Chunk Key Encoding +------------------ + +When writing data to Zarr stores, Xarray supports customizing how chunk keys are encoded +through the ``chunk_key_encoding`` parameter in the variable's encoding dictionary. This +is particularly useful when working with Zarr V2 arrays and you need to control the +dimension separator in chunk keys. + +For example, to specify a custom separator for chunk keys: + +.. ipython:: python + :okwarning: + + import xarray as xr + import numpy as np + from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + + # Create a custom chunk key encoding with "/" as separator + enc = V2ChunkKeyEncoding(separator="/").to_dict() + + # Create and write a dataset with custom chunk key encoding + arr = np.ones((42, 100)) + ds = xr.DataArray(arr, name="var1").to_dataset() + ds.to_zarr( + "example.zarr", + zarr_format=2, + mode="w", + encoding={"var1": {"chunks": (42, 50), "chunk_key_encoding": enc}}, + ) + +The ``chunk_key_encoding`` option accepts a dictionary that specifies the encoding +configuration. For Zarr V2 arrays, you can use the ``V2ChunkKeyEncoding`` class from +``zarr.core.chunk_key_encodings`` to generate this configuration. This is particularly +useful when you need to ensure compatibility with specific Zarr V2 storage layouts or +when working with tools that expect a particular chunk key format. + +.. note:: + The ``chunk_key_encoding`` option is only relevant when writing to Zarr stores. + When reading Zarr arrays, Xarray automatically detects and uses the appropriate + chunk key encoding based on the store's format and configuration. + +.. ipython:: python + :suppress: + + import shutil + + shutil.rmtree("example.zarr") diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c8fbecf82af..7a46406439c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -123,6 +123,8 @@ Bug fixes By `Mathias Hauser `_. - Variables with no temporal dimension are left untouched by :py:meth:`~xarray.Dataset.convert_calendar`. (:issue:`10266`, :pull:`10268`) By `Pascal Bourgault `_. +- Enable ``chunk_key_encoding`` in :py:meth:`~xarray.Dataset.to_zarr` for Zarr v2 Datasets (:pull:`10274`) + By `BrianMichell `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 1a46346dda7..b11646d711a 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -464,6 +464,7 @@ def extract_zarr_variable_encoding( "serializer", "cache_metadata", "write_empty_chunks", + "chunk_key_encoding", } if zarr_format == 3: valid_encodings.add("fill_value") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 95c53786f86..5873c7f40ed 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3691,6 +3691,45 @@ def create_zarr_target(self): else: yield {} + def test_chunk_key_encoding(self) -> None: + try: + from zarr.core.chunk_key_encodings import V2ChunkKeyEncoding + + encoding = V2ChunkKeyEncoding(separator="/").to_dict() + except ImportError: + # Fallback for zarr<3 + encoding = {"name": "v2", "configuration": {"separator": "/"}} + + # Create a dataset with a variable name containing a period + data = np.ones((4, 4)) + original = Dataset({"var1": (("x", "y"), data)}) + + # Set up chunk key encoding with slash separator + encoding = { + "var1": { + "chunk_key_encoding": encoding, + "chunks": (2, 2), + } + } + + # Write to store with custom encoding + with self.create_zarr_target() as store: + original.to_zarr(store, encoding=encoding) + + # Verify the chunk keys in store use the slash separator + if not has_zarr_v3: + chunk_keys = [k for k in store.keys() if k.startswith("var1/")] + assert len(chunk_keys) > 0 + for key in chunk_keys: + assert "/" in key + assert "." not in key.split("/")[1:] # No dots in chunk coordinates + + # Read back and verify data + with xr.open_zarr(store) as actual: + assert_identical(original, actual) + # Verify chunks are preserved + assert actual["var1"].encoding["chunks"] == (2, 2) + @requires_zarr @pytest.mark.skipif(