diff --git a/pyproject.toml b/pyproject.toml index e45ca563..265c337e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ requires-python = ">=3.10" dynamic = ["version"] dependencies = [ - "xarray>=2024.10.0,<2025.0.0", + "xarray>=2025.1.1", "numpy>=2.0.0", "packaging", "universal-pathlib", @@ -39,7 +39,7 @@ hdf_reader = [ "numcodecs" ] icechunk = [ - "icechunk==0.1.0a8", + "icechunk>=0.1.0a10", ] test = [ "codecov", diff --git a/virtualizarr/codecs.py b/virtualizarr/codecs.py index ad2a3d9b..94f6d1aa 100644 --- a/virtualizarr/codecs.py +++ b/virtualizarr/codecs.py @@ -55,7 +55,7 @@ def _get_manifestarray_codecs( ) -> Union[Codec, tuple["ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec", ...]]: """Get codecs for a ManifestArray based on its zarr_format.""" if normalize_to_zarr_v3 or array.zarray.zarr_format == 3: - return array.zarray._v3_codec_pipeline() + return (array.zarray.serializer(),) + array.zarray._v3_codec_pipeline() elif array.zarray.zarr_format == 2: return array.zarray.codec else: diff --git a/virtualizarr/writers/icechunk.py b/virtualizarr/writers/icechunk.py index e6869c6f..eee68162 100644 --- a/virtualizarr/writers/icechunk.py +++ b/virtualizarr/writers/icechunk.py @@ -208,13 +208,13 @@ def write_virtual_variable_to_icechunk( else: append_axis = None # create array if it doesn't already exist - arr = group.require_array( name=name, shape=zarray.shape, - chunk_shape=zarray.chunks, + chunks=zarray.chunks, dtype=encode_dtype(zarray.dtype), - codecs=zarray._v3_codec_pipeline(), + compressors=zarray._v3_codec_pipeline(), # compressors, + serializer=zarray.serializer(), dimension_names=var.dims, fill_value=zarray.fill_value, ) diff --git a/virtualizarr/zarr.py b/virtualizarr/zarr.py index e339a3f4..4c36e7dc 100644 --- a/virtualizarr/zarr.py +++ b/virtualizarr/zarr.py @@ -175,13 +175,6 @@ def _v3_codec_pipeline(self) -> Any: transpose = dict(name="transpose", configuration=dict(order=order)) codec_configs.append(transpose) - # https://github.com/zarr-developers/zarr-python/pull/1944#issuecomment-2151994097 - # "If no ArrayBytesCodec is supplied, we can auto-add a BytesCodec" - bytes = dict( - name="bytes", configuration={} - ) # TODO need to handle endianess configuration - codec_configs.append(bytes) - # Noting here that zarr v3 has very few codecs specificed in the official spec, # and that there are far more codecs in `numcodecs`. We take a gamble and assume # that the codec names and configuration are simply mapped into zarrv3 "configurables". @@ -198,6 +191,23 @@ def _v3_codec_pipeline(self) -> Any: return codec_pipeline + def serializer(self) -> Any: + """ + testing + """ + try: + from zarr.core.metadata.v3 import ( # type: ignore[import-untyped] + parse_codecs, + ) + except ImportError: + raise ImportError("zarr v3 is required to generate v3 codec pipelines") + # https://github.com/zarr-developers/zarr-python/pull/1944#issuecomment-2151994097 + # "If no ArrayBytesCodec is supplied, we can auto-add a BytesCodec" + bytes = dict( + name="bytes", configuration={} + ) # TODO need to handle endianess configuration + return parse_codecs([bytes])[0] + def encode_dtype(dtype: np.dtype) -> str: # TODO not sure if there is a better way to get the '