Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ab/upgrade icechunk #378

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
requires-python = ">=3.10"
dynamic = ["version"]
dependencies = [
"xarray>=2024.10.0,<2025.0.0",
"xarray>=2025.1.1",
"numpy>=2.0.0",
"packaging",
"universal-pathlib",
Expand All @@ -39,7 +39,7 @@ hdf_reader = [
"numcodecs"
]
icechunk = [
"icechunk==0.1.0a8",
"icechunk>=0.1.0a10",
]
test = [
"codecov",
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
) -> Union[Codec, tuple["ArrayArrayCodec | ArrayBytesCodec | BytesBytesCodec", ...]]:
"""Get codecs for a ManifestArray based on its zarr_format."""
if normalize_to_zarr_v3 or array.zarray.zarr_format == 3:
return array.zarray._v3_codec_pipeline()
return (array.zarray.serializer(),) + array.zarray._v3_codec_pipeline()

Check warning on line 58 in virtualizarr/codecs.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/codecs.py#L58

Added line #L58 was not covered by tests
elif array.zarray.zarr_format == 2:
return array.zarray.codec
else:
Expand Down
6 changes: 3 additions & 3 deletions virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,13 @@ def write_virtual_variable_to_icechunk(
else:
append_axis = None
# create array if it doesn't already exist

arr = group.require_array(
name=name,
shape=zarray.shape,
chunk_shape=zarray.chunks,
chunks=zarray.chunks,
dtype=encode_dtype(zarray.dtype),
codecs=zarray._v3_codec_pipeline(),
compressors=zarray._v3_codec_pipeline(), # compressors,
serializer=zarray.serializer(),
dimension_names=var.dims,
fill_value=zarray.fill_value,
)
Expand Down
24 changes: 17 additions & 7 deletions virtualizarr/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,6 @@
transpose = dict(name="transpose", configuration=dict(order=order))
codec_configs.append(transpose)

# https://github.com/zarr-developers/zarr-python/pull/1944#issuecomment-2151994097
# "If no ArrayBytesCodec is supplied, we can auto-add a BytesCodec"
bytes = dict(
name="bytes", configuration={}
) # TODO need to handle endianess configuration
codec_configs.append(bytes)

# Noting here that zarr v3 has very few codecs specificed in the official spec,
# and that there are far more codecs in `numcodecs`. We take a gamble and assume
# that the codec names and configuration are simply mapped into zarrv3 "configurables".
Expand All @@ -198,6 +191,23 @@

return codec_pipeline

def serializer(self) -> Any:
"""
testing
"""
try:
from zarr.core.metadata.v3 import ( # type: ignore[import-untyped]

Check warning on line 199 in virtualizarr/zarr.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/zarr.py#L198-L199

Added lines #L198 - L199 were not covered by tests
parse_codecs,
)
except ImportError:
raise ImportError("zarr v3 is required to generate v3 codec pipelines")

Check warning on line 203 in virtualizarr/zarr.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/zarr.py#L202-L203

Added lines #L202 - L203 were not covered by tests
# https://github.com/zarr-developers/zarr-python/pull/1944#issuecomment-2151994097
# "If no ArrayBytesCodec is supplied, we can auto-add a BytesCodec"
bytes = dict(

Check warning on line 206 in virtualizarr/zarr.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/zarr.py#L206

Added line #L206 was not covered by tests
name="bytes", configuration={}
) # TODO need to handle endianess configuration
return parse_codecs([bytes])[0]

Check warning on line 209 in virtualizarr/zarr.py

View check run for this annotation

Codecov / codecov/patch

virtualizarr/zarr.py#L209

Added line #L209 was not covered by tests


def encode_dtype(dtype: np.dtype) -> str:
# TODO not sure if there is a better way to get the '<i4' style representation of the dtype out
Expand Down
Loading