Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v3] fix: zarr v2 compatibility fixes for Dask #2186

Merged
merged 20 commits into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ async def create(
if meta_array is not None:
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)

mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "w"))
mode = kwargs.pop("mode", cast(AccessModeLiteral, "r" if read_only else "a"))
store_path = await make_store_path(store, mode=mode)
jhamman marked this conversation as resolved.
Show resolved Hide resolved
jhamman marked this conversation as resolved.
Show resolved Hide resolved
if path is not None:
store_path = store_path / path
Expand Down
35 changes: 21 additions & 14 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
import numpy as np
import numpy.typing as npt

from zarr.abc.store import set_or_delete
from zarr.abc.store import Store, set_or_delete
from zarr.codecs import BytesCodec
from zarr.codecs._v2 import V2Compressor, V2Filters
from zarr.core.attributes import Attributes
from zarr.core.buffer import BufferPrototype, NDArrayLike, NDBuffer, default_buffer_prototype
from zarr.core.chunk_grids import RegularChunkGrid, _guess_chunks
from zarr.core.chunk_grids import RegularChunkGrid, normalize_chunks
from zarr.core.chunk_key_encodings import (
ChunkKeyEncoding,
DefaultChunkKeyEncoding,
Expand Down Expand Up @@ -129,7 +129,7 @@ async def create(
fill_value: Any | None = None,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
chunk_shape: ChunkCoords | None = None, # TODO: handle bool and iterable of iterable types
jhamman marked this conversation as resolved.
Show resolved Hide resolved
chunk_key_encoding: (
ChunkKeyEncoding
| tuple[Literal["default"], Literal[".", "/"]]
Expand All @@ -139,7 +139,7 @@ async def create(
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
# v2 only
chunks: ShapeLike | None = None,
chunks: ShapeLike | None = None, # TODO: handle bool and iterable of iterable types
jhamman marked this conversation as resolved.
Show resolved Hide resolved
dimension_separator: Literal[".", "/"] | None = None,
order: Literal["C", "F"] | None = None,
filters: list[dict[str, JSON]] | None = None,
Expand All @@ -152,15 +152,14 @@ async def create(

shape = parse_shapelike(shape)

if chunk_shape is None:
if chunks is None:
chunk_shape = chunks = _guess_chunks(shape=shape, typesize=np.dtype(dtype).itemsize)
else:
chunks = parse_shapelike(chunks)
if chunks is not None and chunk_shape is not None:
raise ValueError("Only one of chunk_shape or chunks can be provided.")
d-v-b marked this conversation as resolved.
Show resolved Hide resolved

chunk_shape = chunks
elif chunks is not None:
raise ValueError("Only one of chunk_shape or chunks must be provided.")
dtype = np.dtype(dtype)
if chunks:
_chunks = normalize_chunks(chunks, shape, dtype.itemsize)
if chunk_shape:
_chunks = normalize_chunks(chunk_shape, shape, dtype.itemsize)

if zarr_format == 3:
if dimension_separator is not None:
Expand All @@ -183,7 +182,7 @@ async def create(
store_path,
shape=shape,
dtype=dtype,
chunk_shape=chunk_shape,
chunk_shape=_chunks,
fill_value=fill_value,
chunk_key_encoding=chunk_key_encoding,
codecs=codecs,
Expand All @@ -206,7 +205,7 @@ async def create(
store_path,
shape=shape,
dtype=dtype,
chunks=chunk_shape,
chunks=_chunks,
dimension_separator=dimension_separator,
fill_value=fill_value,
order=order,
Expand Down Expand Up @@ -393,6 +392,10 @@ async def open(
metadata=ArrayV3Metadata.from_dict(json.loads(zarr_json_bytes.to_bytes())),
)

@property
def store(self) -> Store:
return self.store_path.store

@property
def ndim(self) -> int:
return len(self.metadata.shape)
Expand Down Expand Up @@ -697,6 +700,10 @@ def open(
async_array = sync(AsyncArray.open(store))
return cls(async_array)

@property
def store(self) -> Store:
return self._async_array.store
jhamman marked this conversation as resolved.
Show resolved Hide resolved

@property
def ndim(self) -> int:
return self._async_array.ndim
Expand Down
47 changes: 46 additions & 1 deletion src/zarr/core/chunk_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

import itertools
import math
import numbers
import operator
from abc import abstractmethod
from dataclasses import dataclass
from functools import reduce
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Any

import numpy as np

Expand Down Expand Up @@ -98,6 +99,50 @@ def _guess_chunks(
return tuple(int(x) for x in chunks)


def normalize_chunks(chunks: Any, shape: tuple[int, ...], typesize: int) -> tuple[int, ...]:
"""Convenience function to normalize the `chunks` argument for an array
with the given `shape`."""

# N.B., expect shape already normalized

# handle auto-chunking
if chunks is None or chunks is True:
d-v-b marked this conversation as resolved.
Show resolved Hide resolved
return _guess_chunks(shape, typesize)

# handle no chunking
if chunks is False:
return shape

# handle 1D convenience form
if isinstance(chunks, numbers.Integral):
chunks = tuple(int(chunks) for _ in shape)

# handle dask-style chunks (iterable of iterables)
if all(isinstance(c, (tuple | list)) for c in chunks):
# take first chunk size for each dimension
chunks = (
c[0] for c in chunks
) # TODO: check/error/warn for irregular chunks (e.g. if c[0] != c[1:-1])
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

except for this block, the rest of this function came from 2.x


# handle bad dimensionality
if len(chunks) > len(shape):
raise ValueError("too many dimensions in chunks")

# handle underspecified chunks
if len(chunks) < len(shape):
# assume chunks across remaining dimensions
chunks += shape[len(chunks) :]

# handle None or -1 in chunks
if -1 in chunks or None in chunks:
chunks = tuple(
s if c == -1 or c is None else int(c) for s, c in zip(shape, chunks, strict=False)
)

out = tuple(int(c) for c in chunks)
return out


@dataclass(frozen=True)
class ChunkGrid(Metadata):
@classmethod
Expand Down
4 changes: 3 additions & 1 deletion src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,9 @@ async def open(
# alternatively, we could warn and favor v3
raise ValueError("Both zarr.json and .zgroup objects exist")
if zarr_json_bytes is None and zgroup_bytes is None:
raise FileNotFoundError(store_path)
raise FileNotFoundError(
f"could not find zarr.json or .zgroup objects in {store_path}"
)
# set zarr_format based on which keys were found
if zarr_json_bytes is not None:
zarr_format = 3
Expand Down
8 changes: 4 additions & 4 deletions src/zarr/store/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ async def make_store_path(
store_like: StoreLike | None, *, mode: AccessModeLiteral | None = None
) -> StorePath:
if isinstance(store_like, StorePath):
if mode is not None:
assert AccessMode.from_literal(mode) == store_like.store.mode
if (mode is not None) and (AccessMode.from_literal(mode) != store_like.store.mode):
raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.store.mode})")
return store_like
elif isinstance(store_like, Store):
if mode is not None:
assert AccessMode.from_literal(mode) == store_like.mode
if (mode is not None) and (AccessMode.from_literal(mode) != store_like.mode):
raise ValueError(f"mode mismatch (mode={mode} != store.mode={store_like.mode})")
await store_like._ensure_open()
return StorePath(store_like)
elif store_like is None:
Expand Down
Loading