Skip to content

Commit e790176

Browse files
authored
Merge pull request #95 from d-v-b/fix/no-empty-codecs
disallow empty codecs, and use a sane default in auto_codecs, allow codecs to be specified by strings
2 parents 3fe4264 + 89e0688 commit e790176

File tree

3 files changed

+59
-10
lines changed

3 files changed

+59
-10
lines changed

src/pydantic_zarr/core.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
Any,
77
Literal,
88
TypeAlias,
9+
TypeVar,
910
overload,
1011
)
1112

@@ -24,6 +25,8 @@
2425

2526
AccessMode: TypeAlias = Literal["w", "w+", "r", "a"]
2627

28+
T = TypeVar("T")
29+
2730

2831
@overload
2932
def tuplify_json(obj: Mapping) -> Mapping: ...
@@ -133,3 +136,12 @@ def maybe_node(
133136
return get_node(spath.store, spath.path, zarr_format=zarr_format)
134137
except FileNotFoundError:
135138
return None
139+
140+
141+
def ensure_multiple(data: Sequence[T]) -> Sequence[T]:
142+
"""
143+
Ensure that there is at least one element in the sequence
144+
"""
145+
if len(data) < 1:
146+
raise ValueError("Invalid length. Expected 1 or more, got 0.")
147+
return data

src/pydantic_zarr/v3.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
IncEx,
3131
StrictBase,
3232
ensure_key_no_path,
33+
ensure_multiple,
3334
maybe_node,
3435
model_like,
3536
tuplify_json,
@@ -94,6 +95,10 @@ class AnyNamedConfig(NamedConfig[str, Mapping[str, object]]):
9495
"""
9596

9697

98+
CodecLike = str | AnyNamedConfig
99+
"""A type modelling the permissible declarations for codecs"""
100+
101+
97102
class RegularChunkingConfig(TypedDict):
98103
chunk_shape: tuple[int, ...]
99104

@@ -160,7 +165,9 @@ def parse_dtype_v3(dtype: npt.DTypeLike | Mapping[str, object]) -> Mapping[str,
160165
raise ValueError(f"Unsupported dtype: {dtype}")
161166

162167

163-
DtypeStr = Annotated[str, BeforeValidator(parse_dtype_v3)]
168+
DTypeStr = Annotated[str, BeforeValidator(parse_dtype_v3)]
169+
DTypeLike = DTypeStr | AnyNamedConfig
170+
CodecTuple = Annotated[tuple[CodecLike, ...], BeforeValidator(ensure_multiple)]
164171

165172

166173
class ArraySpec(NodeSpec, Generic[TAttr]):
@@ -196,11 +203,11 @@ class ArraySpec(NodeSpec, Generic[TAttr]):
196203
node_type: Literal["array"] = "array"
197204
attributes: TAttr = cast(TAttr, {})
198205
shape: tuple[int, ...]
199-
data_type: DtypeStr | AnyNamedConfig
206+
data_type: DTypeLike
200207
chunk_grid: RegularChunking # todo: validate this against shape
201208
chunk_key_encoding: DefaultChunkKeyEncoding # todo: validate this against shape
202209
fill_value: FillValue # todo: validate this against the data type
203-
codecs: tuple[AnyNamedConfig, ...]
210+
codecs: CodecTuple
204211
storage_transformers: tuple[AnyNamedConfig, ...] = ()
205212
dimension_names: tuple[str | None, ...] | None = None # todo: validate this against shape
206213

@@ -252,7 +259,7 @@ def from_array(
252259
chunk_grid: Literal["auto"] | AnyNamedConfig = "auto",
253260
chunk_key_encoding: Literal["auto"] | AnyNamedConfig = "auto",
254261
fill_value: Literal["auto"] | FillValue = "auto",
255-
codecs: Literal["auto"] | Sequence[AnyNamedConfig] = "auto",
262+
codecs: Literal["auto"] | Sequence[CodecLike] = "auto",
256263
storage_transformers: Literal["auto"] | Sequence[AnyNamedConfig] = "auto",
257264
dimension_names: Literal["auto"] | Sequence[str | None] = "auto",
258265
) -> Self:
@@ -293,11 +300,11 @@ def from_array(
293300
else:
294301
fill_value_actual = fill_value
295302

296-
codecs_actual: Sequence[AnyNamedConfig]
303+
codecs_actual: tuple[CodecLike, ...]
297304
if codecs == "auto":
298305
codecs_actual = auto_codecs(array)
299306
else:
300-
codecs_actual = codecs
307+
codecs_actual = tuple(codecs)
301308
storage_transformers_actual: Sequence[AnyNamedConfig]
302309
if storage_transformers == "auto":
303310
storage_transformers_actual = auto_storage_transformers(array)
@@ -1017,10 +1024,14 @@ def auto_fill_value(data: object) -> FillValue:
10171024
raise ValueError("Cannot determine default data type for object without shape attribute.")
10181025

10191026

1020-
def auto_codecs(data: object) -> tuple[AnyNamedConfig, ...]:
1027+
def auto_codecs(data: object) -> tuple[CodecLike, ...]:
1028+
"""
1029+
Automatically create a tuple of codecs from an arbitrary python object.
1030+
"""
10211031
if hasattr(data, "codecs"):
1032+
# todo: type check
10221033
return tuple(data.codecs)
1023-
return ()
1034+
return ({"name": "bytes"},)
10241035

10251036

10261037
def auto_storage_transformers(data: object) -> tuple[AnyNamedConfig, ...]:

tests/test_pydantic_zarr/test_v3.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import numpy as np
77
import pytest
88
import zarr
9+
from pydantic import ValidationError
910

1011
from pydantic_zarr.core import tuplify_json
1112
from pydantic_zarr.v3 import (
@@ -18,6 +19,7 @@
1819
NamedConfig,
1920
RegularChunking,
2021
RegularChunkingConfig,
22+
auto_codecs,
2123
)
2224

2325
from .conftest import DTYPE_EXAMPLES_V3, DTypeExample
@@ -44,7 +46,9 @@ def test_serialize_deserialize() -> None:
4446

4547

4648
def test_from_array() -> None:
47-
array_spec = ArraySpec.from_array(np.arange(10))
49+
array = np.arange(10)
50+
array_spec = ArraySpec.from_array(array)
51+
4852
assert array_spec == ArraySpec(
4953
zarr_format=3,
5054
node_type="array",
@@ -58,10 +62,32 @@ def test_from_array() -> None:
5862
name="default", configuration=DefaultChunkKeyEncodingConfig(separator="/")
5963
),
6064
fill_value=0,
61-
codecs=(),
65+
codecs=auto_codecs(array),
6266
storage_transformers=(),
6367
dimension_names=None,
6468
)
69+
# check that we can write this array to zarr
70+
# TODO: fix type of the store argument in to_zarr
71+
array_spec.to_zarr(store={}, path="") # type: ignore[arg-type]
72+
73+
74+
def test_arrayspec_no_empty_codecs() -> None:
75+
"""
76+
Ensure that it is not possible to create an ArraySpec with no codecs
77+
"""
78+
79+
with pytest.raises(
80+
ValidationError, match="Value error, Invalid length. Expected 1 or more, got 0."
81+
):
82+
ArraySpec(
83+
shape=(1,),
84+
data_type="uint8",
85+
codecs=[],
86+
attributes={},
87+
fill_value=0,
88+
chunk_grid={"name": "regular", "configuration": {"chunk_shape": (1,)}},
89+
chunk_key_encoding={"name": "default", "configuration": {"separator": "/"}},
90+
)
6591

6692

6793
@pytest.mark.filterwarnings("ignore:The dtype:UserWarning")

0 commit comments

Comments
 (0)