Skip to content

Commit

Permalink
add explicit support for h5
Browse files Browse the repository at this point in the history
  • Loading branch information
FynnBe committed Nov 15, 2024
1 parent 4da6766 commit 4cbf9ef
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 8 deletions.
89 changes: 81 additions & 8 deletions bioimageio/core/io.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import collections.abc
from pathlib import Path
from typing import Any, Mapping, Optional, Sequence, Union
import warnings
from pathlib import Path, PurePosixPath
from typing import Any, Mapping, Optional, Sequence, Tuple, Union

import h5py
import numpy as np
from imageio.v3 import imread, imwrite
from loguru import logger
from numpy.typing import NDArray
Expand All @@ -15,6 +18,8 @@
from .stat_measures import DatasetMeasure, MeasureValue
from .tensor import Tensor

DEFAULT_H5_DATASET_PATH = "data"


def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
"""load a single image as numpy array
Expand All @@ -23,9 +28,38 @@ def load_image(path: Path, is_volume: Optional[bool] = None) -> NDArray[Any]:
path: image path
is_volume: deprecated
"""
ext = path.suffix
if ext == ".npy":
if is_volume is not None:
warnings.warn("**is_volume** is deprecated and will be removed soon.")

file_path, subpath = _split_dataset_path(Path(path))

if file_path.suffix == ".npy":
if subpath is not None:
raise ValueError(f"Unexpected subpath {subpath} for .npy path {path}")
return load_array(path)
elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
if subpath is None:
dataset_path = DEFAULT_H5_DATASET_PATH
else:
dataset_path = str(subpath)

with h5py.File(file_path, "r") as f:
h5_dataset = f.get( # pyright: ignore[reportUnknownVariableType]
dataset_path
)
if not isinstance(h5_dataset, h5py.Dataset):
raise ValueError(
f"{path} is not of type {h5py.Dataset}, but has type "
+ str(
type(h5_dataset) # pyright: ignore[reportUnknownArgumentType]
)
)
image: NDArray[Any]
image = h5_dataset[:] # pyright: ignore[reportUnknownVariableType]
assert isinstance(image, np.ndarray), type(
image # pyright: ignore[reportUnknownArgumentType]
)
return image # pyright: ignore[reportUnknownVariableType]
else:
return imread(path) # pyright: ignore[reportUnknownVariableType]

Expand All @@ -37,14 +71,53 @@ def load_tensor(path: Path, axes: Optional[Sequence[AxisLike]] = None) -> Tensor
return Tensor.from_numpy(array, dims=axes)


def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]:
"""Split off subpath (e.g. internal h5 dataset path)
from a file path following a file extension.
Examples:
>>> _split_dataset_path(Path("my_file.h5/dataset"))
(Path("my_file.h5"), PurePosixPath("dataset"))
If no suffix is detected the path is returned with
>>> _split_dataset_path(Path("my_plain_file"))
(Path("my_plain_file"), None)
"""
if path.suffix:
return path, None

for p in path.parents:
if p.suffix:
return p, PurePosixPath(path.relative_to(p))

return path, None


def save_tensor(path: Path, tensor: Tensor) -> None:
# TODO: save axis meta data

data: NDArray[Any] = tensor.data.to_numpy()
path = Path(path)
path.parent.mkdir(exist_ok=True, parents=True)
if path.suffix == ".npy":
save_array(path, data)
file_path, subpath = _split_dataset_path(Path(path))
if not file_path.suffix:
raise ValueError(f"No suffix (needed to decide file format) found in {path}")

file_path.parent.mkdir(exist_ok=True, parents=True)
if file_path.suffix == ".npy":
if subpath is not None:
raise ValueError(f"Unexpected subpath {subpath} found in .npy path {path}")
save_array(file_path, data)
elif file_path.suffix in (".h5", ".hdf", ".hdf5"):
if subpath is None:
dataset_path = DEFAULT_H5_DATASET_PATH
else:
dataset_path = str(subpath)

with h5py.File(file_path, "a") as f:
if dataset_path in f:
del f[dataset_path]

_ = f.create_dataset(dataset_path, data=data, chunks=True)
else:
# if singleton_axes := [a for a, s in tensor.tagged_shape.items() if s == 1]:
# tensor = tensor[{a: 0 for a in singleton_axes}]
Expand Down
1 change: 1 addition & 0 deletions dev/env-py38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- black
- crick # uncommented
- filelock
- h5py
- imageio>=2.5
- jupyter
- jupyter-black
Expand Down
1 change: 1 addition & 0 deletions dev/env-tf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- black
# - crick # currently requires python<=3.9
- filelock
- h5py
- imageio>=2.5
- jupyter
- jupyter-black
Expand Down
1 change: 1 addition & 0 deletions dev/env-wo-python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ dependencies:
- black
# - crick # currently requires python<=3.9
- filelock
- h5py
- imageio>=2.5
- jupyter
- jupyter-black
Expand Down
1 change: 1 addition & 0 deletions dev/env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ dependencies:
- black
# - crick # currently requires python<=3.9
- filelock
- h5py
- imageio>=2.5
- jupyter
- jupyter-black
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
],
packages=find_namespace_packages(exclude=["tests"]),
install_requires=[
"bioimageio.spec ==0.5.3.3",
"h5py",
"imageio>=2.10",
"loguru",
"numpy",
Expand Down
38 changes: 38 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pathlib import Path
from typing import Tuple

import numpy as np
import pytest


@pytest.mark.parametrize(
"name",
[
"img.png",
"img.tiff",
"img.h5",
"img.h5/img",
"img.npy",
],
)
@pytest.mark.parametrize(
"shape",
[
(4, 5),
(3, 4, 5),
(1, 4, 5),
(5, 4, 3),
(5, 3, 4),
],
)
def test_image_io(name: str, shape: Tuple[int, ...], tmp_path: Path):
from bioimageio.core import Tensor
from bioimageio.core.io import load_tensor, save_tensor

path = tmp_path / name
data = Tensor.from_numpy(
np.arange(np.prod(shape), dtype=np.uint8).reshape(shape), dims=None
)
save_tensor(path, data)
actual = load_tensor(path)
assert actual == data

0 comments on commit 4cbf9ef

Please sign in to comment.