diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c890e8df..3ed0f5df 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -27,6 +27,9 @@ jobs: strategy: matrix: python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + include: + - python-version: '3.12' + run-expensive-tests: true steps: - uses: actions/checkout@v4 - name: Install Conda environment with Micromamba @@ -39,6 +42,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: Install py3.8 environment if: matrix.python-version == '3.8' uses: mamba-org/setup-micromamba@v1 @@ -47,6 +52,8 @@ jobs: cache-environment: true environment-file: dev/env-py38.yaml post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup run: pip install --no-deps -e . - name: Get Date @@ -63,6 +70,8 @@ jobs: run: pytest --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache + RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} + test-spec-main: runs-on: ubuntu-latest @@ -71,7 +80,8 @@ jobs: python-version: ['3.8', '3.12'] include: - python-version: '3.12' - is-dev-version: true + report-coverage: true + run-expensive-tests: true steps: - uses: actions/checkout@v4 - name: Install Conda environment with Micromamba @@ -84,6 +94,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: Install py3.8 environment if: matrix.python-version == '3.8' uses: mamba-org/setup-micromamba@v1 @@ -92,6 +104,8 @@ jobs: cache-environment: true environment-file: dev/env-py38.yaml post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup spec run: | conda remove --yes --force bioimageio.spec || true # allow failure for cached env @@ -112,17 +126,18 @@ jobs: run: pytest --disable-pytest-warnings env: BIOIMAGEIO_CACHE_PATH: bioimageio_cache - - if: matrix.is-dev-version && github.event_name == 'pull_request' + RUN_EXPENSIVE_TESTS: ${{ matrix.run-expensive-tests && 'true' || 'false' }} + - if: matrix.report-coverage && github.event_name == 'pull_request' uses: orgoro/coverage@v3.2 with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - - if: matrix.is-dev-version && github.ref == 'refs/heads/main' + - if: matrix.report-coverage && github.ref == 'refs/heads/main' run: | pip install genbadge[coverage] genbadge coverage --input-file coverage.xml --output-file ./dist/coverage/coverage-badge.svg coverage html -d dist/coverage - - if: matrix.is-dev-version && github.ref == 'refs/heads/main' + - if: matrix.report-coverage && github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: coverage @@ -147,6 +162,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup spec run: | conda remove --yes --force bioimageio.spec || true # allow failure for cached env @@ -184,6 +201,8 @@ jobs: create-args: >- python=${{ matrix.python-version }} post-cleanup: 'all' + env: + PIP_NO_DEPS: true - name: additional setup run: pip install --no-deps -e . - name: Get Date diff --git a/README.md b/README.md index 49e6cbbd..9985207c 100644 --- a/README.md +++ b/README.md @@ -375,6 +375,12 @@ The model specification and its validation tools can be found at ValidationSummary: """Test model inference""" return test_description( @@ -113,34 +138,215 @@ def test_model( decimal=decimal, determinism=determinism, expected_type="model", + sha256=sha256, ) +def default_run_command(args: Sequence[str]): + logger.info("running '{}'...", " ".join(args)) + _ = subprocess.run(args, shell=True, text=True, check=True) + + def test_description( source: Union[ResourceDescr, PermissiveFileSource, BioimageioYamlContent], *, format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, devices: Optional[Sequence[str]] = None, - absolute_tolerance: float = 1.5e-4, - relative_tolerance: float = 1e-4, + absolute_tolerance: AbsoluteTolerance = 1.5e-4, + relative_tolerance: RelativeTolerance = 1e-4, decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, + sha256: Optional[Sha256] = None, + runtime_env: Union[ + Literal["currently-active", "as-described"], Path, BioimageioCondaEnv + ] = ("currently-active"), + run_command: Callable[[Sequence[str]], None] = default_run_command, ) -> ValidationSummary: - """Test a bioimage.io resource dynamically, e.g. prediction of test tensors for models""" - rd = load_description_and_test( - source, - format_version=format_version, - weight_format=weight_format, - devices=devices, - absolute_tolerance=absolute_tolerance, - relative_tolerance=relative_tolerance, - decimal=decimal, - determinism=determinism, - expected_type=expected_type, + """Test a bioimage.io resource dynamically, e.g. prediction of test tensors for models. + + Args: + source: model description source. + weight_format: Weight format to test. + Default: All weight formats present in **source**. + devices: Devices to test with, e.g. 'cpu', 'cuda'. + Default (may be weight format dependent): ['cuda'] if available, ['cpu'] otherwise. + absolute_tolerance: Maximum absolute tolerance of reproduced output tensors. + relative_tolerance: Maximum relative tolerance of reproduced output tensors. + determinism: Modes to improve reproducibility of test outputs. + runtime_env: (Experimental feature!) The Python environment to run the tests in + - `"currently-active"`: Use active Python interpreter. + - `"as-described"`: Use `bioimageio.spec.get_conda_env` to generate a conda + environment YAML file based on the model weights description. + - A `BioimageioCondaEnv` or a path to a conda environment YAML file. + Note: The `bioimageio.core` dependency will be added automatically if not present. + run_command: (Experimental feature!) Function to execute (conda) terminal commands in a subprocess + (ignored if **runtime_env** is `"currently-active"`). + """ + if runtime_env == "currently-active": + rd = load_description_and_test( + source, + format_version=format_version, + weight_format=weight_format, + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + decimal=decimal, + determinism=determinism, + expected_type=expected_type, + sha256=sha256, + ) + return rd.validation_summary + + if runtime_env == "as-described": + conda_env = None + elif isinstance(runtime_env, (str, Path)): + conda_env = BioimageioCondaEnv.model_validate(read_yaml(Path(runtime_env))) + elif isinstance(runtime_env, BioimageioCondaEnv): + conda_env = runtime_env + else: + assert_never(runtime_env) + + with TemporaryDirectory(ignore_cleanup_errors=True) as _d: + working_dir = Path(_d) + if isinstance(source, (dict, ResourceDescrBase)): + file_source = save_bioimageio_package( + source, output_path=working_dir / "package.zip" + ) + else: + file_source = source + + return _test_in_env( + file_source, + working_dir=working_dir, + weight_format=weight_format, + conda_env=conda_env, + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + run_command=run_command, + ) + + +def _test_in_env( + source: PermissiveFileSource, + *, + working_dir: Path, + weight_format: Optional[WeightsFormat], + conda_env: Optional[BioimageioCondaEnv], + devices: Optional[Sequence[str]], + absolute_tolerance: AbsoluteTolerance, + relative_tolerance: RelativeTolerance, + determinism: Literal["seed_only", "full"], + run_command: Callable[[Sequence[str]], None], +) -> ValidationSummary: + descr = load_description(source) + + if not isinstance(descr, (v0_4.ModelDescr, v0_5.ModelDescr)): + raise NotImplementedError("Not yet implemented for non-model resources") + + if weight_format is None: + all_present_wfs = [ + wf for wf in get_args(WeightsFormat) if getattr(descr.weights, wf) + ] + ignore_wfs = [wf for wf in all_present_wfs if wf in ["tensorflow_js"]] + logger.info( + "Found weight formats {}. Start testing all{}...", + all_present_wfs, + f" (except: {', '.join(ignore_wfs)}) " if ignore_wfs else "", + ) + summary = _test_in_env( + source, + working_dir=working_dir / all_present_wfs[0], + weight_format=all_present_wfs[0], + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + conda_env=conda_env, + run_command=run_command, + ) + for wf in all_present_wfs[1:]: + additional_summary = _test_in_env( + source, + working_dir=working_dir / wf, + weight_format=wf, + devices=devices, + absolute_tolerance=absolute_tolerance, + relative_tolerance=relative_tolerance, + determinism=determinism, + conda_env=conda_env, + run_command=run_command, + ) + for d in additional_summary.details: + # TODO: filter reduntant details; group details + summary.add_detail(d) + return summary + + if weight_format == "pytorch_state_dict": + wf = descr.weights.pytorch_state_dict + elif weight_format == "torchscript": + wf = descr.weights.torchscript + elif weight_format == "keras_hdf5": + wf = descr.weights.keras_hdf5 + elif weight_format == "onnx": + wf = descr.weights.onnx + elif weight_format == "tensorflow_saved_model_bundle": + wf = descr.weights.tensorflow_saved_model_bundle + elif weight_format == "tensorflow_js": + raise RuntimeError( + "testing 'tensorflow_js' is not supported by bioimageio.core" + ) + else: + assert_never(weight_format) + + assert wf is not None + if conda_env is None: + conda_env = get_conda_env(entry=wf) + + # remove name as we crate a name based on the env description hash value + conda_env.name = None + + dumped_env = conda_env.model_dump(mode="json", exclude_none=True) + if not is_yaml_value(dumped_env): + raise ValueError(f"Failed to dump conda env to valid YAML {conda_env}") + + env_io = StringIO() + write_yaml(dumped_env, file=env_io) + encoded_env = env_io.getvalue().encode() + env_name = hashlib.sha256(encoded_env).hexdigest() + + try: + run_command(["where" if platform.system() == "Windows" else "which", "conda"]) + except Exception as e: + raise RuntimeError("Conda not available") from e + + working_dir.mkdir(parents=True, exist_ok=True) + try: + run_command(["conda", "activate", env_name]) + except Exception: + path = working_dir / "env.yaml" + _ = path.write_bytes(encoded_env) + logger.debug("written conda env to {}", path) + run_command(["conda", "env", "create", f"--file={path}", f"--name={env_name}"]) + run_command(["conda", "activate", env_name]) + + summary_path = working_dir / "summary.json" + run_command( + [ + "conda", + "run", + "-n", + env_name, + "bioimageio", + "test", + str(source), + f"--summary-path={summary_path}", + ] ) - return rd.validation_summary + return ValidationSummary.model_validate_json(summary_path.read_bytes()) def load_description_and_test( @@ -149,11 +355,12 @@ def load_description_and_test( format_version: Union[Literal["discover", "latest"], str] = "discover", weight_format: Optional[WeightsFormat] = None, devices: Optional[Sequence[str]] = None, - absolute_tolerance: float = 1.5e-4, - relative_tolerance: float = 1e-4, + absolute_tolerance: AbsoluteTolerance = 1.5e-4, + relative_tolerance: RelativeTolerance = 1e-4, decimal: Optional[int] = None, determinism: Literal["seed_only", "full"] = "seed_only", expected_type: Optional[str] = None, + sha256: Optional[Sha256] = None, ) -> Union[ResourceDescr, InvalidDescr]: """Test RDF dynamically, e.g. model inference of test inputs""" if ( @@ -171,7 +378,7 @@ def load_description_and_test( elif isinstance(source, dict): rd = build_description(source, format_version=format_version) else: - rd = load_description(source, format_version=format_version) + rd = load_description(source, format_version=format_version, sha256=sha256) rd.validation_summary.env.add( InstalledPackage(name="bioimageio.core", version=VERSION) @@ -223,7 +430,7 @@ def _test_model_inference( rtol: float, ) -> None: test_name = f"Reproduce test outputs from test inputs ({weight_format})" - logger.info("starting '{}'", test_name) + logger.debug("starting '{}'", test_name) error: Optional[str] = None tb: List[str] = [] @@ -256,6 +463,9 @@ def _test_model_inference( error = f"Output and expected output disagree:\n {e}" break except Exception as e: + if validation_context_var.get().raise_errors: + raise e + error = str(e) tb = traceback.format_tb(e.__traceback__) @@ -311,11 +521,13 @@ def _test_model_inference_parametrized( # no batch axis batch_sizes = {1} - test_cases: Set[Tuple[v0_5.ParameterizedSize_N, BatchSize]] = { - (n, b) for n, b in product(sorted(ns), sorted(batch_sizes)) + test_cases: Set[Tuple[BatchSize, v0_5.ParameterizedSize_N]] = { + (b, n) for b, n in product(sorted(batch_sizes), sorted(ns)) } logger.info( - "Testing inference with {} different input tensor sizes", len(test_cases) + "Testing inference with {} different inputs (B, N): {}", + len(test_cases), + test_cases, ) def generate_test_cases(): @@ -329,7 +541,7 @@ def get_ns(n: int): if isinstance(a.size, v0_5.ParameterizedSize) } - for n, batch_size in sorted(test_cases): + for batch_size, n in sorted(test_cases): input_target_sizes, expected_output_sizes = model.get_axis_sizes( get_ns(n), batch_size=batch_size ) @@ -423,6 +635,9 @@ def get_ns(n: int): ) ) except Exception as e: + if validation_context_var.get().raise_errors: + raise e + error = str(e) tb = traceback.format_tb(e.__traceback__) model.validation_summary.add_detail( diff --git a/bioimageio/core/axis.py b/bioimageio/core/axis.py index 34dfa3e1..30c0d281 100644 --- a/bioimageio/core/axis.py +++ b/bioimageio/core/axis.py @@ -8,19 +8,26 @@ from bioimageio.spec.model import v0_5 -def _get_axis_type(a: Literal["b", "t", "i", "c", "x", "y", "z"]): - if a == "b": +def _guess_axis_type(a: str): + if a in ("b", "batch"): return "batch" - elif a == "t": + elif a in ("t", "time"): return "time" - elif a == "i": + elif a in ("i", "index"): return "index" - elif a == "c": + elif a in ("c", "channel"): return "channel" elif a in ("x", "y", "z"): return "space" else: - return "index" # return most unspecific axis + raise ValueError( + f"Failed to infer axis type for axis id '{a}'." + + " Consider using one of: '" + + "', '".join( + ["b", "batch", "t", "time", "i", "index", "c", "channel", "x", "y", "z"] + ) + + "'. Or creating an `Axis` object instead." + ) S = TypeVar("S", bound=str) @@ -42,16 +49,22 @@ class Axis: id: AxisId type: Literal["batch", "channel", "index", "space", "time"] + def __post_init__(self): + if self.type == "batch": + self.id = AxisId("batch") + elif self.type == "channel": + self.id = AxisId("channel") + @classmethod def create(cls, axis: AxisLike) -> Axis: if isinstance(axis, cls): return axis elif isinstance(axis, Axis): return Axis(id=axis.id, type=axis.type) - elif isinstance(axis, str): - return Axis(id=AxisId(axis), type=_get_axis_type(axis)) elif isinstance(axis, v0_5.AxisBase): return Axis(id=AxisId(axis.id), type=axis.type) + elif isinstance(axis, str): + return Axis(id=AxisId(axis), type=_guess_axis_type(axis)) else: assert_never(axis) diff --git a/bioimageio/core/backends/__init__.py b/bioimageio/core/backends/__init__.py new file mode 100644 index 00000000..c39b58b5 --- /dev/null +++ b/bioimageio/core/backends/__init__.py @@ -0,0 +1,3 @@ +from ._model_adapter import create_model_adapter + +__all__ = ["create_model_adapter"] diff --git a/bioimageio/core/backends/_model_adapter.py b/bioimageio/core/backends/_model_adapter.py new file mode 100644 index 00000000..677a88f7 --- /dev/null +++ b/bioimageio/core/backends/_model_adapter.py @@ -0,0 +1,249 @@ +import warnings +from abc import ABC, abstractmethod +from typing import ( + Any, + List, + Literal, + Optional, + Sequence, + Tuple, + Union, + assert_never, + final, +) + +from numpy.typing import NDArray + +from bioimageio.core.digest_spec import get_axes_infos, get_member_ids +from bioimageio.core.sample import Sample +from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 + +from ..tensor import Tensor + +SupportedWeightsFormat = Literal[ + "keras_hdf5", + "onnx", + "pytorch_state_dict", + "tensorflow_saved_model_bundle", + "torchscript", +] + +# Known weight formats in order of priority +# First match wins +DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER: Tuple[SupportedWeightsFormat, ...] = ( + "pytorch_state_dict", + "tensorflow_saved_model_bundle", + "torchscript", + "onnx", + "keras_hdf5", +) + + +class ModelAdapter(ABC): + """ + Represents model *without* any preprocessing or postprocessing. + + ``` + from bioimageio.core import load_description + + model = load_description(...) + + # option 1: + adapter = ModelAdapter.create(model) + adapter.forward(...) + adapter.unload() + + # option 2: + with ModelAdapter.create(model) as adapter: + adapter.forward(...) + ``` + """ + + def __init__(self, model_description: AnyModelDescr): + super().__init__() + self._model_descr = model_description + self._input_ids = get_member_ids(model_description.inputs) + self._output_ids = get_member_ids(model_description.outputs) + self._input_axes = [ + tuple(a.id for a in get_axes_infos(t)) for t in model_description.inputs + ] + self._output_axes = [ + tuple(a.id for a in get_axes_infos(t)) for t in model_description.outputs + ] + if isinstance(model_description, v0_4.ModelDescr): + self._input_is_optional = [False] * len(model_description.inputs) + else: + self._input_is_optional = [ipt.optional for ipt in model_description.inputs] + + @final + @classmethod + def create( + cls, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + devices: Optional[Sequence[str]] = None, + weight_format_priority_order: Optional[Sequence[SupportedWeightsFormat]] = None, + ): + """ + Creates model adapter based on the passed spec + Note: All specific adapters should happen inside this function to prevent different framework + initializations interfering with each other + """ + if not isinstance(model_description, (v0_4.ModelDescr, v0_5.ModelDescr)): + raise TypeError( + f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_description)}" + ) + + weights = model_description.weights + errors: List[Tuple[SupportedWeightsFormat, Exception]] = [] + weight_format_priority_order = ( + DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER + if weight_format_priority_order is None + else weight_format_priority_order + ) + # limit weight formats to the ones present + weight_format_priority_order_present: Sequence[SupportedWeightsFormat] = [ + w for w in weight_format_priority_order if getattr(weights, w) is not None + ] + if not weight_format_priority_order_present: + raise ValueError( + f"None of the specified weight formats ({weight_format_priority_order}) is present ({weight_format_priority_order_present})" + ) + + for wf in weight_format_priority_order_present: + if wf == "pytorch_state_dict": + assert weights.pytorch_state_dict is not None + try: + from .pytorch_backend import PytorchModelAdapter + + return PytorchModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + elif wf == "tensorflow_saved_model_bundle": + assert weights.tensorflow_saved_model_bundle is not None + try: + from .tensorflow_backend import create_tf_model_adapter + + return create_tf_model_adapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + elif wf == "onnx": + assert weights.onnx is not None + try: + from .onnx_backend import ONNXModelAdapter + + return ONNXModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + elif wf == "torchscript": + assert weights.torchscript is not None + try: + from .torchscript_backend import TorchscriptModelAdapter + + return TorchscriptModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + elif wf == "keras_hdf5": + assert weights.keras_hdf5 is not None + # keras can either be installed as a separate package or used as part of tensorflow + # we try to first import the keras model adapter using the separate package and, + # if it is not available, try to load the one using tf + try: + try: + from .keras_backend import KerasModelAdapter + except Exception: + from .tensorflow_backend import KerasModelAdapter + + return KerasModelAdapter( + model_description=model_description, devices=devices + ) + except Exception as e: + errors.append((wf, e)) + else: + assert_never(wf) + + assert errors + if len(weight_format_priority_order) == 1: + assert len(errors) == 1 + wf, e = errors[0] + raise ValueError( + f"The '{wf}' model adapter could not be created" + + f" in this environment:\n{e.__class__.__name__}({e}).\n\n" + ) from e + + else: + error_list = "\n - ".join( + f"{wf}: {e.__class__.__name__}({e})" for wf, e in errors + ) + raise ValueError( + "None of the weight format specific model adapters could be created" + + f" in this environment. Errors are:\n\n{error_list}.\n\n" + ) + + @final + def load(self, *, devices: Optional[Sequence[str]] = None) -> None: + warnings.warn("Deprecated. ModelAdapter is loaded on initialization") + + def forward(self, input_sample: Sample) -> Sample: + """ + Run forward pass of model to get model predictions + + Note: sample id and stample stat attributes are passed through + """ + unexpected = [mid for mid in input_sample.members if mid not in self._input_ids] + if unexpected: + warnings.warn(f"Got unexpected input tensor IDs: {unexpected}") + + input_arrays = [ + ( + None + if (a := input_sample.members.get(in_id)) is None + else a.transpose(in_order).data.data + ) + for in_id, in_order in zip(self._input_ids, self._input_axes) + ] + output_arrays = self._forward_impl(input_arrays) + assert len(output_arrays) <= len(self._output_ids) + output_tensors = [ + None if a is None else Tensor(a, dims=d) + for a, d in zip(output_arrays, self._output_axes) + ] + return Sample( + members={ + tid: out + for tid, out in zip( + self._output_ids, + output_tensors, + ) + if out is not None + }, + stat=input_sample.stat, + id=input_sample.id, + ) + + @abstractmethod + def _forward_impl( + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ) -> Union[List[Optional[NDArray[Any]]], Tuple[Optional[NDArray[Any]]]]: + """framework specific forward implementation""" + + @abstractmethod + def unload(self): + """ + Unload model from any devices, freeing their memory. + The moder adapter should be considered unusable afterwards. + """ + + def _get_input_args_numpy(self, input_sample: Sample): + """helper to extract tensor args as transposed numpy arrays""" + + +create_model_adapter = ModelAdapter.create diff --git a/bioimageio/core/model_adapters/_keras_model_adapter.py b/bioimageio/core/backends/keras_backend.py similarity index 61% rename from bioimageio/core/model_adapters/_keras_model_adapter.py rename to bioimageio/core/backends/keras_backend.py index e6864ccc..6ca603ad 100644 --- a/bioimageio/core/model_adapters/_keras_model_adapter.py +++ b/bioimageio/core/backends/keras_backend.py @@ -1,39 +1,31 @@ import os -from typing import Any, List, Optional, Sequence, Union +from typing import Any, Optional, Sequence, Union from loguru import logger from numpy.typing import NDArray from bioimageio.spec._internal.io_utils import download +from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.model.v0_5 import Version from .._settings import settings from ..digest_spec import get_axes_infos -from ..tensor import Tensor from ._model_adapter import ModelAdapter os.environ["KERAS_BACKEND"] = settings.keras_backend # by default, we use the keras integrated with tensorflow +# TODO: check if we should prefer keras try: - import tensorflow as tf # pyright: ignore[reportMissingImports] - from tensorflow import ( # pyright: ignore[reportMissingImports] - keras, # pyright: ignore[reportUnknownVariableType] + import tensorflow as tf + from tensorflow import ( + keras, # pyright: ignore[reportUnknownVariableType,reportAttributeAccessIssue] ) - tf_version = Version(tf.__version__) # pyright: ignore[reportUnknownArgumentType] + tf_version = Version(tf.__version__) except Exception: - try: - import keras # pyright: ignore[reportMissingImports] - except Exception as e: - keras = None - keras_error = str(e) - else: - keras_error = None - tf_version = None -else: - keras_error = None + import keras class KerasModelAdapter(ModelAdapter): @@ -43,10 +35,7 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ) -> None: - if keras is None: - raise ImportError(f"failed to import keras: {keras_error}") - - super().__init__() + super().__init__(model_description=model_description) if model_description.weights.keras_hdf5 is None: raise ValueError("model has not keras_hdf5 weights specified") model_tf_version = model_description.weights.keras_hdf5.tensorflow_version @@ -84,22 +73,14 @@ def __init__( for out in model_description.outputs ] - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - _result: Union[Sequence[NDArray[Any]], NDArray[Any]] - _result = self._network.predict( # pyright: ignore[reportUnknownVariableType] - *[None if t is None else t.data.data for t in input_tensors] - ) - if isinstance(_result, (tuple, list)): - result: Sequence[NDArray[Any]] = _result + def _forward_impl( # pyright: ignore[reportUnknownParameterType] + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ): + network_output = self._network.predict(*input_arrays) # type: ignore + if is_list(network_output) or is_tuple(network_output): + return network_output else: - result = [_result] # type: ignore - - assert len(result) == len(self._output_axes) - ret: List[Optional[Tensor]] = [] - ret.extend( - [Tensor(r, dims=axes) for r, axes, in zip(result, self._output_axes)] - ) - return ret + return [network_output] # pyright: ignore[reportUnknownVariableType] def unload(self) -> None: logger.warning( diff --git a/bioimageio/core/model_adapters/_onnx_model_adapter.py b/bioimageio/core/backends/onnx_backend.py similarity index 53% rename from bioimageio/core/model_adapters/_onnx_model_adapter.py rename to bioimageio/core/backends/onnx_backend.py index c747de22..858b4cc1 100644 --- a/bioimageio/core/model_adapters/_onnx_model_adapter.py +++ b/bioimageio/core/backends/onnx_backend.py @@ -1,22 +1,16 @@ import warnings from typing import Any, List, Optional, Sequence, Union +import onnxruntime as rt from numpy.typing import NDArray +from bioimageio.spec._internal.type_guards import is_list, is_tuple from bioimageio.spec.model import v0_4, v0_5 from bioimageio.spec.utils import download from ..digest_spec import get_axes_infos +from ..model_adapters import ModelAdapter from ..tensor import Tensor -from ._model_adapter import ModelAdapter - -try: - import onnxruntime as rt -except Exception as e: - rt = None - rt_error = str(e) -else: - rt_error = None class ONNXModelAdapter(ModelAdapter): @@ -26,14 +20,8 @@ def __init__( model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], devices: Optional[Sequence[str]] = None, ): - if rt is None: - raise ImportError(f"failed to import onnxruntime: {rt_error}") + super().__init__(model_description=model_description) - super().__init__() - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] if model_description.weights.onnx is None: raise ValueError("No ONNX weights specified for {model_description.name}") @@ -48,22 +36,18 @@ def __init__( f"Device management is not implemented for onnx yet, ignoring the devices {devices}" ) - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - assert len(input_tensors) == len(self._input_names) - input_arrays = [None if ipt is None else ipt.data.data for ipt in input_tensors] - result: Union[Sequence[Optional[NDArray[Any]]], Optional[NDArray[Any]]] - result = self._session.run( # pyright: ignore[reportUnknownVariableType] + def _forward_impl( + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ) -> List[Optional[NDArray[Any]]]: + result: Any = self._session.run( None, dict(zip(self._input_names, input_arrays)) ) - if isinstance(result, (list, tuple)): - result_seq: Sequence[Optional[NDArray[Any]]] = result + if is_list(result) or is_tuple(result): + result_seq = result else: - result_seq = [result] # type: ignore + result_seq = [result] - return [ - None if r is None else Tensor(r, dims=axes) - for r, axes in zip(result_seq, self._internal_output_axes) - ] + return result_seq # pyright: ignore[reportReturnType] def unload(self) -> None: warnings.warn( diff --git a/bioimageio/core/backends/pytorch_backend.py b/bioimageio/core/backends/pytorch_backend.py new file mode 100644 index 00000000..a7fecfb7 --- /dev/null +++ b/bioimageio/core/backends/pytorch_backend.py @@ -0,0 +1,167 @@ +import gc +import warnings +from contextlib import nullcontext +from io import TextIOWrapper +from pathlib import Path +from typing import Any, List, Literal, Optional, Sequence, Union + +import torch +from loguru import logger +from numpy.typing import NDArray +from torch import nn +from typing_extensions import assert_never + +from bioimageio.spec._internal.type_guards import is_list, is_ndarray, is_tuple +from bioimageio.spec.common import ZipPath +from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 +from bioimageio.spec.utils import download + +from ..digest_spec import import_callable +from ._model_adapter import ModelAdapter + + +class PytorchModelAdapter(ModelAdapter): + def __init__( + self, + *, + model_description: AnyModelDescr, + devices: Optional[Sequence[Union[str, torch.device]]] = None, + mode: Literal["eval", "train"] = "eval", + ): + super().__init__(model_description=model_description) + weights = model_description.weights.pytorch_state_dict + if weights is None: + raise ValueError("No `pytorch_state_dict` weights found") + + devices = get_devices(devices) + self._model = load_torch_model(weights, load_state=True, devices=devices) + if mode == "eval": + self._model = self._model.eval() + elif mode == "train": + self._model = self._model.train() + else: + assert_never(mode) + + self._mode: Literal["eval", "train"] = mode + self._primary_device = devices[0] + + def _forward_impl( + self, input_arrays: Sequence[NDArray[Any] | None] + ) -> List[Optional[NDArray[Any]]]: + tensors = [ + None if a is None else torch.from_numpy(a).to(self._primary_device) + for a in input_arrays + ] + + if self._mode == "eval": + ctxt = torch.no_grad + elif self._mode == "train": + ctxt = nullcontext + else: + assert_never(self._mode) + + with ctxt(): + model_out = self._model(*tensors) + + if is_tuple(model_out) or is_list(model_out): + model_out_seq = model_out + else: + model_out_seq = model_out = [model_out] + + result: List[Optional[NDArray[Any]]] = [] + for i, r in enumerate(model_out_seq): + if r is None: + result.append(None) + elif isinstance(r, torch.Tensor): + r_np: NDArray[Any] = r.detach().cpu().numpy() + result.append(r_np) + elif is_ndarray(r): + result.append(r) + else: + raise TypeError(f"Model output[{i}] has unexpected type {type(r)}.") + + return result + + def unload(self) -> None: + del self._model + _ = gc.collect() # deallocate memory + assert torch is not None + torch.cuda.empty_cache() # release reserved memory + + +def load_torch_model( + weight_spec: Union[ + v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr + ], + *, + load_state: bool = False, + devices: Optional[Sequence[Union[str, torch.device]]] = None, +) -> nn.Module: + arch = import_callable( + weight_spec.architecture, + sha256=( + weight_spec.architecture_sha256 + if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) + else weight_spec.sha256 + ), + ) + model_kwargs = ( + weight_spec.kwargs + if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) + else weight_spec.architecture.kwargs + ) + network = arch(**model_kwargs) + if not isinstance(network, nn.Module): + raise ValueError( + f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" + ) + + if load_state or devices: + use_devices = get_devices(devices) + network = network.to(use_devices[0]) + if load_state: + network = load_torch_state_dict( + network, + path=download(weight_spec).path, + devices=use_devices, + ) + return network + + +def load_torch_state_dict( + model: nn.Module, + path: Union[Path, ZipPath], + devices: Sequence[torch.device], +) -> nn.Module: + model = model.to(devices[0]) + with path.open("rb") as f: + assert not isinstance(f, TextIOWrapper) + state = torch.load(f, map_location=devices[0]) + + incompatible = model.load_state_dict(state) + if incompatible is not None and incompatible.missing_keys: + logger.warning("Missing state dict keys: {}", incompatible.missing_keys) + + if incompatible is not None and incompatible.unexpected_keys: + logger.warning("Unexpected state dict keys: {}", incompatible.unexpected_keys) + + return model + + +def get_devices( + devices: Optional[Sequence[Union[torch.device, str]]] = None, +) -> List[torch.device]: + if not devices: + torch_devices = [ + (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) + ] + else: + torch_devices = [torch.device(d) for d in devices] + + if len(torch_devices) > 1: + warnings.warn( + f"Multiple devices for single pytorch model not yet implemented; ignoring {torch_devices[1:]}" + ) + torch_devices = torch_devices[:1] + + return torch_devices diff --git a/bioimageio/core/backends/tensorflow_backend.py b/bioimageio/core/backends/tensorflow_backend.py new file mode 100644 index 00000000..83fa4813 --- /dev/null +++ b/bioimageio/core/backends/tensorflow_backend.py @@ -0,0 +1,212 @@ +from pathlib import Path +from typing import Any, Optional, Sequence, Union + +import numpy as np +import tensorflow as tf +from loguru import logger +from numpy.typing import NDArray + +from bioimageio.core.io import ensure_unzipped +from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 + +from ._model_adapter import ModelAdapter + + +class TensorflowModelAdapter(ModelAdapter): + weight_format = "tensorflow_saved_model_bundle" + + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + super().__init__(model_description=model_description) + + weight_file = model_description.weights.tensorflow_saved_model_bundle + if model_description.weights.tensorflow_saved_model_bundle is None: + raise ValueError("No `tensorflow_saved_model_bundle` weights found") + + if devices is not None: + logger.warning( + f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" + ) + + # TODO: check how to load tf weights without unzipping + weight_file = ensure_unzipped( + model_description.weights.tensorflow_saved_model_bundle.source, + Path("bioimageio_unzipped_tf_weights"), + ) + self._network = str(weight_file) + + # TODO currently we relaod the model every time. it would be better to keep the graph and session + # alive in between of forward passes (but then the sessions need to be properly opened / closed) + def _forward_impl( # pyright: ignore[reportUnknownParameterType] + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ): + # TODO read from spec + tag = ( # pyright: ignore[reportUnknownVariableType] + tf.saved_model.tag_constants.SERVING # pyright: ignore[reportAttributeAccessIssue] + ) + signature_key = ( # pyright: ignore[reportUnknownVariableType] + tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY # pyright: ignore[reportAttributeAccessIssue] + ) + + graph = tf.Graph() + with graph.as_default(): + with tf.Session( # pyright: ignore[reportAttributeAccessIssue] + graph=graph + ) as sess: # pyright: ignore[reportUnknownVariableType] + # load the model and the signature + graph_def = tf.saved_model.loader.load( # pyright: ignore[reportUnknownVariableType,reportAttributeAccessIssue] + sess, [tag], self._network + ) + signature = ( # pyright: ignore[reportUnknownVariableType] + graph_def.signature_def + ) + + # get the tensors into the graph + in_names = [ # pyright: ignore[reportUnknownVariableType] + signature[signature_key].inputs[key].name for key in self._input_ids + ] + out_names = [ # pyright: ignore[reportUnknownVariableType] + signature[signature_key].outputs[key].name + for key in self._output_ids + ] + in_tf_tensors = [ + graph.get_tensor_by_name( + name # pyright: ignore[reportUnknownArgumentType] + ) + for name in in_names # pyright: ignore[reportUnknownVariableType] + ] + out_tf_tensors = [ + graph.get_tensor_by_name( + name # pyright: ignore[reportUnknownArgumentType] + ) + for name in out_names # pyright: ignore[reportUnknownVariableType] + ] + + # run prediction + res = sess.run( # pyright: ignore[reportUnknownVariableType] + dict( + zip( + out_names, # pyright: ignore[reportUnknownArgumentType] + out_tf_tensors, + ) + ), + dict(zip(in_tf_tensors, input_arrays)), + ) + # from dict to list of tensors + res = [ # pyright: ignore[reportUnknownVariableType] + res[out] + for out in out_names # pyright: ignore[reportUnknownVariableType] + ] + + return res # pyright: ignore[reportUnknownVariableType] + + def unload(self) -> None: + logger.warning( + "Device management is not implemented for tensorflow 1, cannot unload model" + ) + + +class KerasModelAdapter(ModelAdapter): + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + if model_description.weights.tensorflow_saved_model_bundle is None: + raise ValueError("No `tensorflow_saved_model_bundle` weights found") + + super().__init__(model_description=model_description) + if devices is not None: + logger.warning( + f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" + ) + + # TODO: check how to load tf weights without unzipping + weight_file = ensure_unzipped( + model_description.weights.tensorflow_saved_model_bundle.source, + Path("bioimageio_unzipped_tf_weights"), + ) + + try: + self._network = tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue] + weight_file, + call_endpoint="serve", + ) + except Exception as e: + try: + self._network = tf.keras.layers.TFSMLayer( # pyright: ignore[reportAttributeAccessIssue] + weight_file, call_endpoint="serving_default" + ) + except Exception as ee: + logger.opt(exception=ee).info( + "keras.layers.TFSMLayer error for alternative call_endpoint='serving_default'" + ) + raise e + + def _forward_impl( # pyright: ignore[reportUnknownParameterType] + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ): + assert tf is not None + tf_tensor = [ + None if ipt is None else tf.convert_to_tensor(ipt) for ipt in input_arrays + ] + + result = self._network(*tf_tensor) # pyright: ignore[reportUnknownVariableType] + + assert isinstance(result, dict) + + # TODO: Use RDF's `outputs[i].id` here + result = list( # pyright: ignore[reportUnknownVariableType] + result.values() # pyright: ignore[reportUnknownArgumentType] + ) + + return [ # pyright: ignore[reportUnknownVariableType] + (None if r is None else r if isinstance(r, np.ndarray) else r.numpy()) + for r in result # pyright: ignore[reportUnknownVariableType] + ] + + def unload(self) -> None: + logger.warning( + "Device management is not implemented for tensorflow>=2 models" + + f" using `{self.__class__.__name__}`, cannot unload model" + ) + + +def create_tf_model_adapter( + model_description: AnyModelDescr, devices: Optional[Sequence[str]] +): + tf_version = v0_5.Version(tf.__version__) + weights = model_description.weights.tensorflow_saved_model_bundle + if weights is None: + raise ValueError("No `tensorflow_saved_model_bundle` weights found") + + model_tf_version = weights.tensorflow_version + if model_tf_version is None: + logger.warning( + "The model does not specify the tensorflow version." + + f"Cannot check if it is compatible with intalled tensorflow {tf_version}." + ) + elif model_tf_version > tf_version: + logger.warning( + f"The model specifies a newer tensorflow version than installed: {model_tf_version} > {tf_version}." + ) + elif (model_tf_version.major, model_tf_version.minor) != ( + tf_version.major, + tf_version.minor, + ): + logger.warning( + "The tensorflow version specified by the model does not match the installed: " + + f"{model_tf_version} != {tf_version}." + ) + + if tf_version.major <= 1: + return TensorflowModelAdapter( + model_description=model_description, devices=devices + ) + else: + return KerasModelAdapter(model_description=model_description, devices=devices) diff --git a/bioimageio/core/backends/torchscript_backend.py b/bioimageio/core/backends/torchscript_backend.py new file mode 100644 index 00000000..26924e3c --- /dev/null +++ b/bioimageio/core/backends/torchscript_backend.py @@ -0,0 +1,71 @@ +import gc +import warnings +from typing import Any, List, Optional, Sequence, Union + +import torch +from numpy.typing import NDArray + +from bioimageio.spec._internal.type_guards import is_list, is_tuple +from bioimageio.spec.model import v0_4, v0_5 +from bioimageio.spec.utils import download + +from ..model_adapters import ModelAdapter + + +class TorchscriptModelAdapter(ModelAdapter): + def __init__( + self, + *, + model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], + devices: Optional[Sequence[str]] = None, + ): + super().__init__(model_description=model_description) + if model_description.weights.torchscript is None: + raise ValueError( + f"No torchscript weights found for model {model_description.name}" + ) + + weight_path = download(model_description.weights.torchscript.source).path + if devices is None: + self.devices = ["cuda" if torch.cuda.is_available() else "cpu"] + else: + self.devices = [torch.device(d) for d in devices] + + if len(self.devices) > 1: + warnings.warn( + "Multiple devices for single torchscript model not yet implemented" + ) + + self._model = torch.jit.load(weight_path) + self._model.to(self.devices[0]) + self._model = self._model.eval() + + def _forward_impl( + self, input_arrays: Sequence[Optional[NDArray[Any]]] + ) -> List[Optional[NDArray[Any]]]: + + with torch.no_grad(): + torch_tensor = [ + None if a is None else torch.from_numpy(a).to(self.devices[0]) + for a in input_arrays + ] + output: Any = self._model.forward(*torch_tensor) + if is_list(output) or is_tuple(output): + output_seq: Sequence[Any] = output + else: + output_seq = [output] + + return [ + ( + None + if r is None + else r.cpu().numpy() if isinstance(r, torch.Tensor) else r + ) + for r in output_seq + ] + + def unload(self) -> None: + self._devices = None + del self._model + _ = gc.collect() # deallocate memory + torch.cuda.empty_cache() # release reserved memory diff --git a/bioimageio/core/cli.py b/bioimageio/core/cli.py index fad44ab3..49700b43 100644 --- a/bioimageio/core/cli.py +++ b/bioimageio/core/cli.py @@ -18,6 +18,7 @@ Dict, Iterable, List, + Literal, Mapping, Optional, Sequence, @@ -113,14 +114,14 @@ def descr_id(self) -> str: class ValidateFormatCmd(CmdBase, WithSource): - """validate the meta data format of a bioimageio resource.""" + """Validate the meta data format of a bioimageio resource.""" def run(self): sys.exit(validate_format(self.descr)) class TestCmd(CmdBase, WithSource): - """Test a bioimageio resource (beyond meta data formatting)""" + """Test a bioimageio resource (beyond meta data formatting).""" weight_format: WeightFormatArgAll = "all" """The weight format to limit testing to. @@ -133,6 +134,24 @@ class TestCmd(CmdBase, WithSource): decimal: int = 4 """Precision for numerical comparisons""" + runtime_env: Union[Literal["currently-active", "as-described"], Path] = Field( + "currently-active", alias="runtime-env" + ) + """The python environment to run the tests in + + - `"currently-active"`: use active Python interpreter + - `"as-described"`: generate a conda environment YAML file based on the model + weights description. + - A path to a conda environment YAML. + Note: The `bioimageio.core` dependency will be added automatically if not present. + """ + + summary_path: Optional[Path] = Field(None, alias="summary-path") + """Path to save validation summary as JSON file.""" + + determinism: Literal["seed_only", "full"] = "seed_only" + """Modes to improve reproducibility of test outputs.""" + def run(self): sys.exit( test( @@ -140,12 +159,15 @@ def run(self): weight_format=self.weight_format, devices=self.devices, decimal=self.decimal, + summary_path=self.summary_path, + runtime_env=self.runtime_env, + determinism=self.determinism, ) ) class PackageCmd(CmdBase, WithSource): - """save a resource's metadata with its associated files.""" + """Save a resource's metadata with its associated files.""" path: CliPositionalArg[Path] """The path to write the (zipped) package to. @@ -551,10 +573,10 @@ def input_dataset(stat: Stat): class Bioimageio( BaseSettings, + cli_implicit_flags=True, cli_parse_args=True, cli_prog_name="bioimageio", cli_use_class_docs_for_groups=True, - cli_implicit_flags=True, use_attribute_docstrings=True, ): """bioimageio - CLI for bioimage.io resources 🦒""" diff --git a/bioimageio/core/commands.py b/bioimageio/core/commands.py index c71d495f..92d7ddbc 100644 --- a/bioimageio/core/commands.py +++ b/bioimageio/core/commands.py @@ -26,15 +26,15 @@ def test( weight_format: WeightFormatArgAll = "all", devices: Optional[Union[str, Sequence[str]]] = None, decimal: int = 4, + summary_path: Optional[Path] = None, + runtime_env: Union[ + Literal["currently-active", "as-described"], Path + ] = "currently-active", + determinism: Literal["seed_only", "full"] = "seed_only", ) -> int: - """test a bioimageio resource + """Test a bioimageio resource. - Args: - source: Path or URL to the bioimageio resource description file - (bioimageio.yaml or rdf.yaml) or to a zipped resource - weight_format: (model only) The weight format to use - devices: Device(s) to use for testing - decimal: Precision for numerical comparisons + Arguments as described in `bioimageio.core.cli.TestCmd` """ if isinstance(descr, InvalidDescr): descr.validation_summary.display() @@ -45,8 +45,13 @@ def test( weight_format=None if weight_format == "all" else weight_format, devices=[devices] if isinstance(devices, str) else devices, decimal=decimal, + runtime_env=runtime_env, + determinism=determinism, ) summary.display() + if summary_path is not None: + _ = summary_path.write_text(summary.model_dump_json(indent=4)) + return 0 if summary.status == "passed" else 1 diff --git a/bioimageio/core/digest_spec.py b/bioimageio/core/digest_spec.py index edb5a45d..5789f377 100644 --- a/bioimageio/core/digest_spec.py +++ b/bioimageio/core/digest_spec.py @@ -23,8 +23,7 @@ from numpy.typing import NDArray from typing_extensions import Unpack, assert_never -from bioimageio.spec._internal.io import resolve_and_extract -from bioimageio.spec._internal.io_utils import HashKwargs +from bioimageio.spec._internal.io import HashKwargs, resolve_and_extract from bioimageio.spec.common import FileSource from bioimageio.spec.model import AnyModelDescr, v0_4, v0_5 from bioimageio.spec.model.v0_4 import CallableFromDepencency, CallableFromFile @@ -35,7 +34,7 @@ ) from bioimageio.spec.utils import load_array -from .axis import AxisId, AxisInfo, AxisLike, PerAxis +from .axis import Axis, AxisId, AxisInfo, AxisLike, PerAxis from .block_meta import split_multiple_shapes_into_blocks from .common import Halo, MemberId, PerMember, SampleId, TotalNumberOfBlocks from .io import load_tensor @@ -50,7 +49,12 @@ def import_callable( - node: Union[CallableFromDepencency, ArchitectureFromLibraryDescr], + node: Union[ + ArchitectureFromFileDescr, + ArchitectureFromLibraryDescr, + CallableFromDepencency, + CallableFromFile, + ], /, **kwargs: Unpack[HashKwargs], ) -> Callable[..., Any]: @@ -65,7 +69,6 @@ def import_callable( c = _import_from_file_impl(node.source_file, str(node.callable_name), **kwargs) elif isinstance(node, ArchitectureFromFileDescr): c = _import_from_file_impl(node.source, str(node.callable), sha256=node.sha256) - else: assert_never(node) @@ -100,14 +103,15 @@ def get_axes_infos( ], ) -> List[AxisInfo]: """get a unified, simplified axis representation from spec axes""" - return [ - ( - AxisInfo.create("i") - if isinstance(a, str) and a not in ("b", "i", "t", "c", "z", "y", "x") - else AxisInfo.create(a) - ) - for a in io_descr.axes - ] + ret: List[AxisInfo] = [] + for a in io_descr.axes: + if isinstance(a, v0_5.AxisBase): + ret.append(AxisInfo.create(Axis(id=a.id, type=a.type))) + else: + assert a in ("b", "i", "t", "c", "z", "y", "x") + ret.append(AxisInfo.create(a)) + + return ret def get_member_id( @@ -335,7 +339,7 @@ def create_sample_for_model( sample_id: SampleId = None, inputs: Optional[ PerMember[Union[Tensor, xr.DataArray, NDArray[Any], Path]] - ] = None, # TODO: make non-optional + ] = None, # TODO: make non-optional # TODO: accept tuple of tensor sources **kwargs: NDArray[Any], # TODO: deprecate in favor of `inputs` ) -> Sample: """Create a sample from a single set of input(s) for a specific bioimage.io model diff --git a/bioimageio/core/io.py b/bioimageio/core/io.py index ee60a67a..d80c6870 100644 --- a/bioimageio/core/io.py +++ b/bioimageio/core/io.py @@ -1,6 +1,9 @@ import collections.abc import warnings +import zipfile +from io import TextIOWrapper from pathlib import Path, PurePosixPath +from shutil import copyfileobj from typing import Any, Mapping, Optional, Sequence, Tuple, Union import h5py @@ -10,7 +13,8 @@ from numpy.typing import NDArray from pydantic import BaseModel, ConfigDict, TypeAdapter -from bioimageio.spec.utils import load_array, save_array +from bioimageio.spec.common import FileSource, ZipPath +from bioimageio.spec.utils import download, load_array, save_array from .axis import AxisLike from .common import PerMember @@ -77,11 +81,11 @@ def _split_dataset_path(path: Path) -> Tuple[Path, Optional[PurePosixPath]]: Examples: >>> _split_dataset_path(Path("my_file.h5/dataset")) - (PosixPath('my_file.h5'), PurePosixPath('dataset')) + (...Path('my_file.h5'), PurePosixPath('dataset')) If no suffix is detected the path is returned with >>> _split_dataset_path(Path("my_plain_file")) - (PosixPath('my_plain_file'), None) + (...Path('my_plain_file'), None) """ if path.suffix: @@ -176,3 +180,27 @@ def save_dataset_stat(stat: Mapping[DatasetMeasure, MeasureValue], path: Path): def load_dataset_stat(path: Path): seq = _stat_adapter.validate_json(path.read_bytes()) return {e.measure: e.value for e in seq} + + +def ensure_unzipped(source: Union[FileSource, ZipPath], folder: Path): + """unzip a (downloaded) **source** to a file in **folder** if source is a zip archive. + Always returns the path to the unzipped source (maybe source itself)""" + local_weights_file = download(source).path + if isinstance(local_weights_file, ZipPath): + # source is inside a zip archive + out_path = folder / local_weights_file.filename + with local_weights_file.open("rb") as src, out_path.open("wb") as dst: + assert not isinstance(src, TextIOWrapper) + copyfileobj(src, dst) + + local_weights_file = out_path + + if zipfile.is_zipfile(local_weights_file): + # source itself is a zipfile + out_path = folder / local_weights_file.with_suffix(".unzipped").name + with zipfile.ZipFile(local_weights_file, "r") as f: + f.extractall(out_path) + + return out_path + else: + return local_weights_file diff --git a/bioimageio/core/model_adapters.py b/bioimageio/core/model_adapters.py new file mode 100644 index 00000000..db92d013 --- /dev/null +++ b/bioimageio/core/model_adapters.py @@ -0,0 +1,22 @@ +"""DEPRECATED""" + +from typing import List + +from .backends._model_adapter import ( + DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER, + ModelAdapter, + create_model_adapter, +) + +__all__ = [ + "ModelAdapter", + "create_model_adapter", + "get_weight_formats", +] + + +def get_weight_formats() -> List[str]: + """ + Return list of supported weight types + """ + return list(DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER) diff --git a/bioimageio/core/model_adapters/__init__.py b/bioimageio/core/model_adapters/__init__.py deleted file mode 100644 index 01899de9..00000000 --- a/bioimageio/core/model_adapters/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from ._model_adapter import ModelAdapter, create_model_adapter, get_weight_formats - -__all__ = [ - "ModelAdapter", - "create_model_adapter", - "get_weight_formats", -] diff --git a/bioimageio/core/model_adapters/_model_adapter.py b/bioimageio/core/model_adapters/_model_adapter.py deleted file mode 100644 index c918603e..00000000 --- a/bioimageio/core/model_adapters/_model_adapter.py +++ /dev/null @@ -1,177 +0,0 @@ -import warnings -from abc import ABC, abstractmethod -from typing import List, Optional, Sequence, Tuple, Union, final - -from bioimageio.spec.model import v0_4, v0_5 - -from ..tensor import Tensor - -WeightsFormat = Union[v0_4.WeightsFormat, v0_5.WeightsFormat] - -# Known weight formats in order of priority -# First match wins -DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER: Tuple[WeightsFormat, ...] = ( - "pytorch_state_dict", - "tensorflow_saved_model_bundle", - "torchscript", - "onnx", - "keras_hdf5", -) - - -class ModelAdapter(ABC): - """ - Represents model *without* any preprocessing or postprocessing. - - ``` - from bioimageio.core import load_description - - model = load_description(...) - - # option 1: - adapter = ModelAdapter.create(model) - adapter.forward(...) - adapter.unload() - - # option 2: - with ModelAdapter.create(model) as adapter: - adapter.forward(...) - ``` - """ - - @final - @classmethod - def create( - cls, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - *, - devices: Optional[Sequence[str]] = None, - weight_format_priority_order: Optional[Sequence[WeightsFormat]] = None, - ): - """ - Creates model adapter based on the passed spec - Note: All specific adapters should happen inside this function to prevent different framework - initializations interfering with each other - """ - if not isinstance(model_description, (v0_4.ModelDescr, v0_5.ModelDescr)): - raise TypeError( - f"expected v0_4.ModelDescr or v0_5.ModelDescr, but got {type(model_description)}" - ) - - weights = model_description.weights - errors: List[Tuple[WeightsFormat, Exception]] = [] - weight_format_priority_order = ( - DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER - if weight_format_priority_order is None - else weight_format_priority_order - ) - # limit weight formats to the ones present - weight_format_priority_order = [ - w for w in weight_format_priority_order if getattr(weights, w) is not None - ] - - for wf in weight_format_priority_order: - if wf == "pytorch_state_dict" and weights.pytorch_state_dict is not None: - try: - from ._pytorch_model_adapter import PytorchModelAdapter - - return PytorchModelAdapter( - outputs=model_description.outputs, - weights=weights.pytorch_state_dict, - devices=devices, - ) - except Exception as e: - errors.append((wf, e)) - elif ( - wf == "tensorflow_saved_model_bundle" - and weights.tensorflow_saved_model_bundle is not None - ): - try: - from ._tensorflow_model_adapter import TensorflowModelAdapter - - return TensorflowModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - elif wf == "onnx" and weights.onnx is not None: - try: - from ._onnx_model_adapter import ONNXModelAdapter - - return ONNXModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - elif wf == "torchscript" and weights.torchscript is not None: - try: - from ._torchscript_model_adapter import TorchscriptModelAdapter - - return TorchscriptModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - elif wf == "keras_hdf5" and weights.keras_hdf5 is not None: - # keras can either be installed as a separate package or used as part of tensorflow - # we try to first import the keras model adapter using the separate package and, - # if it is not available, try to load the one using tf - try: - from ._keras_model_adapter import ( - KerasModelAdapter, - keras, # type: ignore - ) - - if keras is None: - from ._tensorflow_model_adapter import KerasModelAdapter - - return KerasModelAdapter( - model_description=model_description, devices=devices - ) - except Exception as e: - errors.append((wf, e)) - - assert errors - if len(weight_format_priority_order) == 1: - assert len(errors) == 1 - raise ValueError( - f"The '{weight_format_priority_order[0]}' model adapter could not be created" - + f" in this environment:\n{errors[0][1].__class__.__name__}({errors[0][1]}).\n\n" - ) - - else: - error_list = "\n - ".join( - f"{wf}: {e.__class__.__name__}({e})" for wf, e in errors - ) - raise ValueError( - "None of the weight format specific model adapters could be created" - + f" in this environment. Errors are:\n\n{error_list}.\n\n" - ) - - @final - def load(self, *, devices: Optional[Sequence[str]] = None) -> None: - warnings.warn("Deprecated. ModelAdapter is loaded on initialization") - - @abstractmethod - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - """ - Run forward pass of model to get model predictions - """ - # TODO: handle tensor.transpose in here and make _forward_impl the abstract impl - - @abstractmethod - def unload(self): - """ - Unload model from any devices, freeing their memory. - The moder adapter should be considered unusable afterwards. - """ - - -def get_weight_formats() -> List[str]: - """ - Return list of supported weight types - """ - return list(DEFAULT_WEIGHT_FORMAT_PRIORITY_ORDER) - - -create_model_adapter = ModelAdapter.create diff --git a/bioimageio/core/model_adapters/_pytorch_model_adapter.py b/bioimageio/core/model_adapters/_pytorch_model_adapter.py deleted file mode 100644 index a5178d74..00000000 --- a/bioimageio/core/model_adapters/_pytorch_model_adapter.py +++ /dev/null @@ -1,153 +0,0 @@ -import gc -import warnings -from typing import Any, List, Optional, Sequence, Tuple, Union - -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -from ..axis import AxisId -from ..digest_spec import get_axes_infos, import_callable -from ..tensor import Tensor -from ._model_adapter import ModelAdapter - -try: - import torch -except Exception as e: - torch = None - torch_error = str(e) -else: - torch_error = None - - -class PytorchModelAdapter(ModelAdapter): - def __init__( - self, - *, - outputs: Union[ - Sequence[v0_4.OutputTensorDescr], Sequence[v0_5.OutputTensorDescr] - ], - weights: Union[ - v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr - ], - devices: Optional[Sequence[str]] = None, - ): - if torch is None: - raise ImportError(f"failed to import torch: {torch_error}") - - super().__init__() - self.output_dims = [tuple(a.id for a in get_axes_infos(out)) for out in outputs] - self._network = self.get_network(weights) - self._devices = self.get_devices(devices) - self._network = self._network.to(self._devices[0]) - - self._primary_device = self._devices[0] - state: Any = torch.load( - download(weights).path, - map_location=self._primary_device, # pyright: ignore[reportUnknownArgumentType] - ) - self._network.load_state_dict(state) - - self._network = self._network.eval() - - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - if torch is None: - raise ImportError("torch") - with torch.no_grad(): - tensors = [ - None if ipt is None else torch.from_numpy(ipt.data.data) - for ipt in input_tensors - ] - tensors = [ - ( - None - if t is None - else t.to( - self._primary_device # pyright: ignore[reportUnknownArgumentType] - ) - ) - for t in tensors - ] - result: Union[Tuple[Any, ...], List[Any], Any] - result = self._network( # pyright: ignore[reportUnknownVariableType] - *tensors - ) - if not isinstance(result, (tuple, list)): - result = [result] - - result = [ - ( - None - if r is None - else r.detach().cpu().numpy() if isinstance(r, torch.Tensor) else r - ) - for r in result # pyright: ignore[reportUnknownVariableType] - ] - if len(result) > len(self.output_dims): - raise ValueError( - f"Expected at most {len(self.output_dims)} outputs, but got {len(result)}" - ) - - return [ - None if r is None else Tensor(r, dims=out) - for r, out in zip(result, self.output_dims) - ] - - def unload(self) -> None: - del self._network - _ = gc.collect() # deallocate memory - assert torch is not None - torch.cuda.empty_cache() # release reserved memory - - @staticmethod - def get_network( # pyright: ignore[reportUnknownParameterType] - weight_spec: Union[ - v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr - ], - ) -> "torch.nn.Module": # pyright: ignore[reportInvalidTypeForm] - if torch is None: - raise ImportError("torch") - arch = import_callable( - weight_spec.architecture, - sha256=( - weight_spec.architecture_sha256 - if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) - else weight_spec.sha256 - ), - ) - model_kwargs = ( - weight_spec.kwargs - if isinstance(weight_spec, v0_4.PytorchStateDictWeightsDescr) - else weight_spec.architecture.kwargs - ) - network = arch(**model_kwargs) - if not isinstance(network, torch.nn.Module): - raise ValueError( - f"calling {weight_spec.architecture.callable} did not return a torch.nn.Module" - ) - - return network - - @staticmethod - def get_devices( # pyright: ignore[reportUnknownParameterType] - devices: Optional[Sequence[str]] = None, - ) -> List["torch.device"]: # pyright: ignore[reportInvalidTypeForm] - if torch is None: - raise ImportError("torch") - if not devices: - torch_devices = [ - ( - torch.device("cuda") - if torch.cuda.is_available() - else torch.device("cpu") - ) - ] - else: - torch_devices = [torch.device(d) for d in devices] - - if len(torch_devices) > 1: - warnings.warn( - f"Multiple devices for single pytorch model not yet implemented; ignoring {torch_devices[1:]}" - ) - torch_devices = torch_devices[:1] - - return torch_devices diff --git a/bioimageio/core/model_adapters/_tensorflow_model_adapter.py b/bioimageio/core/model_adapters/_tensorflow_model_adapter.py deleted file mode 100644 index cfb264f0..00000000 --- a/bioimageio/core/model_adapters/_tensorflow_model_adapter.py +++ /dev/null @@ -1,275 +0,0 @@ -import zipfile -from typing import List, Literal, Optional, Sequence, Union - -import numpy as np -from loguru import logger - -from bioimageio.spec.common import FileSource -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -from ..digest_spec import get_axes_infos -from ..tensor import Tensor -from ._model_adapter import ModelAdapter - -try: - import tensorflow as tf # pyright: ignore[reportMissingImports] -except Exception as e: - tf = None - tf_error = str(e) -else: - tf_error = None - - -class TensorflowModelAdapterBase(ModelAdapter): - weight_format: Literal["keras_hdf5", "tensorflow_saved_model_bundle"] - - def __init__( - self, - *, - devices: Optional[Sequence[str]] = None, - weights: Union[ - v0_4.KerasHdf5WeightsDescr, - v0_4.TensorflowSavedModelBundleWeightsDescr, - v0_5.KerasHdf5WeightsDescr, - v0_5.TensorflowSavedModelBundleWeightsDescr, - ], - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - ): - if tf is None: - raise ImportError(f"failed to import tensorflow: {tf_error}") - - super().__init__() - self.model_description = model_description - tf_version = v0_5.Version( - tf.__version__ # pyright: ignore[reportUnknownArgumentType] - ) - model_tf_version = weights.tensorflow_version - if model_tf_version is None: - logger.warning( - "The model does not specify the tensorflow version." - + f"Cannot check if it is compatible with intalled tensorflow {tf_version}." - ) - elif model_tf_version > tf_version: - logger.warning( - f"The model specifies a newer tensorflow version than installed: {model_tf_version} > {tf_version}." - ) - elif (model_tf_version.major, model_tf_version.minor) != ( - tf_version.major, - tf_version.minor, - ): - logger.warning( - "The tensorflow version specified by the model does not match the installed: " - + f"{model_tf_version} != {tf_version}." - ) - - self.use_keras_api = ( - tf_version.major > 1 - or self.weight_format == KerasModelAdapter.weight_format - ) - - # TODO tf device management - if devices is not None: - logger.warning( - f"Device management is not implemented for tensorflow yet, ignoring the devices {devices}" - ) - - weight_file = self.require_unzipped(weights.source) - self._network = self._get_network(weight_file) - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] - - def require_unzipped(self, weight_file: FileSource): - loacl_weights_file = download(weight_file).path - if zipfile.is_zipfile(loacl_weights_file): - out_path = loacl_weights_file.with_suffix(".unzipped") - with zipfile.ZipFile(loacl_weights_file, "r") as f: - f.extractall(out_path) - - return out_path - else: - return loacl_weights_file - - def _get_network( # pyright: ignore[reportUnknownParameterType] - self, weight_file: FileSource - ): - weight_file = self.require_unzipped(weight_file) - assert tf is not None - if self.use_keras_api: - try: - return tf.keras.layers.TFSMLayer( - weight_file, call_endpoint="serve" - ) # pyright: ignore[reportUnknownVariableType] - except Exception as e: - try: - return tf.keras.layers.TFSMLayer( - weight_file, call_endpoint="serving_default" - ) # pyright: ignore[reportUnknownVariableType] - except Exception as ee: - logger.opt(exception=ee).info( - "keras.layers.TFSMLayer error for alternative call_endpoint='serving_default'" - ) - raise e - else: - # NOTE in tf1 the model needs to be loaded inside of the session, so we cannot preload the model - return str(weight_file) - - # TODO currently we relaod the model every time. it would be better to keep the graph and session - # alive in between of forward passes (but then the sessions need to be properly opened / closed) - def _forward_tf( # pyright: ignore[reportUnknownParameterType] - self, *input_tensors: Optional[Tensor] - ): - assert tf is not None - input_keys = [ - ipt.name if isinstance(ipt, v0_4.InputTensorDescr) else ipt.id - for ipt in self.model_description.inputs - ] - output_keys = [ - out.name if isinstance(out, v0_4.OutputTensorDescr) else out.id - for out in self.model_description.outputs - ] - # TODO read from spec - tag = ( # pyright: ignore[reportUnknownVariableType] - tf.saved_model.tag_constants.SERVING - ) - signature_key = ( # pyright: ignore[reportUnknownVariableType] - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY - ) - - graph = tf.Graph() # pyright: ignore[reportUnknownVariableType] - with graph.as_default(): - with tf.Session( - graph=graph - ) as sess: # pyright: ignore[reportUnknownVariableType] - # load the model and the signature - graph_def = tf.saved_model.loader.load( # pyright: ignore[reportUnknownVariableType] - sess, [tag], self._network - ) - signature = ( # pyright: ignore[reportUnknownVariableType] - graph_def.signature_def - ) - - # get the tensors into the graph - in_names = [ # pyright: ignore[reportUnknownVariableType] - signature[signature_key].inputs[key].name for key in input_keys - ] - out_names = [ # pyright: ignore[reportUnknownVariableType] - signature[signature_key].outputs[key].name for key in output_keys - ] - in_tensors = [ # pyright: ignore[reportUnknownVariableType] - graph.get_tensor_by_name(name) - for name in in_names # pyright: ignore[reportUnknownVariableType] - ] - out_tensors = [ # pyright: ignore[reportUnknownVariableType] - graph.get_tensor_by_name(name) - for name in out_names # pyright: ignore[reportUnknownVariableType] - ] - - # run prediction - res = sess.run( # pyright: ignore[reportUnknownVariableType] - dict( - zip( - out_names, # pyright: ignore[reportUnknownArgumentType] - out_tensors, # pyright: ignore[reportUnknownArgumentType] - ) - ), - dict( - zip( - in_tensors, # pyright: ignore[reportUnknownArgumentType] - input_tensors, - ) - ), - ) - # from dict to list of tensors - res = [ # pyright: ignore[reportUnknownVariableType] - res[out] - for out in out_names # pyright: ignore[reportUnknownVariableType] - ] - - return res # pyright: ignore[reportUnknownVariableType] - - def _forward_keras( # pyright: ignore[reportUnknownParameterType] - self, *input_tensors: Optional[Tensor] - ): - assert self.use_keras_api - assert not isinstance(self._network, str) - assert tf is not None - tf_tensor = [ # pyright: ignore[reportUnknownVariableType] - None if ipt is None else tf.convert_to_tensor(ipt) for ipt in input_tensors - ] - - result = self._network(*tf_tensor) # pyright: ignore[reportUnknownVariableType] - - assert isinstance(result, dict) - - # TODO: Use RDF's `outputs[i].id` here - result = list(result.values()) - - return [ # pyright: ignore[reportUnknownVariableType] - (None if r is None else r if isinstance(r, np.ndarray) else r.numpy()) - for r in result # pyright: ignore[reportUnknownVariableType] - ] - - def forward(self, *input_tensors: Optional[Tensor]) -> List[Optional[Tensor]]: - data = [None if ipt is None else ipt.data for ipt in input_tensors] - if self.use_keras_api: - result = self._forward_keras( # pyright: ignore[reportUnknownVariableType] - *data - ) - else: - result = self._forward_tf( # pyright: ignore[reportUnknownVariableType] - *data - ) - - return [ - None if r is None else Tensor(r, dims=axes) - for r, axes in zip( # pyright: ignore[reportUnknownVariableType] - result, # pyright: ignore[reportUnknownArgumentType] - self._internal_output_axes, - ) - ] - - def unload(self) -> None: - logger.warning( - "Device management is not implemented for keras yet, cannot unload model" - ) - - -class TensorflowModelAdapter(TensorflowModelAdapterBase): - weight_format = "tensorflow_saved_model_bundle" - - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - if model_description.weights.tensorflow_saved_model_bundle is None: - raise ValueError("missing tensorflow_saved_model_bundle weights") - - super().__init__( - devices=devices, - weights=model_description.weights.tensorflow_saved_model_bundle, - model_description=model_description, - ) - - -class KerasModelAdapter(TensorflowModelAdapterBase): - weight_format = "keras_hdf5" - - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - if model_description.weights.keras_hdf5 is None: - raise ValueError("missing keras_hdf5 weights") - - super().__init__( - model_description=model_description, - devices=devices, - weights=model_description.weights.keras_hdf5, - ) diff --git a/bioimageio/core/model_adapters/_torchscript_model_adapter.py b/bioimageio/core/model_adapters/_torchscript_model_adapter.py deleted file mode 100644 index 0e9f3aef..00000000 --- a/bioimageio/core/model_adapters/_torchscript_model_adapter.py +++ /dev/null @@ -1,96 +0,0 @@ -import gc -import warnings -from typing import Any, List, Optional, Sequence, Tuple, Union - -import numpy as np -from numpy.typing import NDArray - -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -from ..digest_spec import get_axes_infos -from ..tensor import Tensor -from ._model_adapter import ModelAdapter - -try: - import torch -except Exception as e: - torch = None - torch_error = str(e) -else: - torch_error = None - - -class TorchscriptModelAdapter(ModelAdapter): - def __init__( - self, - *, - model_description: Union[v0_4.ModelDescr, v0_5.ModelDescr], - devices: Optional[Sequence[str]] = None, - ): - if torch is None: - raise ImportError(f"failed to import torch: {torch_error}") - - super().__init__() - if model_description.weights.torchscript is None: - raise ValueError( - f"No torchscript weights found for model {model_description.name}" - ) - - weight_path = download(model_description.weights.torchscript.source).path - if devices is None: - self.devices = ["cuda" if torch.cuda.is_available() else "cpu"] - else: - self.devices = [torch.device(d) for d in devices] - - if len(self.devices) > 1: - warnings.warn( - "Multiple devices for single torchscript model not yet implemented" - ) - - self._model = torch.jit.load(weight_path) - self._model.to(self.devices[0]) - self._model = self._model.eval() - self._internal_output_axes = [ - tuple(a.id for a in get_axes_infos(out)) - for out in model_description.outputs - ] - - def forward(self, *batch: Optional[Tensor]) -> List[Optional[Tensor]]: - assert torch is not None - with torch.no_grad(): - torch_tensor = [ - None if b is None else torch.from_numpy(b.data.data).to(self.devices[0]) - for b in batch - ] - _result: Union[ # pyright: ignore[reportUnknownVariableType] - Tuple[Optional[NDArray[Any]], ...], - List[Optional[NDArray[Any]]], - Optional[NDArray[Any]], - ] = self._model.forward(*torch_tensor) - if isinstance(_result, (tuple, list)): - result: Sequence[Optional[NDArray[Any]]] = _result - else: - result = [_result] - - result = [ - ( - None - if r is None - else r.cpu().numpy() if not isinstance(r, np.ndarray) else r - ) - for r in result - ] - - assert len(result) == len(self._internal_output_axes) - return [ - None if r is None else Tensor(r, dims=axes) - for r, axes in zip(result, self._internal_output_axes) - ] - - def unload(self) -> None: - assert torch is not None - self._devices = None - del self._model - _ = gc.collect() # deallocate memory - torch.cuda.empty_cache() # release reserved memory diff --git a/bioimageio/core/proc_ops.py b/bioimageio/core/proc_ops.py index eecf47b1..d6d59092 100644 --- a/bioimageio/core/proc_ops.py +++ b/bioimageio/core/proc_ops.py @@ -16,10 +16,11 @@ import xarray as xr from typing_extensions import Self, assert_never +from bioimageio.core.digest_spec import get_axes_infos from bioimageio.spec.model import v0_4, v0_5 from ._op_base import BlockedOperator, Operator -from .axis import AxisId, PerAxis +from .axis import AxisId, AxisInfo, PerAxis from .block import Block from .common import DTypeStr, MemberId from .sample import Sample, SampleBlock, SampleBlockWithOrigin @@ -299,9 +300,15 @@ def from_proc_descr( member_id: MemberId, ) -> Self: kwargs = descr.kwargs - if isinstance(kwargs, v0_5.ScaleLinearAlongAxisKwargs): + if isinstance(kwargs, v0_5.ScaleLinearKwargs): + axis = None + elif isinstance(kwargs, v0_5.ScaleLinearAlongAxisKwargs): axis = kwargs.axis - elif isinstance(kwargs, (v0_4.ScaleLinearKwargs, v0_5.ScaleLinearKwargs)): + elif isinstance(kwargs, v0_4.ScaleLinearKwargs): + if kwargs.axes is not None: + raise NotImplementedError( + "model.v0_4.ScaleLinearKwargs with axes not implemented, please consider updating the model to v0_5." + ) axis = None else: assert_never(kwargs) diff --git a/bioimageio/core/proc_setup.py b/bioimageio/core/proc_setup.py index b9afb711..89277da5 100644 --- a/bioimageio/core/proc_setup.py +++ b/bioimageio/core/proc_setup.py @@ -1,4 +1,5 @@ from typing import ( + Callable, Iterable, List, Mapping, @@ -45,6 +46,11 @@ class PreAndPostprocessing(NamedTuple): post: List[Processing] +class _ProcessingCallables(NamedTuple): + pre: Callable[[Sample], None] + post: Callable[[Sample], None] + + class _SetupProcessing(NamedTuple): pre: List[Processing] post: List[Processing] @@ -52,6 +58,34 @@ class _SetupProcessing(NamedTuple): post_measures: Set[Measure] +class _ApplyProcs: + def __init__(self, procs: Sequence[Processing]): + super().__init__() + self._procs = procs + + def __call__(self, sample: Sample) -> None: + for op in self._procs: + op(sample) + + +def get_pre_and_postprocessing( + model: AnyModelDescr, + *, + dataset_for_initial_statistics: Iterable[Sample], + keep_updating_initial_dataset_stats: bool = False, + fixed_dataset_stats: Optional[Mapping[DatasetMeasure, MeasureValue]] = None, +) -> _ProcessingCallables: + """Creates callables to apply pre- and postprocessing in-place to a sample""" + + setup = setup_pre_and_postprocessing( + model=model, + dataset_for_initial_statistics=dataset_for_initial_statistics, + keep_updating_initial_dataset_stats=keep_updating_initial_dataset_stats, + fixed_dataset_stats=fixed_dataset_stats, + ) + return _ProcessingCallables(_ApplyProcs(setup.pre), _ApplyProcs(setup.post)) + + def setup_pre_and_postprocessing( model: AnyModelDescr, dataset_for_initial_statistics: Iterable[Sample], diff --git a/bioimageio/core/stat_calculators.py b/bioimageio/core/stat_calculators.py index 41233a5b..6279289b 100644 --- a/bioimageio/core/stat_calculators.py +++ b/bioimageio/core/stat_calculators.py @@ -137,7 +137,7 @@ def compute( else: n = int(np.prod([tensor.sizes[d] for d in self._axes])) - var = xr.dot(c, c, dims=self._axes) / n + var = xr.dot(c, c, dim=self._axes) / n assert isinstance(var, xr.DataArray) std = np.sqrt(var) assert isinstance(std, xr.DataArray) @@ -183,7 +183,7 @@ def finalize( else: assert self._m2 is not None var = self._m2 / self._n - sqrt = np.sqrt(var) + sqrt = var**0.5 if isinstance(sqrt, (int, float)): # var and mean are scalar tensors, let's keep it consistent sqrt = Tensor.from_xarray(xr.DataArray(sqrt)) @@ -306,7 +306,8 @@ def _initialize(self, tensor_sizes: PerAxis[int]): out_sizes[d] = s self._dims, self._shape = zip(*out_sizes.items()) - d = int(np.prod(self._shape[1:])) # type: ignore + assert self._shape is not None + d = int(np.prod(self._shape[1:])) self._digest = [TDigest() for _ in range(d)] self._indices = product(*map(range, self._shape[1:])) diff --git a/bioimageio/core/utils/testing.py b/bioimageio/core/utils/testing.py deleted file mode 100644 index acd65d95..00000000 --- a/bioimageio/core/utils/testing.py +++ /dev/null @@ -1,28 +0,0 @@ -# TODO: move to tests/ -from functools import wraps -from typing import Any, Protocol, Type - - -class test_func(Protocol): - def __call__(*args: Any, **kwargs: Any): ... - - -def skip_on(exception: Type[Exception], reason: str): - """adapted from https://stackoverflow.com/a/63522579""" - import pytest - - # Func below is the real decorator and will receive the test function as param - def decorator_func(f: test_func): - @wraps(f) - def wrapper(*args: Any, **kwargs: Any): - try: - # Try to run the test - return f(*args, **kwargs) - except exception: - # If exception of given type happens - # just swallow it and raise pytest.Skip with given reason - pytest.skip(reason) - - return wrapper - - return decorator_func diff --git a/bioimageio/core/weight_converter/__init__.py b/bioimageio/core/weight_converter/__init__.py deleted file mode 100644 index 5f1674c9..00000000 --- a/bioimageio/core/weight_converter/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""coming soon""" diff --git a/bioimageio/core/weight_converter/keras/__init__.py b/bioimageio/core/weight_converter/keras/__init__.py deleted file mode 100644 index 195b42b8..00000000 --- a/bioimageio/core/weight_converter/keras/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: update keras weight converters diff --git a/bioimageio/core/weight_converter/keras/_tensorflow.py b/bioimageio/core/weight_converter/keras/_tensorflow.py deleted file mode 100644 index c901f458..00000000 --- a/bioimageio/core/weight_converter/keras/_tensorflow.py +++ /dev/null @@ -1,151 +0,0 @@ -# type: ignore # TODO: type -import os -import shutil -from pathlib import Path -from typing import no_type_check -from zipfile import ZipFile - -try: - import tensorflow.saved_model -except Exception: - tensorflow = None - -from bioimageio.spec._internal.io_utils import download -from bioimageio.spec.model.v0_5 import ModelDescr - - -def _zip_model_bundle(model_bundle_folder: Path): - zipped_model_bundle = model_bundle_folder.with_suffix(".zip") - - with ZipFile(zipped_model_bundle, "w") as zip_obj: - for root, _, files in os.walk(model_bundle_folder): - for filename in files: - src = os.path.join(root, filename) - zip_obj.write(src, os.path.relpath(src, model_bundle_folder)) - - try: - shutil.rmtree(model_bundle_folder) - except Exception: - print("TensorFlow bundled model was not removed after compression") - - return zipped_model_bundle - - -# adapted from -# https://github.com/deepimagej/pydeepimagej/blob/master/pydeepimagej/yaml/create_config.py#L236 -def _convert_tf1( - keras_weight_path: Path, - output_path: Path, - input_name: str, - output_name: str, - zip_weights: bool, -): - try: - # try to build the tf model with the keras import from tensorflow - from bioimageio.core.weight_converter.keras._tensorflow import ( - keras, # type: ignore - ) - - except Exception: - # if the above fails try to export with the standalone keras - import keras - - @no_type_check - def build_tf_model(): - keras_model = keras.models.load_model(keras_weight_path) - assert tensorflow is not None - builder = tensorflow.saved_model.builder.SavedModelBuilder(output_path) - signature = tensorflow.saved_model.signature_def_utils.predict_signature_def( - inputs={input_name: keras_model.input}, - outputs={output_name: keras_model.output}, - ) - - signature_def_map = { - tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature - } - - builder.add_meta_graph_and_variables( - keras.backend.get_session(), - [tensorflow.saved_model.tag_constants.SERVING], - signature_def_map=signature_def_map, - ) - builder.save() - - build_tf_model() - - if zip_weights: - output_path = _zip_model_bundle(output_path) - print("TensorFlow model exported to", output_path) - - return 0 - - -def _convert_tf2(keras_weight_path: Path, output_path: Path, zip_weights: bool): - try: - # try to build the tf model with the keras import from tensorflow - from bioimageio.core.weight_converter.keras._tensorflow import keras - except Exception: - # if the above fails try to export with the standalone keras - import keras - - model = keras.models.load_model(keras_weight_path) - keras.models.save_model(model, output_path) - - if zip_weights: - output_path = _zip_model_bundle(output_path) - print("TensorFlow model exported to", output_path) - - return 0 - - -def convert_weights_to_tensorflow_saved_model_bundle( - model: ModelDescr, output_path: Path -): - """Convert model weights from format 'keras_hdf5' to 'tensorflow_saved_model_bundle'. - - Adapted from - https://github.com/deepimagej/pydeepimagej/blob/5aaf0e71f9b04df591d5ca596f0af633a7e024f5/pydeepimagej/yaml/create_config.py - - Args: - model: The bioimageio model description - output_path: where to save the tensorflow weights. This path must not exist yet. - """ - assert tensorflow is not None - tf_major_ver = int(tensorflow.__version__.split(".")[0]) - - if output_path.suffix == ".zip": - output_path = output_path.with_suffix("") - zip_weights = True - else: - zip_weights = False - - if output_path.exists(): - raise ValueError(f"The ouptut directory at {output_path} must not exist.") - - if model.weights.keras_hdf5 is None: - raise ValueError("Missing Keras Hdf5 weights to convert from.") - - weight_spec = model.weights.keras_hdf5 - weight_path = download(weight_spec.source).path - - if weight_spec.tensorflow_version: - model_tf_major_ver = int(weight_spec.tensorflow_version.major) - if model_tf_major_ver != tf_major_ver: - raise RuntimeError( - f"Tensorflow major versions of model {model_tf_major_ver} is not {tf_major_ver}" - ) - - if tf_major_ver == 1: - if len(model.inputs) != 1 or len(model.outputs) != 1: - raise NotImplementedError( - "Weight conversion for models with multiple inputs or outputs is not yet implemented." - ) - return _convert_tf1( - weight_path, - output_path, - model.inputs[0].id, - model.outputs[0].id, - zip_weights, - ) - else: - return _convert_tf2(weight_path, output_path, zip_weights) diff --git a/bioimageio/core/weight_converter/torch/__init__.py b/bioimageio/core/weight_converter/torch/__init__.py deleted file mode 100644 index 1b1ba526..00000000 --- a/bioimageio/core/weight_converter/torch/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: torch weight converters diff --git a/bioimageio/core/weight_converter/torch/_onnx.py b/bioimageio/core/weight_converter/torch/_onnx.py deleted file mode 100644 index 3935e1d1..00000000 --- a/bioimageio/core/weight_converter/torch/_onnx.py +++ /dev/null @@ -1,108 +0,0 @@ -# type: ignore # TODO: type -import warnings -from pathlib import Path -from typing import Any, List, Sequence, cast - -import numpy as np -from numpy.testing import assert_array_almost_equal - -from bioimageio.spec import load_description -from bioimageio.spec.common import InvalidDescr -from bioimageio.spec.model import v0_4, v0_5 - -from ...digest_spec import get_member_id, get_test_inputs -from ...weight_converter.torch._utils import load_torch_model - -try: - import torch -except ImportError: - torch = None - - -def add_onnx_weights( - model_spec: "str | Path | v0_4.ModelDescr | v0_5.ModelDescr", - *, - output_path: Path, - use_tracing: bool = True, - test_decimal: int = 4, - verbose: bool = False, - opset_version: "int | None" = None, -): - """Convert model weights from format 'pytorch_state_dict' to 'onnx'. - - Args: - source_model: model without onnx weights - opset_version: onnx opset version - use_tracing: whether to use tracing or scripting to export the onnx format - test_decimal: precision for testing whether the results agree - """ - if isinstance(model_spec, (str, Path)): - loaded_spec = load_description(Path(model_spec)) - if isinstance(loaded_spec, InvalidDescr): - raise ValueError(f"Bad resource description: {loaded_spec}") - if not isinstance(loaded_spec, (v0_4.ModelDescr, v0_5.ModelDescr)): - raise TypeError( - f"Path {model_spec} is a {loaded_spec.__class__.__name__}, expected a v0_4.ModelDescr or v0_5.ModelDescr" - ) - model_spec = loaded_spec - - state_dict_weights_descr = model_spec.weights.pytorch_state_dict - if state_dict_weights_descr is None: - raise ValueError( - "The provided model does not have weights in the pytorch state dict format" - ) - - assert torch is not None - with torch.no_grad(): - - sample = get_test_inputs(model_spec) - input_data = [sample[get_member_id(ipt)].data.data for ipt in model_spec.inputs] - input_tensors = [torch.from_numpy(ipt) for ipt in input_data] - model = load_torch_model(state_dict_weights_descr) - - expected_tensors = model(*input_tensors) - if isinstance(expected_tensors, torch.Tensor): - expected_tensors = [expected_tensors] - expected_outputs: List[np.ndarray[Any, Any]] = [ - out.numpy() for out in expected_tensors - ] - - if use_tracing: - torch.onnx.export( - model, - tuple(input_tensors) if len(input_tensors) > 1 else input_tensors[0], - str(output_path), - verbose=verbose, - opset_version=opset_version, - ) - else: - raise NotImplementedError - - try: - import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] - except ImportError: - msg = "The onnx weights were exported, but onnx rt is not available and weights cannot be checked." - warnings.warn(msg) - return - - # check the onnx model - sess = rt.InferenceSession(str(output_path)) - onnx_input_node_args = cast( - List[Any], sess.get_inputs() - ) # fixme: remove cast, try using rt.NodeArg instead of Any - onnx_inputs = { - input_name.name: inp - for input_name, inp in zip(onnx_input_node_args, input_data) - } - outputs = cast( - Sequence[np.ndarray[Any, Any]], sess.run(None, onnx_inputs) - ) # FIXME: remove cast - - try: - for exp, out in zip(expected_outputs, outputs): - assert_array_almost_equal(exp, out, decimal=test_decimal) - return 0 - except AssertionError as e: - msg = f"The onnx weights were exported, but results before and after conversion do not agree:\n {str(e)}" - warnings.warn(msg) - return 1 diff --git a/bioimageio/core/weight_converter/torch/_utils.py b/bioimageio/core/weight_converter/torch/_utils.py deleted file mode 100644 index 01df0747..00000000 --- a/bioimageio/core/weight_converter/torch/_utils.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Union - -from bioimageio.core.model_adapters._pytorch_model_adapter import PytorchModelAdapter -from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.utils import download - -try: - import torch -except ImportError: - torch = None - - -# additional convenience for pytorch state dict, eventually we want this in python-bioimageio too -# and for each weight format -def load_torch_model( # pyright: ignore[reportUnknownParameterType] - node: Union[v0_4.PytorchStateDictWeightsDescr, v0_5.PytorchStateDictWeightsDescr], -): - assert torch is not None - model = ( # pyright: ignore[reportUnknownVariableType] - PytorchModelAdapter.get_network(node) - ) - state = torch.load(download(node.source).path, map_location="cpu") - model.load_state_dict(state) # FIXME: check incompatible keys? - return model.eval() # pyright: ignore[reportUnknownVariableType] diff --git a/bioimageio/core/weight_converters/__init__.py b/bioimageio/core/weight_converters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bioimageio/core/weight_converters/_add_weights.py b/bioimageio/core/weight_converters/_add_weights.py new file mode 100644 index 00000000..ed541385 --- /dev/null +++ b/bioimageio/core/weight_converters/_add_weights.py @@ -0,0 +1,92 @@ +from typing import Optional, Sequence + +from loguru import logger +from pydantic import DirectoryPath + +from bioimageio.core._resource_tests import test_model +from bioimageio.spec import load_model_description, save_bioimageio_package_as_folder +from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance +from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat + + +def increase_available_weight_formats( + model_descr: ModelDescr, + *, + output_path: DirectoryPath, + source_format: Optional[WeightsFormat] = None, + target_format: Optional[WeightsFormat] = None, + devices: Sequence[str] = ("cpu",), +) -> ModelDescr: + """Convert model weights to other formats and add them to the model description + + Args: + output_path: Path to save updated model package to. + source_format: convert from a specific weights format. + Default: choose automatically from any available. + target_format: convert to a specific weights format. + Default: attempt to convert to any missing format. + devices: Devices that may be used during conversion. + """ + if not isinstance(model_descr, ModelDescr): + raise TypeError(type(model_descr)) + + # save model to local folder + output_path = save_bioimageio_package_as_folder( + model_descr, output_path=output_path + ) + # reload from local folder to make sure we do not edit the given model + _model_descr = load_model_description(output_path) + assert isinstance(_model_descr, ModelDescr) + model_descr = _model_descr + del _model_descr + + if source_format is None: + available = set(model_descr.weights.available_formats) + else: + available = {source_format} + + if target_format is None: + missing = set(model_descr.weights.missing_formats) + else: + missing = {target_format} + + if "pytorch_state_dict" in available and "onnx" in missing: + from .pytorch_to_onnx import convert + + try: + model_descr.weights.onnx = convert( + model_descr, + output_path=output_path, + use_tracing=False, + ) + except Exception as e: + logger.error(e) + else: + available.add("onnx") + missing.discard("onnx") + + if "pytorch_state_dict" in available and "torchscript" in missing: + from .pytorch_to_torchscript import convert + + try: + model_descr.weights.torchscript = convert( + model_descr, + output_path=output_path, + use_tracing=False, + ) + except Exception as e: + logger.error(e) + else: + available.add("torchscript") + missing.discard("torchscript") + + if missing: + logger.warning( + f"Converting from any of the available weights formats {available} to any" + + f" of {missing} is not yet implemented. Please create an issue at" + + " https://github.com/bioimage-io/core-bioimage-io-python/issues/new/choose" + + " if you would like bioimageio.core to support a particular conversion." + ) + + test_model(model_descr).display() + return model_descr diff --git a/bioimageio/core/weight_converters/keras_to_tensorflow.py b/bioimageio/core/weight_converters/keras_to_tensorflow.py new file mode 100644 index 00000000..083bae5b --- /dev/null +++ b/bioimageio/core/weight_converters/keras_to_tensorflow.py @@ -0,0 +1,184 @@ +import os +import shutil +from pathlib import Path +from typing import Union, no_type_check +from zipfile import ZipFile + +import tensorflow + +from bioimageio.core.io import ensure_unzipped +from bioimageio.spec._internal.io_utils import download +from bioimageio.spec._internal.version_type import Version +from bioimageio.spec.common import ZipPath +from bioimageio.spec.model import v0_4, v0_5 + +try: + # try to build the tf model with the keras import from tensorflow + from tensorflow import keras +except Exception: + # if the above fails try to export with the standalone keras + import keras + + +def convert( + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], *, output_path: Path +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + """ + Convert model weights from the 'keras_hdf5' format to the 'tensorflow_saved_model_bundle' format. + + This method handles the conversion of Keras HDF5 model weights into a TensorFlow SavedModel bundle, + which is the recommended format for deploying TensorFlow models. The method supports both TensorFlow 1.x + and 2.x versions, with appropriate checks to ensure compatibility. + + Adapted from: + https://github.com/deepimagej/pydeepimagej/blob/5aaf0e71f9b04df591d5ca596f0af633a7e024f5/pydeepimagej/yaml/create_config.py + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The bioimage.io model description containing the model's metadata and weights. + output_path (Path): + The directory where the TensorFlow SavedModel bundle will be saved. + This path must not already exist and, if necessary, will be zipped into a .zip file. + use_tracing (bool): + Placeholder argument; currently not used in this method but required to match the abstract method signature. + + Raises: + ValueError: + - If the specified `output_path` already exists. + - If the Keras HDF5 weights are missing in the model description. + RuntimeError: + If there is a mismatch between the TensorFlow version used by the model and the version installed. + NotImplementedError: + If the model has multiple inputs or outputs and TensorFlow 1.x is being used. + + Returns: + v0_5.TensorflowSavedModelBundleWeightsDescr: + A descriptor object containing information about the converted TensorFlow SavedModel bundle. + """ + tf_major_ver = int(tensorflow.__version__.split(".")[0]) + + if output_path.suffix == ".zip": + output_path = output_path.with_suffix("") + zip_weights = True + else: + zip_weights = False + + if output_path.exists(): + raise ValueError(f"The ouptut directory at {output_path} must not exist.") + + if model_descr.weights.keras_hdf5 is None: + raise ValueError("Missing Keras Hdf5 weights to convert from.") + + weight_spec = model_descr.weights.keras_hdf5 + weight_path = download(weight_spec.source).path + + if weight_spec.tensorflow_version: + model_tf_major_ver = int(weight_spec.tensorflow_version.major) + if model_tf_major_ver != tf_major_ver: + raise RuntimeError( + f"Tensorflow major versions of model {model_tf_major_ver} is not {tf_major_ver}" + ) + + if tf_major_ver == 1: + if len(model_descr.inputs) != 1 or len(model_descr.outputs) != 1: + raise NotImplementedError( + "Weight conversion for models with multiple inputs or outputs is not yet implemented." + ) + + input_name = str( + d.id + if isinstance((d := model_descr.inputs[0]), v0_5.InputTensorDescr) + else d.name + ) + output_name = str( + d.id + if isinstance((d := model_descr.outputs[0]), v0_5.OutputTensorDescr) + else d.name + ) + return _convert_tf1( + ensure_unzipped(weight_path, Path("bioimageio_unzipped_tf_weights")), + output_path, + input_name, + output_name, + zip_weights, + ) + else: + return _convert_tf2(weight_path, output_path, zip_weights) + + +def _convert_tf2( + keras_weight_path: Union[Path, ZipPath], output_path: Path, zip_weights: bool +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + model = keras.models.load_model(keras_weight_path) # type: ignore + keras.models.save_model(model, output_path) # type: ignore + + if zip_weights: + output_path = _zip_model_bundle(output_path) + print("TensorFlow model exported to", output_path) + + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=Version(tensorflow.__version__), + ) + + +# adapted from +# https://github.com/deepimagej/pydeepimagej/blob/master/pydeepimagej/yaml/create_config.py#L236 +def _convert_tf1( + keras_weight_path: Path, + output_path: Path, + input_name: str, + output_name: str, + zip_weights: bool, +) -> v0_5.TensorflowSavedModelBundleWeightsDescr: + + @no_type_check + def build_tf_model(): + keras_model = keras.models.load_model(keras_weight_path) + assert tensorflow is not None + builder = tensorflow.saved_model.builder.SavedModelBuilder(output_path) + signature = tensorflow.saved_model.signature_def_utils.predict_signature_def( + inputs={input_name: keras_model.input}, + outputs={output_name: keras_model.output}, + ) + + signature_def_map = { + tensorflow.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature + } + + builder.add_meta_graph_and_variables( + keras.backend.get_session(), + [tensorflow.saved_model.tag_constants.SERVING], + signature_def_map=signature_def_map, + ) + builder.save() + + build_tf_model() + + if zip_weights: + output_path = _zip_model_bundle(output_path) + print("TensorFlow model exported to", output_path) + + return v0_5.TensorflowSavedModelBundleWeightsDescr( + source=output_path, + parent="keras_hdf5", + tensorflow_version=Version(tensorflow.__version__), + ) + + +def _zip_model_bundle(model_bundle_folder: Path): + zipped_model_bundle = model_bundle_folder.with_suffix(".zip") + + with ZipFile(zipped_model_bundle, "w") as zip_obj: + for root, _, files in os.walk(model_bundle_folder): + for filename in files: + src = os.path.join(root, filename) + zip_obj.write(src, os.path.relpath(src, model_bundle_folder)) + + try: + shutil.rmtree(model_bundle_folder) + except Exception: + print("TensorFlow bundled model was not removed after compression") + + return zipped_model_bundle diff --git a/bioimageio/core/weight_converters/pytorch_to_onnx.py b/bioimageio/core/weight_converters/pytorch_to_onnx.py new file mode 100644 index 00000000..a104cb2d --- /dev/null +++ b/bioimageio/core/weight_converters/pytorch_to_onnx.py @@ -0,0 +1,125 @@ +from pathlib import Path +from typing import Any, List, Sequence, Union, cast + +import numpy as np +import torch +from numpy.testing import assert_allclose + +from bioimageio.core.backends.pytorch_backend import load_torch_model +from bioimageio.core.digest_spec import get_member_id, get_test_inputs +from bioimageio.core.proc_setup import get_pre_and_postprocessing +from bioimageio.spec._internal.types import AbsoluteTolerance, RelativeTolerance +from bioimageio.spec.model import v0_4, v0_5 + + +def convert( + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + output_path: Path, + use_tracing: bool = True, + verbose: bool = False, + opset_version: int = 15, + check_reproducibility: bool = True, + relative_tolerance: RelativeTolerance = 1e-07, + absolute_tolerance: AbsoluteTolerance = 0, +) -> v0_5.OnnxWeightsDescr: + """ + Convert model weights from the PyTorch state_dict format to the ONNX format. + + # TODO: update Args + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The model description object that contains the model and its weights. + output_path (Path): + The file path where the ONNX model will be saved. + use_tracing (bool, optional): + Whether to use tracing or scripting to export the ONNX format. Defaults to True. + verbose (bool, optional): + If True, will print out detailed information during the ONNX export process. Defaults to False. + opset_version (int, optional): + The ONNX opset version to use for the export. Defaults to 15. + Raises: + ValueError: + If the provided model does not have weights in the PyTorch state_dict format. + ImportError: + If ONNX Runtime is not available for checking the exported ONNX model. + ValueError: + If the results before and after weights conversion do not agree. + Returns: + v0_5.OnnxWeightsDescr: + A descriptor object that contains information about the exported ONNX weights. + """ + + state_dict_weights_descr = model_descr.weights.pytorch_state_dict + if state_dict_weights_descr is None: + raise ValueError( + "The provided model does not have weights in the pytorch state dict format" + ) + + sample = get_test_inputs(model_descr) + procs = get_pre_and_postprocessing( + model_descr, dataset_for_initial_statistics=[sample] + ) + procs.pre(sample) + inputs_numpy = [ + sample.members[get_member_id(ipt)].data.data for ipt in model_descr.inputs + ] + inputs_torch = [torch.from_numpy(ipt) for ipt in inputs_numpy] + model = load_torch_model(state_dict_weights_descr) + with torch.no_grad(): + outputs_original_torch = model(*inputs_torch) + if isinstance(outputs_original_torch, torch.Tensor): + outputs_original_torch = [outputs_original_torch] + + outputs_original: List[np.ndarray[Any, Any]] = [ + out.numpy() for out in outputs_original_torch + ] + if use_tracing: + _ = torch.onnx.export( + model, + tuple(inputs_torch), + str(output_path), + verbose=verbose, + opset_version=opset_version, + ) + else: + raise NotImplementedError + + if check_reproducibility: + try: + import onnxruntime as rt # pyright: ignore [reportMissingTypeStubs] + except ImportError as e: + raise ImportError( + "The onnx weights were exported, but onnx rt is not available" + + " and weights cannot be checked." + ) from e + + # check the onnx model + sess = rt.InferenceSession(str(output_path)) + onnx_input_node_args = cast( + List[Any], sess.get_inputs() + ) # FIXME: remove cast, try using rt.NodeArg instead of Any + inputs_onnx = { + input_name.name: inp + for input_name, inp in zip(onnx_input_node_args, inputs_numpy) + } + outputs_onnx = cast( + Sequence[np.ndarray[Any, Any]], sess.run(None, inputs_onnx) + ) # FIXME: remove cast + + try: + for out_original, out_onnx in zip(outputs_original, outputs_onnx): + assert_allclose( + out_original, + out_onnx, + rtol=relative_tolerance, + atol=absolute_tolerance, + ) + except AssertionError as e: + raise AssertionError( + "Inference results of original and converted weights do not match." + ) from e + + return v0_5.OnnxWeightsDescr( + source=output_path, parent="pytorch_state_dict", opset_version=opset_version + ) diff --git a/bioimageio/core/weight_converter/torch/_torchscript.py b/bioimageio/core/weight_converters/pytorch_to_torchscript.py similarity index 59% rename from bioimageio/core/weight_converter/torch/_torchscript.py rename to bioimageio/core/weight_converters/pytorch_to_torchscript.py index 5ca16069..a724e5f8 100644 --- a/bioimageio/core/weight_converter/torch/_torchscript.py +++ b/bioimageio/core/weight_converters/pytorch_to_torchscript.py @@ -1,31 +1,85 @@ -# type: ignore # TODO: type from pathlib import Path -from typing import List, Sequence, Union +from typing import Any, List, Sequence, Tuple, Union import numpy as np +import torch from numpy.testing import assert_array_almost_equal -from typing_extensions import Any, assert_never +from torch.jit import ScriptModule +from typing_extensions import assert_never +from bioimageio.core.backends.pytorch_backend import load_torch_model +from bioimageio.spec._internal.version_type import Version from bioimageio.spec.model import v0_4, v0_5 -from bioimageio.spec.model.v0_5 import Version -from ._utils import load_torch_model -try: - import torch -except ImportError: - torch = None +def convert( + model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], + *, + output_path: Path, + use_tracing: bool = True, +) -> v0_5.TorchscriptWeightsDescr: + """ + Convert model weights from the PyTorch `state_dict` format to TorchScript. + + Args: + model_descr (Union[v0_4.ModelDescr, v0_5.ModelDescr]): + The model description object that contains the model and its weights in the PyTorch `state_dict` format. + output_path (Path): + The file path where the TorchScript model will be saved. + use_tracing (bool): + Whether to use tracing or scripting to export the TorchScript format. + - `True`: Use tracing, which is recommended for models with straightforward control flow. + - `False`: Use scripting, which is better for models with dynamic control flow (e.g., loops, conditionals). + + Raises: + ValueError: + If the provided model does not have weights in the PyTorch `state_dict` format. + + Returns: + v0_5.TorchscriptWeightsDescr: + A descriptor object that contains information about the exported TorchScript weights. + """ + state_dict_weights_descr = model_descr.weights.pytorch_state_dict + if state_dict_weights_descr is None: + raise ValueError( + "The provided model does not have weights in the pytorch state dict format" + ) + + input_data = model_descr.get_input_test_arrays() + + with torch.no_grad(): + input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] + model = load_torch_model(state_dict_weights_descr) + scripted_module: Union[ # pyright: ignore[reportUnknownVariableType] + ScriptModule, Tuple[Any, ...] + ] = ( + torch.jit.trace(model, input_data) + if use_tracing + else torch.jit.script(model) + ) + assert not isinstance(scripted_module, tuple), scripted_module + _check_predictions( + model=model, + scripted_model=scripted_module, + model_spec=model_descr, + input_data=input_data, + ) + + scripted_module.save(str(output_path)) + + return v0_5.TorchscriptWeightsDescr( + source=output_path, + pytorch_version=Version(torch.__version__), + parent="pytorch_state_dict", + ) -# FIXME: remove Any def _check_predictions( model: Any, scripted_model: Any, - model_spec: "v0_4.ModelDescr | v0_5.ModelDescr", - input_data: Sequence["torch.Tensor"], + model_spec: v0_4.ModelDescr | v0_5.ModelDescr, + input_data: Sequence[torch.Tensor], ): - assert torch is not None - def _check(input_: Sequence[torch.Tensor]) -> None: expected_tensors = model(*input_) if isinstance(expected_tensors, torch.Tensor): @@ -77,70 +131,24 @@ def _check(input_: Sequence[torch.Tensor]) -> None: else: assert_never(axis.size) - half_step = [st // 2 for st in step] + input_tensor = input_data[0] + max_shape = input_tensor.shape max_steps = 4 # check that input and output agree for decreasing input sizes for step_factor in range(1, max_steps + 1): slice_ = tuple( - slice(None) if st == 0 else slice(step_factor * st, -step_factor * st) - for st in half_step - ) - this_input = [inp[slice_] for inp in input_data] - this_shape = this_input[0].shape - if any(tsh < msh for tsh, msh in zip(this_shape, min_shape)): - raise ValueError( - f"Mismatched shapes: {this_shape}. Expected at least {min_shape}" + ( + slice(None) + if step_dim == 0 + else slice(0, max_dim - step_factor * step_dim, 1) ) - _check(this_input) - - -def convert_weights_to_torchscript( - model_descr: Union[v0_4.ModelDescr, v0_5.ModelDescr], - output_path: Path, - use_tracing: bool = True, -) -> v0_5.TorchscriptWeightsDescr: - """Convert model weights from format 'pytorch_state_dict' to 'torchscript'. - - Args: - model_descr: location of the resource for the input bioimageio model - output_path: where to save the torchscript weights - use_tracing: whether to use tracing or scripting to export the torchscript format - """ - - state_dict_weights_descr = model_descr.weights.pytorch_state_dict - if state_dict_weights_descr is None: - raise ValueError( - "The provided model does not have weights in the pytorch state dict format" + for max_dim, step_dim in zip(max_shape, step) ) - - input_data = model_descr.get_input_test_arrays() - - with torch.no_grad(): - input_data = [torch.from_numpy(inp.astype("float32")) for inp in input_data] - - model = load_torch_model(state_dict_weights_descr) - - # FIXME: remove Any - if use_tracing: - scripted_model: Any = torch.jit.trace(model, input_data) - else: - scripted_model: Any = torch.jit.script(model) - - _check_predictions( - model=model, - scripted_model=scripted_model, - model_spec=model_descr, - input_data=input_data, - ) - - # save the torchscript model - scripted_model.save( - str(output_path) - ) # does not support Path, so need to cast to str - - return v0_5.TorchscriptWeightsDescr( - source=output_path, - pytorch_version=Version(torch.__version__), - parent="pytorch_state_dict", - ) + sliced_input = input_tensor[slice_] + if any( + sliced_dim < min_dim + for sliced_dim, min_dim in zip(sliced_input.shape, min_shape) + ): + return + _check([sliced_input]) diff --git a/dev/env-py38.yaml b/dev/env-py38.yaml index 22353103..148c9668 100644 --- a/dev/env-py38.yaml +++ b/dev/env-py38.yaml @@ -3,18 +3,22 @@ name: core38 channels: - conda-forge - nodefaults + - pytorch dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black - crick # uncommented - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black - # - keras>=3.0 # removed + - # keras>=3.0,<4 # removed - loguru + - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc @@ -28,11 +32,12 @@ dependencies: - pytest-cov - pytest-xdist - python=3.8 # changed - - pytorch>=2.1 + - pytorch>=2.1,<3 - requests - rich - ruff - ruyaml + # - tensorflow>=2,<3 removed - torchvision - tqdm - typing-extensions diff --git a/dev/env-tf.yaml b/dev/env-tf.yaml index 0df6fd07..61e00c41 100644 --- a/dev/env-tf.yaml +++ b/dev/env-tf.yaml @@ -3,18 +3,22 @@ name: core-tf # changed channels: - conda-forge - nodefaults + # - pytroch # removed dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black - keras>=2.15 # changed - loguru + - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc @@ -28,7 +32,7 @@ dependencies: - pytest-cov - pytest-xdist # - python=3.9 # removed - # - pytorch>=2.1 # removed + # - pytorch>=2.1,<3 # removed - requests - rich # - ruff # removed diff --git a/dev/env-wo-python.yaml b/dev/env-wo-python.yaml index d8cba289..cd72571e 100644 --- a/dev/env-wo-python.yaml +++ b/dev/env-wo-python.yaml @@ -3,19 +3,22 @@ name: core channels: - conda-forge - nodefaults - - pytorch # added + - pytorch dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black - - keras>=3.0 + - keras>=3.0,<4 - loguru + - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc @@ -29,11 +32,12 @@ dependencies: - pytest-cov - pytest-xdist # - python=3.9 # removed - - pytorch>=2.1 + - pytorch>=2.1,<3 - requests - rich - ruff - ruyaml + - tensorflow>=2,<3 - torchvision - tqdm - typing-extensions diff --git a/dev/env.yaml b/dev/env.yaml index 20d60a18..8fed3b25 100644 --- a/dev/env.yaml +++ b/dev/env.yaml @@ -1,20 +1,23 @@ -name: core +name: full channels: - conda-forge + - nodefaults + - pytorch dependencies: - - bioimageio.spec>=0.5.3.5 + - bioimageio.spec>=0.5.3.6 - black # - crick # currently requires python<=3.9 - filelock - h5py + - imagecodecs - imageio>=2.5 - jupyter - jupyter-black - - ipykernel - - matplotlib - - keras>=3.0 + - keras>=3.0,<4 - loguru + - matplotlib - numpy + - onnx - onnxruntime - packaging>=17.0 - pdoc @@ -27,12 +30,13 @@ dependencies: - pytest - pytest-cov - pytest-xdist - - python=3.9 - - pytorch>=2.1 + - python=3.12 + - pytorch>=2.1,<3 - requests - rich - ruff - ruyaml + - tensorflow>=2,<3 - torchvision - tqdm - typing-extensions diff --git a/pyproject.toml b/pyproject.toml index 91cd2cbc..98bf386e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.black] line-length = 88 -extend_exclude = "/presentations/" +extend-exclude = "/presentations/" target-version = ["py38", "py39", "py310", "py311", "py312"] preview = true @@ -39,7 +39,7 @@ typeCheckingMode = "strict" useLibraryCodeForTypes = true [tool.pytest.ini_options] -addopts = "--cov=bioimageio --cov-report=xml -n auto --capture=no --doctest-modules --failed-first" +addopts = "--cov=bioimageio --cov-report=xml --cov-append -n 0 --capture=no --doctest-modules --failed-first" [tool.ruff] line-length = 88 diff --git a/setup.py b/setup.py index 99747946..02f39f8d 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,9 @@ ], packages=find_namespace_packages(exclude=["tests"]), install_requires=[ - "bioimageio.spec ==0.5.3.5", + "bioimageio.spec ==0.5.3.6", "h5py", + "imagecodecs", "imageio>=2.10", "loguru", "numpy", @@ -45,28 +46,27 @@ ], include_package_data=True, extras_require={ - "pytorch": ["torch>=1.6", "torchvision", "keras>=3.0"], - "tensorflow": ["tensorflow", "keras>=2.15"], + "pytorch": (pytorch_deps := ["torch>=1.6,<3", "torchvision", "keras>=3.0,<4"]), + "tensorflow": ["tensorflow", "keras>=2.15,<4"], "onnx": ["onnxruntime"], - "dev": [ + "dev": pytorch_deps + + [ "black", # "crick", # currently requires python<=3.9 "filelock", "jupyter", "jupyter-black", "matplotlib", - "keras>=3.0", + "onnx", "onnxruntime", "packaging>=17.0", "pre-commit", "pdoc", "psutil", # parallel pytest with 'pytest -n auto' - "pyright", + "pyright==1.1.391", "pytest-cov", "pytest-xdist", # parallel pytest "pytest", - "torch>=1.6", - "torchvision", ], }, project_urls={ diff --git a/tests/conftest.py b/tests/conftest.py index 253ade2f..cb78bfe9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,6 @@ from itertools import chain from typing import Dict, List -from loguru import logger from pytest import FixtureRequest, fixture from bioimageio.spec import __version__ as bioimageio_spec_version @@ -14,7 +13,7 @@ import torch torch_version = tuple(map(int, torch.__version__.split(".")[:2])) - logger.warning(f"detected torch version {torch_version}.x") + warnings.warn(f"detected torch version {torch.__version__}") except ImportError: torch = None torch_version = None @@ -29,7 +28,7 @@ try: import tensorflow # type: ignore - tf_major_version = int(tensorflow.__version__.split(".")[0]) # type: ignore + tf_major_version = int(tensorflow.__version__.split(".")[0]) except ImportError: tensorflow = None tf_major_version = None @@ -45,9 +44,7 @@ # TODO: use models from new collection on S3 MODEL_SOURCES: Dict[str, str] = { - "hpa_densenet": ( - "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models/hpa-densenet/rdf.yaml" - ), + "hpa_densenet": "polite-pig/1", "stardist": ( "https://raw.githubusercontent.com/bioimage-io/spec-bioimage-io/main/example_descriptions/models" "/stardist_example_model/v0_4.bioimageio.yaml" diff --git a/tests/weight_converter/test_add_weights.py b/tests/test_add_weights.py similarity index 100% rename from tests/weight_converter/test_add_weights.py rename to tests/test_add_weights.py diff --git a/tests/test_bioimageio_collection.py b/tests/test_bioimageio_collection.py new file mode 100644 index 00000000..2cf9ced0 --- /dev/null +++ b/tests/test_bioimageio_collection.py @@ -0,0 +1,60 @@ +from typing import Any, Collection, Dict, Iterable, Mapping, Tuple + +import pytest +import requests +from pydantic import HttpUrl + +from bioimageio.spec import InvalidDescr +from bioimageio.spec.common import Sha256 +from tests.utils import ParameterSet, expensive_test + +BASE_URL = "https://uk1s3.embassy.ebi.ac.uk/public-datasets/bioimage.io/" + + +def _get_latest_rdf_sources(): + entries: Any = requests.get(BASE_URL + "all_versions.json").json()["entries"] + ret: Dict[str, Tuple[HttpUrl, Sha256]] = {} + for entry in entries: + version = entry["versions"][0] + ret[f"{entry['concept']}/{version['v']}"] = ( + HttpUrl(version["source"]), # pyright: ignore[reportCallIssue] + Sha256(version["sha256"]), + ) + + return ret + + +ALL_LATEST_RDF_SOURCES: Mapping[str, Tuple[HttpUrl, Sha256]] = _get_latest_rdf_sources() + + +def yield_bioimageio_yaml_urls() -> Iterable[ParameterSet]: + for descr_url, sha in ALL_LATEST_RDF_SOURCES.values(): + key = ( + str(descr_url) + .replace(BASE_URL, "") + .replace("/files/rdf.yaml", "") + .replace("/files/bioimageio.yaml", "") + ) + yield pytest.param(descr_url, sha, key, id=key) + + +KNOWN_INVALID: Collection[str] = set() + + +@expensive_test +@pytest.mark.parametrize("descr_url,sha,key", list(yield_bioimageio_yaml_urls())) +def test_rdf( + descr_url: HttpUrl, + sha: Sha256, + key: str, +): + if key in KNOWN_INVALID: + pytest.skip("known failure") + + from bioimageio.core import load_description_and_test + + descr = load_description_and_test(descr_url, sha256=sha) + assert not isinstance(descr, InvalidDescr) + assert ( + descr.validation_summary.status == "passed" + ), descr.validation_summary.format() diff --git a/tests/test_prediction_pipeline_device_management.py b/tests/test_prediction_pipeline_device_management.py index 0e241df1..2dde4120 100644 --- a/tests/test_prediction_pipeline_device_management.py +++ b/tests/test_prediction_pipeline_device_management.py @@ -1,16 +1,12 @@ from pathlib import Path +import pytest from numpy.testing import assert_array_almost_equal -from bioimageio.core.utils.testing import skip_on from bioimageio.spec.model.v0_4 import ModelDescr as ModelDescr04 from bioimageio.spec.model.v0_5 import ModelDescr, WeightsFormat -class TooFewDevicesException(Exception): - pass - - def _test_device_management(model_package: Path, weight_format: WeightsFormat): import torch @@ -19,7 +15,7 @@ def _test_device_management(model_package: Path, weight_format: WeightsFormat): from bioimageio.core.digest_spec import get_test_inputs, get_test_outputs if not hasattr(torch, "cuda") or torch.cuda.device_count() == 0: - raise TooFewDevicesException("Need at least one cuda device for this test") + pytest.skip("Need at least one cuda device for this test") bio_model = load_description(model_package) assert isinstance(bio_model, (ModelDescr, ModelDescr04)) @@ -52,26 +48,21 @@ def _test_device_management(model_package: Path, weight_format: WeightsFormat): assert_array_almost_equal(out, exp, decimal=4) -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_torch(any_torch_model: Path): _test_device_management(any_torch_model, "pytorch_state_dict") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_torchscript(any_torchscript_model: Path): _test_device_management(any_torchscript_model, "torchscript") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_onnx(any_onnx_model: Path): _test_device_management(any_onnx_model, "onnx") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_tensorflow(any_tensorflow_model: Path): _test_device_management(any_tensorflow_model, "tensorflow_saved_model_bundle") -@skip_on(TooFewDevicesException, reason="Too few devices") def test_device_management_keras(any_keras_model: Path): _test_device_management(any_keras_model, "keras_hdf5") diff --git a/tests/test_proc_ops.py b/tests/test_proc_ops.py index e408d220..0b93f08b 100644 --- a/tests/test_proc_ops.py +++ b/tests/test_proc_ops.py @@ -105,6 +105,22 @@ def test_zero_mean_unit_variance_fixed(tid: MemberId): xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) +def test_zero_mean_unit_variance_fixed2(tid: MemberId): + from bioimageio.core.proc_ops import FixedZeroMeanUnitVariance + + np_data = np.arange(9).reshape(3, 3) + mean = float(np_data.mean()) + std = float(np_data.mean()) + eps = 1.0e-7 + op = FixedZeroMeanUnitVariance(tid, tid, mean=mean, std=std, eps=eps) + + data = xr.DataArray(np_data, dims=("x", "y")) + sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) + expected = xr.DataArray((np_data - mean) / (std + eps), dims=("x", "y")) + op(sample) + xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) + + def test_zero_mean_unit_across_axes(tid: MemberId): from bioimageio.core.proc_ops import ZeroMeanUnitVariance @@ -126,22 +142,6 @@ def test_zero_mean_unit_across_axes(tid: MemberId): xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) -def test_zero_mean_unit_variance_fixed2(tid: MemberId): - from bioimageio.core.proc_ops import FixedZeroMeanUnitVariance - - np_data = np.arange(9).reshape(3, 3) - mean = float(np_data.mean()) - std = float(np_data.mean()) - eps = 1.0e-7 - op = FixedZeroMeanUnitVariance(tid, tid, mean=mean, std=std, eps=eps) - - data = xr.DataArray(np_data, dims=("x", "y")) - sample = Sample(members={tid: Tensor.from_xarray(data)}, stat={}, id=None) - expected = xr.DataArray((np_data - mean) / (std + eps), dims=("x", "y")) - op(sample) - xr.testing.assert_allclose(expected, sample.members[tid].data, rtol=1e-5, atol=1e-7) - - def test_binarize(tid: MemberId): from bioimageio.core.proc_ops import Binarize diff --git a/tests/test_resource_tests.py b/tests/test_resource_tests.py index 203ca64b..ce5a6fe8 100644 --- a/tests/test_resource_tests.py +++ b/tests/test_resource_tests.py @@ -2,7 +2,7 @@ import pytest -from bioimageio.spec import InvalidDescr +from bioimageio.spec import InvalidDescr, ValidationContext @pytest.mark.parametrize("mode", ["seed_only", "full"]) @@ -38,14 +38,9 @@ def test_error_for_wrong_shape2(stardist_wrong_shape2: str): def test_test_model(any_model: str): from bioimageio.core._resource_tests import test_model - summary = test_model(any_model) - assert summary.status == "passed", summary.format() - - -def test_test_resource(any_model: str): - from bioimageio.core._resource_tests import test_description + with ValidationContext(raise_errors=True): + summary = test_model(any_model) - summary = test_description(any_model) assert summary.status == "passed", summary.format() diff --git a/tests/test_stat_calculators.py b/tests/test_stat_calculators.py index 57e86c5a..0efe02d9 100644 --- a/tests/test_stat_calculators.py +++ b/tests/test_stat_calculators.py @@ -1,7 +1,8 @@ -from typing import Tuple, Union +from typing import Tuple import numpy as np import pytest +from git import Optional from xarray.testing import assert_allclose # pyright: ignore[reportUnknownVariableType] from bioimageio.core.axis import AxisId @@ -31,14 +32,15 @@ def create_random_dataset(tid: MemberId, axes: Tuple[AxisId, ...]): "axes", [ None, - ("x", "y"), - ("channel", "y"), + (AxisId("x"), AxisId("y")), + (AxisId("channel"), AxisId("y")), + (AxisId("batch"), AxisId("channel"), AxisId("x"), AxisId("y")), ], ) -def test_mean_var_std_calculator(axes: Union[None, str, Tuple[str, ...]]): +def test_mean_var_std_calculator(axes: Optional[Tuple[AxisId, ...]]): tid = MemberId("tensor") - axes = tuple(map(AxisId, ("batch", "channel", "x", "y"))) - data, ds = create_random_dataset(tid, axes) + d_axes = tuple(map(AxisId, ("batch", "channel", "x", "y"))) + data, ds = create_random_dataset(tid, d_axes) expected_mean = data.mean(axes) expected_var = data.var(axes) expected_std = data.std(axes) diff --git a/tests/test_tensor.py b/tests/test_tensor.py index 33163077..e00efe04 100644 --- a/tests/test_tensor.py +++ b/tests/test_tensor.py @@ -1,3 +1,5 @@ +from typing import Sequence + import numpy as np import pytest import xarray as xr @@ -8,9 +10,19 @@ @pytest.mark.parametrize( "axes", - ["yx", "xy", "cyx", "yxc", "bczyx", "xyz", "xyzc", "bzyxc"], + [ + "yx", + "xy", + "cyx", + "yxc", + "bczyx", + "xyz", + "xyzc", + "bzyxc", + ("batch", "channel", "x", "y"), + ], ) -def test_transpose_tensor_2d(axes: str): +def test_transpose_tensor_2d(axes: Sequence[str]): tensor = Tensor.from_numpy(np.random.rand(256, 256), dims=None) transposed = tensor.transpose([AxisId(a) for a in axes]) @@ -19,9 +31,18 @@ def test_transpose_tensor_2d(axes: str): @pytest.mark.parametrize( "axes", - ["zyx", "cyzx", "yzixc", "bczyx", "xyz", "xyzc", "bzyxtc"], + [ + "zyx", + "cyzx", + "yzixc", + "bczyx", + "xyz", + "xyzc", + "bzyxtc", + ("batch", "channel", "x", "y", "z"), + ], ) -def test_transpose_tensor_3d(axes: str): +def test_transpose_tensor_3d(axes: Sequence[str]): tensor = Tensor.from_numpy(np.random.rand(64, 64, 64), dims=None) transposed = tensor.transpose([AxisId(a) for a in axes]) assert transposed.ndim == len(axes) diff --git a/tests/test_weight_converters.py b/tests/test_weight_converters.py new file mode 100644 index 00000000..24d2b9cb --- /dev/null +++ b/tests/test_weight_converters.py @@ -0,0 +1,118 @@ +# type: ignore # TODO enable type checking +import os +import zipfile +from pathlib import Path + +import pytest + +from bioimageio.core import test_model +from bioimageio.spec import load_description +from bioimageio.spec.model import v0_5 + + +def test_pytorch_to_torchscript(any_torch_model, tmp_path): + from bioimageio.core.weight_converters.pytorch_to_torchscript import convert + + model_descr = load_description(any_torch_model) + out_path = tmp_path / "weights.pt" + ret_val = convert(model_descr, out_path) + assert out_path.exists() + assert isinstance(ret_val, v0_5.TorchscriptWeightsDescr) + assert ret_val.source == out_path + model_descr.weights.torchscript = ret_val + summary = test_model(model_descr, weight_format="torchscript") + assert summary.status == "passed", summary.format() + + +def test_pytorch_to_onnx(convert_to_onnx, tmp_path): + from bioimageio.core.weight_converters.pytorch_to_onnx import convert + + model_descr = load_description(convert_to_onnx, format_version="latest") + out_path = tmp_path / "weights.onnx" + opset_version = 15 + ret_val = convert( + model_descr=model_descr, + output_path=out_path, + opset_version=opset_version, + ) + assert os.path.exists(out_path) + assert isinstance(ret_val, v0_5.OnnxWeightsDescr) + assert ret_val.opset_version == opset_version + assert ret_val.source == out_path + + model_descr.weights.onnx = ret_val + summary = test_model(model_descr, weight_format="onnx") + assert summary.status == "passed", summary.format() + + +def test_keras_to_tensorflow(any_keras_model: Path, tmp_path: Path): + from bioimageio.core.weight_converters.keras_to_tensorflow import convert + + model_descr = load_description(any_keras_model) + out_path = tmp_path / "weights" + ret_val = convert(model_descr, output_path=out_path) + assert out_path.exists() + assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) + assert ret_val.source == out_path + + model_descr.weights.keras = ret_val + summary = test_model(model_descr, weight_format="keras_hdf5") + assert summary.status == "passed", summary.format() + + +@pytest.mark.skip() +def test_keras_to_tensorflow_zipped(any_keras_model: Path, tmp_path: Path): + from bioimageio.core.weight_converters.keras_to_tensorflow import convert + + out_path = tmp_path / "weights.zip" + model_descr = load_description(any_keras_model) + ret_val = convert(model_descr, out_path) + + assert out_path.exists() + assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) + + expected_names = {"saved_model.pb", "variables/variables.index"} + with zipfile.ZipFile(out_path, "r") as f: + names = set([name for name in f.namelist()]) + assert len(expected_names - names) == 0 + + model_descr.weights.keras = ret_val + summary = test_model(model_descr, weight_format="keras_hdf5") + assert summary.status == "passed", summary.format() + + +# TODO: add tensorflow_to_keras converter +# def test_tensorflow_to_keras(any_tensorflow_model: Path, tmp_path: Path): +# from bioimageio.core.weight_converters.tensorflow_to_keras import convert + +# model_descr = load_description(any_tensorflow_model) +# out_path = tmp_path / "weights.h5" +# ret_val = convert(model_descr, output_path=out_path) +# assert out_path.exists() +# assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) +# assert ret_val.source == out_path + +# model_descr.weights.keras = ret_val +# summary = test_model(model_descr, weight_format="keras_hdf5") +# assert summary.status == "passed", summary.format() + + +# @pytest.mark.skip() +# def test_tensorflow_to_keras_zipped(any_tensorflow_model: Path, tmp_path: Path): +# from bioimageio.core.weight_converters.tensorflow_to_keras import convert + +# out_path = tmp_path / "weights.zip" +# model_descr = load_description(any_tensorflow_model) +# ret_val = convert(model_descr, out_path) + +# assert out_path.exists() +# assert isinstance(ret_val, v0_5.TensorflowSavedModelBundleWeightsDescr) + +# expected_names = {"saved_model.pb", "variables/variables.index"} +# with zipfile.ZipFile(out_path, "r") as f: +# names = set([name for name in f.namelist()]) +# assert len(expected_names - names) == 0 + +# model_descr.weights.keras = ret_val +# summary = test_model(model_descr, weight_format="keras_hdf5") +# assert summary.status == "passed", summary.format() diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000..f9116fa5 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,21 @@ +"""utils to test bioimageio.core""" + +import os +from typing import Any, Protocol, Sequence + +import pytest + + +class ParameterSet(Protocol): + def __init__(self, values: Sequence[Any], marks: Any, id: str) -> None: + super().__init__() + + +class test_func(Protocol): + def __call__(*args: Any, **kwargs: Any): ... + + +expensive_test = pytest.mark.skipif( + os.getenv("RUN_EXPENSIVE_TESTS") != "true", + reason="Skipping expensive test (enable by RUN_EXPENSIVE_TESTS='true')", +) diff --git a/tests/weight_converter/keras/test_tensorflow.py b/tests/weight_converter/keras/test_tensorflow.py deleted file mode 100644 index 65c93f60..00000000 --- a/tests/weight_converter/keras/test_tensorflow.py +++ /dev/null @@ -1,52 +0,0 @@ -# type: ignore # TODO enable type checking -import zipfile -from pathlib import Path - -import pytest - -from bioimageio.spec import load_description -from bioimageio.spec.model.v0_5 import ModelDescr - - -@pytest.mark.skip( - "tensorflow converter not updated yet" -) # TODO: test tensorflow converter -def test_tensorflow_converter(any_keras_model: Path, tmp_path: Path): - from bioimageio.core.weight_converter.keras import ( - convert_weights_to_tensorflow_saved_model_bundle, - ) - - out_path = tmp_path / "weights" - model = load_description(any_keras_model) - assert isinstance(model, ModelDescr), model.validation_summary.format() - ret_val = convert_weights_to_tensorflow_saved_model_bundle(model, out_path) - assert out_path.exists() - assert (out_path / "variables").exists() - assert (out_path / "saved_model.pb").exists() - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes - - -@pytest.mark.skip( - "tensorflow converter not updated yet" -) # TODO: test tensorflow converter -def test_tensorflow_converter_zipped(any_keras_model: Path, tmp_path: Path): - from bioimageio.core.weight_converter.keras import ( - convert_weights_to_tensorflow_saved_model_bundle, - ) - - out_path = tmp_path / "weights.zip" - model = load_description(any_keras_model) - assert isinstance(model, ModelDescr), model.validation_summary.format() - ret_val = convert_weights_to_tensorflow_saved_model_bundle(model, out_path) - assert out_path.exists() - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes - - # make sure that the zip package was created correctly - expected_names = {"saved_model.pb", "variables/variables.index"} - with zipfile.ZipFile(out_path, "r") as f: - names = set([name for name in f.namelist()]) - assert len(expected_names - names) == 0 diff --git a/tests/weight_converter/torch/test_onnx.py b/tests/weight_converter/torch/test_onnx.py deleted file mode 100644 index 54f2cdf4..00000000 --- a/tests/weight_converter/torch/test_onnx.py +++ /dev/null @@ -1,18 +0,0 @@ -# type: ignore # TODO enable type checking -import os -from pathlib import Path - -import pytest - - -@pytest.mark.skip("onnx converter not updated yet") # TODO: test onnx converter -def test_onnx_converter(convert_to_onnx: Path, tmp_path: Path): - from bioimageio.core.weight_converter.torch._onnx import convert_weights_to_onnx - - out_path = tmp_path / "weights.onnx" - ret_val = convert_weights_to_onnx(convert_to_onnx, out_path, test_decimal=3) - assert os.path.exists(out_path) - if not pytest.skip_onnx: - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes diff --git a/tests/weight_converter/torch/test_torchscript.py b/tests/weight_converter/torch/test_torchscript.py deleted file mode 100644 index e0cee3d8..00000000 --- a/tests/weight_converter/torch/test_torchscript.py +++ /dev/null @@ -1,22 +0,0 @@ -# type: ignore # TODO enable type checking -from pathlib import Path - -import pytest - -from bioimageio.spec.model import v0_4, v0_5 - - -@pytest.mark.skip( - "torchscript converter not updated yet" -) # TODO: test torchscript converter -def test_torchscript_converter( - any_torch_model: "v0_4.ModelDescr | v0_5.ModelDescr", tmp_path: Path -): - from bioimageio.core.weight_converter.torch import convert_weights_to_torchscript - - out_path = tmp_path / "weights.pt" - ret_val = convert_weights_to_torchscript(any_torch_model, out_path) - assert out_path.exists() - assert ( - ret_val == 0 - ) # check for correctness is done in converter and returns 0 if it passes