From 9fee675394c0df1ddff835c46ad5c078e76c214a Mon Sep 17 00:00:00 2001 From: fynnbe Date: Mon, 11 Nov 2024 14:56:59 +0100 Subject: [PATCH 1/4] add tool_wo_version --- .../db_structure/compatibility.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bioimageio_collection_backoffice/db_structure/compatibility.py b/bioimageio_collection_backoffice/db_structure/compatibility.py index c04821e9..7b19f384 100644 --- a/bioimageio_collection_backoffice/db_structure/compatibility.py +++ b/bioimageio_collection_backoffice/db_structure/compatibility.py @@ -8,8 +8,13 @@ class CompatiblityReport(Node, frozen=True, extra="allow"): - tool: Annotated[str, Field(exclude=True)] - """toolname (including version)""" + tool: Annotated[str, Field(exclude=True, pattern=r"^[^_]+_[^_]+$")] + """toolname (including version separated by an underscore)""" + + @property + def tool_wo_version(self) -> str: + """assuming a pattern of _""" + return self.tool.split("_")[0] status: Literal["passed", "failed", "not-applicable"] """status of this tool for this resource""" From 09a07870700cfc3a9e7063976e846e00c2f15f48 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 12 Nov 2024 13:12:30 +0100 Subject: [PATCH 2/4] update tags with compatibility tags --- .../remote_collection.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/bioimageio_collection_backoffice/remote_collection.py b/bioimageio_collection_backoffice/remote_collection.py index 0601b2d1..c2552a04 100644 --- a/bioimageio_collection_backoffice/remote_collection.py +++ b/bioimageio_collection_backoffice/remote_collection.py @@ -1216,12 +1216,28 @@ def create_collection_entries( # ingest compatibility reports links = set(rdf.get("links", [])) + tags = set(rdf.get("tags", [])) compat_reports = record_version.get_all_compatibility_reports() + def get_compat_tag(tool: str): + """make a special, derived tag for the automatic compatibility check result + + of a tool to avoid overwriting plain manual tags like 'ilastik'. + """ + return f"{tool}-compatible" + + # remove all version unspecific tool tags + for r in compat_reports: + tags.discard(get_compat_tag(r.tool_wo_version)) + + # update links and tags with compatible tools for r in compat_reports: if r.status == "passed": - # update links to reference compatible tools links.update(r.links) + tags.add(get_compat_tag(r.tool)) # add version unspecific tag + tags.add(get_compat_tag(r.tool_wo_version)) + else: + tags.discard(get_compat_tag(r.tool)) try: thumbnails = rdf["config"]["bioimageio"]["thumbnails"] @@ -1269,7 +1285,7 @@ def create_collection_entries( nickname=nickname, rdf_source=AnyUrl(record_version.rdf_url), root_url=root_url, - tags=rdf.get("tags", []), + tags=list(tags), training_data=rdf["training_data"] if "training_data" in rdf else None, type=rdf["type"], source=rdf.get("source"), From c6ac120acd97a4febec7ed816750f3d85bedafb2 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 12 Nov 2024 13:16:44 +0100 Subject: [PATCH 3/4] document check_compatibility_ilastik_impl --- scripts/check_compatibility_ilastik.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/check_compatibility_ilastik.py b/scripts/check_compatibility_ilastik.py index fba31971..f6f06ffe 100644 --- a/scripts/check_compatibility_ilastik.py +++ b/scripts/check_compatibility_ilastik.py @@ -26,7 +26,14 @@ def check_compatibility_ilastik_impl( rdf_url: str, sha256: str, report_path: Path, -): +) -> None: + """Write `CompatibilityReport` for a resource description. + + Args: + rdf_url: URL to the rdf.yaml file + sha256: SHA-256 value of **rdf_url** content + report_path: path to save `CompatibilityReport` as json to + """ report_path.parent.mkdir(parents=True, exist_ok=True) rdf = download_rdf(rdf_url, sha256) From ffeb55bbcbf8366685f6b2e9db9cbfe226599c84 Mon Sep 17 00:00:00 2001 From: fynnbe Date: Tue, 12 Nov 2024 13:54:12 +0100 Subject: [PATCH 4/4] add check_tool_compatibility helper --- .../db_structure/compatibility.py | 2 +- .../remote_collection.py | 6 +- .../run_dynamic_tests.py | 4 +- scripts/check_compatibility_ilastik.py | 65 +++-------- scripts/script_utils.py | 107 ++++++++++++++++-- 5 files changed, 121 insertions(+), 63 deletions(-) diff --git a/bioimageio_collection_backoffice/db_structure/compatibility.py b/bioimageio_collection_backoffice/db_structure/compatibility.py index 7b19f384..ebc0f3e3 100644 --- a/bioimageio_collection_backoffice/db_structure/compatibility.py +++ b/bioimageio_collection_backoffice/db_structure/compatibility.py @@ -7,7 +7,7 @@ from ..common import Node -class CompatiblityReport(Node, frozen=True, extra="allow"): +class CompatibilityReport(Node, frozen=True, extra="allow"): tool: Annotated[str, Field(exclude=True, pattern=r"^[^_]+_[^_]+$")] """toolname (including version separated by an underscore)""" diff --git a/bioimageio_collection_backoffice/remote_collection.py b/bioimageio_collection_backoffice/remote_collection.py index c2552a04..df657e57 100644 --- a/bioimageio_collection_backoffice/remote_collection.py +++ b/bioimageio_collection_backoffice/remote_collection.py @@ -53,7 +53,7 @@ ) from .db_structure.chat import Chat, Message from .db_structure.compatibility import ( - CompatiblityReport, + CompatibilityReport, TestSummary, TestSummaryEntry, ) @@ -669,7 +669,7 @@ def get_all_compatibility_reports(self, tool: Optional[str] = None): for t in tools } return [ - CompatiblityReport.model_validate({**json.loads(d), "tool": t}) + CompatibilityReport.model_validate({**json.loads(d), "tool": t}) for t, d in reports_data.items() if d is not None ] @@ -677,7 +677,7 @@ def get_all_compatibility_reports(self, tool: Optional[str] = None): def get_compatibility_report_path(self, tool: str): return f"{self.folder}compatibility/{tool}.json" - def set_compatibility_report(self, report: CompatiblityReport) -> None: + def set_compatibility_report(self, report: CompatibilityReport) -> None: path = self.get_compatibility_report_path(report.tool) self.client.put_and_cache(path, report.model_dump_json().encode()) diff --git a/bioimageio_collection_backoffice/run_dynamic_tests.py b/bioimageio_collection_backoffice/run_dynamic_tests.py index c5786e72..767541a5 100644 --- a/bioimageio_collection_backoffice/run_dynamic_tests.py +++ b/bioimageio_collection_backoffice/run_dynamic_tests.py @@ -12,7 +12,7 @@ from ruyaml import YAML from bioimageio_collection_backoffice.db_structure.compatibility import ( - CompatiblityReport, + CompatibilityReport, ) from .db_structure.log import LogEntry @@ -61,7 +61,7 @@ def run_dynamic_tests( details_formatted=summary.format(), ) ) - report = CompatiblityReport( + report = CompatibilityReport( tool=f"bioimageio.core_{bioimageio.core.__version__}", status=summary.status, error=( diff --git a/scripts/check_compatibility_ilastik.py b/scripts/check_compatibility_ilastik.py index f6f06ffe..2d610473 100644 --- a/scripts/check_compatibility_ilastik.py +++ b/scripts/check_compatibility_ilastik.py @@ -1,12 +1,8 @@ import argparse -import json -import traceback -import warnings from pathlib import Path from typing import TYPE_CHECKING import bioimageio.core -import requests from typing_extensions import Literal if bioimageio.core.__version__.startswith("0.5."): @@ -14,39 +10,31 @@ else: from bioimageio.core import test_model -from script_utils import CompatiblityReport, download_rdf - -try: - from tqdm import tqdm -except ImportError: - tqdm = list +from script_utils import CompatibilityReportDict, check_tool_compatibility, download_rdf def check_compatibility_ilastik_impl( rdf_url: str, sha256: str, - report_path: Path, -) -> None: - """Write `CompatibilityReport` for a resource description. +) -> CompatibilityReportDict: + """Create a `CompatibilityReport` for a resource description. Args: rdf_url: URL to the rdf.yaml file sha256: SHA-256 value of **rdf_url** content - report_path: path to save `CompatibilityReport` as json to """ - report_path.parent.mkdir(parents=True, exist_ok=True) rdf = download_rdf(rdf_url, sha256) if rdf["type"] != "model": - report = CompatiblityReport( + report = CompatibilityReportDict( status="not-applicable", error=None, details="only 'model' resources can be used in ilastik.", ) elif len(rdf["inputs"]) > 1 or len(rdf["outputs"]) > 1: - report = CompatiblityReport( + report = CompatibilityReportDict( status="failed", error=f"ilastik only supports single tensor input/output (found {len(rdf['inputs'])}/{len(rdf['outputs'])})", details=None, @@ -78,15 +66,14 @@ def check_compatibility_ilastik_impl( else summary.format() ) ) - report = CompatiblityReport( + report = CompatibilityReportDict( status=status, error=error, details=details, links=["ilastik/ilastik"], ) - with report_path.open("wt", encoding="utf-8") as f: - json.dump(report, f) + return report def check_compatibility_ilastik( @@ -98,36 +85,14 @@ def check_compatibility_ilastik( # TODO: test with ilastik itself """ - with all_version_path.open() as f: - all_versions = json.load(f)["entries"] - - all_model_versions = [entry for entry in all_versions if entry["type"] == "model"] - - for entry in tqdm(all_model_versions): - for version in entry["versions"]: - rdf_url = version["source"] - sha256 = version["sha256"] - - report_url = ( - "/".join(rdf_url.split("/")[:-2]) - + f"/compatibility/ilastik_{ilastik_version}.yaml" - ) - r = requests.head(report_url) - if r.status_code != 404: - r.raise_for_status() # raises if failed to check if report exists - continue # report already exists - - report_path = ( - "/".join(rdf_url.split("/")[-4:-2]) - + f"/compatibility/ilastik_{ilastik_version}.json" - ) - try: - check_compatibility_ilastik_impl( - rdf_url, sha256, output_folder / report_path - ) - except Exception as e: - traceback.print_exc() - warnings.warn(f"failed to check '{rdf_url}': {e}") + check_tool_compatibility( + "ilastik", + ilastik_version, + all_version_path=all_version_path, + output_folder=output_folder, + check_tool_compatibility_impl=check_compatibility_ilastik_impl, + applicable_types={"model"}, + ) if __name__ == "__main__": diff --git a/scripts/script_utils.py b/scripts/script_utils.py index 6ff75784..738c1c53 100644 --- a/scripts/script_utils.py +++ b/scripts/script_utils.py @@ -1,19 +1,37 @@ import hashlib -from io import BytesIO -from typing import Any, Dict, Optional, Sequence, Union +import json +import traceback +import warnings +from pathlib import Path +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Sequence, Set, Union import requests -from typing_extensions import Literal, NotRequired, TypedDict +from typing_extensions import Literal, NotRequired, TypedDict, TypeGuard try: from ruyaml import YAML except ImportError: from ruamel.yaml import YAML +try: + from tqdm import tqdm +except ImportError: + tqdm = list + +if TYPE_CHECKING: + from bioimageio_collection_backoffice.db_structure.compatibility import ( + CompatibilityReport, + ) + yaml = YAML(typ="safe") -class CompatiblityReport(TypedDict): +class CompatibilityReportDict(TypedDict): + """TypedDict version of + `bioimageio_collection_backoffice.db_structure.compatibility.CompatibilityReport` + for environments without `bioimageio_collection_backoffice`. + """ + status: Literal["passed", "failed", "not-applicable"] """status of this tool for this resource""" @@ -27,6 +45,76 @@ class CompatiblityReport(TypedDict): """the checked resource should link these other bioimage.io resources""" +def check_tool_compatibility( + tool_name: str, + tool_version: str, + *, + all_version_path: Path, + output_folder: Path, + check_tool_compatibility_impl: Callable[ + [str, str], Union[CompatibilityReportDict, "CompatibilityReport"] + ], + applicable_types: Set[str], +): + """helper to implement tool compatibility checks + + Args: + tool_name: name of the tool (without version), e.g. "ilastik" + tool_version: version of the tool, e.g. "1.4" + all_versions_path: Path to the `all_versions.json` file. + output_folder: Folder to write compatibility reports to. + check_tool_compatibility_impl: + Function accepting two positional arguments: + URL to an rdf.yaml, SHA-256 of that rdf.yaml. + And returning a compatibility report. + applicable_types: Set of resource types + **check_tool_compatibility_impl** is applicable to. + """ + if "_" in tool_name: + raise ValueError("Underscore not allowed in tool_name") + + if "_" in tool_version: + raise ValueError("Underscore not allowed in tool_version") + + with all_version_path.open() as f: + all_versions = json.load(f)["entries"] + + filtered_versions = [ + entry for entry in all_versions if entry["type"] in applicable_types + ] + + for entry in tqdm(filtered_versions): + for version in entry["versions"]: + rdf_url = version["source"] + sha256 = version["sha256"] + + report_url = ( + "/".join(rdf_url.split("/")[:-2]) + + f"/compatibility/ilastik_{tool_version}.yaml" + ) + r = requests.head(report_url) + if r.status_code != 404: + r.raise_for_status() # raises if failed to check if report exists + continue # report already exists + + try: + report = check_tool_compatibility_impl(rdf_url, sha256) + except Exception as e: + traceback.print_exc() + warnings.warn(f"failed to check '{rdf_url}': {e}") + else: + if not isinstance(report, dict): + report = report.model_dump(mode="json") + + report_path = output_folder / ( + "/".join(rdf_url.split("/")[-4:-2]) + + f"/compatibility/{tool_name}_{tool_version}.json" + ) + report_path.parent.mkdir(parents=True, exist_ok=True) + with report_path.open("wt", encoding="utf-8") as f: + json.dump(report, f) + + def download_and_check_hash(url: str, sha256: str) -> bytes: r = requests.get(url) r.raise_for_status() @@ -41,9 +129,14 @@ def download_and_check_hash(url: str, sha256: str) -> bytes: return data +def _is_str_dict(d: Any) -> TypeGuard[Dict[str, Any]]: + return isinstance(d, dict) and all( + isinstance(k, str) for k in d # pyright: ignore[reportUnknownVariableType] + ) + + def download_rdf(rdf_url: str, sha256: str) -> Dict[str, Any]: rdf_data = download_and_check_hash(rdf_url, sha256) - rdf: Union[Any, Dict[Any, Any]] = yaml.load(rdf_data.decode()) - assert isinstance(rdf, dict) - assert all(isinstance(k, str) for k in rdf) + rdf: Any = yaml.load(rdf_data.decode()) + assert _is_str_dict(rdf) return rdf