Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup + add strided patches #218

Merged
merged 15 commits into from
Feb 22, 2024
73 changes: 60 additions & 13 deletions tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from wsinfer.wsi import HAS_OPENSLIDE
from wsinfer.wsi import HAS_TIFFSLIDE


@pytest.fixture
def tiff_image(tmp_path: Path) -> Path:
x = np.empty((4096, 4096, 3), dtype="uint8")
Expand Down Expand Up @@ -82,7 +83,7 @@ def test_cli_run_with_registered_models(
backend: str,
tiff_image: Path,
tmp_path: Path,
):
) -> None:
"""A regression test of the command 'wsinfer run'."""

reference_csv = Path(__file__).parent / "reference" / model / "purple.csv"
Expand Down Expand Up @@ -144,14 +145,14 @@ def test_cli_run_with_registered_models(
geojson_dir = results_dir / "model-outputs-geojson"
# result = runner.invoke(cli, ["togeojson", str(results_dir), str(geojson_dir)])
assert result.exit_code == 0
with open(geojson_dir / "purple.json") as f:
with open(geojson_dir / "purple.geojson") as f:
d: geojsonlib.GeoJSON = geojsonlib.load(f)
assert d.is_valid, "geojson not valid!"
assert len(d["features"]) == len(df_ref)

for geojson_row in d["features"]:
assert geojson_row["type"] == "Feature"
isinstance(geojson_row["id"] , str)
isinstance(geojson_row["id"], str)
assert geojson_row["geometry"]["type"] == "Polygon"
res = []
for i, prob_col in enumerate(prob_cols):
Expand All @@ -178,7 +179,7 @@ def test_cli_run_with_registered_models(
assert [df_coords] == geojson_row["geometry"]["coordinates"]


def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path):
def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path) -> None:
model = "breast-tumor-resnet34.tcga-brca"
reference_csv = Path(__file__).parent / "reference" / model / "purple.csv"
if not reference_csv.exists():
Expand Down Expand Up @@ -246,7 +247,7 @@ def test_cli_run_with_local_model(tmp_path: Path, tiff_image: Path):
), f"Column {prob_col} not allclose at atol=1e-07"


def test_cli_run_no_model_or_config(tmp_path: Path):
def test_cli_run_no_model_or_config(tmp_path: Path) -> None:
"""Test that --model or (--config and --model-path) is required."""
wsi_dir = tmp_path / "slides"
wsi_dir.mkdir()
Expand All @@ -265,7 +266,7 @@ def test_cli_run_no_model_or_config(tmp_path: Path):
assert "one of --model or (--config and --model-path) is required" in result.output


def test_cli_run_model_and_config(tmp_path: Path):
def test_cli_run_model_and_config(tmp_path: Path) -> None:
"""Test that (model and weights) or config is required."""
wsi_dir = tmp_path / "slides"
wsi_dir.mkdir()
Expand Down Expand Up @@ -298,7 +299,7 @@ def test_cli_run_model_and_config(tmp_path: Path):


@pytest.mark.xfail
def test_convert_to_sbu():
def test_convert_to_sbu() -> None:
# TODO: create a synthetic output and then convert it. Check that it is valid.
assert False

Expand Down Expand Up @@ -330,7 +331,7 @@ def test_patch_cli(
backend: str,
tmp_path: Path,
tiff_image: Path,
):
) -> None:
"""Test of 'wsinfer patch'."""
orig_slide_size = 4096
orig_slide_spacing = 0.25
Expand Down Expand Up @@ -380,7 +381,7 @@ def test_patch_cli(


# FIXME: parametrize this test across our models.
def test_jit_compile():
def test_jit_compile() -> None:
w = get_registered_model("breast-tumor-resnet34.tcga-brca")
model = get_pretrained_torch_module(w)

Expand Down Expand Up @@ -411,7 +412,7 @@ def test_jit_compile():
)


def test_issue_89():
def test_issue_89() -> None:
"""Do not fail if 'git' is not installed."""
model_obj = get_registered_model("breast-tumor-resnet34.tcga-brca")
d = _get_info_for_save(model_obj)
Expand All @@ -433,7 +434,7 @@ def test_issue_89():
os.environ["PATH"] = orig_path # reset path


def test_issue_94(tmp_path: Path, tiff_image: Path):
def test_issue_94(tmp_path: Path, tiff_image: Path) -> None:
"""Gracefully handle unreadable slides."""

# We have a valid tiff in 'tiff_image.parent'. We put in an unreadable file too.
Expand Down Expand Up @@ -461,7 +462,7 @@ def test_issue_94(tmp_path: Path, tiff_image: Path):
assert not results_dir.joinpath("model-outputs-csv").joinpath("bad.csv").exists()


def test_issue_97(tmp_path: Path, tiff_image: Path):
def test_issue_97(tmp_path: Path, tiff_image: Path) -> None:
"""Write a run_metadata file per run."""

runner = CliRunner()
Expand Down Expand Up @@ -502,11 +503,57 @@ def test_issue_97(tmp_path: Path, tiff_image: Path):
assert len(metas) == 2


def test_issue_125(tmp_path: Path):
def test_issue_125(tmp_path: Path) -> None:
"""Test that path in model config can be saved when a pathlib.Path object."""

w = get_registered_model("breast-tumor-resnet34.tcga-brca")
w.model_path = Path(w.model_path) # type: ignore
info = _get_info_for_save(w)
with open(tmp_path / "foo.json", "w") as f:
json.dump(info, f)


def test_issue_203(tiff_image: Path) -> None:
"""Test that openslide and tiffslide pad an image if an out-of-bounds region
is requested.
"""
import openslide
import tiffslide

with tiffslide.TiffSlide(tiff_image) as tslide:
w, h = tslide.dimensions
img = tslide.read_region((w, h), level=0, size=(256, 256))
assert img.size == (256, 256)
assert np.allclose(np.array(img), 0)
del tslide, img

with openslide.OpenSlide(tiff_image) as oslide:
w, h = oslide.dimensions
img = oslide.read_region((w, h), level=0, size=(256, 256))
assert img.size == (256, 256)
assert np.allclose(np.array(img), 0)


def test_issue_214(tmp_path: Path, tiff_image: Path) -> None:
"""Test that symlinked slides don't mess things up."""
link = tmp_path / "forlinks" / "arbitrary-link-name.tiff"
link.parent.mkdir(parents=True)
link.symlink_to(tiff_image)

runner = CliRunner()
results_dir = tmp_path / "inference"
result = runner.invoke(
cli,
[
"run",
"--wsi-dir",
str(link.parent),
"--results-dir",
str(results_dir),
"--model",
"breast-tumor-resnet34.tcga-brca",
],
)
assert result.exit_code == 0
assert (results_dir / "patches" / link.with_suffix(".h5").name).exists()
assert (results_dir / "model-outputs-csv" / link.with_suffix(".csv").name).exists()
10 changes: 3 additions & 7 deletions wsinfer/cli/convert_csv_to_sbubmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,21 +249,17 @@ def get_color(row: pd.Series) -> tuple[float, float, float]:
@click.command()
@click.argument(
"results_dir",
type=click.Path(
exists=True, file_okay=False, dir_okay=True, path_type=Path, resolve_path=True
),
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
)
@click.argument(
"output",
type=click.Path(exists=False, path_type=Path, resolve_path=True),
type=click.Path(exists=False, path_type=Path),
)
@click.option(
"--wsi-dir",
required=True,
help="Directory with whole slide images.",
type=click.Path(
exists=True, file_okay=False, dir_okay=True, path_type=Path, resolve_path=True
),
type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
)
@click.option("--execution-id", required=True, help="Unique execution ID for this run.")
@click.option("--study-id", required=True, help="Study ID, like TCGA-BRCA.")
Expand Down
30 changes: 21 additions & 9 deletions wsinfer/cli/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,15 +188,15 @@ def get_stdout(args: list[str]) -> str:
@click.option(
"-i",
"--wsi-dir",
type=click.Path(exists=True, file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, file_okay=False, path_type=Path),
required=True,
help="Directory containing whole slide images. This directory can *only* contain"
" whole slide images.",
)
@click.option(
"-o",
"--results-dir",
type=click.Path(file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(file_okay=False, path_type=Path),
required=True,
help="Directory to store results. If directory exists, will skip"
" whole slides for which outputs exist.",
Expand All @@ -212,7 +212,7 @@ def get_stdout(args: list[str]) -> str:
@click.option(
"-c",
"--config",
type=click.Path(exists=True, dir_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, dir_okay=False, path_type=Path),
help=(
"Path to configuration for the trained model. Use this option if the"
" model weights are not registered in wsinfer. Mutually exclusive with"
Expand All @@ -222,7 +222,7 @@ def get_stdout(args: list[str]) -> str:
@click.option(
"-p",
"--model-path",
type=click.Path(exists=True, dir_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, dir_okay=False, path_type=Path),
help=(
"Path to the pretrained model. Use only when --config is passed. Mutually "
"exclusive with --model."
Expand Down Expand Up @@ -303,6 +303,16 @@ def get_stdout(args: list[str]) -> str:
" area, it is filled with foreground. The default is 190um x 190um. The units of"
" this argument are microns squared.",
)
@click.option(
"--patch-overlap-ratio",
default=0.0,
type=click.FloatRange(min=None, max=1, max_open=True),
help="The ratio of overlap among patches. The default value of 0 produces"
" non-overlapping patches. A value in (0, 1) will produce overlapping patches."
" Negative values will add space between patches. A value of -1 would skip"
" every other patch. A value of 0.5 will provide 50%% of overlap between patches."
" Values must be in (-inf, 1).",
)
def run(
ctx: click.Context,
*,
Expand All @@ -321,6 +331,7 @@ def run(
seg_closing_kernel_size: int,
seg_min_object_size_um2: float,
seg_min_hole_size_um2: float,
patch_overlap_ratio: float = 0.0,
) -> None:
"""Run model inference on a directory of whole slide images.

Expand Down Expand Up @@ -349,9 +360,6 @@ def run(
"--config and --model-path must both be set if one is set."
)

wsi_dir = wsi_dir.resolve()
results_dir = results_dir.resolve()

if not wsi_dir.exists():
raise FileNotFoundError(f"Whole slide image directory not found: {wsi_dir}")

Expand Down Expand Up @@ -401,6 +409,7 @@ def run(
closing_kernel_size=seg_closing_kernel_size,
min_object_size_um2=seg_min_object_size_um2,
min_hole_size_um2=seg_min_hole_size_um2,
overlap=patch_overlap_ratio,
)

if not results_dir.joinpath("patches").exists():
Expand Down Expand Up @@ -438,9 +447,12 @@ def run(
with open(run_metadata_outpath, "w") as f:
json.dump(run_metadata, f, indent=2)

click.secho("Finished.", fg="green")

click.echo("Writing inference results to GeoJSON files")
csvs = list((results_dir / "model-outputs-csv").glob("*.csv"))
write_geojsons(csvs, results_dir, num_workers)

if qupath:
click.echo("Creating QuPath project with results")
make_qupath_project(wsi_dir, results_dir)

click.secho("Finished.", fg="green")
40 changes: 20 additions & 20 deletions wsinfer/cli/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
@click.option(
"-i",
"--wsi-dir",
type=click.Path(exists=True, file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(exists=True, file_okay=False, path_type=Path),
required=True,
help="Directory containing whole slide images. This directory can *only* contain"
" whole slide images.",
)
@click.option(
"-o",
"--results-dir",
type=click.Path(file_okay=False, path_type=Path, resolve_path=True),
type=click.Path(file_okay=False, path_type=Path),
required=True,
help="Directory to store patch results. If directory exists, will skip"
" whole slides for which outputs exist.",
Expand All @@ -32,41 +32,41 @@
help="Physical spacing of the patch in micrometers per pixel.",
)
@click.option(
"--thumbsize",
"--seg-thumbsize",
default=(2048, 2048),
type=(int, int),
help="The size of the slide thumbnail (in pixels) used for tissue segmentation."
" The aspect ratio is preserved, and the longest side will have length"
" max(thumbsize).",
)
@click.option(
"--median-filter-size",
"--seg-median-filter-size",
default=7,
type=click.IntRange(min=3),
help="The kernel size for median filtering. Must be greater than 1 and odd.",
)
@click.option(
"--binary-threshold",
"--seg-binary-threshold",
default=7,
type=click.IntRange(min=1),
help="The threshold for image binarization.",
)
@click.option(
"--closing-kernel-size",
"--seg-closing-kernel-size",
default=6,
type=click.IntRange(min=1),
help="The kernel size for binary closing (morphological operation).",
)
@click.option(
"--min-object-size-um2",
"--seg-min-object-size-um2",
default=200**2,
type=click.FloatRange(min=0),
help="The minimum size of an object to keep during tissue detection. If a"
" contiguous object is smaller than this area, it replaced with background."
" The default is 200um x 200um. The units of this argument are microns squared.",
)
@click.option(
"--min-hole-size-um2",
"--seg-min-hole-size-um2",
default=190**2,
type=click.FloatRange(min=0),
help="The minimum size of a hole to keep as a hole. If a hole is smaller than this"
Expand All @@ -78,23 +78,23 @@ def patch(
results_dir: str,
patch_size_px: int,
patch_spacing_um_px: float,
thumbsize: tuple[int, int],
median_filter_size: int,
binary_threshold: int,
closing_kernel_size: int,
min_object_size_um2: float,
min_hole_size_um2: float,
seg_thumbsize: tuple[int, int],
seg_median_filter_size: int,
seg_binary_threshold: int,
seg_closing_kernel_size: int,
seg_min_object_size_um2: float,
seg_min_hole_size_um2: float,
) -> None:
"""Patch a directory of whole slide iamges."""
segment_and_patch_directory_of_slides(
wsi_dir=wsi_dir,
save_dir=results_dir,
patch_size_px=patch_size_px,
patch_spacing_um_px=patch_spacing_um_px,
thumbsize=thumbsize,
median_filter_size=median_filter_size,
binary_threshold=binary_threshold,
closing_kernel_size=closing_kernel_size,
min_object_size_um2=min_object_size_um2,
min_hole_size_um2=min_hole_size_um2,
thumbsize=seg_thumbsize,
median_filter_size=seg_median_filter_size,
binary_threshold=seg_binary_threshold,
closing_kernel_size=seg_closing_kernel_size,
min_object_size_um2=seg_min_object_size_um2,
min_hole_size_um2=seg_min_hole_size_um2,
)
Loading
Loading