Skip to content

Commit a1b8852

Browse files
Memory layout optimized TIFF reader (#120)
* Add experimental tiff reader * Add multipage image support for experimental reader * Dynamically calculate tile scale * Add unit tests for experimental tiff reader * Extract logic for finding memory layout of chucks * Refactor experimental reader * Update tests * Address PR comments * Added missing import * Remove erroneous clear function call on metadata * REplace the optimal reader implementation with a caching approach for tiles that are read out of order * Limit the number of pending tiles for writing to bound memory usage * Add support for 5D images * lint fixes * Increase CI timeout * Exclude experimental reader tests from Windows CI * Add support for depth ordered images * Expose experimental reader parameters * Lint fix
1 parent ec595fb commit a1b8852

File tree

13 files changed

+666
-67
lines changed

13 files changed

+666
-67
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
build:
77
name: ${{ matrix.sys.os }}
88
runs-on: ${{ matrix.sys.os }}
9-
timeout-minutes: 20
9+
timeout-minutes: 25
1010
strategy:
1111
matrix:
1212
sys:
@@ -88,7 +88,7 @@ jobs:
8888
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
8989
run: |
9090
mkdir test_temp
91-
micromamba run -n test pytest --basetemp=test_temp -v --cov=tiledb --cov-report=term-missing --durations=0 tests/
91+
micromamba run -n test pytest --basetemp=test_temp -v --cov=tiledb --cov-report=term-missing --durations=0 tests/ --ignore=tests/integration/converters/test_ome_tiff_experimental.py
9292
if: ${{ matrix.sys.os == 'windows-latest' }}
9393

9494
- name: Run notebook examples

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
repos:
22
- repo: https://github.com/ambv/black
3-
rev: 23.1.0
3+
rev: 24.4.2
44
hooks:
55
- id: black
66
- repo: https://github.com/charliermarsh/ruff-pre-commit
7-
rev: v0.0.241
7+
rev: v0.4.2
88
hooks:
99
- id: ruff
1010
- repo: https://github.com/pre-commit/mirrors-mypy
11-
rev: v0.991
11+
rev: v1.10.0
1212
hooks:
1313
- id: mypy

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ module = "tests.*"
99
ignore_errors = true
1010

1111
[tool.ruff]
12-
ignore = ["E501"]
13-
extend-select = ["I001"]
12+
lint.ignore = ["E501"]
13+
lint.extend-select = ["I001"]
1414
exclude = ["__init__.py"]
1515
fix = true

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ classifiers =
3333
[options]
3434
zip_safe = False
3535
packages = find_namespace:
36-
python_requires = >=3.7
36+
python_requires = >=3.8
3737

3838

3939
[options.packages.find]
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import numpy as np
2+
import pytest
3+
import tifffile
4+
5+
import tiledb
6+
from tests import assert_image_similarity, get_path
7+
from tiledb.bioimg.converters.ome_tiff import OMETiffConverter
8+
from tiledb.bioimg.helpers import open_bioimg
9+
from tiledb.bioimg.openslide import TileDBOpenSlide
10+
from tiledb.cc import WebpInputFormat
11+
12+
13+
# We need to expand on the test files. Most of the test files we have currently are not memory
14+
# contiguous and the ones that are not RGB files to test the different compressors
15+
@pytest.mark.parametrize("filename,num_series", [("UTM2GTIF.tiff", 1)])
16+
@pytest.mark.parametrize("preserve_axes", [False, True])
17+
@pytest.mark.parametrize("chunked,max_workers", [(False, 0), (True, 0), (True, 4)])
18+
@pytest.mark.parametrize(
19+
"compressor",
20+
[tiledb.ZstdFilter(level=0)],
21+
)
22+
def test_ome_tiff_converter_exclude_original_metadata(
23+
tmp_path, filename, num_series, preserve_axes, chunked, max_workers, compressor
24+
):
25+
# The image is not RGB to use the WbeP compressor
26+
if isinstance(compressor, tiledb.WebpFilter) and filename == "UTM2GTIF.tiff":
27+
pytest.skip(f"WebPFilter cannot be applied to {filename}")
28+
29+
input_path = get_path(filename)
30+
tiledb_path = tmp_path / "to_tiledb"
31+
exprimental_path = tmp_path / "experimental"
32+
OMETiffConverter.to_tiledb(
33+
input_path,
34+
str(tiledb_path),
35+
preserve_axes=preserve_axes,
36+
chunked=chunked,
37+
max_workers=max_workers,
38+
compressor=compressor,
39+
log=False,
40+
exclude_metadata=True,
41+
)
42+
43+
OMETiffConverter.to_tiledb(
44+
input_path,
45+
str(exprimental_path),
46+
preserve_axes=preserve_axes,
47+
chunked=True,
48+
max_workers=max_workers,
49+
compressor=compressor,
50+
log=False,
51+
exclude_metadata=True,
52+
experimental_reader=True,
53+
)
54+
55+
with TileDBOpenSlide(str(tiledb_path)) as t:
56+
with TileDBOpenSlide(str(exprimental_path)) as e:
57+
assert t.level_count == e.level_count
58+
59+
for level in range(t.level_count):
60+
np.testing.assert_array_equal(t.read_level(level), e.read_level(level))
61+
62+
63+
@pytest.mark.parametrize("filename,num_series", [("UTM2GTIF.tiff", 1)])
64+
@pytest.mark.parametrize("preserve_axes", [False, True])
65+
@pytest.mark.parametrize("chunked,max_workers", [(True, 0), (True, 4)])
66+
@pytest.mark.parametrize(
67+
"compressor",
68+
[tiledb.ZstdFilter(level=0)],
69+
)
70+
def test_ome_tiff_converter_roundtrip(
71+
tmp_path, filename, num_series, preserve_axes, chunked, max_workers, compressor
72+
):
73+
if isinstance(compressor, tiledb.WebpFilter) and filename == "UTM2GTIF.tiff":
74+
pytest.skip(f"WebPFilter cannot be applied to {filename}")
75+
76+
input_path = get_path(filename)
77+
tiledb_path = tmp_path / "to_tiledb"
78+
output_path = tmp_path / "from_tiledb"
79+
OMETiffConverter.to_tiledb(
80+
input_path,
81+
str(tiledb_path),
82+
preserve_axes=preserve_axes,
83+
chunked=chunked,
84+
max_workers=max_workers,
85+
compressor=compressor,
86+
log=False,
87+
experimental_reader=True,
88+
reader_kwargs=dict(
89+
extra_tags=(
90+
"ModelPixelScaleTag",
91+
"ModelTiepointTag",
92+
"GeoKeyDirectoryTag",
93+
"GeoAsciiParamsTag",
94+
)
95+
),
96+
)
97+
# Store it back to NGFF Zarr
98+
OMETiffConverter.from_tiledb(str(tiledb_path), str(output_path))
99+
100+
with tifffile.TiffFile(input_path) as t1, tifffile.TiffFile(output_path) as t2:
101+
compare_tiff(t1, t2, lossless=False)
102+
103+
104+
@pytest.mark.parametrize(
105+
"filename,dims",
106+
[
107+
("single-channel.ome.tif", "YX"),
108+
("z-series.ome.tif", "ZYX"),
109+
("multi-channel.ome.tif", "CYX"),
110+
("time-series.ome.tif", "TYX"),
111+
("multi-channel-z-series.ome.tif", "CZYX"),
112+
("multi-channel-time-series.ome.tif", "TCYX"),
113+
("4D-series.ome.tif", "TZYX"),
114+
("multi-channel-4D-series.ome.tif", "TCZYX"),
115+
],
116+
)
117+
@pytest.mark.parametrize("tiles", [{}, {"X": 128, "Y": 128, "Z": 2, "C": 1, "T": 3}])
118+
def test_ome_tiff_converter_artificial_rountrip(tmp_path, filename, dims, tiles):
119+
input_path = get_path(f"artificial-ome-tiff/{filename}")
120+
tiledb_path = tmp_path / "to_tiledb"
121+
experimental_path = tmp_path / "_experimental"
122+
output_path = tmp_path / "from_tiledb"
123+
124+
OMETiffConverter.to_tiledb(input_path, str(tiledb_path), tiles=tiles)
125+
OMETiffConverter.to_tiledb(
126+
input_path,
127+
str(experimental_path),
128+
tiles=tiles,
129+
experimental_reader=True,
130+
chunked=True,
131+
max_workers=16,
132+
)
133+
134+
with TileDBOpenSlide(str(experimental_path)) as t:
135+
assert len(tiledb.Group(str(experimental_path))) == t.level_count == 1
136+
137+
with open_bioimg(str(experimental_path / "l_0.tdb")) as A:
138+
assert "".join(dim.name for dim in A.domain) == dims
139+
assert A.dtype == np.int8
140+
assert A.dim("X").tile == tiles.get("X", 439)
141+
assert A.dim("Y").tile == tiles.get("Y", 167)
142+
if A.domain.has_dim("Z"):
143+
assert A.dim("Z").tile == tiles.get("Z", 1)
144+
if A.domain.has_dim("C"):
145+
assert A.dim("C").tile == tiles.get("C", 3)
146+
if A.domain.has_dim("T"):
147+
assert A.dim("T").tile == tiles.get("T", 1)
148+
149+
OMETiffConverter.from_tiledb(str(tiledb_path), str(output_path))
150+
with tifffile.TiffFile(input_path) as t1, tifffile.TiffFile(output_path) as t2:
151+
compare_tiff(t1, t2, lossless=True)
152+
153+
154+
def compare_tiff(t1: tifffile.TiffFile, t2: tifffile.TiffFile, lossless: bool = True):
155+
assert len(t1.series[0].levels) == len(t2.series[0].levels)
156+
157+
for l1, l2 in zip(t1.series[0].levels, t2.series[0].levels):
158+
assert l1.axes.replace("S", "C") == l2.axes.replace("S", "C")
159+
assert l1.shape == l2.shape
160+
assert l1.dtype == l2.dtype
161+
assert l1.nbytes == l2.nbytes
162+
163+
if lossless:
164+
np.testing.assert_array_equal(l1.asarray(), l2.asarray())
165+
else:
166+
assert_image_similarity(l1.asarray(), l2.asarray(), channel_axis=0)
167+
168+
169+
compressors = [
170+
None,
171+
tiledb.ZstdFilter(level=0),
172+
tiledb.WebpFilter(WebpInputFormat.WEBP_RGB, lossless=False),
173+
tiledb.WebpFilter(WebpInputFormat.WEBP_RGB, lossless=True),
174+
]

tests/unit/test_tiles.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,18 @@
1111

1212
def test_dim_range():
1313
dims = list(domain)
14-
assert dim_range(dims[0]) == range(0, 10, 3)
15-
assert dim_range(dims[1]) == range(0, 15, 5)
16-
assert dim_range(dims[2]) == range(0, 7, 4)
17-
assert dim_range(dims[3]) == range(0, 3, 3)
14+
assert dim_range(
15+
(int(dims[0].domain[0]), int(dims[0].domain[1]), int(dims[0].tile))
16+
) == range(0, 10, 3)
17+
assert dim_range(
18+
(int(dims[1].domain[0]), int(dims[1].domain[1]), int(dims[1].tile))
19+
) == range(0, 15, 5)
20+
assert dim_range(
21+
(int(dims[2].domain[0]), int(dims[2].domain[1]), int(dims[2].tile))
22+
) == range(0, 7, 4)
23+
assert dim_range(
24+
(int(dims[3].domain[0]), int(dims[3].domain[1]), int(dims[3].tile))
25+
) == range(0, 3, 3)
1826

1927

2028
def test_iter_slices():

0 commit comments

Comments
 (0)