From fefde860f62b22516fabb78a045b192207bb6ccb Mon Sep 17 00:00:00 2001 From: David Stirling Date: Fri, 11 Oct 2024 11:40:08 +0100 Subject: [PATCH] Reapply GS-specific modifications --- cellprofiler_core/analysis/_runner.py | 3 +- cellprofiler_core/constants/image.py | 2 +- .../image/abstract_image/file/_file_image.py | 80 ++++- .../abstract_image/file/url/_color_image.py | 4 +- .../abstract_image/file/url/_mask_image.py | 4 +- .../file/url/_monochrome_image.py | 4 + .../abstract_image/file/url/_url_image.py | 12 +- cellprofiler_core/modules/loaddata.py | 27 ++ cellprofiler_core/modules/metadata.py | 8 +- cellprofiler_core/modules/namesandtypes.py | 35 ++- cellprofiler_core/preferences/__init__.py | 7 +- cellprofiler_core/utilities/image.py | 3 + cellprofiler_core/utilities/zarr.py | 279 ++++++++++++++++++ 13 files changed, 452 insertions(+), 16 deletions(-) create mode 100644 cellprofiler_core/utilities/zarr.py diff --git a/cellprofiler_core/analysis/_runner.py b/cellprofiler_core/analysis/_runner.py index d099dabd..712bf80b 100644 --- a/cellprofiler_core/analysis/_runner.py +++ b/cellprofiler_core/analysis/_runner.py @@ -672,7 +672,8 @@ def start_workers(cls, num=None): # closed, the subprocess exits. if hasattr(sys, "frozen"): if sys.platform == "darwin": - executable = os.path.join(os.path.dirname(sys.executable), "cp") + executable = os.path.join(os.path.dirname(sys.executable), + "cellprofilerapp") args = [executable] + aw_args elif sys.platform.startswith("linux"): aw_path = os.path.join(os.path.dirname(__file__), "__init__.py") diff --git a/cellprofiler_core/constants/image.py b/cellprofiler_core/constants/image.py index 23eb9488..a2f42e43 100644 --- a/cellprofiler_core/constants/image.py +++ b/cellprofiler_core/constants/image.py @@ -103,4 +103,4 @@ SUB_ALL = "All" SUB_SOME = "Some" FILE_SCHEME = "file:" -PASSTHROUGH_SCHEMES = ("http", "https", "ftp", "omero", "s3") +PASSTHROUGH_SCHEMES = ("http", "https", "ftp", "omero", "s3", "omero-3d") diff --git a/cellprofiler_core/image/abstract_image/file/_file_image.py b/cellprofiler_core/image/abstract_image/file/_file_image.py index 4b1715d8..e0dee2d0 100644 --- a/cellprofiler_core/image/abstract_image/file/_file_image.py +++ b/cellprofiler_core/image/abstract_image/file/_file_image.py @@ -9,16 +9,23 @@ import numpy import skimage.io +import requests +from PIL import Image as PilImage +from io import BytesIO +from posixpath import join as urljoin + import cellprofiler_core.preferences from .._abstract_image import AbstractImage from ..._image import Image from ....utilities.image import is_numpy_file from ....utilities.image import is_matlab_file +from ....utilities.image import is_omero3d_path from ....utilities.image import loadmat from ....utilities.image import load_data_file from ....utilities.image import generate_presigned_url from ....constants.image import FILE_SCHEME, PASSTHROUGH_SCHEMES from ....utilities.pathname import pathname2url, url2pathname +from ....utilities.zarr import get_zarr_reader class FileImage(AbstractImage): @@ -35,6 +42,8 @@ def __init__( channel=None, volume=False, spacing=None, + z=None, + t=None, ): """ :param name: Name of image to be provided @@ -84,6 +93,8 @@ def __init__( self.__index = index self.__volume = volume self.__spacing = spacing + self.z_index = z if z is not None else 0 + self.t_index = t if t is not None else 0 self.scale = None @property @@ -176,6 +187,8 @@ def cache_file(self): ) finally: os.close(tempfd) + elif url.lower().endswith('.zarr'): + self.__cached_file = url else: from bioformats.formatreader import get_image_reader @@ -185,6 +198,8 @@ def cache_file(self): return True def get_full_name(self): + if is_omero3d_path(self.__url): + return self.get_url() self.cache_file() if self.__is_cached: return self.__cached_file @@ -203,8 +218,12 @@ def get_md5_hash(self, measurements): # # Cache the MD5 hash on the image reader # - if is_matlab_file(self.__filename) or is_numpy_file(self.__filename): + if (is_matlab_file(self.__filename) or + is_numpy_file(self.__filename) or + is_omero3d_path(self.get_url())): rdr = None + elif self.get_url().endswith('.zarr'): + rdr = get_zarr_reader(None, url=self.get_url()) else: from bioformats.formatreader import get_image_reader @@ -273,6 +292,8 @@ def __set_image(self): url = self.get_url() if url.lower().startswith("omero:"): rdr = get_image_reader(self.get_name(), url=url) + elif url.lower().endswith('.zarr'): + rdr = get_zarr_reader(self.get_name(), url=url) else: rdr = get_image_reader(self.get_name(), url=self.get_url()) if numpy.isscalar(self.index) or self.index is None: @@ -283,6 +304,8 @@ def __set_image(self): rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, + z=self.z_index, + t=self.t_index, ) else: # It's a stack @@ -305,6 +328,8 @@ def __set_image(self): rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, + z=self.z_index, + t=self.t_index, ) stack.append(img) img = numpy.dstack(stack) @@ -330,10 +355,61 @@ def provide_image(self, image_set): def __set_image_volume(self): pathname = url2pathname(self.get_url()) - + print("Using pathname: {} for url {}".format(pathname, self.get_url())) # Volume loading is currently limited to tiffs/numpy files only if is_numpy_file(self.__filename): data = numpy.load(pathname) + elif is_omero3d_path(self.__url): + scheme = 'omero-3d:' + url = self.__url.split(scheme)[1] + parsed_url = urllib.parse.urlparse(url) + query_params = urllib.parse.parse_qs(parsed_url.query) + zmin = int(query_params['zmin'][0]) + zmax = int(query_params['zmax'][0]) + width = int(query_params['width'][0]) + height = int(query_params['height'][0]) + image_id = query_params['imageid'][0] + channel = query_params['c'][0] + stack = numpy.ndarray((zmax - zmin + 1, height, width)) + for i in range(zmin, zmax + 1): + path = urljoin('/tile', image_id, str(i), channel, '0') + url = urllib.parse.urlunparse(( + parsed_url.scheme, + parsed_url.netloc, + path, + '', + parsed_url.query, + '' + )) + print("Requesting URL: {}".format(url)) + timeout = 2 + response = None + while timeout < 500: + try: + response = requests.get(url, timeout=timeout) + except Exception: + print('Get %s with timeout %s sec failed' % ( + url, timeout)) + timeout = timeout**2 + else: + break + if response is None: + raise Exception('Failed to retrieve data from URL') + image_bytes = BytesIO(response.content) + image = PilImage.open(image_bytes) + stack[i - zmin, :, :] = image + data = stack + elif pathname.endswith('.zarr'): + rdr = get_zarr_reader(self.get_name(), url=self.get_url()) + data = rdr.read( + c=self.channel, + series=self.series, + index=None, + rescale=False, + wants_max_intensity=False, + z=None, + t=self.t_index, + ) else: data = imageio.volread(pathname) diff --git a/cellprofiler_core/image/abstract_image/file/url/_color_image.py b/cellprofiler_core/image/abstract_image/file/url/_color_image.py index 45a903f2..0a520b5d 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_color_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_color_image.py @@ -7,7 +7,7 @@ class ColorImage(URLImage): """Provide a color image, tripling a monochrome plane if needed""" def __init__( - self, name, url, series, index, rescale=True, volume=False, spacing=None + self, name, url, series, index, rescale=True, volume=False, spacing=None, z=None, t=None, ): URLImage.__init__( self, @@ -18,6 +18,8 @@ def __init__( index=index, volume=volume, spacing=spacing, + z=z, + t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_mask_image.py b/cellprofiler_core/image/abstract_image/file/url/_mask_image.py index 1ac74bf8..e4f7a37a 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_mask_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_mask_image.py @@ -4,7 +4,7 @@ class MaskImage(MonochromeImage): """Provide a boolean image, converting nonzero to True, zero to False if needed""" - def __init__(self, name, url, series, index, channel, volume=False, spacing=None): + def __init__(self, name, url, series, index, channel, volume=False, spacing=None, z=None, t=None): MonochromeImage.__init__( self, name, @@ -15,6 +15,8 @@ def __init__(self, name, url, series, index, channel, volume=False, spacing=None channel=channel, volume=volume, spacing=spacing, + z=z, + t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py b/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py index 188020c3..951d12d2 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_monochrome_image.py @@ -16,6 +16,8 @@ def __init__( rescale=True, volume=False, spacing=None, + z=None, + t=None, ): URLImage.__init__( self, @@ -27,6 +29,8 @@ def __init__( channel=channel, volume=volume, spacing=spacing, + z=z, + t=t, ) def provide_image(self, image_set): diff --git a/cellprofiler_core/image/abstract_image/file/url/_url_image.py b/cellprofiler_core/image/abstract_image/file/url/_url_image.py index 0486caec..fe21b724 100644 --- a/cellprofiler_core/image/abstract_image/file/url/_url_image.py +++ b/cellprofiler_core/image/abstract_image/file/url/_url_image.py @@ -1,6 +1,8 @@ import os from .....utilities.pathname import url2pathname +from .....utilities.image import is_omero3d_path + from .._file_image import FileImage @@ -17,6 +19,8 @@ def __init__( channel=None, volume=False, spacing=None, + z=None, + t=None, ): if url.lower().startswith("file:"): path = url2pathname(url) @@ -25,11 +29,17 @@ def __init__( pathname = "" filename = url super(URLImage, self).__init__( - name, pathname, filename, rescale, series, index, channel, volume, spacing + name, pathname, filename, rescale, series, index, channel, volume, spacing, z=z, t=t, ) self.url = url def get_url(self): + if is_omero3d_path(self.url): + print("OMERO-3D URL: {}".format(self.url)) + url = self.url.split("omero-3d:")[1] + if url is not None: + return url + return self.url if self.cache_file(): return super(URLImage, self).get_url() return self.url diff --git a/cellprofiler_core/modules/loaddata.py b/cellprofiler_core/modules/loaddata.py index 75ee9767..cef6c4cd 100644 --- a/cellprofiler_core/modules/loaddata.py +++ b/cellprofiler_core/modules/loaddata.py @@ -1060,6 +1060,33 @@ def fetch_provider(self, name, measurements, is_image_name=True): frame = measurements["Image", frame_feature] else: frame = None + if url.endswith('.zarr'): + # Zarrs need czt indexing rather than just index. + c, z, t = None, None, None + + if measurements.has_feature("Image", f"Channel_{name}"): + c = measurements["Image", f"Channel_{name}"] + elif measurements.has_feature("Image", "Metadata_C"): + c = measurements["Image", "Metadata_C"] + if measurements.has_feature("Image", f"Z_{name}"): + z = measurements["Image", f"Z_{name}"] + elif measurements.has_feature("Image", "Metadata_Z"): + z = measurements["Image", "Metadata_Z"] + if measurements.has_feature("Image", f"T_{name}"): + t = measurements["Image", f"T_{name}"] + elif measurements.has_feature("Image", "Metadata_T"): + t = measurements["Image", "Metadata_T"] + return FileImage( + name, + path, + filename, + rescale=self.rescale.value and is_image_name, + series=series, + index=frame, + z=z, + channel=c, + t=t, + ) return FileImage( name, path, diff --git a/cellprofiler_core/modules/metadata.py b/cellprofiler_core/modules/metadata.py index 5f9019b3..fd1a0aa8 100644 --- a/cellprofiler_core/modules/metadata.py +++ b/cellprofiler_core/modules/metadata.py @@ -1133,6 +1133,7 @@ def msg(url): from bioformats.formatreader import get_omexml_metadata from typing import Optional, Any, Callable from dataclasses import dataclass + from cellprofiler_core.utilities.zarr import get_zarr_metadata @dataclass class ui_context: @@ -1173,6 +1174,7 @@ def update_all_urls(): for i, url in enumerate(urls): try: if not pbar_context.update_callback(pbar_context.dlg, i, url): + break if group.filter_choice == F_FILTERED_IMAGES: match = group.filter.evaluate( @@ -1186,7 +1188,10 @@ def update_all_urls(): continue metadata = filelist.get_metadata(url) if metadata is None: - metadata = get_omexml_metadata(url=url) + if url.lower().endswith('.zarr'): + metadata = get_zarr_metadata(url=url) + else: + metadata = get_omexml_metadata(url=url) filelist.add_metadata(url, metadata) except Exception as e: import logging @@ -1199,7 +1204,6 @@ def update_all_urls(): pbar_context.err_callback(errmsg) update_all_urls() - group.metadata_autoextracted.value = True def on_activated(self, workspace): diff --git a/cellprofiler_core/modules/namesandtypes.py b/cellprofiler_core/modules/namesandtypes.py index b922c7dc..f5faacf0 100644 --- a/cellprofiler_core/modules/namesandtypes.py +++ b/cellprofiler_core/modules/namesandtypes.py @@ -1981,7 +1981,20 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): javabridge.call(stack, "get", "([I)Ljava/lang/Object;", coords) ) ) + # CZT needed for zarr reading. Provision of index parameter will make bioformats readers ignore these. + if workspace.measurements.has_feature("Image", "Metadata_Z"): + z = workspace.measurements.get_measurement("Image", "Metadata_Z") + if z is not None: + z = int(z) + else: + z = None + if workspace.measurements.has_feature("Image", "Metadata_T"): + t = workspace.measurements.get_measurement("Image", "Metadata_T") + if t is not None: + t = int(t) + else: + t = None if len(ipds) == 1: interleaved = javabridge.get_static_field( "org/cellprofiler/imageset/ImagePlane", "INTERLEAVED", "I" @@ -1994,6 +2007,14 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): series = ipd.series index = ipd.index channel = ipd.channel + if url.lower().endswith('.zarr') and workspace.measurements.has_feature("Image", "Metadata_C"): + # Override channel index with real value if using zarrs + channelM = workspace.measurements.get_measurement("Image", "Metadata_C") + if channelM is not None: + channel = int(channelM) + else: + # Todo: Solve channel ID if mixed channel pipeline + channel = index if channel == monochrome: channel = None elif channel == interleaved: @@ -2001,7 +2022,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): if index == 0: index = None self.add_simple_image( - workspace, name, load_choice, rescale, url, series, index, channel + workspace, name, load_choice, rescale, url, series, index, channel, z=z, t=t, ) elif all([ipd.url == ipds[0].url for ipd in ipds[1:]]): # Can load a simple image with a vector of series/index/channel @@ -2010,7 +2031,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): index = [ipd.index for ipd in ipds] channel = [None if ipd.channel < 0 else ipd.channel for ipd in ipds] self.add_simple_image( - workspace, name, load_choice, rescale, url, series, index, channel + workspace, name, load_choice, rescale, url, series, index, channel, z=z, t=t, ) else: # Different URLs - someone is a clever sadist @@ -2023,7 +2044,7 @@ def add_image_provider(self, workspace, name, load_choice, rescale, stack): ) def add_simple_image( - self, workspace, name, load_choice, rescale, url, series, index, channel + self, workspace, name, load_choice, rescale, url, series, index, channel, z=None, t=None, ): m = workspace.measurements @@ -2035,7 +2056,7 @@ def add_simple_image( if load_choice == LOAD_AS_COLOR_IMAGE: provider = ColorImage( - name, url, series, index, rescale, volume=volume, spacing=spacing + name, url, series, index, rescale, volume=volume, spacing=spacing, z=z, t=t, ) elif load_choice == LOAD_AS_GRAYSCALE_IMAGE: provider = MonochromeImage( @@ -2047,14 +2068,16 @@ def add_simple_image( rescale, volume=volume, spacing=spacing, + z=z, + t=t, ) elif load_choice == LOAD_AS_ILLUMINATION_FUNCTION: provider = MonochromeImage( - name, url, series, index, channel, False, volume=volume, spacing=spacing + name, url, series, index, channel, False, volume=volume, spacing=spacing, z=z, t=t, ) elif load_choice == LOAD_AS_MASK: provider = MaskImage( - name, url, series, index, channel, volume=volume, spacing=spacing + name, url, series, index, channel, volume=volume, spacing=spacing, z=z, t=t, ) workspace.image_set.providers.append(provider) diff --git a/cellprofiler_core/preferences/__init__.py b/cellprofiler_core/preferences/__init__.py index 708401c0..5a5be6d7 100644 --- a/cellprofiler_core/preferences/__init__.py +++ b/cellprofiler_core/preferences/__init__.py @@ -82,7 +82,12 @@ def get_config(): try: config = wx.Config.Get(False) except wx.PyNoAppError: - app = wx.App(0) + try: + app = wx.App(0) + except SystemExit: + # We're probably building on GitHub Actions + print("Python version doesn't support GUI, no app available.") + return __headless_config config = wx.Config.Get(False) if not config: wx.Config.Set( diff --git a/cellprofiler_core/utilities/image.py b/cellprofiler_core/utilities/image.py index 51fa71d1..f2984a4b 100644 --- a/cellprofiler_core/utilities/image.py +++ b/cellprofiler_core/utilities/image.py @@ -115,6 +115,9 @@ def is_numpy_file(filename): def is_matlab_file(filename): return os.path.splitext(filename)[-1].lower() == ".mat" +def is_omero3d_path(url): + return url.lower().startswith('omero-3d:') + def loadmat(path): imgdata = scipy.io.matlab.mio.loadmat(path, struct_as_record=True) diff --git a/cellprofiler_core/utilities/zarr.py b/cellprofiler_core/utilities/zarr.py new file mode 100644 index 00000000..bbf07c60 --- /dev/null +++ b/cellprofiler_core/utilities/zarr.py @@ -0,0 +1,279 @@ +import collections +import os +import re +import sys +import tempfile +import urllib.parse +from urllib.request import urlopen + +import numpy +import zarr +import boto3 +import shutil + +from cellprofiler_core.utilities.pathname import url2pathname + +import logging +logger = logging.getLogger(__name__) + + +def get_zarr_metadata(url): + xmlfile = 'METADATA.ome.xml' + parser = urllib.parse.urlparse(url) + if parser.scheme == 'file': + url = url2pathname(url) + elif parser.scheme == 's3': + client = boto3.client('s3') + bucket_name, key = re.compile('s3://([\w\d\-\.]+)/(.*)').search( + url).groups() + key += "/OME/METADATA.ome.xml" + url = client.generate_presigned_url( + 'get_object', + Params={'Bucket': bucket_name, 'Key': key.replace("+", " ")} + ) + src = urlopen(url) + return src.read().decode() + metadata_path = os.path.join(url, "OME", xmlfile) + if os.path.exists(metadata_path): + with open(metadata_path) as data: + return data.read() + elif os.path.exists(os.path.join(url, xmlfile)): + with open(os.path.join(url, xmlfile)) as data: + return data.read() + else: + logger.warning("Input zarr lacks an OME-XML file. " + "CellProfiler will try to construct metadata, but this feature is experimental") + return make_ome_xml(url) + + +def make_ome_xml(url): + # Prototype zarr parser to construct a fake OME XML. + root = zarr.open(url, mode='r') + queue = collections.deque() + queue.append(root) + xmlstr = """""" + while queue: + subject = queue.popleft() + for loc, group in subject.groups(): + queue.append(group) + for loc, array in subject.arrays(): + t, c, z, y, x = array.shape + dtype = array.dtype.name + if dtype == "int64": + # OME-XML can't handle int64, consider as a float instead. + dtype = "float" + xmlstr += \ + f"""UnknownCellProfiler OME Metadata """ + for i in range(c): + xmlstr += f"""""" + xmlstr += """ """ + # We only want the first resolution + break + return xmlstr + "" + + +def get_zarr_reader(key, path=None, url=None): + logger.debug("Getting image reader for: %s, %s, %s" % (key, path, url)) + from bioformats.formatreader import __image_reader_key_cache, __image_reader_cache, release_image_reader + if key in __image_reader_key_cache: + old_path, old_url = __image_reader_key_cache[key] + old_count, rdr = __image_reader_cache[old_path, old_url] + if old_path == path and old_url == url: + return rdr + release_image_reader(key) + if (path, url) in __image_reader_cache: + old_count, rdr = __image_reader_cache[path, url] + else: + rdr = ZarrReader(path, url) + old_count = 0 + __image_reader_cache[path, url] = (old_count + 1, rdr) + __image_reader_key_cache[key] = (path, url) + + return rdr + + +class ZarrReader(object): + def __init__(self, path=None, url=None, perform_init=True): + + self.stream = None + file_scheme = "file:" + + self.using_temp_file = False + + if url is not None: + url = str(url) + if url.lower().startswith(file_scheme): + url = url2pathname(url) + path = url + elif path is None: + path = url + + self.path = path + if path is None: + if not url.lower().startswith('s3:'): + self.path = self.download(url) + else: + if sys.platform.startswith("win"): + self.path = self.path.replace("/", os.path.sep) + filename = os.path.split(path)[1] + store = zarr.storage.FSStore(self.path) + if path.startswith('s3'): + logger.info("Zarr is stored on S3, will try to read directly.") + if '.zmetadata' in store: + # Zarr has consolidated metadata. + self.reader = zarr.convenience.open_consolidated(store, mode='r') + else: + logging.warning(f"Image is on S3 but lacks consolidated metadata. " + f"This may degrade reading performance. URL: {path}") + self.reader = zarr.open(store, mode='r') + elif not os.path.isdir(self.path): + raise IOError("The file, \"%s\", does not exist." % path) + else: + self.reader = zarr.open(store, mode='r') + self.well_map = self.map_wells() + self.series_list = self.map_series() + + def read(self, c=None, z=None, t=None, series=None, index=None, rescale=True, wants_max_intensity=True, channel_names=None, XYWH=None): + """Read a single plane from the image reader file. + :param c: read from this channel. `None` = read color image if multichannel + or interleaved RGB. + :param z: z-stack index + :param t: time index + :param series: series for ``.flex`` and similar multi-stack formats + :param index: if `None`, fall back to ``zct``, otherwise load the indexed frame + :param rescale: `True` to rescale the intensity scale to 0 and 1; `False` to + return the raw values native to the file. + :param wants_max_intensity: if `False`, only return the image; if `True`, + return a tuple of image and max intensity + :param channel_names: provide the channel names for the OME metadata + :param XYWH: a (x, y, w, h) tuple""" + # Index should always be None, we need ctz to properly index zarrs. + logger.debug(f"Reading {c=}, {z=}, {t=}, {series=}, {index=}, {XYWH=}") + c2 = None if c is None else c + 1 + z2 = None if z is None else z + 1 + t2 = None if t is None else t + 1 + if XYWH is not None: + x, y, w, h = XYWH + x = round(x) + y = round(y) + x2 = x + w + y2 = y + h + else: + y, y2, x, x2 = None, None, None, None + if self.well_map: + series_col, series_row, series_field = self.series_list[series] + base_path = self.well_map[(series_col, series_row)] + seriesreader = self.reader[base_path] + field = seriesreader.attrs['well']['images'][series_field]['path'] + # Hard-coding resolution 0 for now + seriesreader = seriesreader[field][0] + else: + seriesreader = self.reader[self.series_list[series]][0] + # Zarr arrays are indexed as TCZYX + if len(seriesreader.shape) == 5: + image = seriesreader[t:t2, c:c2, z:z2, y:y2, x:x2] + else: + image = seriesreader[c:c2, z:z2, y:y2, x:x2] + # Remove redundant axes + image = numpy.squeeze(image) + # C needs to be the last axis, but z should be first. Thank you CellProfiler. + if len(image.shape) > 2 and z is not None: + image = numpy.moveaxis(image, 0, -1) + elif len(image.shape) > 3: + image = numpy.moveaxis(image, 0, -1) + scale = numpy.iinfo(image.dtype).max + if rescale: + image = image.astype(float) / scale + if wants_max_intensity: + if image.dtype in [numpy.int8, numpy.uint8]: + scale = 255 + elif image.dtype in [numpy.int16, numpy.uint16]: + scale = 65535 + elif image.dtype == numpy.int32: + scale = 2 ** 32 - 1 + elif image.dtype == numpy.uint32: + scale = 2 ** 32 + else: + scale = 1 + return image, scale + return image + + def map_wells(self): + # For HCS zarrs, we construct a dictionary mapping well positions to array directories. + attrs = self.reader.attrs + if 'plate' not in attrs or 'wells' not in attrs['plate']: + return False + well_data = attrs['plate']['wells'] + mapper = {} + if 'column_index' in well_data[0]: + # Standard format + for row in well_data: + mapper[(str(row['column_index']), str(row['row_index']))] = row['path'] + else: + for row in well_data: + path = row['path'] + col, row = path.split('/', 1) + mapper[(str(col), str(row))] = path + return mapper + + def map_series(self): + # If in HCS mode we produce a list of (Row, Column, FieldNum) tuples to use with the well map. + # If in non-HCS mode we just make a list of paths to each series. + series_list = [] + if self.well_map: + metadata = get_zarr_metadata(self.path) + from lxml import etree + import io + context = etree.iterparse(io.BytesIO(metadata.encode()), tag="{*}ImageRef") + for action, node in context: + wellsample = node.getparent() + well = wellsample.getparent() + series_list.append((well.get('Column'), well.get('Row'), well.getchildren().index(wellsample))) + node.clear() + if not series_list: + # No series were found, try constructing from the image tags. + context = etree.iterparse(io.BytesIO(metadata.encode()), tag="{*}Image") + for action, node in context: + imagepath = node.attrib["Name"] + parts = imagepath.split('/') + series_list.append((parts[1], parts[2], int(parts[3]))) + node.clear() + else: + # No well metadata, just fetch series in order. + queue = collections.deque() + queue.append(self.reader) + while queue: + subject = queue.popleft() + for loc, group in subject.groups(): + queue.append(group) + for loc, array in subject.arrays(): + series_list.append(array.name) + # We only want the first resolution + break + return series_list + + def download(self, url): + # Cloned from bioformats' reader. Should temporarily download URLs. + # No idea if this will work since zarr is a directory-based format. + scheme = urllib.parse.urlparse(url)[0] + ext = url[url.rfind("."):] + urlpath = urllib.parse.urlparse(url)[2] + filename = os.path.basename(self.path) + + self.using_temp_file = True + + src = urlopen(url) + dest_fd, self.path = tempfile.mkstemp(suffix=ext) + try: + with os.fdopen(dest_fd, 'wb') as dest: + shutil.copyfileobj(src, dest) + except: + os.remove(self.path) + finally: + src.close() + + return filename + + def close(self): + # Zarr readers don't need to be explicitly closed. + pass