Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
initial (working) version of imaris import
Browse files Browse the repository at this point in the history
TODO:
  - get metadata from the ims file
  - make the intermediate zarr another rule perhaps
akhanf committed Jan 13, 2025
1 parent 4562271 commit 8cc8b95
Showing 5 changed files with 181 additions and 5 deletions.
3 changes: 1 addition & 2 deletions config/samples.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
subject sample acq stain_0 stain_1 stain_2 sample_path
mouse1 brain blaze Lectin PI Abeta .test/dryrun/data
lifecanvas1 brain prestitched PI Abeta n/a .test/dryrun/data
brown brain imaris Lectin PI Abeta /cifs/trident/projects/Brown/241129_AMR_1wk_App_tau_E3_34-3_M2_1x1_09-57-23/09-57-23_AMR_1wk_App_tau_E3_34-3_M2_1x1_Blaze_C00_xyz-Table Z0000.ome.ims
3 changes: 2 additions & 1 deletion workflow/rules/common.smk
Original file line number Diff line number Diff line change
@@ -164,7 +164,6 @@ def sample_is_remote(wildcards):
def get_input_sample(wildcards):
"""returns path to extracted sample or path to provided input folder"""
sample_path = Path(get_sample_path(wildcards))

if is_remote_gcs(sample_path):
return rules.cp_from_gcs.output.ome_dir.format(**wildcards)

@@ -175,6 +174,8 @@ def get_input_sample(wildcards):
# sample was a tar file, so point to the extracted folder
return rules.extract_sample.output.ome_dir.format(**wildcards)

elif sample_path.suffixes[-1] == '.ims':
return get_sample_path_remote(wildcards)
else:
print(f"unsupported input: {sample_path}")

2 changes: 1 addition & 1 deletion workflow/rules/import.smk
Original file line number Diff line number Diff line change
@@ -155,7 +155,7 @@ rule prestitched_to_metadata:
subject="{subject}",
datatype="micr",
sample="{sample}",
acq="{acq,[a-zA-Z0-9]*prestitched[a-zA-Z0-9]*}",
acq="{acq,[a-zA-Z0-9]*(prestitched|imaris)[a-zA-Z0-9]*}",
suffix="SPIM.json",
),
benchmark:
39 changes: 38 additions & 1 deletion workflow/rules/ome_zarr.smk
Original file line number Diff line number Diff line change
@@ -72,7 +72,7 @@ rule tif_stacks_to_ome_zarr:
bids(
root="logs",
subject="{subject}",
datatype="zarr_to_ome_zarr",
datatype="tif_stacks_to_ome_zarr",
sample="{sample}",
acq="{acq}",
suffix="log.txt",
@@ -172,3 +172,40 @@ rule ome_zarr_to_nii:
config["containers"]["spimprep"]
script:
"../scripts/ome_zarr_to_nii.py"

rule imaris_to_ome_zarr:
input:
ims=get_input_sample,
metadata_json=rules.prestitched_to_metadata.output.metadata_json,
params:
max_downsampling_layers=config["ome_zarr"]["max_downsampling_layers"],
rechunk_size=config["ome_zarr"]["rechunk_size"],
scaling_method=config["ome_zarr"]["scaling_method"],
downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"],
stains=get_stains,
uri=get_output_ome_zarr_uri(),
storage_provider_settings=workflow.storage_provider_settings,
output:
**get_output_ome_zarr("imaris"),
log:
bids(
root="logs",
subject="{subject}",
datatype="imaris_to_ome_zarr",
sample="{sample}",
acq="{acq}",
suffix="log.txt",
),
container:
config["containers"]["spimprep"]
group:
"preproc"
threads: config["total_cores"]
resources:
runtime=360,
mem_mb=config["total_mem_mb"],
shadow: 'minimal'
script:
"../scripts/imaris_to_ome_zarr.py"


139 changes: 139 additions & 0 deletions workflow/scripts/imaris_to_ome_zarr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import h5py
import zarr
import json
import zarr
import dask.array as da
from ome_zarr.writer import write_image
from ome_zarr.format import format_from_version
from ome_zarr.scale import Scaler
from dask.diagnostics import ProgressBar
from upath import UPath as Path
from lib.cloud_io import get_fsspec, is_remote


def convert_hdf5_to_zarr(hdf5_path, zarr_path):
"""
Convert an HDF5 file to Zarr using h5py and zarr.
Parameters:
hdf5_path (str): Path to the input HDF5 (.ims) file.
zarr_path (str): Path to the output Zarr dataset.
"""
# Open the HDF5 file and create a Zarr root group
with h5py.File(hdf5_path, "r") as hdf5_file:
zarr_store = zarr.open_group(zarr_path, mode="w")

def copy_group(hdf5_group, zarr_group):
for key, item in hdf5_group.items():
if isinstance(item, h5py.Group): # Recursively copy groups
new_group = zarr_group.create_group(key)
copy_group(item, new_group)
elif isinstance(item, h5py.Dataset): # Copy datasets
zarr_group.create_dataset(
name=key,
data=item[()],
chunks=item.chunks,
dtype=item.dtype,
compression="gzip" # Optional compression
)
print(f"Copied dataset: {key}")

# Start copying from the root group
copy_group(hdf5_file, zarr_store)

print(f"Converted HDF5 file to Zarr at: {zarr_path}")


#copy imaris (hdf5) to zarr -- TODO: don't need to copy everything
convert_hdf5_to_zarr(
hdf5_path=snakemake.input.ims,
zarr_path='copy_hdf5.zarr',
)



in_zarr='copy_hdf5.zarr'
metadata_json=snakemake.input.metadata_json
downsampling=snakemake.params.downsampling
max_layer=snakemake.params.max_downsampling_layers #number of downsamplings by 2 to include in zarr
rechunk_size=snakemake.params.rechunk_size
out_zarr=snakemake.output.zarr
stains=snakemake.params.stains
scaling_method=snakemake.params.scaling_method
uri = snakemake.params.uri


# prepare metadata for ome-zarr
with open(metadata_json) as fp:
metadata = json.load(fp)

voxdim = [float(metadata['physical_size_z']) * float(downsampling) / 1000.0,
float(metadata['physical_size_y']) * float(downsampling) / 1000.0,
float(metadata['physical_size_x']) * float(downsampling) / 1000.0]


coordinate_transformations = []
#for each resolution (dataset), we have a list of dicts, transformations to apply..
#in this case just a single one (scaling by voxel size)

for l in range(max_layer+1):

coordinate_transformations.append( [{'scale': [1,voxdim[0],(2**l)*voxdim[1],(2**l)*voxdim[2]], #image-pyramids in XY only
'type': 'scale'}])


axes = [{'name': 'c', 'type': 'channel'}] + [{'name': ax, 'type': 'space', 'unit': 'millimeter'} for ax in ['z','y','x'] ]


#init omero metadata
omero={key:val for key,val in snakemake.config['ome_zarr']['omero_metadata']['defaults'].items()}
omero['channels']=[]


if is_remote(uri):
fs_args={'storage_provider_settings':snakemake.params.storage_provider_settings,'creds':snakemake.input.creds}
fs = get_fsspec(uri,**fs_args)
store = zarr.storage.FSStore(Path(uri).path,fs=fs,dimension_separator='/',mode='w')
else:
store = zarr.DirectoryStore(out_zarr,dimension_separator='/')





darr_list=[]
for zarr_i,stain in enumerate(stains):
#open zarr to get group name
zi = zarr.open(in_zarr)
# darr_list.append(da.from_zarr(in_zarr,component=f'DataSet/ResolutionLevel 0/TimePoint 0/Channel {zarr_i}/Data',chunks=rechunk_size))
darr_list.append(da.from_zarr(in_zarr,component=f'DataSet/ResolutionLevel 0/TimePoint 0/Channel {zarr_i}/Data'))


#append to omero metadata
channel_metadata={key:val for key,val in snakemake.config['ome_zarr']['omero_metadata']['channels']['defaults'].items()}
channel_name=stain
channel_metadata['label'] = channel_name
default_color=snakemake.config['ome_zarr']['omero_metadata']['channels']['default_color']
color=snakemake.config['ome_zarr']['omero_metadata']['channels']['color_mapping'].get(channel_name,default_color)
channel_metadata['color'] = color
omero['channels'].append(channel_metadata)


darr_channels = da.stack(darr_list)



group = zarr.group(store,overwrite=True)
scaler = Scaler(max_layer=max_layer,method=scaling_method)


with ProgressBar():
write_image(image=darr_channels,
group=group,
scaler=scaler,
coordinate_transformations=coordinate_transformations,
axes=axes,
metadata={'omero':omero}
)


0 comments on commit 8cc8b95

Please sign in to comment.