refactored, and added new metadata extraction

khanlab · Jan 13, 2025 · dcfb77d · dcfb77d
1 parent 8cc8b95
commit dcfb77d
Showing 6 changed files with 108 additions and 38 deletions.
diff --git a/workflow/Snakefile b/workflow/Snakefile
@@ -43,3 +43,4 @@ include: "rules/bigstitcher.smk"
 include: "rules/ome_zarr.smk"
 include: "rules/bids.smk"
 include: "rules/qc.smk"
+include: "rules/imaris.smk"
diff --git a/workflow/rules/imaris.smk b/workflow/rules/imaris.smk
@@ -0,0 +1,74 @@
+rule imaris_to_metadata:
+    input:
+        ims=get_input_sample,
+    output:
+        metadata_json=bids(
+            root=root,
+            subject="{subject}",
+            datatype="micr",
+            sample="{sample}",
+            acq="{acq,[a-zA-Z0-9]*imaris[a-zA-Z0-9]*}",
+            suffix="SPIM.json",
+        ),
+    benchmark:
+        bids(
+            root="benchmarks",
+            datatype="imaris_to_metdata",
+            subject="{subject}",
+            sample="{sample}",
+            acq="{acq}",
+            suffix="benchmark.tsv",
+        )
+    log:
+        bids(
+            root="logs",
+            datatype="prestitched_to_metdata",
+            subject="{subject}",
+            sample="{sample}",
+            acq="{acq}",
+            suffix="log.txt",
+        ),
+    group:
+        "preproc"
+    container:
+        config["containers"]["spimprep"]
+    script:
+        "../scripts/imaris_to_metadata.py"
+
+
+rule imaris_to_ome_zarr:
+    input:
+        ims=get_input_sample,
+        metadata_json=rules.prestitched_to_metadata.output.metadata_json,
+    params:
+        max_downsampling_layers=config["ome_zarr"]["max_downsampling_layers"],
+        rechunk_size=config["ome_zarr"]["rechunk_size"],
+        scaling_method=config["ome_zarr"]["scaling_method"],
+        downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"],
+        stains=get_stains,
+        uri=get_output_ome_zarr_uri(),
+        storage_provider_settings=workflow.storage_provider_settings,
+    output:
+        **get_output_ome_zarr("imaris"),
+    log:
+        bids(
+            root="logs",
+            subject="{subject}",
+            datatype="imaris_to_ome_zarr",
+            sample="{sample}",
+            acq="{acq}",
+            suffix="log.txt",
+        ),
+    container:
+        config["containers"]["spimprep"]
+    group:
+        "preproc"
+    threads: config["total_cores"]
+    resources:
+        runtime=360,
+        mem_mb=config["total_mem_mb"],
+    shadow: 'minimal'
+    script:
+        "../scripts/imaris_to_ome_zarr.py"
+
+
diff --git a/workflow/rules/import.smk b/workflow/rules/import.smk
@@ -155,7 +155,7 @@ rule prestitched_to_metadata:
             subject="{subject}",
             datatype="micr",
             sample="{sample}",
-            acq="{acq,[a-zA-Z0-9]*(prestitched|imaris)[a-zA-Z0-9]*}",
+            acq="{acq,[a-zA-Z0-9]*prestitched[a-zA-Z0-9]*}",
             suffix="SPIM.json",
         ),
     benchmark:

diff --git a/workflow/rules/ome_zarr.smk b/workflow/rules/ome_zarr.smk
@@ -173,39 +173,3 @@ rule ome_zarr_to_nii:
     script:
         "../scripts/ome_zarr_to_nii.py"
 
-rule imaris_to_ome_zarr:
-    input:
-        ims=get_input_sample,
-        metadata_json=rules.prestitched_to_metadata.output.metadata_json,
-    params:
-        max_downsampling_layers=config["ome_zarr"]["max_downsampling_layers"],
-        rechunk_size=config["ome_zarr"]["rechunk_size"],
-        scaling_method=config["ome_zarr"]["scaling_method"],
-        downsampling=config["bigstitcher"]["fuse_dataset"]["downsampling"],
-        stains=get_stains,
-        uri=get_output_ome_zarr_uri(),
-        storage_provider_settings=workflow.storage_provider_settings,
-    output:
-        **get_output_ome_zarr("imaris"),
-    log:
-        bids(
-            root="logs",
-            subject="{subject}",
-            datatype="imaris_to_ome_zarr",
-            sample="{sample}",
-            acq="{acq}",
-            suffix="log.txt",
-        ),
-    container:
-        config["containers"]["spimprep"]
-    group:
-        "preproc"
-    threads: config["total_cores"]
-    resources:
-        runtime=360,
-        mem_mb=config["total_mem_mb"],
-    shadow: 'minimal'
-    script:
-        "../scripts/imaris_to_ome_zarr.py"
-
-
diff --git a/workflow/scripts/imaris_to_metadata.py b/workflow/scripts/imaris_to_metadata.py
@@ -0,0 +1,31 @@
+import h5py
+import xmltodict 
+import json
+
+with h5py.File(snakemake.input.ims, "r") as hdf5_file:
+    xml_data = hdf5_file['DataSetInfo/OME Image Tags/Image 0'][:]
+
+
+# Convert byte array to string and then to a dictionary
+xml_str = bytes(xml_data).decode('utf-8', errors='ignore')  # Decode byte array to string
+
+try:
+    xml_dict = xmltodict.parse(f"<root>{xml_str}</root>", namespace_separator=':')
+except Exception as e:
+    print(f"Error parsing XML: {e}")
+
+
+metadata={}
+metadata['physical_size_x'] = float(xml_dict['root']['ca:CustomAttributes']['DataAxis0']['@PhysicalUnit'])
+metadata['physical_size_y'] = float(xml_dict['root']['ca:CustomAttributes']['DataAxis1']['@PhysicalUnit'])
+metadata['physical_size_z'] = abs(float(xml_dict['root']['ca:CustomAttributes']['DataAxis3']['@PhysicalUnit']))
+metadata['PixelSize'] = [ metadata['physical_size_z']/1000.0, metadata['physical_size_y']/1000.0, metadata['physical_size_x']/1000.0] #zyx since OME-Zarr is ZYX
+metadata['PixelSizeUnits'] = 'mm' 
+
+#write metadata to json
+with open(snakemake.output.metadata_json, 'w') as fp:
+    json.dump(metadata, fp,indent=4)
+
+
+
+
diff --git a/workflow/scripts/imaris_to_ome_zarr.py b/workflow/scripts/imaris_to_ome_zarr.py
@@ -34,7 +34,7 @@ def copy_group(hdf5_group, zarr_group):
                         data=item[()],
                         chunks=item.chunks,
                         dtype=item.dtype,
-                        compression="gzip"  # Optional compression
+                        compression="blosc"  # Optional compression
                     )
                     print(f"Copied dataset: {key}")