update docs

khanlab · Dec 10, 2024 · 7087bd4 · 7087bd4
1 parent 367d541
commit 7087bd4
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 86 deletions.
diff --git a/docs/GettingStarted/beta_amyloid_plaque_detection.rst b/docs/GettingStarted/beta_amyloid_plaque_detection.rst
@@ -0,0 +1,11 @@
+.. _beta_amyloid_plaque_detection:
+
+Beta-Amyloid Plaque Detection
+#############################
+
+Below is a figure illustrating an overview of the pipeline processing the image volume to a list of centroids:
+
+.. figure:: cvpl_tools/docs/assets/image_to_list_of_centroids.png
+    :alt: Slice of mouse brain, unsegmented
+
+    pipeline overview
diff --git a/docs/GettingStarted/nnunet.rst b/docs/GettingStarted/nnunet.rst
@@ -113,3 +113,20 @@ looking at:
 
 3. The number of voxels covered by brain edge areas above threshold t, and how many of them are correctly annotated
    as 1, and how many of them are incorrectly annotated as 0
+
+these metrics are best summarized as IOU or DICE scores. We look at an example segmentation below.
+
+.. figure:: cvpl_tools/docs/assets/mb_unmasked.png
+    :alt: Slice of mouse brain, unsegmented
+
+    Slice of the mouse brain, not annotated (without negative masking)
+
+.. figure:: cvpl_tools/docs/assets/mb_masked.png
+    :alt: Slice of mouse brain, negative masked
+
+    Slice of the mouse brain, annotated (with negative masking)
+
+Here the algorithm, as intended, marks not only the outer edges of the brain but also some of the brighter inner
+structures as edge areas to be removed, since they can't be plaques. The bright spots on the upper left of the images
+are left as is, for they are all plaques. Overall, the annotation requires quite a bit of labour and it is preferred
+to obtain a high quality annotated volume over many low quality ones.
diff --git a/docs/assets/mb_masked.png b/docs/assets/mb_masked.png
diff --git a/docs/assets/mb_unmasked.png b/docs/assets/mb_unmasked.png
diff --git a/docs/index.rst b/docs/index.rst
@@ -34,6 +34,8 @@ or on cloud.
    Setting Up the Script <GettingStarted/setting_up_the_script>
    Defining Segmentation Pipeline <GettingStarted/segmentation_pipeline>
    Result Caching <GettingStarted/result_caching>
+   nn-UNet <GettingStarted/nnunet>
+   Beta-Amyloid Plaque Detection <GettingStarted/beta_amyloid_plaque_detection>
 
 .. toctree::
    :maxdepth: 2

diff --git a/src/cvpl_tools/examples/mousebrain_processing.py b/src/cvpl_tools/examples/mousebrain_processing.py
@@ -1,56 +1,101 @@
 import numpy as np
 
 from cvpl_tools.fsspec import RDirFileSystem
+from dataclasses import dataclass
 
-SUBJECT_ID = 'o22'  # **CHANGE THIS**
-MINIMUM_SUBJECT_ID = None
-BA_CHANNEL = None
-PLAQUE_THRESHOLD, MAX_THRESHOLD = {
+
+@dataclass
+class Subject:
+    SUBJECT_ID: str = None
+    MINIMUM_SUBJECT_ID: str = None
+    BA_CHANNEL = None
+    PLAQUE_THRESHOLD: float = None
+    MAX_THRESHOLD: float = None
+
+    OME_ZARR_PATH = None
+    SUBJECT_FOLDER = None
+    NNUNET_CACHE_DIR = None
+
+    FIRST_DOWNSAMPLE_PATH = None
+    SECOND_DOWNSAMPLE_PATH = None
+    THIRD_DOWNSAMPLE_PATH = None
+
+    THIRD_DOWNSAMPLE_BIAS_PATH = None
+    SECOND_DOWNSAMPLE_CORR_PATH = None
+
+    NNUNET_OUTPUT_TIFF_PATH = None
+    GCS_NEG_MASK_TGT = None
+    GCS_BIAS_PATH = None
+    COILED_CACHE_DIR_PATH = None
+
+
+subjects4x = ('F4A1Te3Blaze', 'F6A2Te3Blaze', 'M1A1Te3Blaze', 'M1A2Te3Blaze', 'M7A1Te4')
+THRESHOLD_TABLE = {
     'o22': (400., 1000.),  # 1
     'o23': (400., 1000.),  # 2
-    'o24': (400., 1000.),  # 3
-    'o24oldBlaze': (2000., 5000.),  # 4
+    # 'o24': (400., 1000.),  # 3
+    # 'o24oldBlaze': (2000., 5000.),  # 4
     'F1A1Te4Blaze': (3000., 7500.),  # 5
+    'F1A2Te3Blaze': (3000., 7500.),
     'F4A1Te3Blaze': (3000., 7500.),
+    'F6A2Te3Blaze': (3000., 7500.),
+    'M1A1Te3Blaze': (3000., 7500.),
+    'M1A2Te3Blaze': (3000., 7500.),
+    'M3A2Te3Blaze': (3000., 7500.),
+    'M4A2Te3Blaze': (3000., 7500.),
+    'M7A1Te4Blaze': (3000., 7500.),
     # **ADD MORE SUBJECTS HERE**
-}[SUBJECT_ID]
-if SUBJECT_ID.endswith('oldBlaze'):
-    MINIMUM_SUBJECT_ID = SUBJECT_ID[:-len('oldBlaze')]
-    OME_ZARR_PATH = f'gcs://khanlab-lightsheet/data/mouse_appmaptapoe/bids_oldBlaze/sub-{MINIMUM_SUBJECT_ID}/micr/sub-{MINIMUM_SUBJECT_ID}_sample-brain_acq-blaze_SPIM.ome.zarr'
-    BA_CHANNEL = np.s_[0]
-elif SUBJECT_ID.endswith('Blaze'):
-    MINIMUM_SUBJECT_ID = SUBJECT_ID[:-len('Blaze')]
-    OME_ZARR_PATH = f'gcs://khanlab-lightsheet/data/mouse_appmaptapoe/bids/sub-{MINIMUM_SUBJECT_ID}/micr/sub-{MINIMUM_SUBJECT_ID}_sample-brain_acq-blaze_SPIM.ome.zarr'
-    BA_CHANNEL = np.s_[0]
-else:
-    MINIMUM_SUBJECT_ID = SUBJECT_ID
-    OME_ZARR_PATH = f'Z:/projects/lightsheet_lifecanvas/bids/sub-{MINIMUM_SUBJECT_ID}/micr/sub-{MINIMUM_SUBJECT_ID}_sample-brain_acq-prestitched_SPIM.ome.zarr'
-    BA_CHANNEL = np.s_[1]
-
-if SUBJECT_ID == 'F4A1Te3Blaze':  # **SOME SUBJECTS DO NOT FOLLOW THE ABOVE FORMAT**
-    OME_ZARR_PATH = 'gcs://khanlab-lightsheet/data/mouse_appmaptapoe/bids/sub-F4A1Te3/micr/sub-F4A1Te3_sample-brain_acq-blaze4x_SPIM.ome.zarr'
-
-RUN_ON_FULL_IM = True
-if not RUN_ON_FULL_IM:
-    BA_CHANNEL = np.s_[0, 256:512, :, :]  # **CHANGE THIS**
-
-SUBJECT_FOLDER = f'C:/Users/than83/Documents/progtools/datasets/subjects/subject_{SUBJECT_ID}'  # **CHANGE THIS**
-NNUNET_CACHE_DIR = 'C:/Users/than83/Documents/progtools/datasets/nnunet/Cache_250epoch_Run20241126'  # **CHANGE THIS**
-
-FIRST_DOWNSAMPLE_PATH = f'{SUBJECT_FOLDER}/first_downsample.ome.zarr'
-SECOND_DOWNSAMPLE_PATH = f'{SUBJECT_FOLDER}/second_downsample.ome.zarr'
-THIRD_DOWNSAMPLE_PATH = f'{SUBJECT_FOLDER}/third_downsample.ome.zarr'
-
-THIRD_DOWNSAMPLE_BIAS_PATH = f'{SUBJECT_FOLDER}/third_downsample_bias.ome.zarr'
-SECOND_DOWNSAMPLE_CORR_PATH = f'{SUBJECT_FOLDER}/second_downsample_corr.ome.zarr'
-
-NNUNET_OUTPUT_TIFF_PATH = f'{SUBJECT_FOLDER}/second_downsample_nnunet.tiff'
-GCS_NEG_MASK_TGT = f'gcs://khanlab-scratch/tmp/{SUBJECT_ID}_second_downsample_nnunet.tiff'
-GCS_BIAS_PATH = f'gcs://khanlab-scratch/tmp/{SUBJECT_ID}_second_downsample_corr.tiff'
-COILED_CACHE_DIR_PATH = f'gcs://khanlab-scratch/tmp/CacheDirectory_{SUBJECT_ID}'
-
-
-def main(run_nnunet: bool = True, run_coiled_process: bool = True):
+}
+ALL_SUBJECTS = list(THRESHOLD_TABLE.keys())
+
+def get_subject(SUBJECT_ID):
+    subject = Subject()
+    subject.SUBJECT_ID = SUBJECT_ID
+
+    subject.PLAQUE_THRESHOLD, subject.MAX_THRESHOLD = THRESHOLD_TABLE[SUBJECT_ID]
+    if SUBJECT_ID.endswith('oldBlaze'):
+        MINIMUM_SUBJECT_ID = SUBJECT_ID[:-len('oldBlaze')]
+        OME_ZARR_PATH = f'gcs://khanlab-lightsheet/data/mouse_appmaptapoe/bids_oldBlaze/sub-{MINIMUM_SUBJECT_ID}/micr/sub-{MINIMUM_SUBJECT_ID}_sample-brain_acq-blaze_SPIM.ome.zarr'
+        BA_CHANNEL = np.s_[0]
+    elif SUBJECT_ID.endswith('Blaze'):
+        MINIMUM_SUBJECT_ID = SUBJECT_ID[:-len('Blaze')]
+        if SUBJECT_ID in subjects4x:  # **SOME SUBJECTS DO NOT FOLLOW THE ABOVE FORMAT**
+            OME_ZARR_PATH = f'gcs://khanlab-lightsheet/data/mouse_appmaptapoe/bids/sub-{MINIMUM_SUBJECT_ID}/micr/sub-{MINIMUM_SUBJECT_ID}_sample-brain_acq-blaze4x_SPIM.ome.zarr'
+        else:
+            OME_ZARR_PATH = f'gcs://khanlab-lightsheet/data/mouse_appmaptapoe/bids/sub-{MINIMUM_SUBJECT_ID}/micr/sub-{MINIMUM_SUBJECT_ID}_sample-brain_acq-blaze_SPIM.ome.zarr'
+        BA_CHANNEL = np.s_[0]
+    else:
+        MINIMUM_SUBJECT_ID = SUBJECT_ID
+        OME_ZARR_PATH = f'Z:/projects/lightsheet_lifecanvas/bids/sub-{MINIMUM_SUBJECT_ID}/micr/sub-{MINIMUM_SUBJECT_ID}_sample-brain_acq-prestitched_SPIM.ome.zarr'
+        BA_CHANNEL = np.s_[1]
+
+    RUN_ON_FULL_IM = False
+    if not RUN_ON_FULL_IM:
+        BA_CHANNEL = np.s_[BA_CHANNEL, 256:512, :, :]  # **CHANGE THIS**
+
+    subject.MINIMUM_SUBJECT_ID = MINIMUM_SUBJECT_ID
+    subject.OME_ZARR_PATH = OME_ZARR_PATH
+    subject.BA_CHANNEL = BA_CHANNEL
+
+    subject.SUBJECT_FOLDER = f'C:/Users/than83/Documents/progtools/datasets/subjects/subject_{SUBJECT_ID}'  # **CHANGE THIS**
+    subject.NNUNET_CACHE_DIR = 'C:/Users/than83/Documents/progtools/datasets/nnunet/Cache_250epoch_Run20241126'  # **CHANGE THIS**
+
+    subject.FIRST_DOWNSAMPLE_PATH = f'{subject.SUBJECT_FOLDER}/first_downsample.ome.zarr'
+    subject.SECOND_DOWNSAMPLE_PATH = f'{subject.SUBJECT_FOLDER}/second_downsample.ome.zarr'
+    subject.THIRD_DOWNSAMPLE_PATH = f'{subject.SUBJECT_FOLDER}/third_downsample.ome.zarr'
+
+    subject.THIRD_DOWNSAMPLE_BIAS_PATH = f'{subject.SUBJECT_FOLDER}/third_downsample_bias.ome.zarr'
+    subject.SECOND_DOWNSAMPLE_CORR_PATH = f'{subject.SUBJECT_FOLDER}/second_downsample_corr.ome.zarr'
+
+    subject.NNUNET_OUTPUT_TIFF_PATH = f'{subject.SUBJECT_FOLDER}/second_downsample_nnunet.tiff'
+    subject.GCS_NEG_MASK_TGT = f'gcs://khanlab-scratch/tmp/{SUBJECT_ID}_second_downsample_nnunet.tiff'
+    subject.GCS_BIAS_PATH = f'gcs://khanlab-scratch/tmp/{SUBJECT_ID}_second_downsample_corr.tiff'
+    subject.COILED_CACHE_DIR_PATH = f'gcs://khanlab-scratch/tmp/CacheDirectory_{SUBJECT_ID}'
+
+    return subject
+
+
+def main(subject: Subject, run_nnunet: bool = True, run_coiled_process: bool = True):
     import numpy as np
     import cvpl_tools.nnunet.current_im as current_im_py
     import cvpl_tools.nnunet.n4 as n4
@@ -59,33 +104,33 @@ def main(run_nnunet: bool = True, run_coiled_process: bool = True):
     import asyncio
     import cvpl_tools.nnunet.triplanar as triplanar
 
-    print(f'first downsample: from path {OME_ZARR_PATH}')
+    print(f'first downsample: from path {subject.OME_ZARR_PATH}')
     first_downsample = current_im_py.downsample(
-        OME_ZARR_PATH, reduce_fn=np.max, ndownsample_level=(1, 2, 2), ba_channel=BA_CHANNEL,
-        write_loc=FIRST_DOWNSAMPLE_PATH
+        subject.OME_ZARR_PATH, reduce_fn=np.max, ndownsample_level=(1, 2, 2), ba_channel=subject.BA_CHANNEL,
+        write_loc=subject.FIRST_DOWNSAMPLE_PATH
     )
     print(f'first downsample done. result is of shape {first_downsample.shape}')
 
     second_downsample = current_im_py.downsample(
         first_downsample, reduce_fn=np.max, ndownsample_level=(1,) * 3,
-        write_loc=SECOND_DOWNSAMPLE_PATH
+        write_loc=subject.SECOND_DOWNSAMPLE_PATH
     )
     third_downsample = current_im_py.downsample(
         second_downsample, reduce_fn=np.max, ndownsample_level=(1,) * 3,
-        write_loc=THIRD_DOWNSAMPLE_PATH
+        write_loc=subject.THIRD_DOWNSAMPLE_PATH
     )
     print(f'second and third downsample done. second_downsample.shape={second_downsample.shape}, third_downsample.shape={third_downsample.shape}')
 
     third_downsample_bias = n4.obtain_bias(third_downsample,
-                                           write_loc=THIRD_DOWNSAMPLE_BIAS_PATH)
+                                           write_loc=subject.THIRD_DOWNSAMPLE_BIAS_PATH)
     print('third downsample bias done.')
 
     print(f'im.shape={second_downsample.shape}, bias.shape={third_downsample_bias.shape}; applying bias over image to obtain corrected image...')
     second_downsample_bias = dask_ndinterp.scale_nearest(third_downsample_bias, scale=(2, 2, 2),
                                                          output_shape=second_downsample.shape, output_chunks=(4, 4096, 4096)).persist()
 
     second_downsample_corr = current_im_py.apply_bias(second_downsample, (1,) * 3, second_downsample_bias, (1,) * 3)
-    asyncio.run(ome_io.write_ome_zarr_image(SECOND_DOWNSAMPLE_CORR_PATH, da_arr=second_downsample_corr, MAX_LAYER=1))
+    asyncio.run(ome_io.write_ome_zarr_image(subject.SECOND_DOWNSAMPLE_CORR_PATH, da_arr=second_downsample_corr, MAX_LAYER=1))
     print('second downsample corrected image done')
 
     # first_downsample_correct_path = f'C:/Users/than83/Documents/progtools/datasets/lightsheet_downsample/sub-{SUBJECT_ID}_corrected.ome.zarr'
@@ -98,12 +143,12 @@ def main(run_nnunet: bool = True, run_coiled_process: bool = True):
     if run_nnunet is False:
         return
 
-    if not RDirFileSystem(NNUNET_OUTPUT_TIFF_PATH).exists(''):
+    if not RDirFileSystem(subject.NNUNET_OUTPUT_TIFF_PATH).exists(''):
         pred_args = {
-            "cache_url": NNUNET_CACHE_DIR,
-            "test_im": SECOND_DOWNSAMPLE_CORR_PATH,
+            "cache_url": subject.NNUNET_CACHE_DIR,
+            "test_im": subject.SECOND_DOWNSAMPLE_CORR_PATH,
             "test_seg": None,
-            "output": NNUNET_OUTPUT_TIFF_PATH,
+            "output": subject.NNUNET_OUTPUT_TIFF_PATH,
             "dataset_id": 1,
             "fold": '0',
             "triplanar": False,
@@ -118,31 +163,36 @@ def main(run_nnunet: bool = True, run_coiled_process: bool = True):
 
     import cvpl_tools.nnunet.api as cvpl_nnunet_api
 
-    if not RDirFileSystem(NNUNET_OUTPUT_TIFF_PATH).exists(''):
+    if not RDirFileSystem(subject.GCS_NEG_MASK_TGT).exists(''):
         cvpl_nnunet_api.upload_negmask(
-            NNUNET_OUTPUT_TIFF_PATH,
-            GCS_NEG_MASK_TGT,
-            THIRD_DOWNSAMPLE_BIAS_PATH,
-            f'{SUBJECT_FOLDER}/.temp',
-            GCS_BIAS_PATH
+            subject.NNUNET_OUTPUT_TIFF_PATH,
+            subject.GCS_NEG_MASK_TGT,
+            subject.THIRD_DOWNSAMPLE_BIAS_PATH,
+            f'{subject.SUBJECT_FOLDER}/.temp',
+            subject.GCS_BIAS_PATH
         )
 
     ppm_to_im_upscale = (4, 8, 8)
     async def fn(dask_worker):
         await cvpl_nnunet_api.mousebrain_forward(
             dask_worker=dask_worker,
-            CACHE_DIR_PATH=COILED_CACHE_DIR_PATH,
-            ORIG_IM_PATH=OME_ZARR_PATH,
-            NEG_MASK_PATH=GCS_NEG_MASK_TGT,
-            GCS_BIAS_PATH=GCS_BIAS_PATH,
-            BA_CHANNEL=BA_CHANNEL,
-            MAX_THRESHOLD=MAX_THRESHOLD,
+            CACHE_DIR_PATH=subject.COILED_CACHE_DIR_PATH,
+            ORIG_IM_PATH=subject.OME_ZARR_PATH,
+            NEG_MASK_PATH=subject.GCS_NEG_MASK_TGT,
+            GCS_BIAS_PATH=subject.GCS_BIAS_PATH,
+            BA_CHANNEL=subject.BA_CHANNEL,
+            MAX_THRESHOLD=subject.MAX_THRESHOLD,
             ppm_to_im_upscale=ppm_to_im_upscale
         )
     cvpl_nnunet_api.coiled_run(fn=fn, nworkers=10, local_testing=False)
 
 
 if __name__ == '__main__':
-    main(run_nnunet=True, run_coiled_process=False)
+    ID = 'M4A2Te3Blaze'
+    print(f'Starting prediction on subject {ID}')
+    subject = get_subject(ID)
+
+    main(subject=subject, run_nnunet=True, run_coiled_process=True)
+    print(f'Finished predicting on subject {ID}')