Merge pull request #255 from DigitalSlideArchive/handle-missing-rules

Handle missing rules
DigitalSlideArchive · Oct 28, 2024 · 32f4077 · 32f4077
2 parents 97800f0 + bdaedf7
commit 32f4077
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 29 deletions.
diff --git a/client/src/HomePage.vue b/client/src/HomePage.vue
@@ -14,6 +14,7 @@ const inputModal = ref(null);
 const outputModal = ref(null);
 const rulesetModal = ref(null);
 const redactionModal = ref();
+const missingRulesModal = ref();
 
 const progress = ref({
   count: 0,
@@ -70,11 +71,31 @@ const redact_images = async () => {
     });
     redactionStateFlags.value.redacting = false;
     redactionModal.value.close();
+    redactionStateFlags.value.showImageTable = false;
     redactionStateFlags.value.redactionComplete =
       !!useRedactionPlan.imageRedactionPlan.total;
     redactionStateFlags.value.redactionSnackbar = true;
   }
 };
+
+const canRedact = () => {
+  if (
+    !selectedDirectories.value.inputDirectory ||
+    !selectedDirectories.value.outputDirectory
+  ) {
+    return;
+  }
+  if (useRedactionPlan.imageRedactionPlan.missing_rules) {
+    missingRulesModal.value.showModal();
+  } else {
+    redact_images();
+  }
+};
+// If the user chooses to redact with missing rules, force redaction
+const forceRedact = () => {
+  missingRulesModal.value.close();
+  redact_images();
+};
 </script>
 
 <template>
@@ -137,14 +158,49 @@ const redact_images = async () => {
               type="submit"
               :class="`${!selectedDirectories.inputDirectory || !selectedDirectories.outputDirectory ? 'btn btn-block bg-accent text-white uppercase rounded-lg tooltip' : 'btn btn-block btn-accent text-white uppercase rounded-lg'}`"
               data-tip="Please select input and output directories"
-              @click="redact_images()"
+              @click="canRedact()"
             >
               De-phi Images
             </button>
           </div>
         </div>
       </div>
     </div>
+    <dialog id="missingRulesModal" ref="missingRulesModal" class="modal">
+      <div class="modal-box max-w-100">
+        <div class="card max-w-100">
+          <div class="card-body">
+            <h2 class="font-bold text-xl text-center">
+              Missing Redaction Rules
+            </h2>
+            <div class="divider my-1"></div>
+            <p class="indent-8 font-medium">
+              One or more images are missing redaction rules. If you continue
+              these images will not be redacted.
+            </p>
+            <p class="indent-8 text-base font-medium">
+              To add rules, please select a ruleset with the missing redaction
+              rules.
+            </p>
+          </div>
+          <div class="card-actions flex-nowrap justify-between">
+            <button
+              class="btn btn-accent w-1/2 text-white uppercase"
+              @click="forceRedact()"
+            >
+              Continue
+            </button>
+            <button
+              class="btn btn-neutral text-white w-1/2 uppercase"
+              @click="missingRulesModal.close()"
+            >
+              Cancel
+            </button>
+          </div>
+        </div>
+      </div>
+    </dialog>
+
     <dialog id="redactionModal" ref="redactionModal" class="modal">
       <div class="modal-box w-96">
         <div class="card">

diff --git a/client/src/store/types.ts b/client/src/store/types.ts
@@ -18,6 +18,7 @@ export type imagePlanResponse = {
   data: Record<string, Record<string, string>>;
   total: number;
   tags: string[];
+  missing_rules: boolean;
 };
 
 export interface Path {

diff --git a/imagedephi/main.py b/imagedephi/main.py
@@ -122,9 +122,10 @@ def imagedephi(
         set_logging_config(verbose, quiet, log_file)
 
 
-@imagedephi.command
+@imagedephi.command(no_args_is_help=True)
 @global_options
 @click.argument("input-path", type=click.Path(exists=True, readable=True, path_type=Path))
+@click.option("-i", "--index", default=1, help="Starting index of the images to redact.", type=int)
 @click.option(
     "-o",
     "--output-dir",
@@ -146,6 +147,7 @@ def run(
     quiet,
     verbose,
     log_file,
+    index,
 ):
     """Perform the redaction of images."""
     params = _check_parent_params(ctx, profile, override_rules, recursive, quiet, verbose, log_file)
@@ -158,10 +160,11 @@ def run(
         rename=rename,
         recursive=params["recursive"],
         profile=params["profile"],
+        index=index,
     )
 
 
-@imagedephi.command
+@imagedephi.command(no_args_is_help=True)
 @global_options
 @click.argument("input-path", type=click.Path(exists=True, readable=True, path_type=Path))
 @click.pass_context

diff --git a/imagedephi/redact/redact.py b/imagedephi/redact/redact.py
@@ -8,7 +8,8 @@
 import importlib.resources
 import logging
 from pathlib import Path
-from typing import NamedTuple, TypeVar
+from shutil import copy2
+from typing import TYPE_CHECKING, NamedTuple, TypeVar
 
 import tifftools
 import tifftools.constants
@@ -25,11 +26,15 @@
 from .svs import MalformedAperioFileError
 from .tiff import UnsupportedFileTypeError
 
+if TYPE_CHECKING:
+    from .redaction_plan import TagRedactionPlan
+
 tags_used = OrderedDict()
 redaction_plan_report = {}
 unprocessable_image_messages: list[str] = []
 
 T = TypeVar("T")
+missing_rules = False
 
 
 class ProfileChoice(Enum):
@@ -98,16 +103,16 @@ def generator_to_list_with_progress(
     return result
 
 
-def create_redact_dir_and_manifest(base_output_dir: Path) -> tuple[Path, Path]:
+def create_redact_dir_and_manifest(base_output_dir: Path, time_stamp: str) -> tuple[Path, Path]:
     """
     Given a directory, create and return a sub-directory within it.
 
     `identifier` should be a unique string for the new directory. If no value
     is supplied, a timestamp is used.
     """
-    time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
     redact_dir = base_output_dir / f"Redacted_{time_stamp}"
     manifest_file = base_output_dir / f"Redacted_{time_stamp}_manifest.csv"
+
     try:
         redact_dir.mkdir(parents=True)
         manifest_file.touch()
@@ -127,7 +132,11 @@ def redact_images(
     profile: str = "",
     overwrite: bool = False,
     recursive: bool = False,
+    index: int = 1,
 ) -> None:
+
+    time_stamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+
     # Keep track of information about this run to write to a persistent log file (csv?)
     # (original_name, output_name) as bare minimum
     # error message? rule set (base/override)?
@@ -151,7 +160,15 @@ def redact_images(
 
     output_file_counter = 1
     output_file_max = len(images_to_redact)
-    redact_dir, manifest_file = create_redact_dir_and_manifest(output_dir)
+    failed_img_counter = 0
+    failed_images: dict[
+        str, list[dict[str, dict[str, int | str | list[str] | TagRedactionPlan]]]
+    ] = {"failed_images": []}
+    redact_dir, manifest_file = create_redact_dir_and_manifest(output_dir, time_stamp)
+    failed_dir = output_dir / f"Failed_{time_stamp}"
+    failed_manifest_file = (
+        output_dir / f"Failed_{time_stamp}" / f"Failed_{time_stamp}_manifest.yaml"
+    )
 
     dcm_uid_map: dict[str, str] = {}
 
@@ -177,14 +194,48 @@ def redact_images(
                 )
                 continue
             if not redaction_plan.is_comprehensive():
+                nested_failed_dir: Path = Path()
                 logger.info(f"Redaction could not be performed for {image_file.name}.")
+                failed_img_counter += 1
+
+                if failed_img_counter == 1:
+                    failed_dir.mkdir(parents=True)
+                    failed_manifest_file.touch()
+
+                if recursive:
+                    nested_failed_dir = Path(
+                        str(image_file).replace(str(input_path), str(failed_dir), 1)
+                    ).parent
+                    nested_failed_dir.mkdir(parents=True, exist_ok=True)
+
+                # Attempt to hardlink the image to the failed directory
+                # Copy occurs if hardlink fails ie. cross-device
+                if nested_failed_dir.name == image_file.parent.name:
+                    failed_img = nested_failed_dir / image_file.name
+                else:
+                    failed_img = failed_dir / image_file.name
+                try:
+                    failed_img.hardlink_to(image_file)
+                except OSError:
+                    # Using copy2 preserves metadata
+                    # https://docs.python.org/3/library/shutil.html#shutil.copy2
+                    copy2(image_file, failed_img)
+                img_dict = {
+                    image_file.name: {
+                        "missing_tags": redaction_plan.report_plan()[image_file.name].get(
+                            "missing_tags", []
+                        )
+                    }
+                }
+                failed_images["failed_images"].append(img_dict)
                 run_summary.append(
                     {
                         "input_path": image_file,
                         "output_path": "",
                         "detail": "Could not redact with the provided set of rules.",
                     }
                 )
+
             else:
                 redaction_plan.execute_plan()
                 output_parent_dir = redact_dir
@@ -198,7 +249,7 @@ def redact_images(
                         image_file,
                         output_parent_dir,
                         output_file_name_base,
-                        output_file_counter,
+                        index,
                         output_file_max,
                     )
                     if rename
@@ -214,6 +265,30 @@ def redact_images(
                 )
                 if output_file_counter == output_file_max:
                     logger.info("Redactions completed")
+                    if failed_img_counter:
+                        # Ensure that the logged index is the correct starting point
+                        with open(failed_manifest_file, "a") as manifest:
+                            yaml.dump(
+                                failed_images,
+                                manifest,
+                                explicit_start=True,
+                                default_flow_style=False,
+                            )
+                            manifest.write("failed_images_count: " + str(failed_img_counter) + "\n")
+                            index += 1
+
+                            yaml_command = f"""command: imagedephi run {failed_dir} --output-dir {redact_dir.parent} --index {index}"""  # noqa
+                            options = [
+                                f" --override-rules {override_rules}" if override_rules else "",
+                                " --overwrite" if overwrite else "",
+                                f" --profile {profile}" if profile != "default" else "",
+                                " --recursive" if recursive else "",
+                                " --skip-rename" if not rename else "",
+                            ]
+                            yaml_command += " ".join(filter(None, options))
+                            command = yaml.safe_load(yaml_command)
+                            yaml.dump(command, manifest, width=float("inf"))
+                index += 1
             output_file_counter += 1
     logger.info(f"Writing manifest to {manifest_file}")
     with open(manifest_file, "w") as manifest:
@@ -303,6 +378,8 @@ def show_redaction_plan(
 
     def _create_redaction_plan_report():
         global redaction_plan_report
+        global missing_rules
+        missing_rules = False
         global unprocessable_image_messages
         unprocessable_image_messages = []
         with logging_redirect_tqdm(loggers=[logger]):
@@ -330,6 +407,8 @@ def _create_redaction_plan_report():
                     continue
                 logger.info(f"Redaction plan for {image_path.name}:")
                 redaction_plan_report.update(redaction_plan.report_plan())  # type: ignore
+            if not redaction_plan.is_comprehensive():
+                missing_rules = True
 
     if not update:
         global redaction_plan_report
@@ -338,13 +417,14 @@ def _create_redaction_plan_report():
         tags_used = OrderedDict()
         _create_redaction_plan_report()
     else:
+
         _create_redaction_plan_report()
 
     total = len(redaction_plan_report)  # type: ignore
     sorted_dict = _sort_data(redaction_plan_report)  # type: ignore
     if limit is not None and offset is not None:
         sorted_dict = OrderedDict(list(sorted_dict.items())[offset * limit : (offset + 1) * limit])
-    images_plan = namedtuple("images_plan", ["data", "total", "tags"])
+    images_plan = namedtuple("images_plan", ["data", "total", "tags", "missing_rules"])
 
     if input_path.is_dir():
         # Provide a summary if the input path is a directory of images
@@ -378,4 +458,4 @@ def _create_redaction_plan_report():
 
     # Reset logging level if it was changed
     logger.setLevel(starting_logging_level)
-    return images_plan(sorted_dict, total, list(tags_used))
+    return images_plan(sorted_dict, total, list(tags_used), missing_rules)
diff --git a/imagedephi/redact/redaction_plan.py b/imagedephi/redact/redaction_plan.py
@@ -13,7 +13,7 @@
 
     TagRedactionPlan = dict[str, int | float | TagData | ByteInfo]
 
-    RedactionPlanReport = dict[str, dict[str, int | str | TagRedactionPlan]]
+    RedactionPlanReport = dict[str, dict[str, int | str | list[str] | TagRedactionPlan]]
 
 
 class RedactionPlan:

diff --git a/imagedephi/redact/svs.py b/imagedephi/redact/svs.py
@@ -233,23 +233,27 @@ def report_plan(
                     logger.debug(f"SVS Image Description - {key_name}: {operation}")
                     report[self.image_path.name][key_name] = {"action": operation, "value": _data}
                 continue
-            rule = self.metadata_redaction_steps[tag.value]
-            operation = self.determine_redaction_operation(rule, ifd)
-            logger.debug(f"Tiff Tag {tag.value} - {rule.key_name}: {operation}")
-            if ifd["tags"][tag.value]["datatype"] == tifftools.constants.Datatype.UNDEFINED.value:
-                encoded_value: dict[str, str | int] = {
-                    "value": f"0x{binascii.hexlify(ifd['tags'][tag.value]['data'] ).decode('utf-8')}",  # type: ignore # noqa: E501
-                    "bytes": len(ifd["tags"][tag.value]["data"]),
-                }
-                report[self.image_path.name][rule.key_name] = {
-                    "action": operation,
-                    "binary": encoded_value,
-                }
-            else:
-                report[self.image_path.name][rule.key_name] = {
-                    "action": operation,
-                    "value": ifd["tags"][tag.value]["data"],
-                }
+            if tag.value not in self.no_match_tags:
+                rule = self.metadata_redaction_steps[tag.value]
+                operation = self.determine_redaction_operation(rule, ifd)
+                logger.debug(f"Tiff Tag {tag.value} - {rule.key_name}: {operation}")
+                if (
+                    ifd["tags"][tag.value]["datatype"]
+                    == tifftools.constants.Datatype.UNDEFINED.value
+                ):
+                    encoded_value: dict[str, str | int] = {
+                        "value": f"0x{binascii.hexlify(ifd['tags'][tag.value]['data'] ).decode('utf-8')}",  # type: ignore # noqa: E501
+                        "bytes": len(ifd["tags"][tag.value]["data"]),
+                    }
+                    report[self.image_path.name][rule.key_name] = {
+                        "action": operation,
+                        "binary": encoded_value,
+                    }
+                else:
+                    report[self.image_path.name][rule.key_name] = {
+                        "action": operation,
+                        "value": ifd["tags"][tag.value]["data"],
+                    }
         self.report_missing_rules(report)
         logger.debug("Aperio (.svs) Associated Image Redaction Plan\n")
         # Report the number of associated images found in the image that match each associated