Skip to content

Commit

Permalink
Merge pull request #254 from DigitalSlideArchive/248-strict-as-override
Browse files Browse the repository at this point in the history
Add `strict` to rule sets
  • Loading branch information
naglepuff authored Sep 17, 2024
2 parents 9414884 + 1879460 commit da2490f
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 11 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Image redaction is determined by a set of rules. By default, the base set of rul
## Rule Application
All runs of `imagedephi` use the provided base set of rules as a foundation. End users can use the ruleset framework to build custom rulesets that handle additional or custom metadata not covered by the base rules, or override the behavior of the base rule set.

Override rule sets can be specified by using the `-r my_ruleset.yaml` or `--override-rules my_ruleset.yaml` option. This option is available for both the `imagedephi run` and `imagedephi plan` commands. Override rules sets are not provided by `imagedephi`, and must de defined by the end user.
Override rule sets can be specified by using the `-R my_ruleset.yaml` or `--override-rules my_ruleset.yaml` option. This option is available for both the `imagedephi run` and `imagedephi plan` commands. Override rules sets are not provided by `imagedephi`, and must de defined by the end user.

When `imagedephi` determines the steps to redact a file, it checks each piece of metadata in the file. For each piece of metadata found this way, it will first consult the override rule set, if present, for an applicable rule. If the override rule set does not contain a rule for that piece of metadata, the program will check the base ruleset.

Expand Down Expand Up @@ -53,9 +53,15 @@ You can add a description to your custom rulesets. This is not used by the progr
#### `output_file_name`
Specify how the output files should be named here. The base ruleset contains the value `study_slide`. In this case, if the input slides are named: `john_smith_lung.svs` and `john_smith_pancreas.svs`, the redacted output images will be named `study_slide_1.svs` and `study_slide_2.svs`.

### Other Top-level Properties

#### `strict`
The `strict` property of rulesets is used to denote that ALL unspecified tags should be deleted. This is supported for `tiff` and `svs` files. An example of using the strict flag can be seen in the `minimum_rules.yaml` rule set.

### File Format Rules
Redaction behavior is specified per file type. Currently pure `tiff` files, Aperio (`.svs`), and DICOM files are supported. Each image type has its own groups of data that can be redacted. For example, Aperio images have `tiff` metadata, certain associated images, and additional metadata specified in the `ImageDescription` tag. `svs` rulesets take the following shape:


```yaml
svs:
associated_images:
Expand Down
1 change: 1 addition & 0 deletions imagedephi/minimum_rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
name: Minimum Rules
description: A set of rules that defines a minimum amount of metadata for images to be read. Metadata not specified by a rule is deleted (controlled by the metadata_fallback_action).
output_file_name: study_slide
strict: true
tiff:
metadata_fallback_action: delete
associated_images:
Expand Down
14 changes: 10 additions & 4 deletions imagedephi/redact/build_redaction_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def build_redaction_plan(
base_rules: Ruleset,
override_rules: Ruleset | None = None,
dcm_uid_map: dict[str, str] | None = None,
strict=False,
) -> RedactionPlan:
file_format = get_file_format_from_path(image_path)
strict = override_rules.strict if override_rules else base_rules.strict
if file_format == FileFormat.TIFF:
# Since SVS is a subset of tiff, fall back on file extension
file_extension = (
Expand All @@ -39,14 +39,20 @@ def build_redaction_plan(
if file_extension == FileFormat.TIFF:
merged_rules = base_rules.tiff.copy()
if override_rules:
merged_rules.metadata.update(override_rules.tiff.metadata)
if override_rules.strict:
merged_rules = override_rules.tiff.copy()
else:
merged_rules.metadata.update(override_rules.tiff.metadata)

return TiffRedactionPlan(image_path, merged_rules, strict)
elif file_extension == FileFormat.SVS:
merged_rules = base_rules.svs.copy()
if override_rules:
merged_rules.metadata.update(override_rules.svs.metadata)
merged_rules.image_description.update(override_rules.svs.image_description)
if override_rules.strict:
merged_rules = override_rules.svs.copy()
else:
merged_rules.metadata.update(override_rules.svs.metadata)
merged_rules.image_description.update(override_rules.svs.image_description)
return SvsRedactionPlan(image_path, merged_rules, strict)
else:
raise UnsupportedFileTypeError(f"File format for {image_path} not supported.")
Expand Down
8 changes: 2 additions & 6 deletions imagedephi/redact/redact.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,9 @@ def redact_images(
with logging_redirect_tqdm(loggers=[logger]):
for image_file in tqdm(images_to_redact, desc="Redacting images", position=0, leave=True):
push_progress(output_file_counter, output_file_max, redact_dir)
strict = profile == ProfileChoice.Strict.value
try:
redaction_plan = build_redaction_plan(
image_file, base_rules, override_rules, dcm_uid_map=dcm_uid_map, strict=strict
image_file, base_rules, override_rules, dcm_uid_map=dcm_uid_map
)
# Handle and report other errors without stopping the process
except Exception as e:
Expand Down Expand Up @@ -245,17 +244,14 @@ def show_redaction_plan(
) -> NamedTuple:
image_paths = iter_image_files(input_path, recursive) if input_path.is_dir() else [input_path]
base_rules = get_base_rules(profile)
strict = profile == ProfileChoice.Strict.value

global tags_used

def _create_redaction_plan_report():
global redaction_plan_report
for image_path in image_paths:
try:
redaction_plan = build_redaction_plan(
image_path, base_rules, override_rules, strict=strict
)
redaction_plan = build_redaction_plan(image_path, base_rules, override_rules)
except tifftools.TifftoolsError:
logger.error(f"Could not open {image_path.name} as a tiff.")
continue
Expand Down
1 change: 1 addition & 0 deletions imagedephi/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class Ruleset(BaseModel):
name: str = "My Rules"
description: str = "My rules"
output_file_name: str = "study_slide"
strict: bool = False
tiff: TiffRules = TiffRules()
svs: SvsRules = SvsRules()
dicom: DicomRules = DicomRules()
17 changes: 17 additions & 0 deletions tests/test_redact.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ def override_rule_set(rules_dir: Path):
return Ruleset.parse_obj(yaml.safe_load(rule_stream))


@pytest.fixture
def strict_rule_set():
strict_rules_path = importlib.resources.files("imagedephi") / "minimum_rules.yaml"
with strict_rules_path.open() as rules_stream:
return Ruleset.parse_obj(yaml.safe_load(rules_stream))


@pytest.fixture(
params=[PurePath("svs"), PurePath("svs") / "test_svs_image_blank.svs"],
ids=["input_dir", "input_file"],
Expand Down Expand Up @@ -148,6 +155,16 @@ def test_strict(svs_input_path, tmp_path) -> None:
assert b"macro" not in output_file_bytes


@freeze_time("2023-05-12 12:12:53")
@pytest.mark.timeout(5)
def test_override_with_strict_flag(svs_input_path, tmp_path, strict_rule_set) -> None:
redact.redact_images(svs_input_path, tmp_path, override_rules=strict_rule_set)
output_file = tmp_path / "Redacted_2023-05-12_12-12-53" / "study_slide_1.svs"
output_file_bytes = output_file.read_bytes()
assert b"Aperio" not in output_file_bytes
assert b"macro" not in output_file_bytes


@freeze_time("2023-05-12 12:12:53")
@pytest.mark.timeout(5)
def test_strict_skip_dcm(dcm_input_path, tmp_path) -> None:
Expand Down

0 comments on commit da2490f

Please sign in to comment.