Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are:
Referenced versions in headers are tagged on Github, in parentheses are for pypi.

## [vxx](https://github.com/pydicom/deid/tree/master) (master)
- Add 'clean-pixel' CLI command [#285](https://github.com/pydicom/deid/pull/285) (0.5.0)
- Fix REMOVE action to respect REPLACE or JITTER priority [#283](https://github.com/pydicom/deid/pull/283) (0.4.6)
- Add enhanced private tag syntax support [#282](https://github.com/pydicom/deid/pull/282) (0.4.5)
- Fix tag specification for KEEP action [#281](https://github.com/pydicom/deid/pull/281) (0.4.4)
Expand Down
71 changes: 52 additions & 19 deletions deid/dicom/pixels/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def get_figure(self, show=False, image_type="cleaned", title=None):
plt.show()
return plt

def _get_clean_name(self, output_folder, extension="dcm"):
def _get_clean_name(self, output_folder, extension="dcm", prefix="clean-"):
"""
Get path to a cleaned output file.

Expand All @@ -145,6 +145,10 @@ def _get_clean_name(self, output_folder, extension="dcm"):
exist.
extension: the extension of the file to create a name for, should
not start with "."
prefix: string prepended to file's basename
extension: extension to append to basename after removing .dcm or .dicom.
Use empty string to disable. .dcm|.dicom will not be stripped. basename is not changed.

"""
if output_folder is None:
output_folder = self.output_folder
Expand All @@ -153,8 +157,16 @@ def _get_clean_name(self, output_folder, extension="dcm"):
bot.debug("Creating output folder %s" % output_folder)
os.makedirs(output_folder)

basename = re.sub("[.]dicom|[.]dcm", "", os.path.basename(self.dicom_file))
return "%s/cleaned-%s.%s" % (output_folder, basename, extension)
# do we want to change extension? this might convert .dicom to .dcm
# or would add .dcm to eg. 'MR.*' (MR.12*34.dcm) or '*IMA' (1234.IMA.dcm)
if extension:
basename = re.sub("[.]dicom|[.]dcm", "", os.path.basename(self.dicom_file))
extension = "." + extension
else:
basename = os.path.basename(self.dicom_file)

new_basename = prefix + basename + extension
return os.path.join(output_folder, new_basename)

def save_png(self, output_folder=None, image_type="cleaned", title=None):
"""
Expand Down Expand Up @@ -233,26 +245,47 @@ def animate(i):
else:
bot.warning("use detect() --> clean() before saving is possible.")

def save_dicom(self, output_folder=None, image_type="cleaned"):
def save_dicom(
self, output_folder=None, prefix="clean-", extension="dcm"
) -> str | None:
"""
Save a cleaned dicom to disk.
DicomCleaner object must have already been run through detect() and clean()


Parameters
==========
output_folder: where to save clean dicoms. Will use self.output_folder if None
prefix: passed onto py:meth:`DicomCleaner._get_clean_name`.
Default adds 'clean-' to basename
extension: paseed onto py:meth:`DicomCleaner._get_clean_name`.
Default appends .dcm after removing .dcm or .dicom.
use empty string to disable, reuses whole basename


Returns
=======
dicom_name: the file that was written

We expose an option to save an original (change image_type to "original"
to be consistent, although this is not incredibly useful given it would
duplicate the original data.
"""
# Having clean also means has dicom image
if hasattr(self, image_type):
dicom_name = self._get_clean_name(output_folder)
dicom = utils.dcmread(self.dicom_file, force=True)
# If going from compressed, change TransferSyntax
if dicom.file_meta.TransferSyntaxUID.is_compressed is True:
dicom.decompress()
dicom.PixelData = self.cleaned.tobytes()
dicom.save_as(dicom_name)
return dicom_name
else:
bot.warning("use detect() --> clean() before saving is possible.")
if not hasattr(self, "cleaned"):
bot.warning(
"No cleaned data for '%s'. use detect() --> clean() before saving is possible."
% (self.dicom_file)
)
return

dicom_name = self._get_clean_name(
output_folder, prefix=prefix, extension=extension
)
dicom = utils.dcmread(self.dicom_file, force=True)

# If going from compressed, change TransferSyntax
if dicom.file_meta.TransferSyntaxUID.is_compressed is True:
dicom.decompress()
dicom.PixelData = self.cleaned.tobytes()
dicom.save_as(dicom_name)
return dicom_name


def clean_pixel_data(
Expand Down
90 changes: 56 additions & 34 deletions deid/main/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def get_parser():
description="Deid (de-identification, anonymization) command line tool."
)

# Global Variables
# Global Variables: generic options available for all 'actions' command
parser.add_argument(
"--quiet",
"-q",
Expand Down Expand Up @@ -70,22 +70,58 @@ def get_parser():
action="store_true",
)

# Additional args specific to distinct commands (see args.command conditions)
subparsers = parser.add_subparsers(
help="action for deid to perform",
title="actions",
description="actions for deid to perform",
title="commands",
dest="command",
help="command for deid to perform",
description="command for deid to perform",
)

subparsers.add_parser(
"version", help="print version and exit" # pylint: disable=unused-variable
)

# Checks (checks / tests for various services)
inspect = subparsers.add_parser(
"inspect", help="various checks for PHI and quality"
"inspect", help="Various checks for PHI and quality"
)

ids = subparsers.add_parser(
"identifiers", help="Extract and replace identifiers from headers"
)

pixels = subparsers.add_parser(
"clean-pixels", help="Clean dicom: scrub burn in pixels"
)

# '--deid' for each command rather than once in global to keep arg order
# currently --deid is expected after args.command, like:
# deid identifiers --deid deid.cfg
# would be breaking change to put '--deid' in main 'parser' var, like:
# deid --died deid.cfg identifiers
# not doing so (any existing scripts) would give error. see tests/test_cli.py
# > deid: error: unrecognized arguments: --deid deid.cfg
for command in [ids, inspect, pixels]:
command.add_argument(
"--deid",
dest="deid",
help="deid file with preferences, if not specified, default used.",
type=str,
default=None,
)

# '--input' is shared, but not for 'inspect'
# instead all additional arguments to 'inspect' are inputs
for command in [ids, pixels]:
command.add_argument(
"--input",
dest="input",
help="Input folder or single image to perform action on.",
type=str,
default=None,
)

## Args for command='inspect' only
inspect.add_argument(
nargs="+",
dest="folder",
Expand All @@ -94,14 +130,6 @@ def get_parser():
default=None,
)

inspect.add_argument(
"--deid",
dest="deid",
help="deid file with preferences, if not specified, default used.",
type=str,
default=None,
)

inspect.add_argument(
"--pattern",
dest="pattern",
Expand All @@ -119,18 +147,7 @@ def get_parser():
action="store_true",
)

ids = subparsers.add_parser(
"identifiers", help="extract and replace identifiers from headers"
)

ids.add_argument(
"--deid",
dest="deid",
help="deid file with preferences, if not specified, default used.",
type=str,
default=None,
)

## Args for command='identifiers' only
# A path to an ids file, required if user wants to put (without get)
ids.add_argument(
"--ids",
Expand All @@ -140,14 +157,6 @@ def get_parser():
default=None,
)

ids.add_argument(
"--input",
dest="input",
help="Input folder or single image to perform action on.",
type=str,
default=None,
)

# Action
ids.add_argument(
"--action",
Expand All @@ -159,6 +168,15 @@ def get_parser():
required=True,
)

## Args for command='clean-pixels' only
pixels.add_argument(
"--type",
dest="type",
help="Input type. Currently only dicom supported.",
choices=["dicom"],
default="dicom",
)

return parser


Expand All @@ -181,10 +199,14 @@ def main():
# Initialize the message bot, with level above
from deid.logger import bot # pylint: disable=unused-import

# 'main' function use with __main__ imported from submodules based on 'command'
if args.command == "identifiers":
from .identifiers import main
elif args.command == "inspect":
from .inspect import main
elif args.command == "clean-pixels":
from .pixels import main

else:
parser.print_help()
sys.exit(1)
Expand Down
47 changes: 47 additions & 0 deletions deid/main/pixels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python3
"""
CLI 'main' entrypoint for pixel scrubbing
"""
import argparse # for typing only
import tempfile

from deid.dicom import get_files
from deid.dicom.pixels import DicomCleaner
from deid.logger import bot


def main(args: argparse.Namespace, parser: argparse.ArgumentParser = None):
"""
CLI interface for pixel cleaning.
Used as `main` by py:func:`deid.main.main`

Also see `getting-started/dicom-pixels`

Parameters
==========
args: Likely created by py:func:`deid.main.get_parser`.
Uses output_folder, deid

parser: for compatibility with other *.main functions. Ignored.
"""

output_folder = args.outfolder
if output_folder is None:
output_folder = tempfile.mkdtemp()

if args.input is None:
bot.exit("No input folder specified. Specify inputs as additional arguments.")
dicom_files = list(get_files(args.input))

bot.info("Looking at %i input dicoms" % len(dicom_files))

# NOTE: self.results and self.file is updated each call to detect
# may want new client for each file to be safe?
client = DicomCleaner(output_folder=output_folder, deid=args.deid)
for dcm in dicom_files:
#: py:func:`deid.dicom.pixels.detect._has_burned_pixels_single` dictionary, not used so not saved
client.detect(dcm)
client.clean()
# prefix and extension empty to reuse same name as input
# folder=None means use self.output_folder
client.save_dicom(output_folder=None, prefix="", extension="")
44 changes: 44 additions & 0 deletions deid/tests/test_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,50 @@ def test_pixel_cleaner_keepcoordinates_from(self):
compare = inputpixels[0:2000, 0:2000] == outputpixels[0:2000, 0:2000]
self.assertTrue(compare.all())

def test_get_clean_name(self):
from deid.dicom import DicomCleaner

# 'out/' given to cleaner not necessarily what _get_clean_name will use
client = DicomCleaner(output_folder="out")

# exercise normal usage
client.dicom_file = "XYZ.dcm"
new_name = client._get_clean_name(output_folder="abc", extension="png")
self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.png"))

client.dicom_file = "XYZ.dicom"
new_name = client._get_clean_name(output_folder="abc", extension="png")
self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.png"))

# !! careful with extension! was .dicom will now be .dcm
client.dicom_file = "XYZ.dicom"
new_name = client._get_clean_name(
output_folder="abc"
) # defaults: extension="dcm", prefix="clean-"
self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.dcm"))

# note IMA not removed -- Siemens dicom extension not implicitly handled
client.dicom_file = "XYZ.IMA"
new_name = client._get_clean_name(output_folder="abc", extension="png")
self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.IMA.png"))

# fully specified options to avoid any change to basename
client.dicom_file = "image.IMA"
new_name = client._get_clean_name(output_folder=None, extension="", prefix="")
expected_name = os.path.join("out", "image.IMA")
self.assertEqual(new_name, expected_name)

# prefix but no extension
# example: UPitt MRRC no dcm extension (via DCMTK's storescp?)
client.dicom_file = "MR.1.3.12.2.1107.5.2.0.18914.2025082910014953724207010"
new_name = client._get_clean_name(
output_folder=None, extension="", prefix="clean-"
)
expected_name = os.path.join(
"out", "clean-MR.1.3.12.2.1107.5.2.0.18914.2025082910014953724207010"
)
self.assertEqual(new_name, expected_name)


if __name__ == "__main__":
unittest.main()
Loading