diff --git a/CHANGELOG.md b/CHANGELOG.md index 69e5910..fccda78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are: Referenced versions in headers are tagged on Github, in parentheses are for pypi. ## [vxx](https://github.com/pydicom/deid/tree/master) (master) +- Add 'clean-pixel' CLI command [#285](https://github.com/pydicom/deid/pull/285) (0.5.0) - Fix REMOVE action to respect REPLACE or JITTER priority [#283](https://github.com/pydicom/deid/pull/283) (0.4.6) - Add enhanced private tag syntax support [#282](https://github.com/pydicom/deid/pull/282) (0.4.5) - Fix tag specification for KEEP action [#281](https://github.com/pydicom/deid/pull/281) (0.4.4) diff --git a/deid/dicom/pixels/clean.py b/deid/dicom/pixels/clean.py index 9622a59..4606e97 100644 --- a/deid/dicom/pixels/clean.py +++ b/deid/dicom/pixels/clean.py @@ -132,7 +132,7 @@ def get_figure(self, show=False, image_type="cleaned", title=None): plt.show() return plt - def _get_clean_name(self, output_folder, extension="dcm"): + def _get_clean_name(self, output_folder, extension="dcm", prefix="clean-"): """ Get path to a cleaned output file. @@ -145,6 +145,10 @@ def _get_clean_name(self, output_folder, extension="dcm"): exist. extension: the extension of the file to create a name for, should not start with "." + prefix: string prepended to file's basename + extension: extension to append to basename after removing .dcm or .dicom. + Use empty string to disable. .dcm|.dicom will not be stripped. basename is not changed. + """ if output_folder is None: output_folder = self.output_folder @@ -153,8 +157,16 @@ def _get_clean_name(self, output_folder, extension="dcm"): bot.debug("Creating output folder %s" % output_folder) os.makedirs(output_folder) - basename = re.sub("[.]dicom|[.]dcm", "", os.path.basename(self.dicom_file)) - return "%s/cleaned-%s.%s" % (output_folder, basename, extension) + # do we want to change extension? this might convert .dicom to .dcm + # or would add .dcm to eg. 'MR.*' (MR.12*34.dcm) or '*IMA' (1234.IMA.dcm) + if extension: + basename = re.sub("[.]dicom|[.]dcm", "", os.path.basename(self.dicom_file)) + extension = "." + extension + else: + basename = os.path.basename(self.dicom_file) + + new_basename = prefix + basename + extension + return os.path.join(output_folder, new_basename) def save_png(self, output_folder=None, image_type="cleaned", title=None): """ @@ -233,26 +245,47 @@ def animate(i): else: bot.warning("use detect() --> clean() before saving is possible.") - def save_dicom(self, output_folder=None, image_type="cleaned"): + def save_dicom( + self, output_folder=None, prefix="clean-", extension="dcm" + ) -> str | None: """ Save a cleaned dicom to disk. + DicomCleaner object must have already been run through detect() and clean() + + + Parameters + ========== + output_folder: where to save clean dicoms. Will use self.output_folder if None + prefix: passed onto py:meth:`DicomCleaner._get_clean_name`. + Default adds 'clean-' to basename + extension: paseed onto py:meth:`DicomCleaner._get_clean_name`. + Default appends .dcm after removing .dcm or .dicom. + use empty string to disable, reuses whole basename + + + Returns + ======= + dicom_name: the file that was written - We expose an option to save an original (change image_type to "original" - to be consistent, although this is not incredibly useful given it would - duplicate the original data. """ - # Having clean also means has dicom image - if hasattr(self, image_type): - dicom_name = self._get_clean_name(output_folder) - dicom = utils.dcmread(self.dicom_file, force=True) - # If going from compressed, change TransferSyntax - if dicom.file_meta.TransferSyntaxUID.is_compressed is True: - dicom.decompress() - dicom.PixelData = self.cleaned.tobytes() - dicom.save_as(dicom_name) - return dicom_name - else: - bot.warning("use detect() --> clean() before saving is possible.") + if not hasattr(self, "cleaned"): + bot.warning( + "No cleaned data for '%s'. use detect() --> clean() before saving is possible." + % (self.dicom_file) + ) + return + + dicom_name = self._get_clean_name( + output_folder, prefix=prefix, extension=extension + ) + dicom = utils.dcmread(self.dicom_file, force=True) + + # If going from compressed, change TransferSyntax + if dicom.file_meta.TransferSyntaxUID.is_compressed is True: + dicom.decompress() + dicom.PixelData = self.cleaned.tobytes() + dicom.save_as(dicom_name) + return dicom_name def clean_pixel_data( diff --git a/deid/main/__init__.py b/deid/main/__init__.py index bf6cc1a..5604899 100644 --- a/deid/main/__init__.py +++ b/deid/main/__init__.py @@ -18,7 +18,7 @@ def get_parser(): description="Deid (de-identification, anonymization) command line tool." ) - # Global Variables + # Global Variables: generic options available for all 'actions' command parser.add_argument( "--quiet", "-q", @@ -70,22 +70,58 @@ def get_parser(): action="store_true", ) + # Additional args specific to distinct commands (see args.command conditions) subparsers = parser.add_subparsers( - help="action for deid to perform", - title="actions", - description="actions for deid to perform", + title="commands", dest="command", + help="command for deid to perform", + description="command for deid to perform", ) subparsers.add_parser( "version", help="print version and exit" # pylint: disable=unused-variable ) - # Checks (checks / tests for various services) inspect = subparsers.add_parser( - "inspect", help="various checks for PHI and quality" + "inspect", help="Various checks for PHI and quality" ) + ids = subparsers.add_parser( + "identifiers", help="Extract and replace identifiers from headers" + ) + + pixels = subparsers.add_parser( + "clean-pixels", help="Clean dicom: scrub burn in pixels" + ) + + # '--deid' for each command rather than once in global to keep arg order + # currently --deid is expected after args.command, like: + # deid identifiers --deid deid.cfg + # would be breaking change to put '--deid' in main 'parser' var, like: + # deid --died deid.cfg identifiers + # not doing so (any existing scripts) would give error. see tests/test_cli.py + # > deid: error: unrecognized arguments: --deid deid.cfg + for command in [ids, inspect, pixels]: + command.add_argument( + "--deid", + dest="deid", + help="deid file with preferences, if not specified, default used.", + type=str, + default=None, + ) + + # '--input' is shared, but not for 'inspect' + # instead all additional arguments to 'inspect' are inputs + for command in [ids, pixels]: + command.add_argument( + "--input", + dest="input", + help="Input folder or single image to perform action on.", + type=str, + default=None, + ) + + ## Args for command='inspect' only inspect.add_argument( nargs="+", dest="folder", @@ -94,14 +130,6 @@ def get_parser(): default=None, ) - inspect.add_argument( - "--deid", - dest="deid", - help="deid file with preferences, if not specified, default used.", - type=str, - default=None, - ) - inspect.add_argument( "--pattern", dest="pattern", @@ -119,18 +147,7 @@ def get_parser(): action="store_true", ) - ids = subparsers.add_parser( - "identifiers", help="extract and replace identifiers from headers" - ) - - ids.add_argument( - "--deid", - dest="deid", - help="deid file with preferences, if not specified, default used.", - type=str, - default=None, - ) - + ## Args for command='identifiers' only # A path to an ids file, required if user wants to put (without get) ids.add_argument( "--ids", @@ -140,14 +157,6 @@ def get_parser(): default=None, ) - ids.add_argument( - "--input", - dest="input", - help="Input folder or single image to perform action on.", - type=str, - default=None, - ) - # Action ids.add_argument( "--action", @@ -159,6 +168,15 @@ def get_parser(): required=True, ) + ## Args for command='clean-pixels' only + pixels.add_argument( + "--type", + dest="type", + help="Input type. Currently only dicom supported.", + choices=["dicom"], + default="dicom", + ) + return parser @@ -181,10 +199,14 @@ def main(): # Initialize the message bot, with level above from deid.logger import bot # pylint: disable=unused-import + # 'main' function use with __main__ imported from submodules based on 'command' if args.command == "identifiers": from .identifiers import main elif args.command == "inspect": from .inspect import main + elif args.command == "clean-pixels": + from .pixels import main + else: parser.print_help() sys.exit(1) diff --git a/deid/main/pixels.py b/deid/main/pixels.py new file mode 100644 index 0000000..b65669d --- /dev/null +++ b/deid/main/pixels.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" +CLI 'main' entrypoint for pixel scrubbing +""" +import argparse # for typing only +import tempfile + +from deid.dicom import get_files +from deid.dicom.pixels import DicomCleaner +from deid.logger import bot + + +def main(args: argparse.Namespace, parser: argparse.ArgumentParser = None): + """ + CLI interface for pixel cleaning. + Used as `main` by py:func:`deid.main.main` + + Also see `getting-started/dicom-pixels` + + Parameters + ========== + args: Likely created by py:func:`deid.main.get_parser`. + Uses output_folder, deid + + parser: for compatibility with other *.main functions. Ignored. + """ + + output_folder = args.outfolder + if output_folder is None: + output_folder = tempfile.mkdtemp() + + if args.input is None: + bot.exit("No input folder specified. Specify inputs as additional arguments.") + dicom_files = list(get_files(args.input)) + + bot.info("Looking at %i input dicoms" % len(dicom_files)) + + # NOTE: self.results and self.file is updated each call to detect + # may want new client for each file to be safe? + client = DicomCleaner(output_folder=output_folder, deid=args.deid) + for dcm in dicom_files: + #: py:func:`deid.dicom.pixels.detect._has_burned_pixels_single` dictionary, not used so not saved + client.detect(dcm) + client.clean() + # prefix and extension empty to reuse same name as input + # folder=None means use self.output_folder + client.save_dicom(output_folder=None, prefix="", extension="") diff --git a/deid/tests/test_clean.py b/deid/tests/test_clean.py index 7f4dd22..42ce8e2 100644 --- a/deid/tests/test_clean.py +++ b/deid/tests/test_clean.py @@ -229,6 +229,50 @@ def test_pixel_cleaner_keepcoordinates_from(self): compare = inputpixels[0:2000, 0:2000] == outputpixels[0:2000, 0:2000] self.assertTrue(compare.all()) + def test_get_clean_name(self): + from deid.dicom import DicomCleaner + + # 'out/' given to cleaner not necessarily what _get_clean_name will use + client = DicomCleaner(output_folder="out") + + # exercise normal usage + client.dicom_file = "XYZ.dcm" + new_name = client._get_clean_name(output_folder="abc", extension="png") + self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.png")) + + client.dicom_file = "XYZ.dicom" + new_name = client._get_clean_name(output_folder="abc", extension="png") + self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.png")) + + # !! careful with extension! was .dicom will now be .dcm + client.dicom_file = "XYZ.dicom" + new_name = client._get_clean_name( + output_folder="abc" + ) # defaults: extension="dcm", prefix="clean-" + self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.dcm")) + + # note IMA not removed -- Siemens dicom extension not implicitly handled + client.dicom_file = "XYZ.IMA" + new_name = client._get_clean_name(output_folder="abc", extension="png") + self.assertEqual(new_name, os.path.join("abc", "clean-XYZ.IMA.png")) + + # fully specified options to avoid any change to basename + client.dicom_file = "image.IMA" + new_name = client._get_clean_name(output_folder=None, extension="", prefix="") + expected_name = os.path.join("out", "image.IMA") + self.assertEqual(new_name, expected_name) + + # prefix but no extension + # example: UPitt MRRC no dcm extension (via DCMTK's storescp?) + client.dicom_file = "MR.1.3.12.2.1107.5.2.0.18914.2025082910014953724207010" + new_name = client._get_clean_name( + output_folder=None, extension="", prefix="clean-" + ) + expected_name = os.path.join( + "out", "clean-MR.1.3.12.2.1107.5.2.0.18914.2025082910014953724207010" + ) + self.assertEqual(new_name, expected_name) + if __name__ == "__main__": unittest.main() diff --git a/deid/tests/test_cli.py b/deid/tests/test_cli.py index 1cf1986..a9ff964 100644 --- a/deid/tests/test_cli.py +++ b/deid/tests/test_cli.py @@ -6,6 +6,8 @@ import unittest from unittest.mock import patch +import numpy as np + import deid.main from deid.data import get_dataset from deid.dicom import get_files, utils @@ -49,6 +51,47 @@ def test_deidmain_write_identifiers(self): # Confirm new file was srubbed self.assertEqual(None, outfile.get("StudyTime")) + @patch( + "sys.argv", + "deid --outfolder out/ clean-pixels --deid deid.cfg --input ./".split(" "), + ) + def test_deidmain_clean_pixels(self): + """ + Run example command line call to clean pixels + """ + os.chdir(self.tmpdir) + shutil.copyfile(self.example, self.tmpdir + "/example.dicom") + # Confirm input data has value that will be scrubbed. + indcm = utils.dcmread(self.tmpdir + "/example.dicom") + self.assertEqual(indcm.pixel_array.shape, (456, 510, 3)) + # index is y,x,z for censor box coordinates from deid.cfg below + censor_area = indcm.pixel_array[0:250, 0:100, :] + # all voxels in region to be scrubbed are valued. lucky us + self.assertEqual(np.count_nonzero(censor_area != 0), 75000) + + with open(self.tmpdir + "/deid.cfg", "w") as f: + f.write( + """FORMAT dicom + +%filter greylist + +LABEL Censor Top Left +contains SOPInstanceUID . + coordinates 0,0,100,250 +""" + ) + + os.makedirs("out/") + deid.main.main() + + outfile = utils.dcmread("out/example.dicom") + + # Confirm we changed pixel data + self.assertTrue(np.any(indcm.pixel_array != outfile.pixel_array)) + # Confirm censor area is all zeros + zero_cnt = np.count_nonzero(outfile.pixel_array[0:250, 0:100, :] == 0) + self.assertEqual(zero_cnt, 100 * 250 * 3) # 75000 + if __name__ == "__main__": unittest.main() diff --git a/deid/version.py b/deid/version.py index 7825e56..51746ed 100644 --- a/deid/version.py +++ b/deid/version.py @@ -2,7 +2,7 @@ __copyright__ = "Copyright 2016-2025, Vanessa Sochat" __license__ = "MIT" -__version__ = "0.4.6" +__version__ = "0.5.0" AUTHOR = "Vanessa Sochat" AUTHOR_EMAIL = "vsoch@users.noreply.github.com" NAME = "deid"