From aa5ddd199fedc2dde1fc4dc3f2d9e7d73c6c91e7 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Mon, 18 Nov 2024 09:31:46 -0500 Subject: [PATCH 01/10] replace legacy with schema validator also change cubids print-metadata-fields to account for json file errors due to not been validated yet --- cubids/cubids.py | 16 ++++++-- cubids/validator.py | 91 +++++++++++++++------------------------------ 2 files changed, 43 insertions(+), 64 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 44c57fdc..27f632e3 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1336,9 +1336,19 @@ def get_all_metadata_fields(self): found_fields = set() for json_file in Path(self.path).rglob("*.json"): if ".git" not in str(json_file): - with open(json_file, "r") as jsonr: - metadata = json.load(jsonr) - found_fields.update(metadata.keys()) + # add this in case `print-metadata-fields` is run before validate + try: + with open(json_file, "r", encoding="utf-8") as jsonr: + content = jsonr.read().strip() + if not content: + print(f"Empty file: {json_file}") + continue + metadata = json.loads(content) + found_fields.update(metadata.keys()) + except json.JSONDecodeError as e: + print(f"Error decoding JSON in {json_file}: {e}") + except Exception as e: + print(f"Unexpected error with file {json_file}: {e}") return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): diff --git a/cubids/validator.py b/cubids/validator.py index d7e52fe4..7fba8138 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -14,9 +14,9 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" - # build docker call - # CuBIDS automatically ignores subject consistency. - command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"] + # New schema BIDS validator doesn't have option to ignore subject consistency. + # Build the deno command to run the BIDS validator. + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -87,32 +87,6 @@ def parse_validator_output(output): Dataframe of validator output. """ - def get_nested(dct, *keys): - """Get a nested value from a dictionary. - - Parameters - ---------- - dct : :obj:`dict` - Dictionary to get value from. - keys : :obj:`list` - List of keys to get value from. - - Returns - ------- - :obj:`dict` - The nested value. - """ - for key in keys: - try: - dct = dct[key] - except (KeyError, TypeError): - return None - return dct - - data = json.loads(output) - - issues = data["issues"] - def parse_issue(issue_dict): """Parse a single issue from the validator output. @@ -126,30 +100,27 @@ def parse_issue(issue_dict): return_dict : :obj:`dict` Dictionary of parsed issue. """ - return_dict = {} - return_dict["files"] = [ - get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") - ] - return_dict["type"] = issue_dict.get("key", "") - return_dict["severity"] = issue_dict.get("severity", "") - return_dict["description"] = issue_dict.get("reason", "") - return_dict["code"] = issue_dict.get("code", "") - return_dict["url"] = issue_dict.get("helpUrl", "") - - return return_dict - - df = pd.DataFrame() - - for warn in issues["warnings"]: - parsed = parse_issue(warn) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) - - for err in issues["errors"]: - parsed = parse_issue(err) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) + return { + "location": issue_dict.get("location", ""), + "code": issue_dict.get("code", ""), + "subCode": issue_dict.get("subCode", ""), + "severity": issue_dict.get("severity", ""), + "rule": issue_dict.get("rule", ""), + } + + # Load JSON data + data = json.loads(output) + + # Extract issues + issues = data.get("issues", {}).get("issues", []) + if not issues: + return pd.DataFrame(columns=["location", "code", "subCode", "severity", "rule"]) + + # Parse all issues + parsed_issues = [parse_issue(issue) for issue in issues] + # Convert to DataFrame + df = pd.DataFrame(parsed_issues) return df @@ -161,12 +132,10 @@ def get_val_dictionary(): val_dict : dict Dictionary of values. """ - val_dict = {} - val_dict["files"] = {"Description": "File with warning orerror"} - val_dict["type"] = {"Description": "BIDS validation warning or error"} - val_dict["severity"] = {"Description": "gravity of problem (warning/error"} - val_dict["description"] = {"Description": "Description of warning/error"} - val_dict["code"] = {"Description": "BIDS validator issue code number"} - val_dict["url"] = {"Description": "Link to the issue's neurostars thread"} - - return val_dict + return { + "location": {"Description": "File with the validation issue."}, + "code": {"Description": "Code of the validation issue."}, + "subCode": {"Description": "Subcode providing additional issue details."}, + "severity": {"Description": "Severity of the issue (e.g., warning, error)."}, + "rule": {"Description": "Validation rule that triggered the issue."}, + } From e98ca1786cde49ff1a7b043babde75ddbe7231ed Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Mon, 18 Nov 2024 11:49:15 -0500 Subject: [PATCH 02/10] circleci install deno --- .circleci/config.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3ed25686..c73d8af8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,10 +31,8 @@ jobs: source activate cubids conda install -c conda-forge -y datalad - # Add nodejs and the validator - conda install nodejs - npm install -g yarn && \ - npm install -g bids-validator@1.14.14-dev.0 + # Add deno to run the schema validator + conda install deno # Install CuBIDS pip install -e .[tests] From 5ba0c717da66cdb2258a119d33be3260624c4d9d Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Mon, 18 Nov 2024 12:00:22 -0500 Subject: [PATCH 03/10] add deno installation instruction --- docs/installation.rst | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index d55b84a4..b02e734e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -16,7 +16,7 @@ initialize a new conda environment (for example, named ``cubids``) as follows: .. code-block:: console - $ conda create -n cubids python=3.8 pip + $ conda create -n cubids python=3.12 pip $ conda activate cubids You are now ready to install CuBIDS. @@ -44,23 +44,16 @@ Once you have a copy of the source, you can install it with: $ pip install -e . We will now need to install some dependencies of ``CuBIDS``. -To do this, we first must install nodejs. +To do this, we first must install deno to run `bids-validator`. We can accomplish this using the following command: .. code-block:: console - $ conda install nodejs + $ conda install deno -Now that we have npm installed, we can install ``bids-validator`` using the following command: +The new schema ``bids-validator`` doesn't need to be installed +and will be implemented automatically when `cubids validate` is called -.. code-block:: console - - $ npm install -g bids-validator@1.7.2 - -In our example walkthrough, -we use ``bids-validator`` v1.7.2. using a different version of the -validator may result in slightly different validation tsv printouts, -but ``CuBIDS`` is compatible with all versions of the validator at or above v1.6.2. We also recommend using ``CuBIDS`` with the optional ``DataLad`` version control capabilities. We use ``DataLad`` throughout our walkthrough of the CuBIDS Workflow on From 38cf11bd8415c955ce9c5a2a4f51548dc9504100 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 10:41:55 -0500 Subject: [PATCH 04/10] Add a function to save bids validator and schema version --- cubids/cli.py | 36 ++++++++++++ cubids/validator.py | 134 +++++++++++++++++++++++++++++++++++++++++++- cubids/workflows.py | 69 +++++++++++++++++++++++ 3 files changed, 238 insertions(+), 1 deletion(-) diff --git a/cubids/cli.py b/cubids/cli.py index f87ffc6c..d2f8a920 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -107,6 +107,41 @@ def _enter_validate(argv=None): workflows.validate(**args) +def _parse_bids_version(): + parser = argparse.ArgumentParser( + description="cubids bids-version: Get BIDS Validator and Schema version", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + PathExists = partial(_path_exists, parser=parser) + + parser.add_argument( + "bids_dir", + type=PathExists, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "--write", + action="store_true", + default=False, + help=( + "Save the validator and schema version to 'dataset_description.json' " + "when using `cubids bids-version /bids/path --write`. " + "By default, `cubids bids-version /bids/path` prints to the terminal." + ), + ) + return parser + + +def _enter_bids_version(argv=None): + options = _parse_bids_version().parse_args(argv) + args = vars(options).copy() + workflows.bids_version(**args) + + def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), @@ -655,6 +690,7 @@ def _enter_print_metadata_fields(argv=None): COMMANDS = [ ("validate", _parse_validate, workflows.validate), + ("bids-version", _parse_bids_version, workflows.bids_version), ("sidecar-merge", _parse_bids_sidecar_merge, workflows.bids_sidecar_merge), ("group", _parse_group, workflows.group), ("apply", _parse_apply, workflows.apply), diff --git a/cubids/validator.py b/cubids/validator.py index 7fba8138..fe0e08ef 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -6,6 +6,7 @@ import os import pathlib import subprocess +import re import pandas as pd @@ -24,6 +25,22 @@ def build_validator_call(path, ignore_headers=False): return command +def get_bids_validator_version(): + """Get the version of the BIDS validator. + + Returns + ------- + version : :obj:`str` + Version of the BIDS validator. + """ + command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + version = output.split()[-1] + clean_ver = re.sub(r'\x1b\[[0-9;]*m', '', version) # Remove ANSI color codes + return {"ValidatorVersion": clean_ver} + + def build_subject_paths(bids_dir): """Build a list of BIDS dirs with 1 subject each.""" bids_dir = str(bids_dir) @@ -52,6 +69,26 @@ def build_subject_paths(bids_dir): return subjects_dict +def build_first_subject_path(bids_dir, subject): + """Build a list of BIDS dirs with 1 subject each.""" + bids_dir = str(bids_dir) + if not bids_dir.endswith("/"): + bids_dir += "/" + + root_files = [x for x in glob.glob(bids_dir + "*") if os.path.isfile(x)] + + subject_dict = {} + + purepath = pathlib.PurePath(subject) + sub_label = purepath.name + + files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] + files.extend(root_files) + subject_dict[sub_label] = files + + return subject_dict + + def run_validator(call): """Run the validator with subprocess. @@ -103,6 +140,7 @@ def parse_issue(issue_dict): return { "location": issue_dict.get("location", ""), "code": issue_dict.get("code", ""), + "issueMessage": issue_dict.get("issueMessage", ""), "subCode": issue_dict.get("subCode", ""), "severity": issue_dict.get("severity", ""), "rule": issue_dict.get("rule", ""), @@ -114,7 +152,9 @@ def parse_issue(issue_dict): # Extract issues issues = data.get("issues", {}).get("issues", []) if not issues: - return pd.DataFrame(columns=["location", "code", "subCode", "severity", "rule"]) + return pd.DataFrame( + columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] + ) # Parse all issues parsed_issues = [parse_issue(issue) for issue in issues] @@ -135,7 +175,99 @@ def get_val_dictionary(): return { "location": {"Description": "File with the validation issue."}, "code": {"Description": "Code of the validation issue."}, + "issueMessage": {"Description": "Validation issue message."}, "subCode": {"Description": "Subcode providing additional issue details."}, "severity": {"Description": "Severity of the issue (e.g., warning, error)."}, "rule": {"Description": "Validation rule that triggered the issue."}, } + + +def extract_summary_info(output): + """Extract summary information from the JSON output. + + Parameters + ---------- + output : str + JSON string of BIDS validator output. + + Returns + ------- + dict + Dictionary containing SchemaVersion and other summary info. + """ + try: + data = json.loads(output) + except json.JSONDecodeError as e: + raise ValueError("Invalid JSON provided to get SchemaVersion.") from e + + summary = data.get("summary", {}) + + return {"SchemaVersion": summary.get("schemaVersion", "")} + + +def update_dataset_description(path, new_info): + """Update or append information to dataset_description.json. + + Parameters + ---------- + path : :obj:`str` + Path to the dataset. + new_info : :obj:`dict` + Information to add or update. + """ + description_path = os.path.join(path, "dataset_description.json") + + # Load existing data if the file exists + if os.path.exists(description_path): + with open(description_path, "r") as f: + existing_data = json.load(f) + else: + existing_data = {} + + # Update the existing data with the new info + existing_data.update(new_info) + + # Write the updated data back to the file + with open(description_path, "w") as f: + json.dump(existing_data, f, indent=4) + print(f"Updated dataset_description.json at: {description_path}") + + # Check if .datalad directory exists before running the DataLad save command + datalad_dir = os.path.join(path, ".datalad") + if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir): + try: + subprocess.run( + ["datalad", "save", "-m", + "Save BIDS validator and schema version to dataset_description", + description_path], + check=True + ) + print("Changes saved with DataLad.") + except subprocess.CalledProcessError as e: + print(f"Error running DataLad save: {e}") + + +def bids_validator_version(output, path, write=False): + """Save BIDS validator and schema version. + + Parameters + ---------- + output : :obj:`str` + Path to JSON file of BIDS validator output. + path : :obj:`str` + Path to the dataset. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Get the BIDS validator version + validator_version = get_bids_validator_version() + # Extract schemaVersion + summary_info = extract_summary_info(output) + + combined_info = {**validator_version, **summary_info} + + if write: + # Update the dataset_description.json file + update_dataset_description(path, combined_info) + elif not write: + print(combined_info) \ No newline at end of file diff --git a/cubids/workflows.py b/cubids/workflows.py index 6cbc1e42..69bed501 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -22,6 +22,8 @@ get_val_dictionary, parse_validator_output, run_validator, + build_first_subject_path, + bids_validator_version, ) warnings.simplefilter(action="ignore", category=FutureWarning) @@ -258,6 +260,73 @@ def validate( sys.exit(proc.returncode) +def bids_version( + bids_dir, + write=False +): + """Get BIDS validator and schema version. + + Parameters + ---------- + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Need to run validator to get output with schema version + # Copy code from `validate --sequential` + + try: # return first subject + # Get all folders that start with "sub-" + sub_folders = [ + name + for name in os.listdir(bids_dir) + if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") + ] + if not sub_folders: + raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") + subject = sub_folders[0] + except FileNotFoundError: + raise FileNotFoundError(f"The directory {bids_dir} does not exist.") + except ValueError as ve: + raise ve + + # build a dictionary with {SubjectLabel: [List of files]} + # run first subject only + subject_dict = build_first_subject_path(bids_dir, subject) + + # iterate over the dictionary + for subject, files_list in subject_dict.items(): + # logger.info(" ".join(["Processing subject:", subject])) + # create a temporary directory and symlink the data + with tempfile.TemporaryDirectory() as tmpdirname: + for fi in files_list: + # cut the path down to the subject label + bids_start = fi.find(subject) + + # maybe it's a single file + if bids_start < 1: + bids_folder = tmpdirname + fi_tmpdir = tmpdirname + + else: + bids_folder = Path(fi[bids_start:]).parent + fi_tmpdir = tmpdirname + "/" + str(bids_folder) + + if not os.path.exists(fi_tmpdir): + os.makedirs(fi_tmpdir) + output = fi_tmpdir + "/" + str(Path(fi).name) + shutil.copy2(fi, output) + + # run the validator + call = build_validator_call(tmpdirname) + ret = run_validator(call) + + # Get BIDS validator and schema version + decoded = ret.stdout.decode("UTF-8") + bids_validator_version(decoded, bids_dir, write=write) + + def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) From 6f85a1f1c5f8adc1e8d78b6641197e5e7b852ce7 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 10:54:32 -0500 Subject: [PATCH 05/10] Fix linting issue --- cubids/cli.py | 27 ++++++---- cubids/cubids.py | 105 ++++++++++++++++++++++++------------ cubids/metadata_merge.py | 15 ++++-- cubids/tests/test_bond.py | 75 +++++++++++++++++--------- cubids/tests/test_cli.py | 6 ++- cubids/tests/test_cubids.py | 15 ++++-- cubids/tests/utils.py | 6 ++- cubids/validator.py | 45 ++++++++++------ cubids/workflows.py | 44 ++++++++------- 9 files changed, 220 insertions(+), 118 deletions(-) diff --git a/cubids/cli.py b/cubids/cli.py index d2f8a920..ea78680c 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -27,7 +27,8 @@ def _is_file(path, parser): """Ensure a given path exists and it is a file.""" path = _path_exists(path, parser) if not path.is_file(): - raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") + raise parser.error( + f"Path should point to a file (or symlink of file): <{path}>.") return path @@ -144,7 +145,8 @@ def _enter_bids_version(argv=None): def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( - description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), + description=( + "bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) IsFile = partial(_is_file, parser=parser) @@ -216,7 +218,8 @@ def _parse_group(): default="subject", choices=["subject", "session"], action="store", - help=("Level at which acquisition groups are created options: 'subject' or 'session'"), + help=( + "Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -244,7 +247,8 @@ def _enter_group(argv=None): def _parse_apply(): parser = argparse.ArgumentParser( - description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), + description=( + "cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -312,7 +316,8 @@ def _parse_apply(): default="subject", choices=["subject", "session"], action="store", - help=("Level at which acquisition groups are created options: 'subject' or 'session'"), + help=( + "Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -341,7 +346,8 @@ def _enter_apply(argv=None): def _parse_datalad_save(): parser = argparse.ArgumentParser( - description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), + description=( + "cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -699,8 +705,10 @@ def _enter_print_metadata_fields(argv=None): ("copy-exemplars", _parse_copy_exemplars, workflows.copy_exemplars), ("undo", _parse_undo, workflows.undo), ("datalad-save", _parse_datalad_save, workflows.datalad_save), - ("print-metadata-fields", _parse_print_metadata_fields, workflows.print_metadata_fields), - ("remove-metadata-fields", _parse_remove_metadata_fields, workflows.remove_metadata_fields), + ("print-metadata-fields", _parse_print_metadata_fields, + workflows.print_metadata_fields), + ("remove-metadata-fields", _parse_remove_metadata_fields, + workflows.remove_metadata_fields), ] @@ -709,7 +717,8 @@ def _get_parser(): from cubids import __version__ parser = argparse.ArgumentParser(prog="cubids") - parser.add_argument("-v", "--version", action="version", version=__version__) + parser.add_argument("-v", "--version", + action="version", version=__version__) subparsers = parser.add_subparsers(help="CuBIDS commands") for command, parser_func, run_func in COMMANDS: diff --git a/cubids/cubids.py b/cubids/cubids.py index 27f632e3..4de83826 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -149,9 +149,11 @@ def reset_bids_layout(self, validate=False): re.compile(r"/\."), ] - indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, index_metadata=False) + indexer = bids.BIDSLayoutIndexer( + validate=validate, ignore=ignores, index_metadata=False) - self._layout = bids.BIDSLayout(self.path, validate=validate, indexer=indexer) + self._layout = bids.BIDSLayout( + self.path, validate=validate, indexer=indexer) def create_cubids_code_dir(self): """Create CuBIDS code directory. @@ -201,7 +203,8 @@ def datalad_save(self, message=None): Commit message to use with datalad save. """ if not self.datalad_ready: - raise Exception("DataLad has not been initialized. use datalad_init()") + raise Exception( + "DataLad has not been initialized. use datalad_init()") statuses = self.datalad_handle.save(message=message or "CuBIDS Save") saved_status = set([status["status"] for status in statuses]) @@ -223,7 +226,8 @@ def is_datalad_clean(self): """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") - statuses = set([status["state"] for status in self.datalad_handle.status()]) + statuses = set([status["state"] + for status in self.datalad_handle.status()]) return statuses == set(["clean"]) def datalad_undo_last_commit(self): @@ -237,8 +241,10 @@ def datalad_undo_last_commit(self): If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): - raise Exception("Untracked changes present. Run clear_untracked_changes first") - reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) + raise Exception( + "Untracked changes present. Run clear_untracked_changes first") + reset_proc = subprocess.run( + ["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() def add_nifti_info(self): @@ -342,11 +348,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T files_df = pd.read_table(files_tsv) # Check that the MergeInto column only contains valid merges - ok_merges, deletions = check_merging_operations(summary_tsv, raise_on_error=raise_on_error) + ok_merges, deletions = check_merging_operations( + summary_tsv, raise_on_error=raise_on_error) merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + dest_files = files_df.loc[( + files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] source_files = files_df.loc[ (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) ] @@ -357,13 +365,15 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T for dest_nii in dest_files.FilePath: dest_json = img_to_new_ext(self.path + dest_nii, ".json") if Path(dest_json).exists() and Path(source_json).exists(): - merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") + merge_commands.append( + f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] + files_to_rm = files_df.loc[( + files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -436,7 +446,8 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T rename_commit = s1 + s2 - self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) + self.datalad_handle.run( + cmd=["bash", renames], message=rename_commit) else: subprocess.run( ["bash", renames], @@ -476,7 +487,8 @@ def change_filename(self, filepath, entities): entity_file_keys = [] # Entities that may be in the filename? - file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] + file_keys = ["task", "acquisition", + "direction", "reconstruction", "run"] for key in file_keys: if key in list(entities.keys()): @@ -490,7 +502,8 @@ def change_filename(self, filepath, entities): # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) - filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) + filename = "_".join( + [f"{key}-{entities[key]}" for key in entity_file_keys]) filename = ( filename.replace("acquisition", "acq") .replace("direction", "dir") @@ -499,7 +512,8 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - raise ValueError(f"Could not construct new filename for {filepath}") + raise ValueError( + f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED # datatype may be overridden/changed if the original file is located in the wrong folder. @@ -517,7 +531,8 @@ def change_filename(self, filepath, entities): dtype_new = dtype_orig # Construct the new filename - new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename + new_path = str(self.path) + "/" + sub + "/" + \ + ses + "/" + dtype_new + "/" + filename # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -536,7 +551,8 @@ def change_filename(self, filepath, entities): # ensure assoc not an IntendedFor reference if ".nii" not in str(assoc_path): self.old_filenames.append(assoc_path) - new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes)) + new_ext_path = img_to_new_ext( + new_path, "".join(Path(assoc_path).suffixes)) self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! @@ -609,7 +625,8 @@ def change_filename(self, filepath, entities): if Path(old_labeling).exists(): self.old_filenames.append(old_labeling) new_scan_end = "_" + suffix + old_ext - new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") + new_labeling = new_path.replace( + new_scan_end, "_asllabeling.jpg") self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! @@ -635,7 +652,8 @@ def change_filename(self, filepath, entities): # remove old filename data["IntendedFor"].remove(item) # add new filename - data["IntendedFor"].append(_get_intended_for_reference(new_path)) + data["IntendedFor"].append( + _get_intended_for_reference(new_path)) # update the json with the new data dictionary _update_json(filename_with_if, data) @@ -808,7 +826,8 @@ def _purge_associations(self, scans): if "/func/" in str(path): # add tsvs - tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") + tsv = img_to_new_ext(str(path), ".tsv").replace( + "_bold", "_events") if Path(tsv).exists(): to_remove.append(tsv) # add tsv json (if exists) @@ -922,7 +941,8 @@ def get_param_groups_from_entity_set(self, entity_set): 2. A data frame with param group summaries """ if not self.fieldmaps_cached: - raise Exception("Fieldmaps must be cached to find parameter groups.") + raise Exception( + "Fieldmaps must be cached to find parameter groups.") key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" @@ -975,7 +995,8 @@ def create_data_dictionary(self): mod_dict = sidecar_params[mod] for s_param in mod_dict.keys(): if s_param not in self.data_dict.keys(): - self.data_dict[s_param] = {"Description": "Scanning Parameter"} + self.data_dict[s_param] = { + "Description": "Scanning Parameter"} relational_params = self.grouping_config.get("relational_params") for r_param in relational_params.keys(): @@ -987,7 +1008,8 @@ def create_data_dictionary(self): mod_dict = derived_params[mod] for d_param in mod_dict.keys(): if d_param not in self.data_dict.keys(): - self.data_dict[d_param] = {"Description": "NIfTI Header Parameter"} + self.data_dict[d_param] = { + "Description": "NIfTI Header Parameter"} # Manually add non-sidecar columns/descriptions to data_dict desc1 = "Column where users mark groups to manually check" @@ -1094,17 +1116,20 @@ def get_param_groups_dataframes(self): long_name = big_df.loc[row, "FilePath"] big_df.loc[row, "FilePath"] = long_name.replace(self.path, "") - summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) + summary = _order_columns( + pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + \ + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + \ + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") @@ -1253,8 +1278,10 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) - big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + summary = summary.sort_values( + by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values( + by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1273,7 +1300,8 @@ def get_tsvs(self, path_prefix): summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets( + f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) print(f"CuBIDS detected {len(summary)} Parameter Groups.") @@ -1492,7 +1520,8 @@ def _get_param_groups( # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) + for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1514,7 +1543,8 @@ def _get_param_groups( # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: intended_entity_sets = sorted( - [_file_to_entity_set(intention) for intention in intentions] + [_file_to_entity_set(intention) + for intention in intentions] ) # check if config says columns or bool @@ -1568,11 +1598,14 @@ def _get_param_groups( {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()} ) - param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"]) + param_groups_with_counts = pd.merge( + deduped, param_group_counts, on=["ParamGroup"]) # Sort by counts and relabel the param groups - param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False) - param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1 + param_groups_with_counts.sort_values( + by=["Counts"], inplace=True, ascending=False) + param_groups_with_counts["ParamGroup"] = np.arange( + param_groups_with_counts.shape[0]) + 1 # Send the new, ordered param group ids to the files list ordered_labeled_files = pd.merge( @@ -1580,13 +1613,15 @@ def _get_param_groups( ) # sort ordered_labeled_files by param group - ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False) + ordered_labeled_files.sort_values( + by=["Counts"], inplace=True, ascending=False) # now get rid of cluster cols from deduped and df for col in list(ordered_labeled_files.columns): if col.startswith("Cluster_"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) - param_groups_with_counts = param_groups_with_counts.drop(col, axis=1) + param_groups_with_counts = param_groups_with_counts.drop( + col, axis=1) if col.endswith("_x"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 6562f35b..ddaa585f 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -57,7 +57,8 @@ def _check_sdc_cols(meta1, meta2): source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] + source_row = actions.loc[( + actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -298,7 +299,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), file_entities.get("session")) + acq_id = (file_entities.get("subject"), + file_entities.get("session")) acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) @@ -325,7 +327,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_group_info = [] for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] - acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) + acq_group_info.append( + (groupnum, content_id_counts[content_id_row]) + content_id) for subject, session in contents_to_subjects[content_id]: grouped_sub_sess.append( {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} @@ -333,7 +336,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", + sep="\t", index=False) # Create data dictionary for acq group tsv acq_dict = get_acq_dictionary() @@ -342,7 +346,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the summary of acq groups to a text file with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt: - infotxt.write("\n".join([" ".join(map(str, line)) for line in acq_group_info])) + infotxt.write("\n".join([" ".join(map(str, line)) + for line in acq_group_info])) # Create and save AcqGroupInfo data dictionary header_dict = {} diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 28211cc2..4c2266e1 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -88,7 +88,8 @@ def test_ok_json_merge_cli(tmp_path): assert os.path.isfile(source_json) assert os.path.isfile(dest_json) - merge_proc = subprocess.run(["bids-sidecar-merge", str(source_json), str(dest_json)]) + merge_proc = subprocess.run( + ["bids-sidecar-merge", str(source_json), str(dest_json)]) assert merge_proc.returncode == 0 assert not _get_json_string(dest_json) == orig_dest_json_content @@ -143,7 +144,8 @@ def test_purge_no_datalad(tmp_path): / "sub-03_ses-phdiff_task-rest_bold.json" ) scans.append(scan_name) - scans.append("sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") + scans.append( + "sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") # create and save .txt with list of scans purge_path = str(tmp_path / "purge_scans.txt") @@ -276,7 +278,8 @@ def test_bad_json_merge_cli(tmp_path): / "sub-01_ses-phdiff_acq-HASC55AP_dwi.json" ) - merge_proc = subprocess.run(["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) + merge_proc = subprocess.run( + ["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) assert merge_proc.returncode > 0 assert _get_json_string(dest_json) == orig_dest_json_content @@ -356,10 +359,12 @@ def test_tsv_merge_no_datalad(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing) - bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, + original_files_tsv, str(tmp_path / "unmodified")) # these will not actually be equivalent because of the auto renames - assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash( + tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -369,28 +374,33 @@ def test_tsv_merge_no_datalad(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, + "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to apply merges! - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, + str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash( + tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -401,7 +411,8 @@ def test_tsv_merge_no_datalad(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str( + tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) @@ -419,7 +430,8 @@ def test_tsv_merge_changes(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing except rename) - bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, + original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): @@ -446,7 +458,8 @@ def test_tsv_merge_changes(tmp_path): applied_f.loc[row, "KeyParamGroup"] ) else: - occurrences[applied_f.loc[row, "FilePath"]] = [applied_f.loc[row, "KeyParamGroup"]] + occurrences[applied_f.loc[row, "FilePath"]] = [ + applied_f.loc[row, "KeyParamGroup"]] assert len(orig) == len(applied) @@ -464,7 +477,8 @@ def test_tsv_merge_changes(tmp_path): assert renamed # will no longer be equal because of auto rename! - assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash( + tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -474,27 +488,32 @@ def test_tsv_merge_changes(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, + "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to merge - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, + str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash( + tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -505,7 +524,8 @@ def test_tsv_merge_changes(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str( + tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) # Make sure MergeInto == 0 deletes the param group and all associations @@ -689,7 +709,8 @@ def test_tsv_creation(tmp_path): # if entity sets in rows i and i+1 are the same if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 - assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 + assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + + 1]["ParamGroup"] - 1 # and count i < count i + 1 assert isummary_df.iloc[i]["Counts"] >= isummary_df.iloc[i + 1]["Counts"] @@ -801,11 +822,13 @@ def test_apply_tsv_changes(tmp_path): for f in deleted_f: assert Path(str(data_root / "complete") + f).exists() - assert Path(str(data_root / "complete") + f.replace("nii.gz", "json")).exists() + assert Path(str(data_root / "complete") + + f.replace("nii.gz", "json")).exists() # apply deletion complete_cubids.apply_tsv_changes( - mod2_path, str(tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") + mod2_path, str( + tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") ) # make sure deleted_keyparam gone from files_tsv @@ -838,7 +861,8 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS(data_root / "inconsistent", + acq_group_level="session", use_datalad=True) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1039,7 +1063,8 @@ def test_docker(): """ try: return_status = 1 - ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = subprocess.run(["docker", "version"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError as e: from errno import ENOENT diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index a0e9066a..78e15501 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -14,9 +14,10 @@ """ import argparse + import pytest -from cubids.cli import _path_exists, _is_file, _get_parser, _main +from cubids.cli import _get_parser, _is_file, _main, _path_exists def _test_path_exists(): @@ -27,7 +28,8 @@ def _test_path_exists(): It asserts that the function returns the expected path when the path exists, and raises an `argparse.ArgumentTypeError` when the path does not exist. """ - assert _path_exists("/path/to/existing/file", None) == "/path/to/existing/file" + assert _path_exists("/path/to/existing/file", + None) == "/path/to/existing/file" with pytest.raises(argparse.ArgumentTypeError): _path_exists("/path/to/nonexistent/file", None) diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index 6ab847fd..ca70d21a 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -74,7 +74,8 @@ def _test_copy_exemplars(cubids_instance): exemplars_dir = "/path/to/exemplars" exemplars_tsv = "/path/to/exemplars.tsv" min_group_size = 2 - cubids_instance.copy_exemplars(exemplars_dir, exemplars_tsv, min_group_size) + cubids_instance.copy_exemplars( + exemplars_dir, exemplars_tsv, min_group_size) # Add assertions here @@ -204,8 +205,10 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): - files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] - fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} + files = ["sub-01_ses-01_task-rest_bold.nii.gz", + "sub-02_ses-01_task-rest_bold.nii.gz"] + fieldmap_lookup = { + "sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" @@ -220,7 +223,8 @@ def _test_round_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"round": 3}} modality = "bold" - rounded_params = cubids_instance.round_params(param_group_df, config, modality) + rounded_params = cubids_instance.round_params( + param_group_df, config, modality) # Add assertions here @@ -234,7 +238,8 @@ def _test_format_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"format": "{:.2f}"}} modality = "bold" - formatted_params = cubids_instance.format_params(param_group_df, config, modality) + formatted_params = cubids_instance.format_params( + param_group_df, config, modality) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index c64da372..9bf6bda1 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -27,7 +27,8 @@ def _remove_a_json(json_file): def _edit_a_nifti(nifti_file): img = nb.load(nifti_file) - new_img = nb.Nifti1Image(np.random.rand(*img.shape), affine=img.affine, header=img.header) + new_img = nb.Nifti1Image(np.random.rand( + *img.shape), affine=img.affine, header=img.header) new_img.to_filename(nifti_file) @@ -76,7 +77,8 @@ def _add_ext_files(img_path): if "/dwi/" in img_path: # add bval and bvec for ext in dwi_exts: - dwi_ext_file = img_path.replace(".nii.gz", "").replace(".nii", "") + ext + dwi_ext_file = img_path.replace( + ".nii.gz", "").replace(".nii", "") + ext Path(dwi_ext_file).touch() if "bold" in img_path: no_suffix = img_path.rpartition("_")[0] diff --git a/cubids/validator.py b/cubids/validator.py index fe0e08ef..bb721212 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -5,8 +5,8 @@ import logging import os import pathlib -import subprocess import re +import subprocess import pandas as pd @@ -17,7 +17,8 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # New schema BIDS validator doesn't have option to ignore subject consistency. # Build the deno command to run the BIDS validator. - command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] + command = ["deno", "run", "-A", "jsr:@bids/validator", + path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -34,10 +35,12 @@ def get_bids_validator_version(): Version of the BIDS validator. """ command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() version = output.split()[-1] - clean_ver = re.sub(r'\x1b\[[0-9;]*m', '', version) # Remove ANSI color codes + # Remove ANSI color codes + clean_ver = re.sub(r"\x1b\[[0-9;]*m", "", version) return {"ValidatorVersion": clean_ver} @@ -54,7 +57,8 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - raise ValueError("Couldn't find any subjects in the specified directory:\n" + bids_dir) + raise ValueError( + "Couldn't find any subjects in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -62,7 +66,8 @@ def build_subject_paths(bids_dir): purepath = pathlib.PurePath(sub) sub_label = purepath.name - files = [x for x in glob.glob(sub + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob( + sub + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subjects_dict[sub_label] = files @@ -82,7 +87,8 @@ def build_first_subject_path(bids_dir, subject): purepath = pathlib.PurePath(subject) sub_label = purepath.name - files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob( + subject + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subject_dict[sub_label] = files @@ -153,8 +159,9 @@ def parse_issue(issue_dict): issues = data.get("issues", {}).get("issues", []) if not issues: return pd.DataFrame( - columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] - ) + columns=["location", "code", "issueMessage", + "subCode", "severity", "rule"] + ) # Parse all issues parsed_issues = [parse_issue(issue) for issue in issues] @@ -229,7 +236,7 @@ def update_dataset_description(path, new_info): # Write the updated data back to the file with open(description_path, "w") as f: - json.dump(existing_data, f, indent=4) + json.dump(existing_data, f, indent=4) print(f"Updated dataset_description.json at: {description_path}") # Check if .datalad directory exists before running the DataLad save command @@ -237,10 +244,14 @@ def update_dataset_description(path, new_info): if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir): try: subprocess.run( - ["datalad", "save", "-m", - "Save BIDS validator and schema version to dataset_description", - description_path], - check=True + [ + "datalad", + "save", + "-m", + "Save BIDS validator and schema version to dataset_description", + description_path, + ], + check=True, ) print("Changes saved with DataLad.") except subprocess.CalledProcessError as e: @@ -263,11 +274,11 @@ def bids_validator_version(output, path, write=False): validator_version = get_bids_validator_version() # Extract schemaVersion summary_info = extract_summary_info(output) - + combined_info = {**validator_version, **summary_info} if write: - # Update the dataset_description.json file + # Update the dataset_description.json file update_dataset_description(path, combined_info) elif not write: - print(combined_info) \ No newline at end of file + print(combined_info) diff --git a/cubids/workflows.py b/cubids/workflows.py index 69bed501..a28b61a3 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -17,13 +17,13 @@ from cubids.metadata_merge import merge_json_into_json from cubids.utils import _get_container_type from cubids.validator import ( + bids_validator_version, + build_first_subject_path, build_subject_paths, build_validator_call, get_val_dictionary, parse_validator_output, run_validator, - build_first_subject_path, - bids_validator_version, ) warnings.simplefilter(action="ignore", category=FutureWarning) @@ -82,7 +82,8 @@ def validate( # parse the string output parsed = parse_validator_output(ret.stdout.decode("UTF-8")) if parsed.shape[1] < 1: - logger.info("No issues/warnings parsed, your dataset is BIDS valid.") + logger.info( + "No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: logger.info("BIDS issues/warnings found in the dataset") @@ -129,7 +130,8 @@ def validate( subjects_dict = { k: v for k, v in subjects_dict.items() if k in sequential_subjects } - assert len(list(subjects_dict.keys())) > 1, "No subjects found in filter" + assert len(list(subjects_dict.keys()) + ) > 1, "No subjects found in filter" for subject, files_list in tqdm.tqdm(subjects_dict.items()): # logger.info(" ".join(["Processing subject:", subject])) # create a temporary directory and symlink the data @@ -158,7 +160,8 @@ def validate( ret = run_validator(call) # parse output if ret.returncode != 0: - logger.error("Errors returned from validator run, parsing now") + logger.error( + "Errors returned from validator run, parsing now") # parse the output and add to list if it returns a df decoded = ret.stdout.decode("UTF-8") @@ -169,7 +172,8 @@ def validate( # concatenate the parsed data and exit if len(parsed) < 1: - logger.info("No issues/warnings parsed, your dataset is BIDS valid.") + logger.info( + "No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: @@ -260,10 +264,7 @@ def validate( sys.exit(proc.returncode) -def bids_version( - bids_dir, - write=False -): +def bids_version(bids_dir, write=False): """Get BIDS validator and schema version. Parameters @@ -284,13 +285,14 @@ def bids_version( if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") ] if not sub_folders: - raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") + raise ValueError( + "No folders starting with 'sub-' found. Please provide a valid BIDS.") subject = sub_folders[0] except FileNotFoundError: raise FileNotFoundError(f"The directory {bids_dir} does not exist.") except ValueError as ve: raise ve - + # build a dictionary with {SubjectLabel: [List of files]} # run first subject only subject_dict = build_first_subject_path(bids_dir, subject) @@ -329,7 +331,8 @@ def bids_version( def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" - merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) + merge_status = merge_json_into_json( + from_json, to_json, raise_on_error=False) sys.exit(merge_status) @@ -368,7 +371,8 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): apply_config = config is not None if apply_config: - input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str( + config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + output_prefix.name @@ -475,14 +479,18 @@ def apply( # Run it through a container container_type = _get_container_type(container) bids_dir_link = str(bids_dir.absolute()) + ":/bids" - input_summary_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" - input_files_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" - output_tsv_dir_link = str(new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" + input_summary_tsv_dir_link = str( + edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" + input_files_tsv_dir_link = str( + edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" + output_tsv_dir_link = str( + new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" # FROM BOND-GROUP apply_config = config is not None if apply_config: - input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str( + config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + new_tsv_prefix.name From 75bf86eac9ed257f8f1390e2736eb82e751a180c Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 10:59:22 -0500 Subject: [PATCH 06/10] Still fixing lint issues --- cubids/cli.py | 27 ++++------ cubids/cubids.py | 105 ++++++++++++------------------------ cubids/metadata_merge.py | 15 ++---- cubids/tests/test_bond.py | 75 +++++++++----------------- cubids/tests/test_cli.py | 3 +- cubids/tests/test_cubids.py | 15 ++---- cubids/tests/utils.py | 6 +-- cubids/validator.py | 18 +++---- cubids/workflows.py | 33 ++++-------- 9 files changed, 99 insertions(+), 198 deletions(-) diff --git a/cubids/cli.py b/cubids/cli.py index ea78680c..d2f8a920 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -27,8 +27,7 @@ def _is_file(path, parser): """Ensure a given path exists and it is a file.""" path = _path_exists(path, parser) if not path.is_file(): - raise parser.error( - f"Path should point to a file (or symlink of file): <{path}>.") + raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") return path @@ -145,8 +144,7 @@ def _enter_bids_version(argv=None): def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( - description=( - "bids-sidecar-merge: merge critical keys from one sidecar to another"), + description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) IsFile = partial(_is_file, parser=parser) @@ -218,8 +216,7 @@ def _parse_group(): default="subject", choices=["subject", "session"], action="store", - help=( - "Level at which acquisition groups are created options: 'subject' or 'session'"), + help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -247,8 +244,7 @@ def _enter_group(argv=None): def _parse_apply(): parser = argparse.ArgumentParser( - description=( - "cubids-apply: apply the changes specified in a tsv to a BIDS directory"), + description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -316,8 +312,7 @@ def _parse_apply(): default="subject", choices=["subject", "session"], action="store", - help=( - "Level at which acquisition groups are created options: 'subject' or 'session'"), + help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -346,8 +341,7 @@ def _enter_apply(argv=None): def _parse_datalad_save(): parser = argparse.ArgumentParser( - description=( - "cubids-datalad-save: perform a DataLad save on a BIDS directory"), + description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -705,10 +699,8 @@ def _enter_print_metadata_fields(argv=None): ("copy-exemplars", _parse_copy_exemplars, workflows.copy_exemplars), ("undo", _parse_undo, workflows.undo), ("datalad-save", _parse_datalad_save, workflows.datalad_save), - ("print-metadata-fields", _parse_print_metadata_fields, - workflows.print_metadata_fields), - ("remove-metadata-fields", _parse_remove_metadata_fields, - workflows.remove_metadata_fields), + ("print-metadata-fields", _parse_print_metadata_fields, workflows.print_metadata_fields), + ("remove-metadata-fields", _parse_remove_metadata_fields, workflows.remove_metadata_fields), ] @@ -717,8 +709,7 @@ def _get_parser(): from cubids import __version__ parser = argparse.ArgumentParser(prog="cubids") - parser.add_argument("-v", "--version", - action="version", version=__version__) + parser.add_argument("-v", "--version", action="version", version=__version__) subparsers = parser.add_subparsers(help="CuBIDS commands") for command, parser_func, run_func in COMMANDS: diff --git a/cubids/cubids.py b/cubids/cubids.py index 4de83826..27f632e3 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -149,11 +149,9 @@ def reset_bids_layout(self, validate=False): re.compile(r"/\."), ] - indexer = bids.BIDSLayoutIndexer( - validate=validate, ignore=ignores, index_metadata=False) + indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, index_metadata=False) - self._layout = bids.BIDSLayout( - self.path, validate=validate, indexer=indexer) + self._layout = bids.BIDSLayout(self.path, validate=validate, indexer=indexer) def create_cubids_code_dir(self): """Create CuBIDS code directory. @@ -203,8 +201,7 @@ def datalad_save(self, message=None): Commit message to use with datalad save. """ if not self.datalad_ready: - raise Exception( - "DataLad has not been initialized. use datalad_init()") + raise Exception("DataLad has not been initialized. use datalad_init()") statuses = self.datalad_handle.save(message=message or "CuBIDS Save") saved_status = set([status["status"] for status in statuses]) @@ -226,8 +223,7 @@ def is_datalad_clean(self): """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") - statuses = set([status["state"] - for status in self.datalad_handle.status()]) + statuses = set([status["state"] for status in self.datalad_handle.status()]) return statuses == set(["clean"]) def datalad_undo_last_commit(self): @@ -241,10 +237,8 @@ def datalad_undo_last_commit(self): If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): - raise Exception( - "Untracked changes present. Run clear_untracked_changes first") - reset_proc = subprocess.run( - ["git", "reset", "--hard", "HEAD~1"], cwd=self.path) + raise Exception("Untracked changes present. Run clear_untracked_changes first") + reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() def add_nifti_info(self): @@ -348,13 +342,11 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T files_df = pd.read_table(files_tsv) # Check that the MergeInto column only contains valid merges - ok_merges, deletions = check_merging_operations( - summary_tsv, raise_on_error=raise_on_error) + ok_merges, deletions = check_merging_operations(summary_tsv, raise_on_error=raise_on_error) merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[( - files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] source_files = files_df.loc[ (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) ] @@ -365,15 +357,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T for dest_nii in dest_files.FilePath: dest_json = img_to_new_ext(self.path + dest_nii, ".json") if Path(dest_json).exists() and Path(source_json).exists(): - merge_commands.append( - f"bids-sidecar-merge {source_json} {dest_json}") + merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[( - files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] + files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -446,8 +436,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T rename_commit = s1 + s2 - self.datalad_handle.run( - cmd=["bash", renames], message=rename_commit) + self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) else: subprocess.run( ["bash", renames], @@ -487,8 +476,7 @@ def change_filename(self, filepath, entities): entity_file_keys = [] # Entities that may be in the filename? - file_keys = ["task", "acquisition", - "direction", "reconstruction", "run"] + file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] for key in file_keys: if key in list(entities.keys()): @@ -502,8 +490,7 @@ def change_filename(self, filepath, entities): # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) - filename = "_".join( - [f"{key}-{entities[key]}" for key in entity_file_keys]) + filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) filename = ( filename.replace("acquisition", "acq") .replace("direction", "dir") @@ -512,8 +499,7 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - raise ValueError( - f"Could not construct new filename for {filepath}") + raise ValueError(f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED # datatype may be overridden/changed if the original file is located in the wrong folder. @@ -531,8 +517,7 @@ def change_filename(self, filepath, entities): dtype_new = dtype_orig # Construct the new filename - new_path = str(self.path) + "/" + sub + "/" + \ - ses + "/" + dtype_new + "/" + filename + new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -551,8 +536,7 @@ def change_filename(self, filepath, entities): # ensure assoc not an IntendedFor reference if ".nii" not in str(assoc_path): self.old_filenames.append(assoc_path) - new_ext_path = img_to_new_ext( - new_path, "".join(Path(assoc_path).suffixes)) + new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes)) self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! @@ -625,8 +609,7 @@ def change_filename(self, filepath, entities): if Path(old_labeling).exists(): self.old_filenames.append(old_labeling) new_scan_end = "_" + suffix + old_ext - new_labeling = new_path.replace( - new_scan_end, "_asllabeling.jpg") + new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! @@ -652,8 +635,7 @@ def change_filename(self, filepath, entities): # remove old filename data["IntendedFor"].remove(item) # add new filename - data["IntendedFor"].append( - _get_intended_for_reference(new_path)) + data["IntendedFor"].append(_get_intended_for_reference(new_path)) # update the json with the new data dictionary _update_json(filename_with_if, data) @@ -826,8 +808,7 @@ def _purge_associations(self, scans): if "/func/" in str(path): # add tsvs - tsv = img_to_new_ext(str(path), ".tsv").replace( - "_bold", "_events") + tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") if Path(tsv).exists(): to_remove.append(tsv) # add tsv json (if exists) @@ -941,8 +922,7 @@ def get_param_groups_from_entity_set(self, entity_set): 2. A data frame with param group summaries """ if not self.fieldmaps_cached: - raise Exception( - "Fieldmaps must be cached to find parameter groups.") + raise Exception("Fieldmaps must be cached to find parameter groups.") key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" @@ -995,8 +975,7 @@ def create_data_dictionary(self): mod_dict = sidecar_params[mod] for s_param in mod_dict.keys(): if s_param not in self.data_dict.keys(): - self.data_dict[s_param] = { - "Description": "Scanning Parameter"} + self.data_dict[s_param] = {"Description": "Scanning Parameter"} relational_params = self.grouping_config.get("relational_params") for r_param in relational_params.keys(): @@ -1008,8 +987,7 @@ def create_data_dictionary(self): mod_dict = derived_params[mod] for d_param in mod_dict.keys(): if d_param not in self.data_dict.keys(): - self.data_dict[d_param] = { - "Description": "NIfTI Header Parameter"} + self.data_dict[d_param] = {"Description": "NIfTI Header Parameter"} # Manually add non-sidecar columns/descriptions to data_dict desc1 = "Column where users mark groups to manually check" @@ -1116,20 +1094,17 @@ def get_param_groups_dataframes(self): long_name = big_df.loc[row, "FilePath"] big_df.loc[row, "FilePath"] = long_name.replace(self.path, "") - summary = _order_columns( - pd.concat(param_group_summaries, ignore_index=True)) + summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["EntitySet"] + \ - "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["EntitySet"] + \ - "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") @@ -1278,10 +1253,8 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values( - by=["Modality", "EntitySetCount"], ascending=[True, False]) - big_df = big_df.sort_values( - by=["Modality", "EntitySetCount"], ascending=[True, False]) + summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1300,8 +1273,7 @@ def get_tsvs(self, path_prefix): summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets( - f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) print(f"CuBIDS detected {len(summary)} Parameter Groups.") @@ -1520,8 +1492,7 @@ def _get_param_groups( # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_entity_set(fmap.path) - for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1543,8 +1514,7 @@ def _get_param_groups( # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: intended_entity_sets = sorted( - [_file_to_entity_set(intention) - for intention in intentions] + [_file_to_entity_set(intention) for intention in intentions] ) # check if config says columns or bool @@ -1598,14 +1568,11 @@ def _get_param_groups( {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()} ) - param_groups_with_counts = pd.merge( - deduped, param_group_counts, on=["ParamGroup"]) + param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"]) # Sort by counts and relabel the param groups - param_groups_with_counts.sort_values( - by=["Counts"], inplace=True, ascending=False) - param_groups_with_counts["ParamGroup"] = np.arange( - param_groups_with_counts.shape[0]) + 1 + param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False) + param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1 # Send the new, ordered param group ids to the files list ordered_labeled_files = pd.merge( @@ -1613,15 +1580,13 @@ def _get_param_groups( ) # sort ordered_labeled_files by param group - ordered_labeled_files.sort_values( - by=["Counts"], inplace=True, ascending=False) + ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False) # now get rid of cluster cols from deduped and df for col in list(ordered_labeled_files.columns): if col.startswith("Cluster_"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) - param_groups_with_counts = param_groups_with_counts.drop( - col, axis=1) + param_groups_with_counts = param_groups_with_counts.drop(col, axis=1) if col.endswith("_x"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index ddaa585f..6562f35b 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -57,8 +57,7 @@ def _check_sdc_cols(meta1, meta2): source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[( - actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] + source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -299,8 +298,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), - file_entities.get("session")) + acq_id = (file_entities.get("subject"), file_entities.get("session")) acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) @@ -327,8 +325,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_group_info = [] for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] - acq_group_info.append( - (groupnum, content_id_counts[content_id_row]) + content_id) + acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) for subject, session in contents_to_subjects[content_id]: grouped_sub_sess.append( {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} @@ -336,8 +333,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", - sep="\t", index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) # Create data dictionary for acq group tsv acq_dict = get_acq_dictionary() @@ -346,8 +342,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the summary of acq groups to a text file with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt: - infotxt.write("\n".join([" ".join(map(str, line)) - for line in acq_group_info])) + infotxt.write("\n".join([" ".join(map(str, line)) for line in acq_group_info])) # Create and save AcqGroupInfo data dictionary header_dict = {} diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 4c2266e1..28211cc2 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -88,8 +88,7 @@ def test_ok_json_merge_cli(tmp_path): assert os.path.isfile(source_json) assert os.path.isfile(dest_json) - merge_proc = subprocess.run( - ["bids-sidecar-merge", str(source_json), str(dest_json)]) + merge_proc = subprocess.run(["bids-sidecar-merge", str(source_json), str(dest_json)]) assert merge_proc.returncode == 0 assert not _get_json_string(dest_json) == orig_dest_json_content @@ -144,8 +143,7 @@ def test_purge_no_datalad(tmp_path): / "sub-03_ses-phdiff_task-rest_bold.json" ) scans.append(scan_name) - scans.append( - "sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") + scans.append("sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") # create and save .txt with list of scans purge_path = str(tmp_path / "purge_scans.txt") @@ -278,8 +276,7 @@ def test_bad_json_merge_cli(tmp_path): / "sub-01_ses-phdiff_acq-HASC55AP_dwi.json" ) - merge_proc = subprocess.run( - ["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) + merge_proc = subprocess.run(["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) assert merge_proc.returncode > 0 assert _get_json_string(dest_json) == orig_dest_json_content @@ -359,12 +356,10 @@ def test_tsv_merge_no_datalad(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing) - bod.apply_tsv_changes(original_summary_tsv, - original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) # these will not actually be equivalent because of the auto renames - assert file_hash(original_summary_tsv) != file_hash( - tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -374,33 +369,28 @@ def test_tsv_merge_no_datalad(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, - "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to apply merges! - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, - str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash( - tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -411,8 +401,7 @@ def test_tsv_merge_no_datalad(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str( - tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) @@ -430,8 +419,7 @@ def test_tsv_merge_changes(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing except rename) - bod.apply_tsv_changes(original_summary_tsv, - original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): @@ -458,8 +446,7 @@ def test_tsv_merge_changes(tmp_path): applied_f.loc[row, "KeyParamGroup"] ) else: - occurrences[applied_f.loc[row, "FilePath"]] = [ - applied_f.loc[row, "KeyParamGroup"]] + occurrences[applied_f.loc[row, "FilePath"]] = [applied_f.loc[row, "KeyParamGroup"]] assert len(orig) == len(applied) @@ -477,8 +464,7 @@ def test_tsv_merge_changes(tmp_path): assert renamed # will no longer be equal because of auto rename! - assert file_hash(original_summary_tsv) != file_hash( - tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -488,32 +474,27 @@ def test_tsv_merge_changes(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, - "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to merge - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, - str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash( - tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -524,8 +505,7 @@ def test_tsv_merge_changes(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str( - tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) # Make sure MergeInto == 0 deletes the param group and all associations @@ -709,8 +689,7 @@ def test_tsv_creation(tmp_path): # if entity sets in rows i and i+1 are the same if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 - assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + - 1]["ParamGroup"] - 1 + assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 # and count i < count i + 1 assert isummary_df.iloc[i]["Counts"] >= isummary_df.iloc[i + 1]["Counts"] @@ -822,13 +801,11 @@ def test_apply_tsv_changes(tmp_path): for f in deleted_f: assert Path(str(data_root / "complete") + f).exists() - assert Path(str(data_root / "complete") + - f.replace("nii.gz", "json")).exists() + assert Path(str(data_root / "complete") + f.replace("nii.gz", "json")).exists() # apply deletion complete_cubids.apply_tsv_changes( - mod2_path, str( - tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") + mod2_path, str(tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") ) # make sure deleted_keyparam gone from files_tsv @@ -861,8 +838,7 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", - acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1063,8 +1039,7 @@ def test_docker(): """ try: return_status = 1 - ret = subprocess.run(["docker", "version"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError as e: from errno import ENOENT diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index 78e15501..06d3af2a 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -28,8 +28,7 @@ def _test_path_exists(): It asserts that the function returns the expected path when the path exists, and raises an `argparse.ArgumentTypeError` when the path does not exist. """ - assert _path_exists("/path/to/existing/file", - None) == "/path/to/existing/file" + assert _path_exists("/path/to/existing/file", None) == "/path/to/existing/file" with pytest.raises(argparse.ArgumentTypeError): _path_exists("/path/to/nonexistent/file", None) diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index ca70d21a..6ab847fd 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -74,8 +74,7 @@ def _test_copy_exemplars(cubids_instance): exemplars_dir = "/path/to/exemplars" exemplars_tsv = "/path/to/exemplars.tsv" min_group_size = 2 - cubids_instance.copy_exemplars( - exemplars_dir, exemplars_tsv, min_group_size) + cubids_instance.copy_exemplars(exemplars_dir, exemplars_tsv, min_group_size) # Add assertions here @@ -205,10 +204,8 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): - files = ["sub-01_ses-01_task-rest_bold.nii.gz", - "sub-02_ses-01_task-rest_bold.nii.gz"] - fieldmap_lookup = { - "sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} + files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] + fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" @@ -223,8 +220,7 @@ def _test_round_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"round": 3}} modality = "bold" - rounded_params = cubids_instance.round_params( - param_group_df, config, modality) + rounded_params = cubids_instance.round_params(param_group_df, config, modality) # Add assertions here @@ -238,8 +234,7 @@ def _test_format_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"format": "{:.2f}"}} modality = "bold" - formatted_params = cubids_instance.format_params( - param_group_df, config, modality) + formatted_params = cubids_instance.format_params(param_group_df, config, modality) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 9bf6bda1..c64da372 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -27,8 +27,7 @@ def _remove_a_json(json_file): def _edit_a_nifti(nifti_file): img = nb.load(nifti_file) - new_img = nb.Nifti1Image(np.random.rand( - *img.shape), affine=img.affine, header=img.header) + new_img = nb.Nifti1Image(np.random.rand(*img.shape), affine=img.affine, header=img.header) new_img.to_filename(nifti_file) @@ -77,8 +76,7 @@ def _add_ext_files(img_path): if "/dwi/" in img_path: # add bval and bvec for ext in dwi_exts: - dwi_ext_file = img_path.replace( - ".nii.gz", "").replace(".nii", "") + ext + dwi_ext_file = img_path.replace(".nii.gz", "").replace(".nii", "") + ext Path(dwi_ext_file).touch() if "bold" in img_path: no_suffix = img_path.rpartition("_")[0] diff --git a/cubids/validator.py b/cubids/validator.py index bb721212..a7225ba0 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -17,8 +17,7 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # New schema BIDS validator doesn't have option to ignore subject consistency. # Build the deno command to run the BIDS validator. - command = ["deno", "run", "-A", "jsr:@bids/validator", - path, "--verbose", "--json"] + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -35,8 +34,7 @@ def get_bids_validator_version(): Version of the BIDS validator. """ command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] - result = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() version = output.split()[-1] # Remove ANSI color codes @@ -57,8 +55,7 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - raise ValueError( - "Couldn't find any subjects in the specified directory:\n" + bids_dir) + raise ValueError("Couldn't find any subjects in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -66,8 +63,7 @@ def build_subject_paths(bids_dir): purepath = pathlib.PurePath(sub) sub_label = purepath.name - files = [x for x in glob.glob( - sub + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob(sub + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subjects_dict[sub_label] = files @@ -87,8 +83,7 @@ def build_first_subject_path(bids_dir, subject): purepath = pathlib.PurePath(subject) sub_label = purepath.name - files = [x for x in glob.glob( - subject + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subject_dict[sub_label] = files @@ -159,8 +154,7 @@ def parse_issue(issue_dict): issues = data.get("issues", {}).get("issues", []) if not issues: return pd.DataFrame( - columns=["location", "code", "issueMessage", - "subCode", "severity", "rule"] + columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] ) # Parse all issues diff --git a/cubids/workflows.py b/cubids/workflows.py index a28b61a3..11931c5b 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -82,8 +82,7 @@ def validate( # parse the string output parsed = parse_validator_output(ret.stdout.decode("UTF-8")) if parsed.shape[1] < 1: - logger.info( - "No issues/warnings parsed, your dataset is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: logger.info("BIDS issues/warnings found in the dataset") @@ -130,8 +129,7 @@ def validate( subjects_dict = { k: v for k, v in subjects_dict.items() if k in sequential_subjects } - assert len(list(subjects_dict.keys()) - ) > 1, "No subjects found in filter" + assert len(list(subjects_dict.keys())) > 1, "No subjects found in filter" for subject, files_list in tqdm.tqdm(subjects_dict.items()): # logger.info(" ".join(["Processing subject:", subject])) # create a temporary directory and symlink the data @@ -160,8 +158,7 @@ def validate( ret = run_validator(call) # parse output if ret.returncode != 0: - logger.error( - "Errors returned from validator run, parsing now") + logger.error("Errors returned from validator run, parsing now") # parse the output and add to list if it returns a df decoded = ret.stdout.decode("UTF-8") @@ -172,8 +169,7 @@ def validate( # concatenate the parsed data and exit if len(parsed) < 1: - logger.info( - "No issues/warnings parsed, your dataset is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: @@ -285,8 +281,7 @@ def bids_version(bids_dir, write=False): if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") ] if not sub_folders: - raise ValueError( - "No folders starting with 'sub-' found. Please provide a valid BIDS.") + raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") subject = sub_folders[0] except FileNotFoundError: raise FileNotFoundError(f"The directory {bids_dir} does not exist.") @@ -331,8 +326,7 @@ def bids_version(bids_dir, write=False): def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" - merge_status = merge_json_into_json( - from_json, to_json, raise_on_error=False) + merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) sys.exit(merge_status) @@ -371,8 +365,7 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): apply_config = config is not None if apply_config: - input_config_dir_link = str( - config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + output_prefix.name @@ -479,18 +472,14 @@ def apply( # Run it through a container container_type = _get_container_type(container) bids_dir_link = str(bids_dir.absolute()) + ":/bids" - input_summary_tsv_dir_link = str( - edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" - input_files_tsv_dir_link = str( - edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" - output_tsv_dir_link = str( - new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" + input_summary_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" + input_files_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" + output_tsv_dir_link = str(new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" # FROM BOND-GROUP apply_config = config is not None if apply_config: - input_config_dir_link = str( - config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + new_tsv_prefix.name From 1736b514ac118044093d22a57bc201948ac26a22 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 16:31:23 -0500 Subject: [PATCH 07/10] add unit test for cubids bids-version --- cubids/cli.py | 16 ++++++------ cubids/cubids.py | 5 ++-- cubids/tests/test_bond.py | 52 ++++++++++++++++++++++++++++++++++++++- cubids/validator.py | 3 ++- docs/installation.rst | 2 +- pyproject.toml | 1 + 6 files changed, 66 insertions(+), 13 deletions(-) diff --git a/cubids/cli.py b/cubids/cli.py index d2f8a920..cf48cf9a 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -43,7 +43,7 @@ def _parse_validate(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -119,7 +119,7 @@ def _parse_bids_version(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -188,7 +188,7 @@ def _parse_group(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -255,7 +255,7 @@ def _parse_apply(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -351,7 +351,7 @@ def _parse_datalad_save(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -393,7 +393,7 @@ def _parse_undo(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -617,7 +617,7 @@ def _parse_remove_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -663,7 +663,7 @@ def _parse_print_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) diff --git a/cubids/cubids.py b/cubids/cubids.py index 27f632e3..81781596 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1346,9 +1346,10 @@ def get_all_metadata_fields(self): metadata = json.loads(content) found_fields.update(metadata.keys()) except json.JSONDecodeError as e: - print(f"Error decoding JSON in {json_file}: {e}") + warnings.warn(f"Error decoding JSON in {json_file}: {e}") except Exception as e: - print(f"Unexpected error with file {json_file}: {e}") + warnings.warn(f"Unexpected error with file {json_file}: {e}") + return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 28211cc2..068ee4fd 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd import pytest +from packaging.version import Version from cubids.cubids import CuBIDS from cubids.metadata_merge import merge_json_into_json, merge_without_overwrite @@ -22,7 +23,15 @@ file_hash, get_data, ) -from cubids.validator import build_validator_call, parse_validator_output, run_validator +from cubids.validator import ( + build_validator_call, + parse_validator_output, + run_validator, + get_bids_validator_version, + extract_summary_info, + update_dataset_description, + bids_validator_version, +) COMPLETE_KEY_GROUPS = [ "acquisition-HASC55AP_datatype-dwi_suffix-dwi", @@ -1028,6 +1037,47 @@ def test_validator(tmp_path): assert isinstance(parsed, pd.DataFrame) +def bids_validator_version(tmp_path): + """Test validator.bids_validator_version.""" + # Get the BIDS validator version + validator_version = get_bids_validator_version() + # Extract schemaVersion + summary_info = extract_summary_info(decoded) + + +def test_bids_version(tmp_path): + """Test workflows.bids_version.""" + data_root = get_data(tmp_path) + bids_dir = Path(data_root) / "complete" + + # Ensure the test directory exists + assert bids_dir.exists() + + # test the validator in valid dataset + call = build_validator_call(bids_dir) + ret = run_validator(call) + + assert ret.returncode == 0 + + decoded = ret.stdout.decode("UTF-8") + + # Get the BIDS validator version + validator_version = Version(get_bids_validator_version()["ValidatorVersion"]) + # Extract schemaVersion + schema_version = Version(extract_summary_info(decoded)["SchemaVersion"]) + + # Set baseline versions to compare against + min_validator_version = Version("2.0.0") + min_schema_version = Version("0.11.3") + + assert ( + validator_version >= min_validator_version + ), f"Validator version {validator_version} is less than minimum {min_validator_version}" + assert ( + schema_version >= min_schema_version + ), f"Schema version {schema_version} is less than minimum {min_schema_version}" + + def test_docker(): """Verify that docker is installed and the user has permission to run docker images. diff --git a/cubids/validator.py b/cubids/validator.py index a7225ba0..a4feeba5 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -7,6 +7,7 @@ import pathlib import re import subprocess +import warnings import pandas as pd @@ -249,7 +250,7 @@ def update_dataset_description(path, new_info): ) print("Changes saved with DataLad.") except subprocess.CalledProcessError as e: - print(f"Error running DataLad save: {e}") + warnings.warn(f"Error running DataLad save: {e}") def bids_validator_version(output, path, write=False): diff --git a/docs/installation.rst b/docs/installation.rst index b02e734e..b6ebda3b 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -51,7 +51,7 @@ We can accomplish this using the following command: $ conda install deno -The new schema ``bids-validator`` doesn't need to be installed +The new schema-based ``bids-validator`` doesn't need to be installed and will be implemented automatically when `cubids validate` is called diff --git a/pyproject.toml b/pyproject.toml index 6e3c151c..79d8e1f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ cubids-copy-exemplars = "cubids.cli:_enter_copy_exemplars" cubids-undo = "cubids.cli:_enter_undo" bids-sidecar-merge = "cubids.cli:_enter_bids_sidecar_merge" cubids-validate = "cubids.cli:_enter_validate" +cubids-bids-version = "cubids.cli:_enter_bids_version" cubids-datalad-save = "cubids.cli:_enter_datalad_save" cubids-print-metadata-fields = "cubids.cli:_enter_print_metadata_fields" cubids-remove-metadata-fields = "cubids.cli:_enter_remove_metadata_fields" From d972d7b1611be06e80bddfe65c348659f55b1129 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 16:43:33 -0500 Subject: [PATCH 08/10] Remove junk test code --- cubids/tests/test_bond.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 068ee4fd..d33107be 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -1037,14 +1037,6 @@ def test_validator(tmp_path): assert isinstance(parsed, pd.DataFrame) -def bids_validator_version(tmp_path): - """Test validator.bids_validator_version.""" - # Get the BIDS validator version - validator_version = get_bids_validator_version() - # Extract schemaVersion - summary_info = extract_summary_info(decoded) - - def test_bids_version(tmp_path): """Test workflows.bids_version.""" data_root = get_data(tmp_path) From 64e3392deebd814babb2a60fb9c8bfa960436ee5 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 13 Dec 2024 11:04:53 -0500 Subject: [PATCH 09/10] Try to fix codecov token issue --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c73d8af8..be6ce31d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,6 +1,6 @@ version: 2.1 orbs: - codecov: codecov/codecov@1.0.5 + codecov: codecov/codecov@3.2.4 jobs: run_pytests: @@ -62,7 +62,7 @@ jobs: # We need curl for the codecov upload apt-get update - apt-get install -yqq curl + apt-get install -y -qq curl cd /home/circleci/src/coverage/ echo "Merge coverage files" From 445533a879eb4547d82000e8ae04f41221d7670f Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 13 Dec 2024 11:13:44 -0500 Subject: [PATCH 10/10] Still trying to fix codecov token issue --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index be6ce31d..1fe2e779 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -63,6 +63,7 @@ jobs: # We need curl for the codecov upload apt-get update apt-get install -y -qq curl + apt-get install -y gnupg cd /home/circleci/src/coverage/ echo "Merge coverage files"