diff --git a/cubids/cli.py b/cubids/cli.py index ea78680c..d2f8a920 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -27,8 +27,7 @@ def _is_file(path, parser): """Ensure a given path exists and it is a file.""" path = _path_exists(path, parser) if not path.is_file(): - raise parser.error( - f"Path should point to a file (or symlink of file): <{path}>.") + raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") return path @@ -145,8 +144,7 @@ def _enter_bids_version(argv=None): def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( - description=( - "bids-sidecar-merge: merge critical keys from one sidecar to another"), + description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) IsFile = partial(_is_file, parser=parser) @@ -218,8 +216,7 @@ def _parse_group(): default="subject", choices=["subject", "session"], action="store", - help=( - "Level at which acquisition groups are created options: 'subject' or 'session'"), + help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -247,8 +244,7 @@ def _enter_group(argv=None): def _parse_apply(): parser = argparse.ArgumentParser( - description=( - "cubids-apply: apply the changes specified in a tsv to a BIDS directory"), + description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -316,8 +312,7 @@ def _parse_apply(): default="subject", choices=["subject", "session"], action="store", - help=( - "Level at which acquisition groups are created options: 'subject' or 'session'"), + help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -346,8 +341,7 @@ def _enter_apply(argv=None): def _parse_datalad_save(): parser = argparse.ArgumentParser( - description=( - "cubids-datalad-save: perform a DataLad save on a BIDS directory"), + description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -705,10 +699,8 @@ def _enter_print_metadata_fields(argv=None): ("copy-exemplars", _parse_copy_exemplars, workflows.copy_exemplars), ("undo", _parse_undo, workflows.undo), ("datalad-save", _parse_datalad_save, workflows.datalad_save), - ("print-metadata-fields", _parse_print_metadata_fields, - workflows.print_metadata_fields), - ("remove-metadata-fields", _parse_remove_metadata_fields, - workflows.remove_metadata_fields), + ("print-metadata-fields", _parse_print_metadata_fields, workflows.print_metadata_fields), + ("remove-metadata-fields", _parse_remove_metadata_fields, workflows.remove_metadata_fields), ] @@ -717,8 +709,7 @@ def _get_parser(): from cubids import __version__ parser = argparse.ArgumentParser(prog="cubids") - parser.add_argument("-v", "--version", - action="version", version=__version__) + parser.add_argument("-v", "--version", action="version", version=__version__) subparsers = parser.add_subparsers(help="CuBIDS commands") for command, parser_func, run_func in COMMANDS: diff --git a/cubids/cubids.py b/cubids/cubids.py index 4de83826..27f632e3 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -149,11 +149,9 @@ def reset_bids_layout(self, validate=False): re.compile(r"/\."), ] - indexer = bids.BIDSLayoutIndexer( - validate=validate, ignore=ignores, index_metadata=False) + indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, index_metadata=False) - self._layout = bids.BIDSLayout( - self.path, validate=validate, indexer=indexer) + self._layout = bids.BIDSLayout(self.path, validate=validate, indexer=indexer) def create_cubids_code_dir(self): """Create CuBIDS code directory. @@ -203,8 +201,7 @@ def datalad_save(self, message=None): Commit message to use with datalad save. """ if not self.datalad_ready: - raise Exception( - "DataLad has not been initialized. use datalad_init()") + raise Exception("DataLad has not been initialized. use datalad_init()") statuses = self.datalad_handle.save(message=message or "CuBIDS Save") saved_status = set([status["status"] for status in statuses]) @@ -226,8 +223,7 @@ def is_datalad_clean(self): """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") - statuses = set([status["state"] - for status in self.datalad_handle.status()]) + statuses = set([status["state"] for status in self.datalad_handle.status()]) return statuses == set(["clean"]) def datalad_undo_last_commit(self): @@ -241,10 +237,8 @@ def datalad_undo_last_commit(self): If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): - raise Exception( - "Untracked changes present. Run clear_untracked_changes first") - reset_proc = subprocess.run( - ["git", "reset", "--hard", "HEAD~1"], cwd=self.path) + raise Exception("Untracked changes present. Run clear_untracked_changes first") + reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() def add_nifti_info(self): @@ -348,13 +342,11 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T files_df = pd.read_table(files_tsv) # Check that the MergeInto column only contains valid merges - ok_merges, deletions = check_merging_operations( - summary_tsv, raise_on_error=raise_on_error) + ok_merges, deletions = check_merging_operations(summary_tsv, raise_on_error=raise_on_error) merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[( - files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] source_files = files_df.loc[ (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) ] @@ -365,15 +357,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T for dest_nii in dest_files.FilePath: dest_json = img_to_new_ext(self.path + dest_nii, ".json") if Path(dest_json).exists() and Path(source_json).exists(): - merge_commands.append( - f"bids-sidecar-merge {source_json} {dest_json}") + merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[( - files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] + files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -446,8 +436,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T rename_commit = s1 + s2 - self.datalad_handle.run( - cmd=["bash", renames], message=rename_commit) + self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) else: subprocess.run( ["bash", renames], @@ -487,8 +476,7 @@ def change_filename(self, filepath, entities): entity_file_keys = [] # Entities that may be in the filename? - file_keys = ["task", "acquisition", - "direction", "reconstruction", "run"] + file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] for key in file_keys: if key in list(entities.keys()): @@ -502,8 +490,7 @@ def change_filename(self, filepath, entities): # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) - filename = "_".join( - [f"{key}-{entities[key]}" for key in entity_file_keys]) + filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) filename = ( filename.replace("acquisition", "acq") .replace("direction", "dir") @@ -512,8 +499,7 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - raise ValueError( - f"Could not construct new filename for {filepath}") + raise ValueError(f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED # datatype may be overridden/changed if the original file is located in the wrong folder. @@ -531,8 +517,7 @@ def change_filename(self, filepath, entities): dtype_new = dtype_orig # Construct the new filename - new_path = str(self.path) + "/" + sub + "/" + \ - ses + "/" + dtype_new + "/" + filename + new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -551,8 +536,7 @@ def change_filename(self, filepath, entities): # ensure assoc not an IntendedFor reference if ".nii" not in str(assoc_path): self.old_filenames.append(assoc_path) - new_ext_path = img_to_new_ext( - new_path, "".join(Path(assoc_path).suffixes)) + new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes)) self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! @@ -625,8 +609,7 @@ def change_filename(self, filepath, entities): if Path(old_labeling).exists(): self.old_filenames.append(old_labeling) new_scan_end = "_" + suffix + old_ext - new_labeling = new_path.replace( - new_scan_end, "_asllabeling.jpg") + new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! @@ -652,8 +635,7 @@ def change_filename(self, filepath, entities): # remove old filename data["IntendedFor"].remove(item) # add new filename - data["IntendedFor"].append( - _get_intended_for_reference(new_path)) + data["IntendedFor"].append(_get_intended_for_reference(new_path)) # update the json with the new data dictionary _update_json(filename_with_if, data) @@ -826,8 +808,7 @@ def _purge_associations(self, scans): if "/func/" in str(path): # add tsvs - tsv = img_to_new_ext(str(path), ".tsv").replace( - "_bold", "_events") + tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") if Path(tsv).exists(): to_remove.append(tsv) # add tsv json (if exists) @@ -941,8 +922,7 @@ def get_param_groups_from_entity_set(self, entity_set): 2. A data frame with param group summaries """ if not self.fieldmaps_cached: - raise Exception( - "Fieldmaps must be cached to find parameter groups.") + raise Exception("Fieldmaps must be cached to find parameter groups.") key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" @@ -995,8 +975,7 @@ def create_data_dictionary(self): mod_dict = sidecar_params[mod] for s_param in mod_dict.keys(): if s_param not in self.data_dict.keys(): - self.data_dict[s_param] = { - "Description": "Scanning Parameter"} + self.data_dict[s_param] = {"Description": "Scanning Parameter"} relational_params = self.grouping_config.get("relational_params") for r_param in relational_params.keys(): @@ -1008,8 +987,7 @@ def create_data_dictionary(self): mod_dict = derived_params[mod] for d_param in mod_dict.keys(): if d_param not in self.data_dict.keys(): - self.data_dict[d_param] = { - "Description": "NIfTI Header Parameter"} + self.data_dict[d_param] = {"Description": "NIfTI Header Parameter"} # Manually add non-sidecar columns/descriptions to data_dict desc1 = "Column where users mark groups to manually check" @@ -1116,20 +1094,17 @@ def get_param_groups_dataframes(self): long_name = big_df.loc[row, "FilePath"] big_df.loc[row, "FilePath"] = long_name.replace(self.path, "") - summary = _order_columns( - pd.concat(param_group_summaries, ignore_index=True)) + summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["EntitySet"] + \ - "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["EntitySet"] + \ - "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") @@ -1278,10 +1253,8 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values( - by=["Modality", "EntitySetCount"], ascending=[True, False]) - big_df = big_df.sort_values( - by=["Modality", "EntitySetCount"], ascending=[True, False]) + summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1300,8 +1273,7 @@ def get_tsvs(self, path_prefix): summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets( - f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) print(f"CuBIDS detected {len(summary)} Parameter Groups.") @@ -1520,8 +1492,7 @@ def _get_param_groups( # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_entity_set(fmap.path) - for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1543,8 +1514,7 @@ def _get_param_groups( # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: intended_entity_sets = sorted( - [_file_to_entity_set(intention) - for intention in intentions] + [_file_to_entity_set(intention) for intention in intentions] ) # check if config says columns or bool @@ -1598,14 +1568,11 @@ def _get_param_groups( {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()} ) - param_groups_with_counts = pd.merge( - deduped, param_group_counts, on=["ParamGroup"]) + param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"]) # Sort by counts and relabel the param groups - param_groups_with_counts.sort_values( - by=["Counts"], inplace=True, ascending=False) - param_groups_with_counts["ParamGroup"] = np.arange( - param_groups_with_counts.shape[0]) + 1 + param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False) + param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1 # Send the new, ordered param group ids to the files list ordered_labeled_files = pd.merge( @@ -1613,15 +1580,13 @@ def _get_param_groups( ) # sort ordered_labeled_files by param group - ordered_labeled_files.sort_values( - by=["Counts"], inplace=True, ascending=False) + ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False) # now get rid of cluster cols from deduped and df for col in list(ordered_labeled_files.columns): if col.startswith("Cluster_"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) - param_groups_with_counts = param_groups_with_counts.drop( - col, axis=1) + param_groups_with_counts = param_groups_with_counts.drop(col, axis=1) if col.endswith("_x"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index ddaa585f..6562f35b 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -57,8 +57,7 @@ def _check_sdc_cols(meta1, meta2): source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[( - actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] + source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -299,8 +298,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), - file_entities.get("session")) + acq_id = (file_entities.get("subject"), file_entities.get("session")) acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) @@ -327,8 +325,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_group_info = [] for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] - acq_group_info.append( - (groupnum, content_id_counts[content_id_row]) + content_id) + acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) for subject, session in contents_to_subjects[content_id]: grouped_sub_sess.append( {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} @@ -336,8 +333,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", - sep="\t", index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) # Create data dictionary for acq group tsv acq_dict = get_acq_dictionary() @@ -346,8 +342,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the summary of acq groups to a text file with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt: - infotxt.write("\n".join([" ".join(map(str, line)) - for line in acq_group_info])) + infotxt.write("\n".join([" ".join(map(str, line)) for line in acq_group_info])) # Create and save AcqGroupInfo data dictionary header_dict = {} diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 4c2266e1..28211cc2 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -88,8 +88,7 @@ def test_ok_json_merge_cli(tmp_path): assert os.path.isfile(source_json) assert os.path.isfile(dest_json) - merge_proc = subprocess.run( - ["bids-sidecar-merge", str(source_json), str(dest_json)]) + merge_proc = subprocess.run(["bids-sidecar-merge", str(source_json), str(dest_json)]) assert merge_proc.returncode == 0 assert not _get_json_string(dest_json) == orig_dest_json_content @@ -144,8 +143,7 @@ def test_purge_no_datalad(tmp_path): / "sub-03_ses-phdiff_task-rest_bold.json" ) scans.append(scan_name) - scans.append( - "sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") + scans.append("sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") # create and save .txt with list of scans purge_path = str(tmp_path / "purge_scans.txt") @@ -278,8 +276,7 @@ def test_bad_json_merge_cli(tmp_path): / "sub-01_ses-phdiff_acq-HASC55AP_dwi.json" ) - merge_proc = subprocess.run( - ["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) + merge_proc = subprocess.run(["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) assert merge_proc.returncode > 0 assert _get_json_string(dest_json) == orig_dest_json_content @@ -359,12 +356,10 @@ def test_tsv_merge_no_datalad(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing) - bod.apply_tsv_changes(original_summary_tsv, - original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) # these will not actually be equivalent because of the auto renames - assert file_hash(original_summary_tsv) != file_hash( - tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -374,33 +369,28 @@ def test_tsv_merge_no_datalad(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, - "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to apply merges! - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, - str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash( - tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -411,8 +401,7 @@ def test_tsv_merge_no_datalad(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str( - tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) @@ -430,8 +419,7 @@ def test_tsv_merge_changes(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing except rename) - bod.apply_tsv_changes(original_summary_tsv, - original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): @@ -458,8 +446,7 @@ def test_tsv_merge_changes(tmp_path): applied_f.loc[row, "KeyParamGroup"] ) else: - occurrences[applied_f.loc[row, "FilePath"]] = [ - applied_f.loc[row, "KeyParamGroup"]] + occurrences[applied_f.loc[row, "FilePath"]] = [applied_f.loc[row, "KeyParamGroup"]] assert len(orig) == len(applied) @@ -477,8 +464,7 @@ def test_tsv_merge_changes(tmp_path): assert renamed # will no longer be equal because of auto rename! - assert file_hash(original_summary_tsv) != file_hash( - tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -488,32 +474,27 @@ def test_tsv_merge_changes(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, - "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to merge - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, - str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash( - tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -524,8 +505,7 @@ def test_tsv_merge_changes(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str( - tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) # Make sure MergeInto == 0 deletes the param group and all associations @@ -709,8 +689,7 @@ def test_tsv_creation(tmp_path): # if entity sets in rows i and i+1 are the same if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 - assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + - 1]["ParamGroup"] - 1 + assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 # and count i < count i + 1 assert isummary_df.iloc[i]["Counts"] >= isummary_df.iloc[i + 1]["Counts"] @@ -822,13 +801,11 @@ def test_apply_tsv_changes(tmp_path): for f in deleted_f: assert Path(str(data_root / "complete") + f).exists() - assert Path(str(data_root / "complete") + - f.replace("nii.gz", "json")).exists() + assert Path(str(data_root / "complete") + f.replace("nii.gz", "json")).exists() # apply deletion complete_cubids.apply_tsv_changes( - mod2_path, str( - tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") + mod2_path, str(tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") ) # make sure deleted_keyparam gone from files_tsv @@ -861,8 +838,7 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", - acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1063,8 +1039,7 @@ def test_docker(): """ try: return_status = 1 - ret = subprocess.run(["docker", "version"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError as e: from errno import ENOENT diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index 78e15501..06d3af2a 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -28,8 +28,7 @@ def _test_path_exists(): It asserts that the function returns the expected path when the path exists, and raises an `argparse.ArgumentTypeError` when the path does not exist. """ - assert _path_exists("/path/to/existing/file", - None) == "/path/to/existing/file" + assert _path_exists("/path/to/existing/file", None) == "/path/to/existing/file" with pytest.raises(argparse.ArgumentTypeError): _path_exists("/path/to/nonexistent/file", None) diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index ca70d21a..6ab847fd 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -74,8 +74,7 @@ def _test_copy_exemplars(cubids_instance): exemplars_dir = "/path/to/exemplars" exemplars_tsv = "/path/to/exemplars.tsv" min_group_size = 2 - cubids_instance.copy_exemplars( - exemplars_dir, exemplars_tsv, min_group_size) + cubids_instance.copy_exemplars(exemplars_dir, exemplars_tsv, min_group_size) # Add assertions here @@ -205,10 +204,8 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): - files = ["sub-01_ses-01_task-rest_bold.nii.gz", - "sub-02_ses-01_task-rest_bold.nii.gz"] - fieldmap_lookup = { - "sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} + files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] + fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" @@ -223,8 +220,7 @@ def _test_round_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"round": 3}} modality = "bold" - rounded_params = cubids_instance.round_params( - param_group_df, config, modality) + rounded_params = cubids_instance.round_params(param_group_df, config, modality) # Add assertions here @@ -238,8 +234,7 @@ def _test_format_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"format": "{:.2f}"}} modality = "bold" - formatted_params = cubids_instance.format_params( - param_group_df, config, modality) + formatted_params = cubids_instance.format_params(param_group_df, config, modality) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 9bf6bda1..c64da372 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -27,8 +27,7 @@ def _remove_a_json(json_file): def _edit_a_nifti(nifti_file): img = nb.load(nifti_file) - new_img = nb.Nifti1Image(np.random.rand( - *img.shape), affine=img.affine, header=img.header) + new_img = nb.Nifti1Image(np.random.rand(*img.shape), affine=img.affine, header=img.header) new_img.to_filename(nifti_file) @@ -77,8 +76,7 @@ def _add_ext_files(img_path): if "/dwi/" in img_path: # add bval and bvec for ext in dwi_exts: - dwi_ext_file = img_path.replace( - ".nii.gz", "").replace(".nii", "") + ext + dwi_ext_file = img_path.replace(".nii.gz", "").replace(".nii", "") + ext Path(dwi_ext_file).touch() if "bold" in img_path: no_suffix = img_path.rpartition("_")[0] diff --git a/cubids/validator.py b/cubids/validator.py index bb721212..a7225ba0 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -17,8 +17,7 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # New schema BIDS validator doesn't have option to ignore subject consistency. # Build the deno command to run the BIDS validator. - command = ["deno", "run", "-A", "jsr:@bids/validator", - path, "--verbose", "--json"] + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -35,8 +34,7 @@ def get_bids_validator_version(): Version of the BIDS validator. """ command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] - result = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() version = output.split()[-1] # Remove ANSI color codes @@ -57,8 +55,7 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - raise ValueError( - "Couldn't find any subjects in the specified directory:\n" + bids_dir) + raise ValueError("Couldn't find any subjects in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -66,8 +63,7 @@ def build_subject_paths(bids_dir): purepath = pathlib.PurePath(sub) sub_label = purepath.name - files = [x for x in glob.glob( - sub + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob(sub + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subjects_dict[sub_label] = files @@ -87,8 +83,7 @@ def build_first_subject_path(bids_dir, subject): purepath = pathlib.PurePath(subject) sub_label = purepath.name - files = [x for x in glob.glob( - subject + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subject_dict[sub_label] = files @@ -159,8 +154,7 @@ def parse_issue(issue_dict): issues = data.get("issues", {}).get("issues", []) if not issues: return pd.DataFrame( - columns=["location", "code", "issueMessage", - "subCode", "severity", "rule"] + columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] ) # Parse all issues diff --git a/cubids/workflows.py b/cubids/workflows.py index a28b61a3..11931c5b 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -82,8 +82,7 @@ def validate( # parse the string output parsed = parse_validator_output(ret.stdout.decode("UTF-8")) if parsed.shape[1] < 1: - logger.info( - "No issues/warnings parsed, your dataset is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: logger.info("BIDS issues/warnings found in the dataset") @@ -130,8 +129,7 @@ def validate( subjects_dict = { k: v for k, v in subjects_dict.items() if k in sequential_subjects } - assert len(list(subjects_dict.keys()) - ) > 1, "No subjects found in filter" + assert len(list(subjects_dict.keys())) > 1, "No subjects found in filter" for subject, files_list in tqdm.tqdm(subjects_dict.items()): # logger.info(" ".join(["Processing subject:", subject])) # create a temporary directory and symlink the data @@ -160,8 +158,7 @@ def validate( ret = run_validator(call) # parse output if ret.returncode != 0: - logger.error( - "Errors returned from validator run, parsing now") + logger.error("Errors returned from validator run, parsing now") # parse the output and add to list if it returns a df decoded = ret.stdout.decode("UTF-8") @@ -172,8 +169,7 @@ def validate( # concatenate the parsed data and exit if len(parsed) < 1: - logger.info( - "No issues/warnings parsed, your dataset is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: @@ -285,8 +281,7 @@ def bids_version(bids_dir, write=False): if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") ] if not sub_folders: - raise ValueError( - "No folders starting with 'sub-' found. Please provide a valid BIDS.") + raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") subject = sub_folders[0] except FileNotFoundError: raise FileNotFoundError(f"The directory {bids_dir} does not exist.") @@ -331,8 +326,7 @@ def bids_version(bids_dir, write=False): def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" - merge_status = merge_json_into_json( - from_json, to_json, raise_on_error=False) + merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) sys.exit(merge_status) @@ -371,8 +365,7 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): apply_config = config is not None if apply_config: - input_config_dir_link = str( - config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + output_prefix.name @@ -479,18 +472,14 @@ def apply( # Run it through a container container_type = _get_container_type(container) bids_dir_link = str(bids_dir.absolute()) + ":/bids" - input_summary_tsv_dir_link = str( - edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" - input_files_tsv_dir_link = str( - edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" - output_tsv_dir_link = str( - new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" + input_summary_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" + input_files_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" + output_tsv_dir_link = str(new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" # FROM BOND-GROUP apply_config = config is not None if apply_config: - input_config_dir_link = str( - config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + new_tsv_prefix.name