diff --git a/cubids/cli.py b/cubids/cli.py index d2f8a920..ea78680c 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -27,7 +27,8 @@ def _is_file(path, parser): """Ensure a given path exists and it is a file.""" path = _path_exists(path, parser) if not path.is_file(): - raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") + raise parser.error( + f"Path should point to a file (or symlink of file): <{path}>.") return path @@ -144,7 +145,8 @@ def _enter_bids_version(argv=None): def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( - description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), + description=( + "bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) IsFile = partial(_is_file, parser=parser) @@ -216,7 +218,8 @@ def _parse_group(): default="subject", choices=["subject", "session"], action="store", - help=("Level at which acquisition groups are created options: 'subject' or 'session'"), + help=( + "Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -244,7 +247,8 @@ def _enter_group(argv=None): def _parse_apply(): parser = argparse.ArgumentParser( - description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), + description=( + "cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -312,7 +316,8 @@ def _parse_apply(): default="subject", choices=["subject", "session"], action="store", - help=("Level at which acquisition groups are created options: 'subject' or 'session'"), + help=( + "Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -341,7 +346,8 @@ def _enter_apply(argv=None): def _parse_datalad_save(): parser = argparse.ArgumentParser( - description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), + description=( + "cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -699,8 +705,10 @@ def _enter_print_metadata_fields(argv=None): ("copy-exemplars", _parse_copy_exemplars, workflows.copy_exemplars), ("undo", _parse_undo, workflows.undo), ("datalad-save", _parse_datalad_save, workflows.datalad_save), - ("print-metadata-fields", _parse_print_metadata_fields, workflows.print_metadata_fields), - ("remove-metadata-fields", _parse_remove_metadata_fields, workflows.remove_metadata_fields), + ("print-metadata-fields", _parse_print_metadata_fields, + workflows.print_metadata_fields), + ("remove-metadata-fields", _parse_remove_metadata_fields, + workflows.remove_metadata_fields), ] @@ -709,7 +717,8 @@ def _get_parser(): from cubids import __version__ parser = argparse.ArgumentParser(prog="cubids") - parser.add_argument("-v", "--version", action="version", version=__version__) + parser.add_argument("-v", "--version", + action="version", version=__version__) subparsers = parser.add_subparsers(help="CuBIDS commands") for command, parser_func, run_func in COMMANDS: diff --git a/cubids/cubids.py b/cubids/cubids.py index 27f632e3..4de83826 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -149,9 +149,11 @@ def reset_bids_layout(self, validate=False): re.compile(r"/\."), ] - indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, index_metadata=False) + indexer = bids.BIDSLayoutIndexer( + validate=validate, ignore=ignores, index_metadata=False) - self._layout = bids.BIDSLayout(self.path, validate=validate, indexer=indexer) + self._layout = bids.BIDSLayout( + self.path, validate=validate, indexer=indexer) def create_cubids_code_dir(self): """Create CuBIDS code directory. @@ -201,7 +203,8 @@ def datalad_save(self, message=None): Commit message to use with datalad save. """ if not self.datalad_ready: - raise Exception("DataLad has not been initialized. use datalad_init()") + raise Exception( + "DataLad has not been initialized. use datalad_init()") statuses = self.datalad_handle.save(message=message or "CuBIDS Save") saved_status = set([status["status"] for status in statuses]) @@ -223,7 +226,8 @@ def is_datalad_clean(self): """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") - statuses = set([status["state"] for status in self.datalad_handle.status()]) + statuses = set([status["state"] + for status in self.datalad_handle.status()]) return statuses == set(["clean"]) def datalad_undo_last_commit(self): @@ -237,8 +241,10 @@ def datalad_undo_last_commit(self): If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): - raise Exception("Untracked changes present. Run clear_untracked_changes first") - reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) + raise Exception( + "Untracked changes present. Run clear_untracked_changes first") + reset_proc = subprocess.run( + ["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() def add_nifti_info(self): @@ -342,11 +348,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T files_df = pd.read_table(files_tsv) # Check that the MergeInto column only contains valid merges - ok_merges, deletions = check_merging_operations(summary_tsv, raise_on_error=raise_on_error) + ok_merges, deletions = check_merging_operations( + summary_tsv, raise_on_error=raise_on_error) merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + dest_files = files_df.loc[( + files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] source_files = files_df.loc[ (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) ] @@ -357,13 +365,15 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T for dest_nii in dest_files.FilePath: dest_json = img_to_new_ext(self.path + dest_nii, ".json") if Path(dest_json).exists() and Path(source_json).exists(): - merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") + merge_commands.append( + f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] + files_to_rm = files_df.loc[( + files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -436,7 +446,8 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T rename_commit = s1 + s2 - self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) + self.datalad_handle.run( + cmd=["bash", renames], message=rename_commit) else: subprocess.run( ["bash", renames], @@ -476,7 +487,8 @@ def change_filename(self, filepath, entities): entity_file_keys = [] # Entities that may be in the filename? - file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] + file_keys = ["task", "acquisition", + "direction", "reconstruction", "run"] for key in file_keys: if key in list(entities.keys()): @@ -490,7 +502,8 @@ def change_filename(self, filepath, entities): # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) - filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) + filename = "_".join( + [f"{key}-{entities[key]}" for key in entity_file_keys]) filename = ( filename.replace("acquisition", "acq") .replace("direction", "dir") @@ -499,7 +512,8 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - raise ValueError(f"Could not construct new filename for {filepath}") + raise ValueError( + f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED # datatype may be overridden/changed if the original file is located in the wrong folder. @@ -517,7 +531,8 @@ def change_filename(self, filepath, entities): dtype_new = dtype_orig # Construct the new filename - new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename + new_path = str(self.path) + "/" + sub + "/" + \ + ses + "/" + dtype_new + "/" + filename # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -536,7 +551,8 @@ def change_filename(self, filepath, entities): # ensure assoc not an IntendedFor reference if ".nii" not in str(assoc_path): self.old_filenames.append(assoc_path) - new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes)) + new_ext_path = img_to_new_ext( + new_path, "".join(Path(assoc_path).suffixes)) self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! @@ -609,7 +625,8 @@ def change_filename(self, filepath, entities): if Path(old_labeling).exists(): self.old_filenames.append(old_labeling) new_scan_end = "_" + suffix + old_ext - new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") + new_labeling = new_path.replace( + new_scan_end, "_asllabeling.jpg") self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! @@ -635,7 +652,8 @@ def change_filename(self, filepath, entities): # remove old filename data["IntendedFor"].remove(item) # add new filename - data["IntendedFor"].append(_get_intended_for_reference(new_path)) + data["IntendedFor"].append( + _get_intended_for_reference(new_path)) # update the json with the new data dictionary _update_json(filename_with_if, data) @@ -808,7 +826,8 @@ def _purge_associations(self, scans): if "/func/" in str(path): # add tsvs - tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") + tsv = img_to_new_ext(str(path), ".tsv").replace( + "_bold", "_events") if Path(tsv).exists(): to_remove.append(tsv) # add tsv json (if exists) @@ -922,7 +941,8 @@ def get_param_groups_from_entity_set(self, entity_set): 2. A data frame with param group summaries """ if not self.fieldmaps_cached: - raise Exception("Fieldmaps must be cached to find parameter groups.") + raise Exception( + "Fieldmaps must be cached to find parameter groups.") key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" @@ -975,7 +995,8 @@ def create_data_dictionary(self): mod_dict = sidecar_params[mod] for s_param in mod_dict.keys(): if s_param not in self.data_dict.keys(): - self.data_dict[s_param] = {"Description": "Scanning Parameter"} + self.data_dict[s_param] = { + "Description": "Scanning Parameter"} relational_params = self.grouping_config.get("relational_params") for r_param in relational_params.keys(): @@ -987,7 +1008,8 @@ def create_data_dictionary(self): mod_dict = derived_params[mod] for d_param in mod_dict.keys(): if d_param not in self.data_dict.keys(): - self.data_dict[d_param] = {"Description": "NIfTI Header Parameter"} + self.data_dict[d_param] = { + "Description": "NIfTI Header Parameter"} # Manually add non-sidecar columns/descriptions to data_dict desc1 = "Column where users mark groups to manually check" @@ -1094,17 +1116,20 @@ def get_param_groups_dataframes(self): long_name = big_df.loc[row, "FilePath"] big_df.loc[row, "FilePath"] = long_name.replace(self.path, "") - summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) + summary = _order_columns( + pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + \ + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + \ + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") @@ -1253,8 +1278,10 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) - big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + summary = summary.sort_values( + by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values( + by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1273,7 +1300,8 @@ def get_tsvs(self, path_prefix): summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets( + f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) print(f"CuBIDS detected {len(summary)} Parameter Groups.") @@ -1492,7 +1520,8 @@ def _get_param_groups( # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) + for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1514,7 +1543,8 @@ def _get_param_groups( # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: intended_entity_sets = sorted( - [_file_to_entity_set(intention) for intention in intentions] + [_file_to_entity_set(intention) + for intention in intentions] ) # check if config says columns or bool @@ -1568,11 +1598,14 @@ def _get_param_groups( {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()} ) - param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"]) + param_groups_with_counts = pd.merge( + deduped, param_group_counts, on=["ParamGroup"]) # Sort by counts and relabel the param groups - param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False) - param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1 + param_groups_with_counts.sort_values( + by=["Counts"], inplace=True, ascending=False) + param_groups_with_counts["ParamGroup"] = np.arange( + param_groups_with_counts.shape[0]) + 1 # Send the new, ordered param group ids to the files list ordered_labeled_files = pd.merge( @@ -1580,13 +1613,15 @@ def _get_param_groups( ) # sort ordered_labeled_files by param group - ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False) + ordered_labeled_files.sort_values( + by=["Counts"], inplace=True, ascending=False) # now get rid of cluster cols from deduped and df for col in list(ordered_labeled_files.columns): if col.startswith("Cluster_"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) - param_groups_with_counts = param_groups_with_counts.drop(col, axis=1) + param_groups_with_counts = param_groups_with_counts.drop( + col, axis=1) if col.endswith("_x"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 6562f35b..ddaa585f 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -57,7 +57,8 @@ def _check_sdc_cols(meta1, meta2): source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] + source_row = actions.loc[( + actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -298,7 +299,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), file_entities.get("session")) + acq_id = (file_entities.get("subject"), + file_entities.get("session")) acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) @@ -325,7 +327,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_group_info = [] for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] - acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) + acq_group_info.append( + (groupnum, content_id_counts[content_id_row]) + content_id) for subject, session in contents_to_subjects[content_id]: grouped_sub_sess.append( {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} @@ -333,7 +336,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", + sep="\t", index=False) # Create data dictionary for acq group tsv acq_dict = get_acq_dictionary() @@ -342,7 +346,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the summary of acq groups to a text file with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt: - infotxt.write("\n".join([" ".join(map(str, line)) for line in acq_group_info])) + infotxt.write("\n".join([" ".join(map(str, line)) + for line in acq_group_info])) # Create and save AcqGroupInfo data dictionary header_dict = {} diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 28211cc2..4c2266e1 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -88,7 +88,8 @@ def test_ok_json_merge_cli(tmp_path): assert os.path.isfile(source_json) assert os.path.isfile(dest_json) - merge_proc = subprocess.run(["bids-sidecar-merge", str(source_json), str(dest_json)]) + merge_proc = subprocess.run( + ["bids-sidecar-merge", str(source_json), str(dest_json)]) assert merge_proc.returncode == 0 assert not _get_json_string(dest_json) == orig_dest_json_content @@ -143,7 +144,8 @@ def test_purge_no_datalad(tmp_path): / "sub-03_ses-phdiff_task-rest_bold.json" ) scans.append(scan_name) - scans.append("sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") + scans.append( + "sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") # create and save .txt with list of scans purge_path = str(tmp_path / "purge_scans.txt") @@ -276,7 +278,8 @@ def test_bad_json_merge_cli(tmp_path): / "sub-01_ses-phdiff_acq-HASC55AP_dwi.json" ) - merge_proc = subprocess.run(["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) + merge_proc = subprocess.run( + ["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) assert merge_proc.returncode > 0 assert _get_json_string(dest_json) == orig_dest_json_content @@ -356,10 +359,12 @@ def test_tsv_merge_no_datalad(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing) - bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, + original_files_tsv, str(tmp_path / "unmodified")) # these will not actually be equivalent because of the auto renames - assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash( + tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -369,28 +374,33 @@ def test_tsv_merge_no_datalad(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, + "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to apply merges! - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, + str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash( + tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -401,7 +411,8 @@ def test_tsv_merge_no_datalad(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str( + tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) @@ -419,7 +430,8 @@ def test_tsv_merge_changes(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing except rename) - bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, + original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): @@ -446,7 +458,8 @@ def test_tsv_merge_changes(tmp_path): applied_f.loc[row, "KeyParamGroup"] ) else: - occurrences[applied_f.loc[row, "FilePath"]] = [applied_f.loc[row, "KeyParamGroup"]] + occurrences[applied_f.loc[row, "FilePath"]] = [ + applied_f.loc[row, "KeyParamGroup"]] assert len(orig) == len(applied) @@ -464,7 +477,8 @@ def test_tsv_merge_changes(tmp_path): assert renamed # will no longer be equal because of auto rename! - assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash( + tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -474,27 +488,32 @@ def test_tsv_merge_changes(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, + "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to merge - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, + str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash( + tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -505,7 +524,8 @@ def test_tsv_merge_changes(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str( + tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) # Make sure MergeInto == 0 deletes the param group and all associations @@ -689,7 +709,8 @@ def test_tsv_creation(tmp_path): # if entity sets in rows i and i+1 are the same if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 - assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 + assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + + 1]["ParamGroup"] - 1 # and count i < count i + 1 assert isummary_df.iloc[i]["Counts"] >= isummary_df.iloc[i + 1]["Counts"] @@ -801,11 +822,13 @@ def test_apply_tsv_changes(tmp_path): for f in deleted_f: assert Path(str(data_root / "complete") + f).exists() - assert Path(str(data_root / "complete") + f.replace("nii.gz", "json")).exists() + assert Path(str(data_root / "complete") + + f.replace("nii.gz", "json")).exists() # apply deletion complete_cubids.apply_tsv_changes( - mod2_path, str(tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") + mod2_path, str( + tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") ) # make sure deleted_keyparam gone from files_tsv @@ -838,7 +861,8 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS(data_root / "inconsistent", + acq_group_level="session", use_datalad=True) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1039,7 +1063,8 @@ def test_docker(): """ try: return_status = 1 - ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = subprocess.run(["docker", "version"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError as e: from errno import ENOENT diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index a0e9066a..78e15501 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -14,9 +14,10 @@ """ import argparse + import pytest -from cubids.cli import _path_exists, _is_file, _get_parser, _main +from cubids.cli import _get_parser, _is_file, _main, _path_exists def _test_path_exists(): @@ -27,7 +28,8 @@ def _test_path_exists(): It asserts that the function returns the expected path when the path exists, and raises an `argparse.ArgumentTypeError` when the path does not exist. """ - assert _path_exists("/path/to/existing/file", None) == "/path/to/existing/file" + assert _path_exists("/path/to/existing/file", + None) == "/path/to/existing/file" with pytest.raises(argparse.ArgumentTypeError): _path_exists("/path/to/nonexistent/file", None) diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index 6ab847fd..ca70d21a 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -74,7 +74,8 @@ def _test_copy_exemplars(cubids_instance): exemplars_dir = "/path/to/exemplars" exemplars_tsv = "/path/to/exemplars.tsv" min_group_size = 2 - cubids_instance.copy_exemplars(exemplars_dir, exemplars_tsv, min_group_size) + cubids_instance.copy_exemplars( + exemplars_dir, exemplars_tsv, min_group_size) # Add assertions here @@ -204,8 +205,10 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): - files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] - fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} + files = ["sub-01_ses-01_task-rest_bold.nii.gz", + "sub-02_ses-01_task-rest_bold.nii.gz"] + fieldmap_lookup = { + "sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" @@ -220,7 +223,8 @@ def _test_round_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"round": 3}} modality = "bold" - rounded_params = cubids_instance.round_params(param_group_df, config, modality) + rounded_params = cubids_instance.round_params( + param_group_df, config, modality) # Add assertions here @@ -234,7 +238,8 @@ def _test_format_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"format": "{:.2f}"}} modality = "bold" - formatted_params = cubids_instance.format_params(param_group_df, config, modality) + formatted_params = cubids_instance.format_params( + param_group_df, config, modality) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index c64da372..9bf6bda1 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -27,7 +27,8 @@ def _remove_a_json(json_file): def _edit_a_nifti(nifti_file): img = nb.load(nifti_file) - new_img = nb.Nifti1Image(np.random.rand(*img.shape), affine=img.affine, header=img.header) + new_img = nb.Nifti1Image(np.random.rand( + *img.shape), affine=img.affine, header=img.header) new_img.to_filename(nifti_file) @@ -76,7 +77,8 @@ def _add_ext_files(img_path): if "/dwi/" in img_path: # add bval and bvec for ext in dwi_exts: - dwi_ext_file = img_path.replace(".nii.gz", "").replace(".nii", "") + ext + dwi_ext_file = img_path.replace( + ".nii.gz", "").replace(".nii", "") + ext Path(dwi_ext_file).touch() if "bold" in img_path: no_suffix = img_path.rpartition("_")[0] diff --git a/cubids/validator.py b/cubids/validator.py index fe0e08ef..bb721212 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -5,8 +5,8 @@ import logging import os import pathlib -import subprocess import re +import subprocess import pandas as pd @@ -17,7 +17,8 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # New schema BIDS validator doesn't have option to ignore subject consistency. # Build the deno command to run the BIDS validator. - command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] + command = ["deno", "run", "-A", "jsr:@bids/validator", + path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -34,10 +35,12 @@ def get_bids_validator_version(): Version of the BIDS validator. """ command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() version = output.split()[-1] - clean_ver = re.sub(r'\x1b\[[0-9;]*m', '', version) # Remove ANSI color codes + # Remove ANSI color codes + clean_ver = re.sub(r"\x1b\[[0-9;]*m", "", version) return {"ValidatorVersion": clean_ver} @@ -54,7 +57,8 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - raise ValueError("Couldn't find any subjects in the specified directory:\n" + bids_dir) + raise ValueError( + "Couldn't find any subjects in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -62,7 +66,8 @@ def build_subject_paths(bids_dir): purepath = pathlib.PurePath(sub) sub_label = purepath.name - files = [x for x in glob.glob(sub + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob( + sub + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subjects_dict[sub_label] = files @@ -82,7 +87,8 @@ def build_first_subject_path(bids_dir, subject): purepath = pathlib.PurePath(subject) sub_label = purepath.name - files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob( + subject + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subject_dict[sub_label] = files @@ -153,8 +159,9 @@ def parse_issue(issue_dict): issues = data.get("issues", {}).get("issues", []) if not issues: return pd.DataFrame( - columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] - ) + columns=["location", "code", "issueMessage", + "subCode", "severity", "rule"] + ) # Parse all issues parsed_issues = [parse_issue(issue) for issue in issues] @@ -229,7 +236,7 @@ def update_dataset_description(path, new_info): # Write the updated data back to the file with open(description_path, "w") as f: - json.dump(existing_data, f, indent=4) + json.dump(existing_data, f, indent=4) print(f"Updated dataset_description.json at: {description_path}") # Check if .datalad directory exists before running the DataLad save command @@ -237,10 +244,14 @@ def update_dataset_description(path, new_info): if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir): try: subprocess.run( - ["datalad", "save", "-m", - "Save BIDS validator and schema version to dataset_description", - description_path], - check=True + [ + "datalad", + "save", + "-m", + "Save BIDS validator and schema version to dataset_description", + description_path, + ], + check=True, ) print("Changes saved with DataLad.") except subprocess.CalledProcessError as e: @@ -263,11 +274,11 @@ def bids_validator_version(output, path, write=False): validator_version = get_bids_validator_version() # Extract schemaVersion summary_info = extract_summary_info(output) - + combined_info = {**validator_version, **summary_info} if write: - # Update the dataset_description.json file + # Update the dataset_description.json file update_dataset_description(path, combined_info) elif not write: - print(combined_info) \ No newline at end of file + print(combined_info) diff --git a/cubids/workflows.py b/cubids/workflows.py index 69bed501..a28b61a3 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -17,13 +17,13 @@ from cubids.metadata_merge import merge_json_into_json from cubids.utils import _get_container_type from cubids.validator import ( + bids_validator_version, + build_first_subject_path, build_subject_paths, build_validator_call, get_val_dictionary, parse_validator_output, run_validator, - build_first_subject_path, - bids_validator_version, ) warnings.simplefilter(action="ignore", category=FutureWarning) @@ -82,7 +82,8 @@ def validate( # parse the string output parsed = parse_validator_output(ret.stdout.decode("UTF-8")) if parsed.shape[1] < 1: - logger.info("No issues/warnings parsed, your dataset is BIDS valid.") + logger.info( + "No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: logger.info("BIDS issues/warnings found in the dataset") @@ -129,7 +130,8 @@ def validate( subjects_dict = { k: v for k, v in subjects_dict.items() if k in sequential_subjects } - assert len(list(subjects_dict.keys())) > 1, "No subjects found in filter" + assert len(list(subjects_dict.keys()) + ) > 1, "No subjects found in filter" for subject, files_list in tqdm.tqdm(subjects_dict.items()): # logger.info(" ".join(["Processing subject:", subject])) # create a temporary directory and symlink the data @@ -158,7 +160,8 @@ def validate( ret = run_validator(call) # parse output if ret.returncode != 0: - logger.error("Errors returned from validator run, parsing now") + logger.error( + "Errors returned from validator run, parsing now") # parse the output and add to list if it returns a df decoded = ret.stdout.decode("UTF-8") @@ -169,7 +172,8 @@ def validate( # concatenate the parsed data and exit if len(parsed) < 1: - logger.info("No issues/warnings parsed, your dataset is BIDS valid.") + logger.info( + "No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: @@ -260,10 +264,7 @@ def validate( sys.exit(proc.returncode) -def bids_version( - bids_dir, - write=False -): +def bids_version(bids_dir, write=False): """Get BIDS validator and schema version. Parameters @@ -284,13 +285,14 @@ def bids_version( if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") ] if not sub_folders: - raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") + raise ValueError( + "No folders starting with 'sub-' found. Please provide a valid BIDS.") subject = sub_folders[0] except FileNotFoundError: raise FileNotFoundError(f"The directory {bids_dir} does not exist.") except ValueError as ve: raise ve - + # build a dictionary with {SubjectLabel: [List of files]} # run first subject only subject_dict = build_first_subject_path(bids_dir, subject) @@ -329,7 +331,8 @@ def bids_version( def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" - merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) + merge_status = merge_json_into_json( + from_json, to_json, raise_on_error=False) sys.exit(merge_status) @@ -368,7 +371,8 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): apply_config = config is not None if apply_config: - input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str( + config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + output_prefix.name @@ -475,14 +479,18 @@ def apply( # Run it through a container container_type = _get_container_type(container) bids_dir_link = str(bids_dir.absolute()) + ":/bids" - input_summary_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" - input_files_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" - output_tsv_dir_link = str(new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" + input_summary_tsv_dir_link = str( + edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" + input_files_tsv_dir_link = str( + edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" + output_tsv_dir_link = str( + new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" # FROM BOND-GROUP apply_config = config is not None if apply_config: - input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str( + config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + new_tsv_prefix.name