diff --git a/.circleci/config.yml b/.circleci/config.yml index 3dc05bf82..1fe2e7791 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,6 +1,6 @@ version: 2.1 orbs: - codecov: codecov/codecov@1.0.5 + codecov: codecov/codecov@3.2.4 jobs: run_pytests: @@ -9,7 +9,7 @@ jobs: type: string default: "3.8" machine: - image: ubuntu-2004:202201-02 + image: ubuntu-2404:2024.08.1 working_directory: /home/circleci/src/CuBIDS steps: - checkout: @@ -31,10 +31,8 @@ jobs: source activate cubids conda install -c conda-forge -y datalad - # Add nodejs and the validator - conda install nodejs - npm install -g yarn && \ - npm install -g bids-validator + # Add deno to run the schema validator + conda install deno # Install CuBIDS pip install -e .[tests] @@ -64,7 +62,8 @@ jobs: # We need curl for the codecov upload apt-get update - apt-get install -yqq curl + apt-get install -y -qq curl + apt-get install -y gnupg cd /home/circleci/src/coverage/ echo "Merge coverage files" @@ -83,7 +82,7 @@ jobs: deploy_pypi: machine: - image: ubuntu-2004:202201-02 + image: ubuntu-2404:2024.08.1 working_directory: /home/circleci/src/CuBIDS steps: - checkout: diff --git a/cubids/cli.py b/cubids/cli.py index 6fde0885e..cf48cf9ab 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -43,7 +43,7 @@ def _parse_validate(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -107,6 +107,41 @@ def _enter_validate(argv=None): workflows.validate(**args) +def _parse_bids_version(): + parser = argparse.ArgumentParser( + description="cubids bids-version: Get BIDS Validator and Schema version", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + PathExists = partial(_path_exists, parser=parser) + + parser.add_argument( + "bids_dir", + type=PathExists, + action="store", + help=( + "The root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "--write", + action="store_true", + default=False, + help=( + "Save the validator and schema version to 'dataset_description.json' " + "when using `cubids bids-version /bids/path --write`. " + "By default, `cubids bids-version /bids/path` prints to the terminal." + ), + ) + return parser + + +def _enter_bids_version(argv=None): + options = _parse_bids_version().parse_args(argv) + args = vars(options).copy() + workflows.bids_version(**args) + + def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), @@ -153,7 +188,7 @@ def _parse_group(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -220,7 +255,7 @@ def _parse_apply(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -230,7 +265,7 @@ def _parse_apply(): action="store", help=( "path to the _summary.tsv that has been edited " - "in the MergeInto and RenameKeyGroup columns. If the " + "in the MergeInto and RenameEntitySet columns. If the " " summary table is located in the code/CuBIDS " "directory, then users can just pass the summary tsv " "filename instead of the full path to the tsv" @@ -242,7 +277,7 @@ def _parse_apply(): action="store", help=( "path to the _files.tsv that has been edited " - "in the MergeInto and RenameKeyGroup columns. If the " + "in the MergeInto and RenameEntitySet columns. If the " "files table is located in the code/CuBIDS " "directory, then users can just pass the files tsv " "filename instead of the full path to the tsv" @@ -316,7 +351,7 @@ def _parse_datalad_save(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -358,7 +393,7 @@ def _parse_undo(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -582,7 +617,7 @@ def _parse_remove_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -628,7 +663,7 @@ def _parse_print_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -655,6 +690,7 @@ def _enter_print_metadata_fields(argv=None): COMMANDS = [ ("validate", _parse_validate, workflows.validate), + ("bids-version", _parse_bids_version, workflows.bids_version), ("sidecar-merge", _parse_bids_sidecar_merge, workflows.bids_sidecar_merge), ("group", _parse_group, workflows.group), ("apply", _parse_apply, workflows.apply), diff --git a/cubids/constants.py b/cubids/constants.py index ec24b6691..dfbc2072b 100644 --- a/cubids/constants.py +++ b/cubids/constants.py @@ -1,9 +1,9 @@ """Constants for CuBIDS.""" # Names of identifier variables. -# Used to place KeyGroup and ParamGroup at the beginning of a dataframe, +# Used to place EntitySet and ParamGroup at the beginning of a dataframe, # but both are hardcoded in the relevant function. -ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"]) +ID_VARS = set(["EntitySet", "ParamGroup", "FilePath"]) # Entities that should not be used to group parameter sets NON_KEY_ENTITIES = set(["subject", "session", "extension"]) # Multi-dimensional keys SliceTiming XXX: what is this line about? diff --git a/cubids/cubids.py b/cubids/cubids.py index 9195163aa..817815965 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -55,7 +55,7 @@ class CuBIDS(object): _layout : :obj:`bids.layout.BIDSLayout` The BIDSLayout object. keys_files : :obj:`dict` - A dictionary of key groups and the files that belong to them. + A dictionary of entity sets and the files that belong to them. fieldmaps_cached : :obj:`bool` If True, the fieldmaps have been cached. datalad_ready : :obj:`bool` @@ -100,7 +100,7 @@ def __init__( self.fieldmaps_cached = False self.datalad_ready = False self.datalad_handle = None - self.old_filenames = [] # files whose key groups changed + self.old_filenames = [] # files whose entity sets changed self.new_filenames = [] # new filenames for files to change self.IF_rename_paths = [] # fmap jsons with rename intended fors self.grouping_config = load_config(grouping_config) @@ -309,7 +309,7 @@ def add_nifti_info(self): def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=True): """Apply changes documented in the edited summary tsv and generate the new tsv files. - This function looks at the RenameKeyGroup and MergeInto + This function looks at the RenameEntitySet and MergeInto columns and modifies the bids dataset according to the specified changs. @@ -346,8 +346,10 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == dest_id).all(1)] - source_files = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == source_id).all(1)] + dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + source_files = files_df.loc[ + (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) + ] # Get a source json file img_full_path = self.path + source_files.iloc[0].FilePath @@ -361,7 +363,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == rm_id).all(1)] + files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -372,21 +374,21 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T self._purge_associations(to_remove) # Now do the file renaming - change_keys_df = summary_df[summary_df.RenameKeyGroup.notnull()] + change_keys_df = summary_df[summary_df.RenameEntitySet.notnull()] move_ops = [] # return if nothing to change if len(change_keys_df) > 0: - key_groups = {} + entity_sets = {} for i in range(len(change_keys_df)): - new_key = change_keys_df.iloc[i]["RenameKeyGroup"] + new_key = change_keys_df.iloc[i]["RenameEntitySet"] old_key_param = change_keys_df.iloc[i]["KeyParamGroup"] # add to dictionary - key_groups[old_key_param] = new_key + entity_sets[old_key_param] = new_key - # orig key/param tuples that will have new key group - to_change = list(key_groups.keys()) + # orig key/param tuples that will have new entity set + to_change = list(entity_sets.keys()) for row in range(len(files_df)): file_path = self.path + files_df.loc[row, "FilePath"] @@ -396,11 +398,11 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T if key_param_group in to_change: orig_key_param = files_df.loc[row, "KeyParamGroup"] - new_key = key_groups[orig_key_param] + new_key = entity_sets[orig_key_param] - new_entities = _key_group_to_entities(new_key) + new_entities = _entity_set_to_entities(new_key) - # generate new filenames according to new key group + # generate new filenames according to new entity set self.change_filename(file_path, new_entities) # create string of mv command ; mv command for dlapi.run @@ -451,17 +453,17 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T subprocess.run(["rm", "-rf", "renames"]) def change_filename(self, filepath, entities): - """Apply changes to a filename based on the renamed key groups. + """Apply changes to a filename based on the renamed entity sets. - This function takes into account the new key group names - and renames all files whose key group names changed. + This function takes into account the new entity set names + and renames all files whose entity set names changed. Parameters ---------- filepath : :obj:`str` - Path prefix to a file in the affected key group change. + Path prefix to a file in the affected entity set change. entities : :obj:`dict` - A pybids dictionary of entities parsed from the new key group name. + A pybids dictionary of entities parsed from the new entity set name. Notes ----- @@ -904,13 +906,13 @@ def _cache_fieldmaps(self): # no intended for found return misfits - def get_param_groups_from_key_group(self, key_group): - """Split key groups into param groups based on json metadata. + def get_param_groups_from_entity_set(self, entity_set): + """Split entity sets into param groups based on json metadata. Parameters ---------- - key_group : str - Key group name. + entity_set : str + Entity set name. Returns ------- @@ -921,7 +923,7 @@ def get_param_groups_from_key_group(self, key_group): """ if not self.fieldmaps_cached: raise Exception("Fieldmaps must be cached to find parameter groups.") - key_entities = _key_group_to_entities(key_group) + key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" matching_files = self.layout.get( @@ -932,12 +934,12 @@ def get_param_groups_from_key_group(self, key_group): # entities do not also get added to matching_files to_include = [] for filepath in matching_files: - f_key_group = _file_to_key_group(filepath) + f_entity_set = _file_to_entity_set(filepath) - if f_key_group == key_group: + if f_entity_set == entity_set: to_include.append(filepath) - # get the modality associated with the key group + # get the modality associated with the entity set modalities = ["/dwi/", "/anat/", "/func/", "/perf/", "/fmap/"] modality = "" for mod in modalities: @@ -951,7 +953,7 @@ def get_param_groups_from_key_group(self, key_group): ret = _get_param_groups( to_include, self.fieldmap_lookup, - key_group, + entity_set, self.grouping_config, modality, self.keys_files, @@ -996,8 +998,8 @@ def create_data_dictionary(self): self.data_dict["Notes"]["Description"] = desc2 desc31 = "Auto-generated suggested rename of Non-Domiannt Groups" desc32 = " based on variant scanning parameters" - self.data_dict["RenameKeyGroup"] = {} - self.data_dict["RenameKeyGroup"]["Description"] = desc31 + desc32 + self.data_dict["RenameEntitySet"] = {} + self.data_dict["RenameEntitySet"]["Description"] = desc31 + desc32 desc4 = "Number of Files in the Parameter Group" self.data_dict["Counts"] = {} self.data_dict["Counts"]["Description"] = desc4 @@ -1008,19 +1010,19 @@ def create_data_dictionary(self): self.data_dict["MergeInto"]["Description"] = desc5 self.data_dict["FilePath"] = {} self.data_dict["FilePath"]["Description"] = "Location of file" - desc6 = "Number of participants in a Key Group" - self.data_dict["KeyGroupCount"] = {} - self.data_dict["KeyGroupCount"]["Description"] = desc6 + desc6 = "Number of participants in a Entity Set" + self.data_dict["EntitySetCount"] = {} + self.data_dict["EntitySetCount"]["Description"] = desc6 desc71 = "A set of scans whose filenames share all BIDS filename" desc72 = " key-value pairs, excluding subject and session" - self.data_dict["KeyGroup"] = {} - self.data_dict["KeyGroup"]["Description"] = desc71 + desc72 + self.data_dict["EntitySet"] = {} + self.data_dict["EntitySet"]["Description"] = desc71 + desc72 desc81 = "The set of scans with identical metadata parameters in their" - desc82 = " sidecars (defined within a Key Group and denoted" + desc82 = " sidecars (defined within a Entity Set and denoted" desc83 = " numerically)" self.data_dict["ParamGroup"] = {} self.data_dict["ParamGroup"]["Description"] = desc81 + desc82 + desc83 - desc91 = "Key Group name and Param Group number separated by a double" + desc91 = "Entity Set name and Param Group number separated by a double" desc92 = " underscore" self.data_dict["KeyParamGroup"] = {} self.data_dict["KeyParamGroup"]["Description"] = desc91 + desc92 @@ -1068,16 +1070,16 @@ def get_data_dictionary(self, df): def get_param_groups_dataframes(self): """Create DataFrames of files x param groups and a summary.""" - key_groups = self.get_key_groups() + entity_sets = self.get_entity_sets() labeled_files = [] param_group_summaries = [] - for key_group in key_groups: + for entity_set in entity_sets: try: ( labeled_file_params, param_summary, modality, - ) = self.get_param_groups_from_key_group(key_group) + ) = self.get_param_groups_from_entity_set(entity_set) except Exception: continue if labeled_file_params is None: @@ -1095,20 +1097,20 @@ def get_param_groups_dataframes(self): summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["KeyGroup"] + "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["KeyGroup"] + "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") big_df.insert(0, "KeyParamGroup", key_param_col) - summary.insert(0, "RenameKeyGroup", np.nan) + summary.insert(0, "RenameEntitySet", np.nan) summary.insert(0, "MergeInto", np.nan) summary.insert(0, "ManualCheck", np.nan) summary.insert(0, "Notes", np.nan) @@ -1122,7 +1124,7 @@ def get_param_groups_dataframes(self): relational = self.grouping_config.get("relational_params") # list of columns names that we account for in suggested renaming - summary["RenameKeyGroup"] = summary["RenameKeyGroup"].apply(str) + summary["RenameEntitySet"] = summary["RenameEntitySet"].apply(str) rename_cols = [] tolerance_cols = [] @@ -1160,7 +1162,7 @@ def get_param_groups_dataframes(self): if str(summary.loc[row, "ParamGroup"]) == "1": val = {} # grab col, all vals send to dict - key = summary.loc[row, "KeyGroup"] + key = summary.loc[row, "EntitySet"] for col in rename_cols: summary[col] = summary[col].apply(str) val[col] = summary.loc[row, col] @@ -1170,8 +1172,8 @@ def get_param_groups_dataframes(self): for row in range(len(summary)): # check to see if renaming has already happened renamed = False - entities = _key_group_to_entities(summary.loc[row, "KeyGroup"]) - if "VARIANT" in summary.loc[row, "KeyGroup"]: + entities = _entity_set_to_entities(summary.loc[row, "EntitySet"]) + if "VARIANT" in summary.loc[row, "EntitySet"]: renamed = True # if NumVolumes is nan, set to 1.0 @@ -1183,7 +1185,7 @@ def get_param_groups_dataframes(self): acq_str = "VARIANT" # now we know we have a deviant param group # check if TR is same as param group 1 - key = summary.loc[row, "KeyGroup"] + key = summary.loc[row, "EntitySet"] for col in rename_cols: summary[col] = summary[col].apply(str) if summary.loc[row, col] != dom_dict[key][col]: @@ -1206,20 +1208,20 @@ def get_param_groups_dataframes(self): if "acquisition" in entities.keys(): acq = f"acquisition-{entities['acquisition'] + acq_str}" - new_name = summary.loc[row, "KeyGroup"].replace( + new_name = summary.loc[row, "EntitySet"].replace( f"acquisition-{entities['acquisition']}", acq, ) else: acq = f"acquisition-{acq_str}" - new_name = acq + "_" + summary.loc[row, "KeyGroup"] + new_name = acq + "_" + summary.loc[row, "EntitySet"] - summary.at[row, "RenameKeyGroup"] = new_name + summary.at[row, "RenameEntitySet"] = new_name # convert all "nan" to empty str # so they don't show up in the summary tsv - if summary.loc[row, "RenameKeyGroup"] == "nan": - summary.at[row, "RenameKeyGroup"] = "" + if summary.loc[row, "RenameEntitySet"] == "nan": + summary.at[row, "RenameEntitySet"] = "" for col in rename_cols: if summary.loc[row, col] == "nan": @@ -1251,8 +1253,8 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values(by=["Modality", "KeyGroupCount"], ascending=[True, False]) - big_df = big_df.sort_values(by=["Modality", "KeyGroupCount"], ascending=[True, False]) + summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1275,12 +1277,12 @@ def get_tsvs(self, path_prefix): print(f"CuBIDS detected {len(summary)} Parameter Groups.") - def get_key_groups(self): - """Identify the key groups for the bids dataset.""" + def get_entity_sets(self): + """Identify the entity sets for the bids dataset.""" # reset self.keys_files self.keys_files = {} - key_groups = set() + entity_sets = set() for path in Path(self.path).rglob("sub-*/**/*.*"): # ignore all dot directories @@ -1288,17 +1290,17 @@ def get_key_groups(self): continue if str(path).endswith(".nii") or str(path).endswith(".nii.gz"): - key_groups.update((_file_to_key_group(path),)) + entity_sets.update((_file_to_entity_set(path),)) - # Fill the dictionary of key group, list of filenames pairrs - ret = _file_to_key_group(path) + # Fill the dictionary of entity set, list of filenames pairrs + ret = _file_to_entity_set(path) if ret not in self.keys_files.keys(): self.keys_files[ret] = [] self.keys_files[ret].append(path) - return sorted(key_groups) + return sorted(entity_sets) def change_metadata(self, filters, metadata): """Change metadata. @@ -1334,9 +1336,20 @@ def get_all_metadata_fields(self): found_fields = set() for json_file in Path(self.path).rglob("*.json"): if ".git" not in str(json_file): - with open(json_file, "r") as jsonr: - metadata = json.load(jsonr) - found_fields.update(metadata.keys()) + # add this in case `print-metadata-fields` is run before validate + try: + with open(json_file, "r", encoding="utf-8") as jsonr: + content = jsonr.read().strip() + if not content: + print(f"Empty file: {json_file}") + continue + metadata = json.loads(content) + found_fields.update(metadata.keys()) + except json.JSONDecodeError as e: + warnings.warn(f"Error decoding JSON in {json_file}: {e}") + except Exception as e: + warnings.warn(f"Unexpected error with file {json_file}: {e}") + return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): @@ -1394,21 +1407,21 @@ def _update_json(json_file, metadata): print("INVALID JSON DATA") -def _key_group_to_entities(key_group): - """Split a key_group name into a pybids dictionary of entities.""" - return dict([group.split("-") for group in key_group.split("_")]) +def _entity_set_to_entities(entity_set): + """Split a entity_set name into a pybids dictionary of entities.""" + return dict([group.split("-") for group in entity_set.split("_")]) -def _entities_to_key_group(entities): - """Convert a pybids entities dictionary into a key group name.""" +def _entities_to_entity_set(entities): + """Convert a pybids entities dictionary into a entity set name.""" group_keys = sorted(entities.keys() - NON_KEY_ENTITIES) return "_".join([f"{key}-{entities[key]}" for key in group_keys]) -def _file_to_key_group(filename): - """Identify and return the key group of a bids valid filename.""" +def _file_to_entity_set(filename): + """Identify and return the entity set of a bids valid filename.""" entities = parse_file_entities(str(filename)) - return _entities_to_key_group(entities) + return _entities_to_entity_set(entities) def _get_intended_for_reference(scan): @@ -1418,7 +1431,7 @@ def _get_intended_for_reference(scan): def _get_param_groups( files, fieldmap_lookup, - key_group_name, + entity_set_name, grouping_config, modality, keys_files, @@ -1447,7 +1460,7 @@ def _get_param_groups( A data frame with param group summaries. """ if not files: - print("WARNING: no files for", key_group_name) + print("WARNING: no files for", entity_set_name) return None, None # Split the config into separate parts @@ -1475,12 +1488,12 @@ def _get_param_groups( wanted_keys = metadata.keys() & imaging_params example_data = {key: metadata[key] for key in wanted_keys} - example_data["KeyGroup"] = key_group_name + example_data["EntitySet"] = entity_set_name # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_key_group(fmap.path) for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1499,21 +1512,21 @@ def _get_param_groups( example_data["FilePath"] = path - # If it's a fieldmap, see what key group it's intended to correct + # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: - intended_key_groups = sorted( - [_file_to_key_group(intention) for intention in intentions] + intended_entity_sets = sorted( + [_file_to_entity_set(intention) for intention in intentions] ) # check if config says columns or bool if relational_params["IntendedForKey"]["display_mode"] == "bool": - if len(intended_key_groups) > 0: + if len(intended_entity_sets) > 0: example_data["UsedAsFieldmap"] = True else: example_data["UsedAsFieldmap"] = False else: - for intention_num, intention_key_group in enumerate(intended_key_groups): - example_data[f"IntendedForKey{intention_num:02d}"] = intention_key_group + for intention_num, intention_entity_set in enumerate(intended_entity_sets): + example_data[f"IntendedForKey{intention_num:02d}"] = intention_entity_set dfs.append(example_data) @@ -1544,8 +1557,8 @@ def _get_param_groups( # add the modality as a column deduped["Modality"] = modality - # add key group count column (will delete later) - deduped["KeyGroupCount"] = len(keys_files[key_group_name]) + # add entity set count column (will delete later) + deduped["EntitySetCount"] = len(keys_files[entity_set_name]) # Add the ParamGroup to the whole list of files labeled_files = pd.merge(df, deduped, on=check_cols) @@ -1684,7 +1697,7 @@ def format_params(param_group_df, config, modality): def _order_columns(df): """Organize columns of the summary and files DataFrames. - This ensures that KeyGroup and ParamGroup are the first two columns, + This ensures that EntitySet and ParamGroup are the first two columns, FilePath is the last, and the others are sorted alphabetically. Notes @@ -1695,7 +1708,7 @@ def _order_columns(df): """ cols = set(df.columns.to_list()) non_id_cols = cols - ID_VARS - new_columns = ["KeyGroup", "ParamGroup"] + sorted(non_id_cols) + new_columns = ["EntitySet", "ParamGroup"] + sorted(non_id_cols) if "FilePath" in cols: new_columns.append("FilePath") diff --git a/cubids/data/config.yml b/cubids/data/config.yml index eb442399f..9ebffc8e9 100644 --- a/cubids/data/config.yml +++ b/cubids/data/config.yml @@ -177,13 +177,13 @@ relational_params: FieldmapKey: # can be # "bool": a single column with true if an IntendedFor is present - # "columns": List all intended key groups in separate columns (IntendedForXX) + # "columns": List all intended entity sets in separate columns (IntendedForXX) display_mode: bool suggest_variant_rename: yes IntendedForKey: # can be # "bool": a single column with true if an IntendedFor is present - # "columns": List all intended key groups in separate columns (IntendedForXX) + # "columns": List all intended entity sets in separate columns (IntendedForXX) display_mode: bool suggest_variant_rename: yes # BIDS fields to directly include in the Parameter Groupings diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 5bd3c9579..6562f35b7 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -54,10 +54,10 @@ def _check_sdc_cols(meta1, meta2): needs_merge = actions[np.isfinite(actions["MergeInto"])] for _, row_needs_merge in needs_merge.iterrows(): - source_param_key = tuple(row_needs_merge[["MergeInto", "KeyGroup"]]) - dest_param_key = tuple(row_needs_merge[["ParamGroup", "KeyGroup"]]) + source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) + dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[(actions[["ParamGroup", "KeyGroup"]] == source_param_key).all(1)] + source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -299,10 +299,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): if acq_group_level == "subject": acq_id = (file_entities.get("subject"), file_entities.get("session")) - acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup)) + acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) - acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup, file_entities.get("session"))) + acq_groups[acq_id].append( + (row.EntitySet, row.ParamGroup, file_entities.get("session")) + ) # Map the contents to a list of subjects/sessions contents_to_subjects = defaultdict(list) diff --git a/cubids/tests/data/BIDS_Dataset/README b/cubids/tests/data/BIDS_Dataset/README index f96d811a8..42be407d1 100644 --- a/cubids/tests/data/BIDS_Dataset/README +++ b/cubids/tests/data/BIDS_Dataset/README @@ -1,7 +1,7 @@ Heavily downsampled CS-DSI testing datasets This data has had some parameters changed so that there are different -parameter groups in the same key groups. +parameter groups in the same entity sets. ## Changes diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv index 21717ed22..68ea3dc61 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv @@ -1,21 +1,21 @@ -KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath -datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz -datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz -datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz -acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz -acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/dwi/sub-02_ses-phdiff_acq-HASC55AP_dwi.nii.gz -acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/dwi/sub-03_ses-phdiff_acq-HASC55AP_dwi.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude1.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude1.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude1.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude2.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude2.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude2.nii.gz -acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_phasediff.nii.gz -acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_phasediff.nii.gz -acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_phasediff.nii.gz -datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_dir-PA_epi.nii.gz -datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_dir-PA_epi.nii.gz -datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_dir-PA_epi.nii.gz -datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/func/sub-03_ses-phdiff_task-rest_bold.nii.gz +KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath +datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz +datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz +datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz +acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz +acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/dwi/sub-02_ses-phdiff_acq-HASC55AP_dwi.nii.gz +acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/dwi/sub-03_ses-phdiff_acq-HASC55AP_dwi.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude1.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude1.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude1.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude2.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude2.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude2.nii.gz +acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_phasediff.nii.gz +acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_phasediff.nii.gz +acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_phasediff.nii.gz +datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_dir-PA_epi.nii.gz +datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_dir-PA_epi.nii.gz +datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_dir-PA_epi.nii.gz +datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/func/sub-03_ses-phdiff_task-rest_bold.nii.gz datatype-func_suffix-bold_task-rest__2,datatype-func_suffix-bold_task-rest,2,1,36,43,36,0.089,0.00069,90.0,TRUE,2,func,36,10.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/func/sub-01_ses-phdiff_task-rest_bold.nii.gz \ No newline at end of file diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv index d308a33ba..7f77a4c5e 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv @@ -1,12 +1,12 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 -,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 -,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-VARIANTIsUsed_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 -,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 +,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 +,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-VARIANTIsUsed_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 +,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 ,,,acquisition-VARIANTNumVolumesHasFmap_datatype-func_suffix-bold_task-rest,datatype-func_suffix-bold_task-rest__2,datatype-func_suffix-bold_task-rest,2,1,36,43,36,0.089,0.00069,90.0,TRUE,2,func,36,10.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 \ No newline at end of file diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv index 06c1da069..047809e3a 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv @@ -1,4 +1,4 @@ -KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath +KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv index a551d9017..000b64fa2 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv @@ -1,4 +1,4 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 ,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0 ,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,True,3,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 ,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,True,3,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv index ec5445c3c..dfde4aebe 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv @@ -1,4 +1,4 @@ -KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath +KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv index 5af0eb838..a2a1c74c4 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv @@ -1,4 +1,4 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 ,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0 ,,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi__1,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,1,1,36,43,36,0.089,0.00069,,True,1,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 ,,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi__1,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,1,1,36,43,36,0.089,0.00069,90.0,True,1,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 diff --git a/cubids/tests/data/inconsistent/README b/cubids/tests/data/inconsistent/README index f96d811a8..42be407d1 100644 --- a/cubids/tests/data/inconsistent/README +++ b/cubids/tests/data/inconsistent/README @@ -1,7 +1,7 @@ Heavily downsampled CS-DSI testing datasets This data has had some parameters changed so that there are different -parameter groups in the same key groups. +parameter groups in the same entity sets. ## Changes diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 867f22cd2..d33107bec 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd import pytest +from packaging.version import Version from cubids.cubids import CuBIDS from cubids.metadata_merge import merge_json_into_json, merge_without_overwrite @@ -22,7 +23,15 @@ file_hash, get_data, ) -from cubids.validator import build_validator_call, parse_validator_output, run_validator +from cubids.validator import ( + build_validator_call, + parse_validator_output, + run_validator, + get_bids_validator_version, + extract_summary_info, + update_dataset_description, + bids_validator_version, +) COMPLETE_KEY_GROUPS = [ "acquisition-HASC55AP_datatype-dwi_suffix-dwi", @@ -97,12 +106,12 @@ def test_get_param_groups(tmp_path): """Test get_param_groups.""" data_root = get_data(tmp_path) bod = CuBIDS(data_root / "inconsistent", use_datalad=True) - key_groups = bod.get_key_groups() + entity_sets = bod.get_entity_sets() bod._cache_fieldmaps() - for key_group in key_groups: - ret = bod.get_param_groups_from_key_group(key_group) - assert sum(ret[1].Counts) == ret[1].loc[0, "KeyGroupCount"] + for entity_set in entity_sets: + ret = bod.get_param_groups_from_entity_set(entity_set) + assert sum(ret[1].Counts) == ret[1].loc[0, "EntitySetCount"] def test_copy_exemplars(tmp_path): @@ -365,16 +374,16 @@ def test_tsv_merge_no_datalad(tmp_path): summary_df = pd.read_table(original_summary_tsv) (fa_nan_dwi_row,) = np.flatnonzero( np.isnan(summary_df.FlipAngle) - & summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + & summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) @@ -421,10 +430,10 @@ def test_tsv_merge_changes(tmp_path): # give tsv with no changes (make sure it does nothing except rename) bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) - # TEST RenameKeyGroup column got populated CORRECTLY + # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): if orig.loc[row, "ParamGroup"] != 1: - assert str(orig.loc[row, "RenameKeyGroup"]) != "nan" + assert str(orig.loc[row, "RenameEntitySet"]) != "nan" # TESTING RENAMES GOT APPLIED applied = pd.read_table(str(tmp_path / "unmodified_summary.tsv")) @@ -451,14 +460,14 @@ def test_tsv_merge_changes(tmp_path): assert len(orig) == len(applied) renamed = True - new_keys = applied["KeyGroup"].tolist() + new_keys = applied["EntitySet"].tolist() for row in range(len(orig)): if orig.loc[row, "Modality"] != "fmap": if ( - str(orig.loc[row, "RenameKeyGroup"]) != "nan" - and str(orig.loc[row, "RenameKeyGroup"]) not in new_keys + str(orig.loc[row, "RenameEntitySet"]) != "nan" + and str(orig.loc[row, "RenameEntitySet"]) not in new_keys ): - print(orig.loc[row, "RenameKeyGroup"]) + print(orig.loc[row, "RenameEntitySet"]) renamed = False assert renamed @@ -470,16 +479,16 @@ def test_tsv_merge_changes(tmp_path): summary_df = pd.read_table(original_summary_tsv) (fa_nan_dwi_row,) = np.flatnonzero( np.isnan(summary_df.FlipAngle) - & summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + & summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) @@ -539,9 +548,9 @@ def test_merge_without_overwrite(): """Test merge_without_overwrite.""" meta1 = { "ManualCheck": 1.0, - "RenameKeyGroup": np.nan, + "RenameEntitySet": np.nan, "MergeInto": 2.0, - "KeyGroup": "datatype-func_suffix-bold_task-rest", + "EntitySet": "datatype-func_suffix-bold_task-rest", "ParamGroup": 12, "Counts": 2, "DwellTime": 2.6e-06, @@ -602,8 +611,8 @@ def test_merge_without_overwrite(): assert not bad_slice_merge -def test_keygroups(tmp_path): - """Test keygroups.""" +def test_entitysets(tmp_path): + """Test entitysets.""" data_root = get_data(tmp_path) # Test the complete data @@ -612,22 +621,22 @@ def test_keygroups(tmp_path): # There should be no unpaired fieldmaps assert len(complete_misfit_fmaps) == 0 - # Test that the correct key groups are found - key_groups = complete_bod.get_key_groups() - assert key_groups == COMPLETE_KEY_GROUPS + # Test that the correct entity sets are found + entity_sets = complete_bod.get_entity_sets() + assert entity_sets == COMPLETE_KEY_GROUPS # Test the incomplete ibod = CuBIDS(data_root / "inconsistent") inc_misfit_fmaps = ibod._cache_fieldmaps() assert len(inc_misfit_fmaps) == 1 - # There will still be the same number of key groups - ikey_groups = ibod.get_key_groups() - assert ikey_groups == COMPLETE_KEY_GROUPS + # There will still be the same number of entity sets + ientity_sets = ibod.get_entity_sets() + assert ientity_sets == COMPLETE_KEY_GROUPS def test_tsv_creation(tmp_path): - """Test the Key Group and Parameter Group creation on sample data.""" + """Test the Entity Set and Parameter Group creation on sample data.""" data_root = get_data(tmp_path) # Test the complete data @@ -636,9 +645,9 @@ def test_tsv_creation(tmp_path): # There should be no unpaired fieldmaps assert len(complete_misfit_fmaps) == 0 - # Test that the correct key groups are found - key_groups = complete_bod.get_key_groups() - assert key_groups == COMPLETE_KEY_GROUPS + # Test that the correct entity sets are found + entity_sets = complete_bod.get_entity_sets() + assert entity_sets == COMPLETE_KEY_GROUPS # Get the tsvs from the complete data cfiles_df, csummary_df = complete_bod.get_param_groups_dataframes() @@ -647,7 +656,7 @@ def test_tsv_creation(tmp_path): assert cfiles_df.shape[0] == 21 # This data should have the same number of param - # groups as key groups + # groups as entity sets assert csummary_df.shape[0] == len(COMPLETE_KEY_GROUPS) # check IntendedForXX and FieldmapKeyXX are boolean now @@ -668,9 +677,9 @@ def test_tsv_creation(tmp_path): inc_misfit_fmaps = ibod._cache_fieldmaps() assert len(inc_misfit_fmaps) == 1 - # There will still be the same number of key groups - ikey_groups = ibod.get_key_groups() - assert ikey_groups == COMPLETE_KEY_GROUPS + # There will still be the same number of entity sets + ientity_sets = ibod.get_entity_sets() + assert ientity_sets == COMPLETE_KEY_GROUPS # Get the tsvs from the inconsistent data ifiles_df, isummary_df = ibod.get_param_groups_dataframes() @@ -686,8 +695,8 @@ def test_tsv_creation(tmp_path): for i, (_, row) in enumerate(isummary_df.iterrows()): if i == len(isummary_df) - 1: break - # if key groups in rows i and i+1 are the same - if isummary_df.iloc[i]["KeyGroup"] == isummary_df.iloc[i + 1]["KeyGroup"]: + # if entity sets in rows i and i+1 are the same + if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 # and count i < count i + 1 @@ -697,8 +706,8 @@ def test_tsv_creation(tmp_path): for i, (_, row) in enumerate(ifiles_df.iterrows()): if i == len(ifiles_df) - 1: break - # if key groups in rows i and i+1 are the same - if ifiles_df.iloc[i]["KeyGroup"] == ifiles_df.iloc[i + 1]["KeyGroup"]: + # if entity sets in rows i and i+1 are the same + if ifiles_df.iloc[i]["EntitySet"] == ifiles_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 assert ifiles_df.iloc[i]["ParamGroup"] <= ifiles_df.iloc[i + 1]["ParamGroup"] @@ -707,9 +716,9 @@ def test_apply_tsv_changes(tmp_path): """Test apply_tsv_changes.""" # set up like narrative of user using this # similar to test tsv creation - # open the tsv, rename a key group + # open the tsv, rename a entity set # save tsv - # call change key groups + # call change entity sets # give tsv with no changes (make sure it does nothing) # make sure files you wanted to rename exist in the bids dir @@ -746,7 +755,7 @@ def test_apply_tsv_changes(tmp_path): assert og_content == mod1_content - # edit the tsv, add a RenameKeyGroup + # edit the tsv, add a RenameEntitySet # _edit_tsv(str(tmp_path / "originals_summary.tsv")) complete_cubids.apply_tsv_changes( @@ -830,9 +839,9 @@ def test_session_apply(tmp_path): """Test session_apply.""" # set up like narrative of user using this # similar to test tsv creation - # open the tsv, rename a key group + # open the tsv, rename a entity set # save tsv - # call change key groups + # call change entity sets # give tsv with no changes (make sure it does nothing) # make sure files you wanted to rename exist in the bids dir @@ -1028,6 +1037,39 @@ def test_validator(tmp_path): assert isinstance(parsed, pd.DataFrame) +def test_bids_version(tmp_path): + """Test workflows.bids_version.""" + data_root = get_data(tmp_path) + bids_dir = Path(data_root) / "complete" + + # Ensure the test directory exists + assert bids_dir.exists() + + # test the validator in valid dataset + call = build_validator_call(bids_dir) + ret = run_validator(call) + + assert ret.returncode == 0 + + decoded = ret.stdout.decode("UTF-8") + + # Get the BIDS validator version + validator_version = Version(get_bids_validator_version()["ValidatorVersion"]) + # Extract schemaVersion + schema_version = Version(extract_summary_info(decoded)["SchemaVersion"]) + + # Set baseline versions to compare against + min_validator_version = Version("2.0.0") + min_schema_version = Version("0.11.3") + + assert ( + validator_version >= min_validator_version + ), f"Validator version {validator_version} is less than minimum {min_validator_version}" + assert ( + schema_version >= min_schema_version + ), f"Schema version {schema_version} is less than minimum {min_schema_version}" + + def test_docker(): """Verify that docker is installed and the user has permission to run docker images. diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index a0e9066ac..06d3af2a2 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -14,9 +14,10 @@ """ import argparse + import pytest -from cubids.cli import _path_exists, _is_file, _get_parser, _main +from cubids.cli import _get_parser, _is_file, _main, _path_exists def _test_path_exists(): diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index 7e985de4d..6ab847fd5 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -101,9 +101,9 @@ def _test__cache_fieldmaps(cubids_instance): # Add assertions here -def _test_get_param_groups_from_key_group(cubids_instance): - key_group = "group-01" - param_groups = cubids_instance.get_param_groups_from_key_group(key_group) +def _test_get_param_groups_from_entity_set(cubids_instance): + entity_set = "group-01" + param_groups = cubids_instance.get_param_groups_from_entity_set(entity_set) # Add assertions here @@ -129,8 +129,8 @@ def _test_get_tsvs(cubids_instance): # Add assertions here -def _test_get_key_groups(cubids_instance): - key_groups = cubids_instance.get_key_groups() +def _test_get_entity_sets(cubids_instance): + entity_sets = cubids_instance.get_entity_sets() # Add assertions here @@ -179,21 +179,21 @@ def _test__update_json(cubids_instance): # Add assertions here -def _test__key_group_to_entities(cubids_instance): - key_group = "group-01" - entities = cubids_instance._key_group_to_entities(key_group) +def _test__entity_set_to_entities(cubids_instance): + entity_set = "group-01" + entities = cubids_instance._entity_set_to_entities(entity_set) # Add assertions here -def _test__entities_to_key_group(cubids_instance): +def _test__entities_to_entity_set(cubids_instance): entities = {"subject": "sub-01", "session": "ses-01"} - key_group = cubids_instance._entities_to_key_group(entities) + entity_set = cubids_instance._entities_to_entity_set(entities) # Add assertions here -def _test__file_to_key_group(cubids_instance): +def _test__file_to_entity_set(cubids_instance): filename = "sub-01_ses-01_task-rest_bold.nii.gz" - key_group = cubids_instance._file_to_key_group(filename) + entity_set = cubids_instance._file_to_entity_set(filename) # Add assertions here @@ -206,12 +206,12 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} - key_group_name = "group-01" + entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" keys_files = {"group-01": ["sub-01_ses-01_task-rest_bold.nii.gz"]} param_groups = cubids_instance._get_param_groups( - files, fieldmap_lookup, key_group_name, grouping_config, modality, keys_files + files, fieldmap_lookup, entity_set_name, grouping_config, modality, keys_files ) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 22263f9ba..c64da3727 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -53,12 +53,12 @@ def _add_deletion(summary_tsv): # def _edit_tsv(summary_tsv): # df = pd.read_table(summary_tsv) -# df['RenameKeyGroup'] = df['RenameKeyGroup'].apply(str) -# df['KeyGroup'] = df['KeyGroup'].apply(str) +# df['RenameEntitySet'] = df['RenameEntitySet'].apply(str) +# df['EntitySet'] = df['EntitySet'].apply(str) # for row in range(len(df)): -# if df.loc[row, 'KeyGroup'] == \ +# if df.loc[row, 'EntitySet'] == \ # "acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1": -# df.at[row, 'RenameKeyGroup'] = \ +# df.at[row, 'RenameEntitySet'] = \ # "acquisition-v5_datatype-fmap_fmap-magnitude1_suffix-magnitude1" # df.to_csv(summary_tsv) diff --git a/cubids/validator.py b/cubids/validator.py index 01dad11c8..a4feeba5b 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -5,7 +5,9 @@ import logging import os import pathlib +import re import subprocess +import warnings import pandas as pd @@ -14,18 +16,33 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" - # build docker call - # CuBIDS automatically ignores subject consistency. - command = ["bids-validator", "--verbose", "--json", "--ignoreSubjectConsistency"] + # New schema BIDS validator doesn't have option to ignore subject consistency. + # Build the deno command to run the BIDS validator. + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") - command.append(path) - return command +def get_bids_validator_version(): + """Get the version of the BIDS validator. + + Returns + ------- + version : :obj:`str` + Version of the BIDS validator. + """ + command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + version = output.split()[-1] + # Remove ANSI color codes + clean_ver = re.sub(r"\x1b\[[0-9;]*m", "", version) + return {"ValidatorVersion": clean_ver} + + def build_subject_paths(bids_dir): """Build a list of BIDS dirs with 1 subject each.""" bids_dir = str(bids_dir) @@ -54,6 +71,26 @@ def build_subject_paths(bids_dir): return subjects_dict +def build_first_subject_path(bids_dir, subject): + """Build a list of BIDS dirs with 1 subject each.""" + bids_dir = str(bids_dir) + if not bids_dir.endswith("/"): + bids_dir += "/" + + root_files = [x for x in glob.glob(bids_dir + "*") if os.path.isfile(x)] + + subject_dict = {} + + purepath = pathlib.PurePath(subject) + sub_label = purepath.name + + files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] + files.extend(root_files) + subject_dict[sub_label] = files + + return subject_dict + + def run_validator(call): """Run the validator with subprocess. @@ -89,32 +126,6 @@ def parse_validator_output(output): Dataframe of validator output. """ - def get_nested(dct, *keys): - """Get a nested value from a dictionary. - - Parameters - ---------- - dct : :obj:`dict` - Dictionary to get value from. - keys : :obj:`list` - List of keys to get value from. - - Returns - ------- - :obj:`dict` - The nested value. - """ - for key in keys: - try: - dct = dct[key] - except (KeyError, TypeError): - return None - return dct - - data = json.loads(output) - - issues = data["issues"] - def parse_issue(issue_dict): """Parse a single issue from the validator output. @@ -128,30 +139,30 @@ def parse_issue(issue_dict): return_dict : :obj:`dict` Dictionary of parsed issue. """ - return_dict = {} - return_dict["files"] = [ - get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") - ] - return_dict["type"] = issue_dict.get("key", "") - return_dict["severity"] = issue_dict.get("severity", "") - return_dict["description"] = issue_dict.get("reason", "") - return_dict["code"] = issue_dict.get("code", "") - return_dict["url"] = issue_dict.get("helpUrl", "") - - return return_dict - - df = pd.DataFrame() - - for warn in issues["warnings"]: - parsed = parse_issue(warn) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) - - for err in issues["errors"]: - parsed = parse_issue(err) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) + return { + "location": issue_dict.get("location", ""), + "code": issue_dict.get("code", ""), + "issueMessage": issue_dict.get("issueMessage", ""), + "subCode": issue_dict.get("subCode", ""), + "severity": issue_dict.get("severity", ""), + "rule": issue_dict.get("rule", ""), + } + + # Load JSON data + data = json.loads(output) + + # Extract issues + issues = data.get("issues", {}).get("issues", []) + if not issues: + return pd.DataFrame( + columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] + ) + # Parse all issues + parsed_issues = [parse_issue(issue) for issue in issues] + + # Convert to DataFrame + df = pd.DataFrame(parsed_issues) return df @@ -163,12 +174,106 @@ def get_val_dictionary(): val_dict : dict Dictionary of values. """ - val_dict = {} - val_dict["files"] = {"Description": "File with warning orerror"} - val_dict["type"] = {"Description": "BIDS validation warning or error"} - val_dict["severity"] = {"Description": "gravity of problem (warning/error"} - val_dict["description"] = {"Description": "Description of warning/error"} - val_dict["code"] = {"Description": "BIDS validator issue code number"} - val_dict["url"] = {"Description": "Link to the issue's neurostars thread"} - - return val_dict + return { + "location": {"Description": "File with the validation issue."}, + "code": {"Description": "Code of the validation issue."}, + "issueMessage": {"Description": "Validation issue message."}, + "subCode": {"Description": "Subcode providing additional issue details."}, + "severity": {"Description": "Severity of the issue (e.g., warning, error)."}, + "rule": {"Description": "Validation rule that triggered the issue."}, + } + + +def extract_summary_info(output): + """Extract summary information from the JSON output. + + Parameters + ---------- + output : str + JSON string of BIDS validator output. + + Returns + ------- + dict + Dictionary containing SchemaVersion and other summary info. + """ + try: + data = json.loads(output) + except json.JSONDecodeError as e: + raise ValueError("Invalid JSON provided to get SchemaVersion.") from e + + summary = data.get("summary", {}) + + return {"SchemaVersion": summary.get("schemaVersion", "")} + + +def update_dataset_description(path, new_info): + """Update or append information to dataset_description.json. + + Parameters + ---------- + path : :obj:`str` + Path to the dataset. + new_info : :obj:`dict` + Information to add or update. + """ + description_path = os.path.join(path, "dataset_description.json") + + # Load existing data if the file exists + if os.path.exists(description_path): + with open(description_path, "r") as f: + existing_data = json.load(f) + else: + existing_data = {} + + # Update the existing data with the new info + existing_data.update(new_info) + + # Write the updated data back to the file + with open(description_path, "w") as f: + json.dump(existing_data, f, indent=4) + print(f"Updated dataset_description.json at: {description_path}") + + # Check if .datalad directory exists before running the DataLad save command + datalad_dir = os.path.join(path, ".datalad") + if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir): + try: + subprocess.run( + [ + "datalad", + "save", + "-m", + "Save BIDS validator and schema version to dataset_description", + description_path, + ], + check=True, + ) + print("Changes saved with DataLad.") + except subprocess.CalledProcessError as e: + warnings.warn(f"Error running DataLad save: {e}") + + +def bids_validator_version(output, path, write=False): + """Save BIDS validator and schema version. + + Parameters + ---------- + output : :obj:`str` + Path to JSON file of BIDS validator output. + path : :obj:`str` + Path to the dataset. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Get the BIDS validator version + validator_version = get_bids_validator_version() + # Extract schemaVersion + summary_info = extract_summary_info(output) + + combined_info = {**validator_version, **summary_info} + + if write: + # Update the dataset_description.json file + update_dataset_description(path, combined_info) + elif not write: + print(combined_info) diff --git a/cubids/workflows.py b/cubids/workflows.py index e01ccc78f..11931c5b0 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -17,6 +17,8 @@ from cubids.metadata_merge import merge_json_into_json from cubids.utils import _get_container_type from cubids.validator import ( + bids_validator_version, + build_first_subject_path, build_subject_paths, build_validator_call, get_val_dictionary, @@ -258,6 +260,70 @@ def validate( sys.exit(proc.returncode) +def bids_version(bids_dir, write=False): + """Get BIDS validator and schema version. + + Parameters + ---------- + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Need to run validator to get output with schema version + # Copy code from `validate --sequential` + + try: # return first subject + # Get all folders that start with "sub-" + sub_folders = [ + name + for name in os.listdir(bids_dir) + if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") + ] + if not sub_folders: + raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") + subject = sub_folders[0] + except FileNotFoundError: + raise FileNotFoundError(f"The directory {bids_dir} does not exist.") + except ValueError as ve: + raise ve + + # build a dictionary with {SubjectLabel: [List of files]} + # run first subject only + subject_dict = build_first_subject_path(bids_dir, subject) + + # iterate over the dictionary + for subject, files_list in subject_dict.items(): + # logger.info(" ".join(["Processing subject:", subject])) + # create a temporary directory and symlink the data + with tempfile.TemporaryDirectory() as tmpdirname: + for fi in files_list: + # cut the path down to the subject label + bids_start = fi.find(subject) + + # maybe it's a single file + if bids_start < 1: + bids_folder = tmpdirname + fi_tmpdir = tmpdirname + + else: + bids_folder = Path(fi[bids_start:]).parent + fi_tmpdir = tmpdirname + "/" + str(bids_folder) + + if not os.path.exists(fi_tmpdir): + os.makedirs(fi_tmpdir) + output = fi_tmpdir + "/" + str(Path(fi).name) + shutil.copy2(fi, output) + + # run the validator + call = build_validator_call(tmpdirname) + ret = run_validator(call) + + # Get BIDS validator and schema version + decoded = ret.stdout.decode("UTF-8") + bids_validator_version(decoded, bids_dir, write=write) + + def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) @@ -895,7 +961,8 @@ def print_metadata_fields(bids_dir, container): if container is None: bod = CuBIDS(data_root=str(bids_dir), use_datalad=False) fields = bod.get_all_metadata_fields() - logger.info("\n".join(fields)) + print("\n".join(fields)) # logger not printing + # logger.info("\n".join(fields)) sys.exit(0) # Run it through a container diff --git a/docs/_static/PNC_example_edited.csv b/docs/_static/PNC_example_edited.csv index a0e9b545e..0de481748 100644 --- a/docs/_static/PNC_example_edited.csv +++ b/docs/_static/PNC_example_edited.csv @@ -1,9 +1,9 @@ -RenameKeyGroup,MergeInto,KeyGroup,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime -,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 -acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 -,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 -,3,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -acquisition-NoSDC_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 -,0,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 +RenameEntitySet,MergeInto,EntitySet,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime +,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 +acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 +,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 +,3,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +acquisition-NoSDC_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 +,0,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,8,1,datatype-fmap_fmap-phase1_suffix-phase1,70,12.3 \ No newline at end of file diff --git a/docs/_static/PNC_example_unedited.csv b/docs/_static/PNC_example_unedited.csv index 1ca5aa678..4fdfb436b 100644 --- a/docs/_static/PNC_example_unedited.csv +++ b/docs/_static/PNC_example_unedited.csv @@ -1,9 +1,9 @@ -RenameKeyGroup,MergeInto,KeyGroup,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime -,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 -,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 -,,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 +RenameEntitySet,MergeInto,EntitySet,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime +,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 +,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 +,,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 ,,datatype-dwi_run-1_suffix-dwi,8,1,datatype-fmap_fmap-phase1_suffix-phase1,70,12.3 \ No newline at end of file diff --git a/docs/_static/PNC_post_apply_summary.csv b/docs/_static/PNC_post_apply_summary.csv index c91f417c9..f4a30b562 100644 --- a/docs/_static/PNC_post_apply_summary.csv +++ b/docs/_static/PNC_post_apply_summary.csv @@ -1,8 +1,8 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,,datatype-dwi_run-1_suffix-dwi__1,datatype-dwi_run-1_suffix-dwi,1,1388,128,128,70,0.082,0.000267,90,TRUE,1388,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,1,25,128,128,70,0.082,0.000267,90,FALSE,25,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,1,6,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__2,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,2,3,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,1,2,128,128,46,0.082,0.000267,90,TRUE,2,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 -,,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,1,1,128,128,70,0.102,0.0008,90,TRUE,1,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,,datatype-dwi_run-1_suffix-dwi__1,datatype-dwi_run-1_suffix-dwi,1,1388,128,128,70,0.082,0.000267,90,TRUE,1388,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,1,25,128,128,70,0.082,0.000267,90,FALSE,25,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,1,6,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__2,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,2,3,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,1,2,128,128,46,0.082,0.000267,90,TRUE,2,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 +,,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,1,1,128,128,70,0.102,0.0008,90,TRUE,1,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 ,,,,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi,1,1,128,128,70,0.082,0.000267,90,TRUE,1,dwi,70,35.0,TRUE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 \ No newline at end of file diff --git a/docs/_static/PNC_pre_apply_summary_dwi_run1.csv b/docs/_static/PNC_pre_apply_summary_dwi_run1.csv index ad0cae9b7..cb02df295 100644 --- a/docs/_static/PNC_pre_apply_summary_dwi_run1.csv +++ b/docs/_static/PNC_pre_apply_summary_dwi_run1.csv @@ -1,7 +1,7 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 -,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 +,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 ,,,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__7,datatype-dwi_run-1_suffix-dwi,7,1,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,TRUE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 \ No newline at end of file diff --git a/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv b/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv index 15608fc77..b304633a5 100644 --- a/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv +++ b/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv @@ -1,7 +1,7 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 -,,0,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 +,,0,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 ,,,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__7,datatype-dwi_run-1_suffix-dwi,7,1,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,TRUE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 \ No newline at end of file diff --git a/docs/_static/v0_edited_summary.csv b/docs/_static/v0_edited_summary.csv index 82bbd39ae..bad6107ea 100644 --- a/docs/_static/v0_edited_summary.csv +++ b/docs/_static/v0_edited_summary.csv @@ -1,12 +1,12 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,4,180,216,180,0.0029,,8,FALSE,4,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 -,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,2,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,0,acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,10.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.07,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,4,36,43,36,0.004,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,4,36,43,36,0.006,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,4,36,43,36,,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 -,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,3,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 -,,,acquisition-VARIANTUnused_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,2,36,43,36,0.089,0.00069,70,TRUE,3,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,4,180,216,180,0.0029,,8,FALSE,4,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 +,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,2,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,0,acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,10.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.07,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,4,36,43,36,0.004,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,4,36,43,36,0.006,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,4,36,43,36,,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 +,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,3,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 +,,,acquisition-VARIANTUnused_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,2,36,43,36,0.089,0.00069,70,TRUE,3,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 ,,,acquisition-VARIANTFlipAngle_datatype-func_suffix-bold_task-rest,datatype-func_suffix-bold_task-rest__2,datatype-func_suffix-bold_task-rest,2,1,36,43,36,0.089,0.00069,90,TRUE,3,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 \ No newline at end of file diff --git a/docs/_static/v0_summary.csv b/docs/_static/v0_summary.csv index 92fb53676..d19cc94ed 100644 --- a/docs/_static/v0_summary.csv +++ b/docs/_static/v0_summary.csv @@ -1,4 +1,4 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 ,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,4,180,216,180,0.0029,,8,False,4,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0 ,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,2,36,43,36,0.089,0.00069,90,True,4,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 ,,,acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,90,True,4,dwi,36,10.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 diff --git a/docs/about.rst b/docs/about.rst index 80e55f950..4dd44b46f 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -23,7 +23,7 @@ and **budget their computational time and resources** effectively. ``CuBIDS`` is designed to facilitate the curation of large, neuroimaging datasets so that users can infer useful information from descriptive and accurate BIDS labels before running pipelines *en masse*. -``CuBIDS`` accomplishes this by summarizing BIDS data using :ref:`keygroup`, +``CuBIDS`` accomplishes this by summarizing BIDS data using :ref:`entityset`, :ref:`paramgroup`, and :ref:`acquisitiongroup` categorizations in your data (we'll explain what these are in more detail in the next section). @@ -56,15 +56,15 @@ Examples Dominant Group resting state BOLD: * Example Filename: ``sub-01_ses-A_task-rest_acq-singleband_bold.nii.gz`` - * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` + * Entity Set: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` * Param Group: ``1`` (Dominant Group) Variant Group resting state BOLD (all scans in this Param Group are missing a fieldmap) * Example Filename: ``sub-02_ses-A_task-rest_acq-singleband_bold.nii.gz`` - * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` + * Entity Set: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` * Param Group: ``2`` (Variant Group) - * Rename Key Group: ``acquisition-singlebandVARIANTNoFmap_datatype-func_suffix-bold_task-rest`` + * Rename Entity Set: ``acquisition-singlebandVARIANTNoFmap_datatype-func_suffix-bold_task-rest`` These definitions are described in more detail in :doc:`glossary` and :doc:`usage`. diff --git a/docs/example.rst b/docs/example.rst index c7826f46c..0f50ca803 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -152,7 +152,7 @@ Note that it is best practice to provide a detailed commit message with each cha Adding NIfTI Information to JSON Sidecars ----------------------------------------- -Next, we seek to add more image parameters to our sidecars so that we can better define our Key Groups. +Next, we seek to add more image parameters to our sidecars so that we can better define our Entity Sets. Historically, only a subset of parameters in the NIfTI image header have been included in a BIDS sidecar... Parameters such as image dimensions, number of volumes, image obliquity, and voxel sizes — all important data that can change how our pipelines will eventually run! @@ -328,7 +328,7 @@ contains only one scan (see "Counts" column) with only 10 volumes (see "NumVolumes" column). Since the majority of DWI scans in this dataset have 61 volumes, ``CuBIDS`` assigns this single scan to a "Variant" (i.e. non-dominant) Parameter Group, -and automatically populates that Parameter Group's "RenameKeyGroup" column in ``v0_summary.tsv`` +and automatically populates that Parameter Group's "RenameEntitySet" column in ``v0_summary.tsv`` with a suggested name: ``acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi``. This time, though, we elect to remove this scan because it does not have enough volumes to be usable for most analyses. @@ -351,7 +351,7 @@ Applying changes Now that all metadata issues have been addressed — both validation and ``CuBIDS`` summary — -we are ready to rename our files based on their RenameKeyGroup values and +we are ready to rename our files based on their RenameEntitySet values and apply the requested deletion in ``v0_edited_summary.tsv``. The ``cubids apply`` function renames scans in each Variant Parameter Group according to the metadata parameters with a flag “VARIANT”, diff --git a/docs/glossary.rst b/docs/glossary.rst index 8a2b3a4f3..7c024461b 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -5,27 +5,27 @@ Glossary .. glossary:: - Key Group + Entity Set A set of scans whose filenames share all `BIDS filename key-value pairs`_, excluding subject and session. - The key group is derived from the common BIDS filename elements. + The entity set is derived from the common BIDS filename elements. For example, ``acquisition-*_datatype-*_run-*_task-*_suffix``. Parameter Group A set of scans with identical metadata parameters in their sidecars. - Defined within a Key Group. - Numerically identified, meaning that each Key Group will have *n* Param Groups, - where *n* is the number of unique sets of scanning parameters present in that Key Group + Defined within a Entity Set. + Numerically identified, meaning that each Entity Set will have *n* Param Groups, + where *n* is the number of unique sets of scanning parameters present in that Entity Set (e.g., 1, 2, etc.). Dominant Group - The Param Group that contains the most scans in its Key Group. + The Param Group that contains the most scans in its Entity Set. Variant Group Any Param Group that is non-dominant. - Rename Key Group - Auto-generated, recommended new Key Group name for Variant Groups. + Rename Entity Set + Auto-generated, recommended new Entity Set name for Variant Groups. Based on the metadata parameters that cause scans in Variant Groups to vary from those in their respective Dominant Groups. diff --git a/docs/installation.rst b/docs/installation.rst index d55b84a40..b6ebda3bb 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -16,7 +16,7 @@ initialize a new conda environment (for example, named ``cubids``) as follows: .. code-block:: console - $ conda create -n cubids python=3.8 pip + $ conda create -n cubids python=3.12 pip $ conda activate cubids You are now ready to install CuBIDS. @@ -44,23 +44,16 @@ Once you have a copy of the source, you can install it with: $ pip install -e . We will now need to install some dependencies of ``CuBIDS``. -To do this, we first must install nodejs. +To do this, we first must install deno to run `bids-validator`. We can accomplish this using the following command: .. code-block:: console - $ conda install nodejs + $ conda install deno -Now that we have npm installed, we can install ``bids-validator`` using the following command: +The new schema-based ``bids-validator`` doesn't need to be installed +and will be implemented automatically when `cubids validate` is called -.. code-block:: console - - $ npm install -g bids-validator@1.7.2 - -In our example walkthrough, -we use ``bids-validator`` v1.7.2. using a different version of the -validator may result in slightly different validation tsv printouts, -but ``CuBIDS`` is compatible with all versions of the validator at or above v1.6.2. We also recommend using ``CuBIDS`` with the optional ``DataLad`` version control capabilities. We use ``DataLad`` throughout our walkthrough of the CuBIDS Workflow on diff --git a/docs/notebooks/HTML_param_groups.ipynb b/docs/notebooks/HTML_param_groups.ipynb index e9854a736..7889c13c3 100644 --- a/docs/notebooks/HTML_param_groups.ipynb +++ b/docs/notebooks/HTML_param_groups.ipynb @@ -244,17 +244,17 @@ "\n", "# ISSUE! Grouping by char!\n", "\n", - "#param_group = cubids_obj.get_param_groups(key_group)\n", - "# print(key_group)\n", + "#param_group = cubids_obj.get_param_groups(entity_set)\n", + "# print(entity_set)\n", "# #print(param_group)\n", "\n", "NON_KEY_ENTITIES = set([\"subject\", \"session\", \"extension\"])\n", "\n", - "def key_group_to_entities(key_group):\n", - " return dict([group.split(\"-\") for group in key_group.split(\"_\")])\n", + "def entity_set_to_entities(entity_set):\n", + " return dict([group.split(\"-\") for group in entity_set.split(\"_\")])\n", "\n", "\n", - "def entities_to_key_group(entities):\n", + "def entities_to_entity_set(entities):\n", " group_keys = sorted(entities.keys())\n", " return \"_\".join(\n", " [\"{}-{}\".format(key, entities[key]) for key in group_keys])\n", @@ -263,13 +263,13 @@ " entities = parse_file_entities(str(filename))\n", " return entities\n", "\n", - "def file_to_key_group(filename):\n", + "def file_to_entity_set(filename):\n", " entities = parse_file_entities(str(filename))\n", - " keys = entities_to_key_group(entities)\n", + " keys = entities_to_entity_set(entities)\n", " return keys\n", "\n", - "def key_group_to_entities(key_group):\n", - " return dict([group.split(\"-\") for group in key_group.split(\"_\")])\n", + "def entity_set_to_entities(entity_set):\n", + " return dict([group.split(\"-\") for group in entity_set.split(\"_\")])\n", "\n", "def get_file_params(files):\n", " \"\"\"Finds a list of *parameter groups* from a list of files.\n", @@ -314,8 +314,8 @@ " return files_params\n", "\n", "\n", - "#def get_param_groups(key_group, path):\n", - "# key_entities = key_group_to_entities(key_group)\n", + "#def get_param_groups(entity_set, path):\n", + "# key_entities = entity_set_to_entities(entity_set)\n", "# path = Path(path)\n", "# matching_files = path.layout.get(return_type=\"file\", scope=\"self\",\n", "# **key_entities)\n", @@ -327,14 +327,14 @@ "path = \"/Users/Covitz/CuBIDS/cubids/testdata/complete/\"\n", "ret_entities = file_to_entities(filename)\n", "print(ret_entities)\n", - "key_group = entities_to_key_group(ret_entities)\n", - "print(key_group)\n", + "entity_set = entities_to_entity_set(ret_entities)\n", + "print(entity_set)\n", "\n", - "entities = key_group_to_entities(key_group)\n", + "entities = entity_set_to_entities(entity_set)\n", "print(\"Entities: \", entities)\n", "\n", - "key_groups = file_to_key_group(filename)\n", - "print(\"F_2_K: \", key_groups)\n", + "entity_sets = file_to_entity_set(filename)\n", + "print(\"F_2_K: \", entity_sets)\n", "\n", "\n", "\n", @@ -374,8 +374,8 @@ "\n", "\n", "cubids_obj = CuBIDS(\"/Users/Covitz/CuBIDS/cubids/testdata/complete/\")\n", - "key_group = cubids_obj.get_key_groups()\n", - "key_group" + "entity_set = cubids_obj.get_entity_sets()\n", + "entity_set" ] }, { @@ -428,20 +428,20 @@ "\n", "\n", "\n", - "def _entities_to_key_group(entities):\n", + "def _entities_to_entity_set(entities):\n", " group_keys = sorted(entities.keys() - NON_KEY_ENTITIES)\n", " return \"_\".join(\n", " [\"{}-{}\".format(key, entities[key]) for key in group_keys])\n", "\n", "\n", - "def _file_to_key_group(filename):\n", + "def _file_to_entity_set(filename):\n", " entities = parse_file_entities(str(filename))\n", - " return _entities_to_key_group(entities)\n", + " return _entities_to_entity_set(entities)\n", "\n", "\n", "def html_groups(bids_dir):\n", - " # get key groups using cubids.get_key_groups\n", - " # use key_group_to_entities to get entities\n", + " # get entity sets using cubids.get_entity_sets\n", + " # use entity_set_to_entities to get entities\n", " # get param groups for each entity\n", "\n", "\n", diff --git a/docs/notebooks/Key_and_Param_Groups.ipynb b/docs/notebooks/Key_and_Param_Groups.ipynb index 3970ce6df..9985a4856 100644 --- a/docs/notebooks/Key_and_Param_Groups.ipynb +++ b/docs/notebooks/Key_and_Param_Groups.ipynb @@ -58,13 +58,13 @@ ], "source": [ "###############################\n", - "# TESTING change_key_groups!!!!\n", + "# TESTING change_entity_sets!!!!\n", "###############################\n", "\n", "\n", "bod = CuBIDS(data_root2)\n", "\n", - "out = bod.change_key_groups('/Users/scovitz/CuBIDS/notebooks/NewTests', '/Users/scovitz/CuBIDS/notebooks/newtsvs')\n", + "out = bod.change_entity_sets('/Users/scovitz/CuBIDS/notebooks/NewTests', '/Users/scovitz/CuBIDS/notebooks/newtsvs')\n", "\n", "\n", "print(out)\n" @@ -148,8 +148,8 @@ "outputs": [], "source": [ "bod = CuBIDS(data_root2)\n", - "key_groups = bod.get_key_groups()\n", - "print(key_groups)" + "entity_sets = bod.get_entity_sets()\n", + "print(entity_sets)" ] }, { @@ -171,22 +171,22 @@ "outputs": [], "source": [ "\n", - "for key_group in key_groups:\n", - " print(key_group)\n", - " output = bod.get_file_params(key_group)\n", + "for entity_set in entity_sets:\n", + " print(entity_set)\n", + " output = bod.get_file_params(entity_set)\n", " print(len(output))\n", " #print(output)\n", "\n", "\n", " #print(output)\n", - " #output2 = bod.get_param_groups(key_group)\n", + " #output2 = bod.get_param_groups(entity_set)\n", " #print(output2[1])\n", " #print(output[1])\n", " #print(output[0])\n", " #print(len(output[0]))\n", - " #output.to_csv('Tests/' + key_group + '.tsv')\n", + " #output.to_csv('Tests/' + entity_set + '.tsv')\n", " # export the line above to a tsv\n", - " # data = data.append(pd.DataFrame(DwellTime: key_group.\n", + " # data = data.append(pd.DataFrame(DwellTime: entity_set.\n", "#print(output['/Users/Covitz/Downloads/RBC_growupCCNP_BIDS/sub-colornest195/ses-1/func/sub-colornest195_ses-1_task-rest_run-02_bold.nii.gz'])\n", "#print(output)\n" ] @@ -197,8 +197,8 @@ "metadata": {}, "outputs": [], "source": [ - "key_group = 'datatype-func_run-2_suffix-bold_task-rest'\n", - "file_params_dict = bod.get_file_params(key_group)\n", + "entity_set = 'datatype-func_run-2_suffix-bold_task-rest'\n", + "file_params_dict = bod.get_file_params(entity_set)\n", "print(len(file_params_dict))\n", "print(file_params_dict)\n", "#print(output[1])\n", @@ -226,7 +226,7 @@ "# print(filenames_size)\n", "# print(filenames[0])\n", "\n", - "# param_groups = bod.get_param_groups(key_group)\n", + "# param_groups = bod.get_param_groups(entity_set)\n", "# print(len(param_groups))\n" ] }, @@ -237,11 +237,11 @@ "outputs": [], "source": [ "# TESTING THE change_filename method!\n", - "key_group = 'datatype-func_run-2_suffix-bold_task-rest'\n", + "entity_set = 'datatype-func_run-2_suffix-bold_task-rest'\n", "split_params = {'FlipAngle': 80, 'EffectiveEchoSpacing': 0.000510002, 'PartialFourier': 1, 'TotalReadoutTime': 0.0362102, 'RepetitionTime': 2.5, 'EchoTime': 0.03, 'PhaseEncodingDirection': 'j-', 'DwellTime': 3.1e-06, 'SliceTime000': 1.2, 'SliceTime001': 0, 'SliceTime002': 1.3, 'SliceTime003': 0.1, 'SliceTime004': 1.4, 'SliceTime005': 0.1, 'SliceTime006': 1.4, 'SliceTime007': 0.2, 'SliceTime008': 1.5, 'SliceTime009': 0.3, 'SliceTime010': 1.6, 'SliceTime011': 0.3, 'SliceTime012': 1.6, 'SliceTime013': 0.4, 'SliceTime014': 1.7, 'SliceTime015': 0.5, 'SliceTime016': 1.8, 'SliceTime017': 0.5, 'SliceTime018': 1.8, 'SliceTime019': 0.6, 'SliceTime020': 1.9, 'SliceTime021': 0.7, 'SliceTime022': 2.0, 'SliceTime023': 0.7, 'SliceTime024': 2.0, 'SliceTime025': 0.8, 'SliceTime026': 2.1, 'SliceTime027': 0.9, 'SliceTime028': 2.2, 'SliceTime029': 0.9, 'SliceTime030': 2.2, 'SliceTime031': 1.0, 'SliceTime032': 2.3, 'SliceTime033': 1.0, 'SliceTime034': 2.4, 'SliceTime035': 1.1, 'SliceTime036': 2.4, 'SliceTime037': 1.2}\n", "pattern = \"task\"\n", "replacement = \"TASK\"\n", - "ret = bod.change_filenames(key_group, split_params, pattern, replacement)\n", + "ret = bod.change_filenames(entity_set, split_params, pattern, replacement)\n", "\n", "changed_paths = ret\n", "\n", diff --git a/docs/notebooks/keyparamgrouptest.ipynb b/docs/notebooks/keyparamgrouptest.ipynb index 150eb3df6..09f0f02b2 100644 --- a/docs/notebooks/keyparamgrouptest.ipynb +++ b/docs/notebooks/keyparamgrouptest.ipynb @@ -51,8 +51,8 @@ } ], "source": [ - "key_groups = bod.get_key_groups()\n", - "print(key_groups)" + "entity_sets = bod.get_entity_sets()\n", + "print(entity_sets)" ] }, { @@ -90,7 +90,7 @@ " PartialFourier\n", " PhaseEncodingDirection\n", " TotalReadoutTime\n", - " key_group\n", + " entity_set\n", " FieldmapKey00\n", " FieldmapKey01\n", " NSliceTimes\n", @@ -168,7 +168,7 @@ "1 2 1 j- \n", "2 2 1 j- \n", "\n", - " TotalReadoutTime key_group \\\n", + " TotalReadoutTime entity_set \\\n", "0 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi \n", "1 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi \n", "2 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi \n", @@ -195,7 +195,7 @@ } ], "source": [ - "bod.get_param_groups_from_key_group('acquisition-64dir_datatype-dwi_suffix-dwi')" + "bod.get_param_groups_from_entity_set('acquisition-64dir_datatype-dwi_suffix-dwi')" ] }, { @@ -244,7 +244,7 @@ " PartialFourier\n", " PhaseEncodingDirection\n", " TotalReadoutTime\n", - " key_group\n", + " entity_set\n", " ...\n", " FilePath\n", " ParamGroup\n", @@ -1023,7 +1023,7 @@ "27 NaN 1 j- \n", "28 NaN 1 j- \n", "\n", - " TotalReadoutTime key_group ... \\\n", + " TotalReadoutTime entity_set ... \\\n", "0 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi ... \n", "1 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi ... \n", "2 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi ... \n", @@ -1342,7 +1342,7 @@ " \n", " \n", " \n", - " key_group\n", + " entity_set\n", " ParamGroup\n", " Count\n", " \n", @@ -1449,7 +1449,7 @@ "" ], "text/plain": [ - " key_group ParamGroup Count\n", + " entity_set ParamGroup Count\n", "0 acquisition-64dir_datatype-dwi_suffix-dwi 1 3\n", "1 acquisition-HCP_datatype-anat_suffix-T1w 1 3\n", "2 acquisition-HCP_datatype-anat_suffix-T2w 1 1\n", @@ -1474,7 +1474,7 @@ } ], "source": [ - "summary_df[[\"key_group\", \"ParamGroup\", \"Count\"]]" + "summary_df[[\"entity_set\", \"ParamGroup\", \"Count\"]]" ] }, { @@ -1487,7 +1487,7 @@ "param_group_cols = list(set(df.columns.to_list()) - set([\"FilePath\"]))\n", "uniques = df.drop_duplicates(param_group_cols, ignore_index=True)\n", "print(uniques.shape)\n", - "counts = df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')\n", + "counts = df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')\n", "print(counts.shape)\n", "\n", "params_and_counts = pd.merge(uniques, counts)\n", @@ -1507,7 +1507,7 @@ "metadata": {}, "outputs": [], "source": [ - "no_paths[[\"key_group\", \"ParamGroup\"]].groupby([\"key_group\", \"ParamGroup\"]).count()" + "no_paths[[\"entity_set\", \"ParamGroup\"]].groupby([\"entity_set\", \"ParamGroup\"]).count()" ] }, { @@ -1530,7 +1530,7 @@ "metadata": {}, "outputs": [], "source": [ - "keyparam_df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')" + "keyparam_df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')" ] }, { @@ -1548,7 +1548,7 @@ "metadata": {}, "outputs": [], "source": [ - "bod.get_key_groups()" + "bod.get_entity_sets()" ] }, { @@ -1650,13 +1650,13 @@ "\n", "dfs = []\n", "fieldmap_lookup = bod.fieldmap_lookup\n", - "key_group_name = \"test\"\n", + "entity_set_name = \"test\"\n", "# path needs to be relative to the root with no leading prefix\n", "for path in files:\n", " metadata = bod.layout.get_metadata(path)\n", " wanted_keys = metadata.keys() & IMAGING_PARAMS\n", " example_data = {key: metadata[key] for key in wanted_keys}\n", - " example_data[\"key_group\"] = key_group_name\n", + " example_data[\"entity_set\"] = entity_set_name\n", "\n", " # Get the fieldmaps out and add their types\n", " print(fieldmap_lookup[path])\n", diff --git a/docs/notebooks/workwithtestdata.ipynb b/docs/notebooks/workwithtestdata.ipynb index 1270bcc97..7f4bd3a82 100644 --- a/docs/notebooks/workwithtestdata.ipynb +++ b/docs/notebooks/workwithtestdata.ipynb @@ -108,8 +108,8 @@ } ], "source": [ - "key_groups = bod.get_key_groups()\n", - "print(key_groups)" + "entity_sets = bod.get_entity_sets()\n", + "print(entity_sets)" ] }, { @@ -147,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "ikey_groups = ibod.get_key_groups()" + "ientity_sets = ibod.get_entity_sets()" ] }, { @@ -167,7 +167,7 @@ } ], "source": [ - "ikey_groups == key_groups" + "ientity_sets == entity_sets" ] }, { @@ -270,7 +270,7 @@ " \n", " \n", " \n", - " key_group\n", + " entity_set\n", " ParamGroup\n", " Count\n", " \n", @@ -377,7 +377,7 @@ "" ], "text/plain": [ - " key_group ParamGroup Count\n", + " entity_set ParamGroup Count\n", "0 acquisition-64dir_datatype-dwi_suffix-dwi 1 3\n", "1 acquisition-HCP_datatype-anat_suffix-T1w 1 3\n", "2 acquisition-HCP_datatype-anat_suffix-T2w 1 1\n", @@ -402,7 +402,7 @@ } ], "source": [ - "summary_df[[\"key_group\", \"ParamGroup\", \"Count\"]]" + "summary_df[[\"entity_set\", \"ParamGroup\", \"Count\"]]" ] }, { @@ -415,7 +415,7 @@ "param_group_cols = list(set(df.columns.to_list()) - set([\"FilePath\"]))\n", "uniques = df.drop_duplicates(param_group_cols, ignore_index=True)\n", "print(uniques.shape)\n", - "counts = df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')\n", + "counts = df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')\n", "print(counts.shape)\n", "\n", "params_and_counts = pd.merge(uniques, counts)\n", @@ -435,7 +435,7 @@ "metadata": {}, "outputs": [], "source": [ - "no_paths[[\"key_group\", \"ParamGroup\"]].groupby([\"key_group\", \"ParamGroup\"]).count()" + "no_paths[[\"entity_set\", \"ParamGroup\"]].groupby([\"entity_set\", \"ParamGroup\"]).count()" ] }, { @@ -458,7 +458,7 @@ "metadata": {}, "outputs": [], "source": [ - "keyparam_df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')" + "keyparam_df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')" ] }, { @@ -476,7 +476,7 @@ "metadata": {}, "outputs": [], "source": [ - "bod.get_key_groups()" + "bod.get_entity_sets()" ] }, { @@ -578,13 +578,13 @@ "\n", "dfs = []\n", "fieldmap_lookup = bod.fieldmap_lookup\n", - "key_group_name = \"test\"\n", + "entity_set_name = \"test\"\n", "# path needs to be relative to the root with no leading prefix\n", "for path in files:\n", " metadata = bod.layout.get_metadata(path)\n", " wanted_keys = metadata.keys() & IMAGING_PARAMS\n", " example_data = {key: metadata[key] for key in wanted_keys}\n", - " example_data[\"key_group\"] = key_group_name\n", + " example_data[\"entity_set\"] = entity_set_name\n", "\n", " # Get the fieldmaps out and add their types\n", " print(fieldmap_lookup[path])\n", diff --git a/docs/usage.rst b/docs/usage.rst index f86b4e386..1696b66b9 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -10,12 +10,12 @@ More definitions ---------------- -.. _keygroup: +.. _entityset: -Key Group -~~~~~~~~~ +Entity Set +~~~~~~~~~~~ -A :term:`Key Group` is a unique set of BIDS key-value pairs, +A :term:`Entity Set` is a unique set of BIDS key-value pairs, excluding identifiers such as subject and session. For example, the files:: @@ -23,18 +23,18 @@ For example, the files:: bids-root/sub-1/ses-2/func/sub-1_ses-2_acq-mb_dir_PA_task-rest_bold.nii.gz bids-root/sub-2/ses-1/func/sub-2_ses-1_acq-mb_dir-PA_task-rest_bold.nii.gz -Would all share the same Key Group. +Would all share the same Entity Set. If these scans were all acquired as a part of the same study on the same scanner with exactly the same acquisition parameters, this naming convention would suffice. However, in large multi-scanner, multi-site, or longitudinal studies where acquisition parameters change over time, -it's possible that the same Key Group could contain scans that differ in important ways. +it's possible that the same Entity Set could contain scans that differ in important ways. -``CuBIDS`` examines all acquisitions within a Key Group to see if there are any images +``CuBIDS`` examines all acquisitions within a Entity Set to see if there are any images that differ in a set of important acquisition parameters. -The subsets of consistent acquisition parameter sets within a Key Group are called a :ref:`paramgroup`. +The subsets of consistent acquisition parameter sets within a Entity Set are called a :ref:`paramgroup`. .. _paramgroup: @@ -42,17 +42,17 @@ The subsets of consistent acquisition parameter sets within a Key Group are call Parameter Group ~~~~~~~~~~~~~~~ -A :term:`Parameter Group` is a subset of a Key Group that contains images with the same +A :term:`Parameter Group` is a subset of a Entity Set that contains images with the same acquisition parameters. -Even though two images may belong to the same Key Group and are valid BIDS, +Even though two images may belong to the same Entity Set and are valid BIDS, they may have images with different acquisition parameters. There is nothing fundamentally wrong with this — the ``bids-validator`` will often simply flag these differences with a ``Warning``, but not necessarily suggest changes. That being said, there can be detrimental consequences downstream if the different parameters cause the -same preprocessing pipelines to configure differently to images of the same Key Group. +same preprocessing pipelines to configure differently to images of the same Entity Set. .. _acquisitiongroup: @@ -62,7 +62,7 @@ Acquisition Group We define an :term:`Acquisition Group` as a collection of sessions across participants that contain the exact same set of Key and Parameter Groups. -Since Key Groups are based on the BIDS filenames— +Since Entity Sets are based on the BIDS filenames— and therefore both MRI image type and acquisition specific— each BIDS session directory contains images that belong to a set of Parameter Groups. CuBIDS assigns each session, or set of Parameter Groups, @@ -75,7 +75,7 @@ if a BIDS App runs successfully on a single subject from each Acquisition Group, one can be confident that it will handle all combinations of scanning parameters in the entire dataset. The Acquisition Groups that subjects belong to are listed in ``_AcqGrouping.csv``, -while the Key Groups and Parameter Groups that define each Acquisition Group are noted in +while the Entity Sets and Parameter Groups that define each Acquisition Group are noted in ``_AcqGroupingInfo.txt``. @@ -84,10 +84,10 @@ while the Key Groups and Parameter Groups that define each Acquisition Group are The ``_summary.tsv`` File ~~~~~~~~~~~~~~~~~~~~~~~~~ -This file contains all the detected Key Groups and Parameter Groups. +This file contains all the detected Entity Sets and Parameter Groups. It provides an opportunity to evaluate your data and decide how to handle heterogeneity. -Below is an example ``_summary.tsv`` of the run-1 DWI Key Group in the PNC [#f1]_. +Below is an example ``_summary.tsv`` of the run-1 DWI Entity Set in the PNC [#f1]_. This reflects the original data that has been converted to BIDS using a heuristic. It is similar to what you will see when you first use this functionality: @@ -112,9 +112,9 @@ but it keeps track of every file's assignment to Key and Parameter Groups. Modifying Key and Parameter Group Assignments --------------------------------------------- -Sometimes we see that there are important differences in acquisition parameters within a Key Group. +Sometimes we see that there are important differences in acquisition parameters within a Entity Set. If these differences impact how a pipeline will process the data, -it makes sense to assign the scans in that Parameter Group to a different Key Group +it makes sense to assign the scans in that Parameter Group to a different Entity Set (i.e., assign them a different BIDS name). This can be accomplished by editing the empty columns in the `_summary.csv` file produced by ``cubids group``. @@ -127,7 +127,7 @@ Once the columns have been edited you can apply the changes to BIDS data using The changes in ``keyparam_edited_summary.csv`` will be applied to the BIDS data in ``/bids/dir`` and the new Key and Parameter groups will be saved to csv files starting with ``new_keyparam_prefix``. -Note: fieldmaps keygroups with variant parameters will be identified but not renamed. +Note: fieldmaps entitysets with variant parameters will be identified but not renamed. The ``_AcqGrouping.tsv`` file @@ -142,14 +142,14 @@ Acquisition Group number. The ``_AcqGroupInfo.txt`` file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``_AcqGroupInfo.txt`` file lists all Key Groups that belong to a given Acquisition Group +The ``_AcqGroupInfo.txt`` file lists all Entity Sets that belong to a given Acquisition Group along with the number of sessions each group possesses. Visualizing and summarizing metadata heterogeneity -------------------------------------------------- -Use ``cubids group`` to generate your dataset's Key Groups and Parameter Groups: +Use ``cubids group`` to generate your dataset's Entity Sets and Parameter Groups: .. code-block:: console @@ -174,10 +174,10 @@ Detecting Variant Groups ~~~~~~~~~~~~~~~~~~~~~~~~ Additionally, ``cubids apply`` can automatically rename files in :term:`Variant Groups ` -based on their scanning parameters that vary from those in their Key Groups' +based on their scanning parameters that vary from those in their Entity Sets' :term:`Dominant Parameter Groups `. Renaming is automatically suggested when the summary.tsv is generated from a ``cubids group`` run, -with the suggested new name listed in the tsv's :term:`Rename Key Group` column. +with the suggested new name listed in the tsv's :term:`Rename Entity Set` column. CuBIDS populates this column for all Variant Groups (e.g., every Parameter Group except the Dominant one). Specifically, CuBIDS will suggest renaming all non-dominant Parameter Group to include ``VARIANT*`` @@ -188,7 +188,7 @@ the one present in the Dominant Group, it will automatically suggest renaming all scans in that Variant Group to include ``acquisition-VARIANTRepetitionTime`` in their filenames. When the user runs ``cubids apply``, -filenames will get renamed according to the auto-generated names in the “Rename Key Group” column +filenames will get renamed according to the auto-generated names in the “Rename Entity Set” column in the summary.tsv @@ -233,7 +233,7 @@ Customizable configuration This file can be passed as an argument to ``cubids group`` and ``cubids apply`` using the ``--config`` flag and allows users to customize grouping settings based on MRI image type and parameter. -Each ``Key Group`` is associated with one (and only one) MRI image type, +Each ``Entity Set`` is associated with one (and only one) MRI image type, as BIDS filenames include MRI image type-specific values as their suffixes. This easy-to-modify configuration file provides several benefits to curation. diff --git a/pyproject.toml b/pyproject.toml index adb35afdb..13b55cb6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,10 +23,10 @@ requires-python = ">=3.8" dependencies = [ "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", "numpy<=2.1.0", - "pandas<=2.2.2", + "pandas<=2.2.3", "pybids<=0.17.2", "pyyaml", - "scikit-learn<=1.5.1", + "scikit-learn<=1.6.0", "tqdm", ] dynamic = ["version"] @@ -86,6 +86,7 @@ cubids-copy-exemplars = "cubids.cli:_enter_copy_exemplars" cubids-undo = "cubids.cli:_enter_undo" bids-sidecar-merge = "cubids.cli:_enter_bids_sidecar_merge" cubids-validate = "cubids.cli:_enter_validate" +cubids-bids-version = "cubids.cli:_enter_bids_version" cubids-datalad-save = "cubids.cli:_enter_datalad_save" cubids-print-metadata-fields = "cubids.cli:_enter_print_metadata_fields" cubids-remove-metadata-fields = "cubids.cli:_enter_remove_metadata_fields"