diff --git a/.circleci/config.yml b/.circleci/config.yml index 3ed25686..1fe2e779 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,6 +1,6 @@ version: 2.1 orbs: - codecov: codecov/codecov@1.0.5 + codecov: codecov/codecov@3.2.4 jobs: run_pytests: @@ -31,10 +31,8 @@ jobs: source activate cubids conda install -c conda-forge -y datalad - # Add nodejs and the validator - conda install nodejs - npm install -g yarn && \ - npm install -g bids-validator@1.14.14-dev.0 + # Add deno to run the schema validator + conda install deno # Install CuBIDS pip install -e .[tests] @@ -64,7 +62,8 @@ jobs: # We need curl for the codecov upload apt-get update - apt-get install -yqq curl + apt-get install -y -qq curl + apt-get install -y gnupg cd /home/circleci/src/coverage/ echo "Merge coverage files" diff --git a/cubids/cli.py b/cubids/cli.py index f87ffc6c..cf48cf9a 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -43,7 +43,7 @@ def _parse_validate(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -107,6 +107,41 @@ def _enter_validate(argv=None): workflows.validate(**args) +def _parse_bids_version(): + parser = argparse.ArgumentParser( + description="cubids bids-version: Get BIDS Validator and Schema version", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + PathExists = partial(_path_exists, parser=parser) + + parser.add_argument( + "bids_dir", + type=PathExists, + action="store", + help=( + "The root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "--write", + action="store_true", + default=False, + help=( + "Save the validator and schema version to 'dataset_description.json' " + "when using `cubids bids-version /bids/path --write`. " + "By default, `cubids bids-version /bids/path` prints to the terminal." + ), + ) + return parser + + +def _enter_bids_version(argv=None): + options = _parse_bids_version().parse_args(argv) + args = vars(options).copy() + workflows.bids_version(**args) + + def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), @@ -153,7 +188,7 @@ def _parse_group(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -220,7 +255,7 @@ def _parse_apply(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -316,7 +351,7 @@ def _parse_datalad_save(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -358,7 +393,7 @@ def _parse_undo(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -582,7 +617,7 @@ def _parse_remove_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -628,7 +663,7 @@ def _parse_print_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -655,6 +690,7 @@ def _enter_print_metadata_fields(argv=None): COMMANDS = [ ("validate", _parse_validate, workflows.validate), + ("bids-version", _parse_bids_version, workflows.bids_version), ("sidecar-merge", _parse_bids_sidecar_merge, workflows.bids_sidecar_merge), ("group", _parse_group, workflows.group), ("apply", _parse_apply, workflows.apply), diff --git a/cubids/cubids.py b/cubids/cubids.py index 44c57fdc..81781596 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1336,9 +1336,20 @@ def get_all_metadata_fields(self): found_fields = set() for json_file in Path(self.path).rglob("*.json"): if ".git" not in str(json_file): - with open(json_file, "r") as jsonr: - metadata = json.load(jsonr) - found_fields.update(metadata.keys()) + # add this in case `print-metadata-fields` is run before validate + try: + with open(json_file, "r", encoding="utf-8") as jsonr: + content = jsonr.read().strip() + if not content: + print(f"Empty file: {json_file}") + continue + metadata = json.loads(content) + found_fields.update(metadata.keys()) + except json.JSONDecodeError as e: + warnings.warn(f"Error decoding JSON in {json_file}: {e}") + except Exception as e: + warnings.warn(f"Unexpected error with file {json_file}: {e}") + return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 28211cc2..d33107be 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd import pytest +from packaging.version import Version from cubids.cubids import CuBIDS from cubids.metadata_merge import merge_json_into_json, merge_without_overwrite @@ -22,7 +23,15 @@ file_hash, get_data, ) -from cubids.validator import build_validator_call, parse_validator_output, run_validator +from cubids.validator import ( + build_validator_call, + parse_validator_output, + run_validator, + get_bids_validator_version, + extract_summary_info, + update_dataset_description, + bids_validator_version, +) COMPLETE_KEY_GROUPS = [ "acquisition-HASC55AP_datatype-dwi_suffix-dwi", @@ -1028,6 +1037,39 @@ def test_validator(tmp_path): assert isinstance(parsed, pd.DataFrame) +def test_bids_version(tmp_path): + """Test workflows.bids_version.""" + data_root = get_data(tmp_path) + bids_dir = Path(data_root) / "complete" + + # Ensure the test directory exists + assert bids_dir.exists() + + # test the validator in valid dataset + call = build_validator_call(bids_dir) + ret = run_validator(call) + + assert ret.returncode == 0 + + decoded = ret.stdout.decode("UTF-8") + + # Get the BIDS validator version + validator_version = Version(get_bids_validator_version()["ValidatorVersion"]) + # Extract schemaVersion + schema_version = Version(extract_summary_info(decoded)["SchemaVersion"]) + + # Set baseline versions to compare against + min_validator_version = Version("2.0.0") + min_schema_version = Version("0.11.3") + + assert ( + validator_version >= min_validator_version + ), f"Validator version {validator_version} is less than minimum {min_validator_version}" + assert ( + schema_version >= min_schema_version + ), f"Schema version {schema_version} is less than minimum {min_schema_version}" + + def test_docker(): """Verify that docker is installed and the user has permission to run docker images. diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index a0e9066a..06d3af2a 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -14,9 +14,10 @@ """ import argparse + import pytest -from cubids.cli import _path_exists, _is_file, _get_parser, _main +from cubids.cli import _get_parser, _is_file, _main, _path_exists def _test_path_exists(): diff --git a/cubids/validator.py b/cubids/validator.py index d7e52fe4..a4feeba5 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -5,7 +5,9 @@ import logging import os import pathlib +import re import subprocess +import warnings import pandas as pd @@ -14,9 +16,9 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" - # build docker call - # CuBIDS automatically ignores subject consistency. - command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"] + # New schema BIDS validator doesn't have option to ignore subject consistency. + # Build the deno command to run the BIDS validator. + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -24,6 +26,23 @@ def build_validator_call(path, ignore_headers=False): return command +def get_bids_validator_version(): + """Get the version of the BIDS validator. + + Returns + ------- + version : :obj:`str` + Version of the BIDS validator. + """ + command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + version = output.split()[-1] + # Remove ANSI color codes + clean_ver = re.sub(r"\x1b\[[0-9;]*m", "", version) + return {"ValidatorVersion": clean_ver} + + def build_subject_paths(bids_dir): """Build a list of BIDS dirs with 1 subject each.""" bids_dir = str(bids_dir) @@ -52,6 +71,26 @@ def build_subject_paths(bids_dir): return subjects_dict +def build_first_subject_path(bids_dir, subject): + """Build a list of BIDS dirs with 1 subject each.""" + bids_dir = str(bids_dir) + if not bids_dir.endswith("/"): + bids_dir += "/" + + root_files = [x for x in glob.glob(bids_dir + "*") if os.path.isfile(x)] + + subject_dict = {} + + purepath = pathlib.PurePath(subject) + sub_label = purepath.name + + files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] + files.extend(root_files) + subject_dict[sub_label] = files + + return subject_dict + + def run_validator(call): """Run the validator with subprocess. @@ -87,32 +126,6 @@ def parse_validator_output(output): Dataframe of validator output. """ - def get_nested(dct, *keys): - """Get a nested value from a dictionary. - - Parameters - ---------- - dct : :obj:`dict` - Dictionary to get value from. - keys : :obj:`list` - List of keys to get value from. - - Returns - ------- - :obj:`dict` - The nested value. - """ - for key in keys: - try: - dct = dct[key] - except (KeyError, TypeError): - return None - return dct - - data = json.loads(output) - - issues = data["issues"] - def parse_issue(issue_dict): """Parse a single issue from the validator output. @@ -126,30 +139,30 @@ def parse_issue(issue_dict): return_dict : :obj:`dict` Dictionary of parsed issue. """ - return_dict = {} - return_dict["files"] = [ - get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") - ] - return_dict["type"] = issue_dict.get("key", "") - return_dict["severity"] = issue_dict.get("severity", "") - return_dict["description"] = issue_dict.get("reason", "") - return_dict["code"] = issue_dict.get("code", "") - return_dict["url"] = issue_dict.get("helpUrl", "") - - return return_dict - - df = pd.DataFrame() - - for warn in issues["warnings"]: - parsed = parse_issue(warn) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) - - for err in issues["errors"]: - parsed = parse_issue(err) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) + return { + "location": issue_dict.get("location", ""), + "code": issue_dict.get("code", ""), + "issueMessage": issue_dict.get("issueMessage", ""), + "subCode": issue_dict.get("subCode", ""), + "severity": issue_dict.get("severity", ""), + "rule": issue_dict.get("rule", ""), + } + + # Load JSON data + data = json.loads(output) + + # Extract issues + issues = data.get("issues", {}).get("issues", []) + if not issues: + return pd.DataFrame( + columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] + ) + # Parse all issues + parsed_issues = [parse_issue(issue) for issue in issues] + + # Convert to DataFrame + df = pd.DataFrame(parsed_issues) return df @@ -161,12 +174,106 @@ def get_val_dictionary(): val_dict : dict Dictionary of values. """ - val_dict = {} - val_dict["files"] = {"Description": "File with warning orerror"} - val_dict["type"] = {"Description": "BIDS validation warning or error"} - val_dict["severity"] = {"Description": "gravity of problem (warning/error"} - val_dict["description"] = {"Description": "Description of warning/error"} - val_dict["code"] = {"Description": "BIDS validator issue code number"} - val_dict["url"] = {"Description": "Link to the issue's neurostars thread"} - - return val_dict + return { + "location": {"Description": "File with the validation issue."}, + "code": {"Description": "Code of the validation issue."}, + "issueMessage": {"Description": "Validation issue message."}, + "subCode": {"Description": "Subcode providing additional issue details."}, + "severity": {"Description": "Severity of the issue (e.g., warning, error)."}, + "rule": {"Description": "Validation rule that triggered the issue."}, + } + + +def extract_summary_info(output): + """Extract summary information from the JSON output. + + Parameters + ---------- + output : str + JSON string of BIDS validator output. + + Returns + ------- + dict + Dictionary containing SchemaVersion and other summary info. + """ + try: + data = json.loads(output) + except json.JSONDecodeError as e: + raise ValueError("Invalid JSON provided to get SchemaVersion.") from e + + summary = data.get("summary", {}) + + return {"SchemaVersion": summary.get("schemaVersion", "")} + + +def update_dataset_description(path, new_info): + """Update or append information to dataset_description.json. + + Parameters + ---------- + path : :obj:`str` + Path to the dataset. + new_info : :obj:`dict` + Information to add or update. + """ + description_path = os.path.join(path, "dataset_description.json") + + # Load existing data if the file exists + if os.path.exists(description_path): + with open(description_path, "r") as f: + existing_data = json.load(f) + else: + existing_data = {} + + # Update the existing data with the new info + existing_data.update(new_info) + + # Write the updated data back to the file + with open(description_path, "w") as f: + json.dump(existing_data, f, indent=4) + print(f"Updated dataset_description.json at: {description_path}") + + # Check if .datalad directory exists before running the DataLad save command + datalad_dir = os.path.join(path, ".datalad") + if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir): + try: + subprocess.run( + [ + "datalad", + "save", + "-m", + "Save BIDS validator and schema version to dataset_description", + description_path, + ], + check=True, + ) + print("Changes saved with DataLad.") + except subprocess.CalledProcessError as e: + warnings.warn(f"Error running DataLad save: {e}") + + +def bids_validator_version(output, path, write=False): + """Save BIDS validator and schema version. + + Parameters + ---------- + output : :obj:`str` + Path to JSON file of BIDS validator output. + path : :obj:`str` + Path to the dataset. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Get the BIDS validator version + validator_version = get_bids_validator_version() + # Extract schemaVersion + summary_info = extract_summary_info(output) + + combined_info = {**validator_version, **summary_info} + + if write: + # Update the dataset_description.json file + update_dataset_description(path, combined_info) + elif not write: + print(combined_info) diff --git a/cubids/workflows.py b/cubids/workflows.py index 6cbc1e42..11931c5b 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -17,6 +17,8 @@ from cubids.metadata_merge import merge_json_into_json from cubids.utils import _get_container_type from cubids.validator import ( + bids_validator_version, + build_first_subject_path, build_subject_paths, build_validator_call, get_val_dictionary, @@ -258,6 +260,70 @@ def validate( sys.exit(proc.returncode) +def bids_version(bids_dir, write=False): + """Get BIDS validator and schema version. + + Parameters + ---------- + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Need to run validator to get output with schema version + # Copy code from `validate --sequential` + + try: # return first subject + # Get all folders that start with "sub-" + sub_folders = [ + name + for name in os.listdir(bids_dir) + if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") + ] + if not sub_folders: + raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") + subject = sub_folders[0] + except FileNotFoundError: + raise FileNotFoundError(f"The directory {bids_dir} does not exist.") + except ValueError as ve: + raise ve + + # build a dictionary with {SubjectLabel: [List of files]} + # run first subject only + subject_dict = build_first_subject_path(bids_dir, subject) + + # iterate over the dictionary + for subject, files_list in subject_dict.items(): + # logger.info(" ".join(["Processing subject:", subject])) + # create a temporary directory and symlink the data + with tempfile.TemporaryDirectory() as tmpdirname: + for fi in files_list: + # cut the path down to the subject label + bids_start = fi.find(subject) + + # maybe it's a single file + if bids_start < 1: + bids_folder = tmpdirname + fi_tmpdir = tmpdirname + + else: + bids_folder = Path(fi[bids_start:]).parent + fi_tmpdir = tmpdirname + "/" + str(bids_folder) + + if not os.path.exists(fi_tmpdir): + os.makedirs(fi_tmpdir) + output = fi_tmpdir + "/" + str(Path(fi).name) + shutil.copy2(fi, output) + + # run the validator + call = build_validator_call(tmpdirname) + ret = run_validator(call) + + # Get BIDS validator and schema version + decoded = ret.stdout.decode("UTF-8") + bids_validator_version(decoded, bids_dir, write=write) + + def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) diff --git a/docs/installation.rst b/docs/installation.rst index d55b84a4..b6ebda3b 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -16,7 +16,7 @@ initialize a new conda environment (for example, named ``cubids``) as follows: .. code-block:: console - $ conda create -n cubids python=3.8 pip + $ conda create -n cubids python=3.12 pip $ conda activate cubids You are now ready to install CuBIDS. @@ -44,23 +44,16 @@ Once you have a copy of the source, you can install it with: $ pip install -e . We will now need to install some dependencies of ``CuBIDS``. -To do this, we first must install nodejs. +To do this, we first must install deno to run `bids-validator`. We can accomplish this using the following command: .. code-block:: console - $ conda install nodejs + $ conda install deno -Now that we have npm installed, we can install ``bids-validator`` using the following command: +The new schema-based ``bids-validator`` doesn't need to be installed +and will be implemented automatically when `cubids validate` is called -.. code-block:: console - - $ npm install -g bids-validator@1.7.2 - -In our example walkthrough, -we use ``bids-validator`` v1.7.2. using a different version of the -validator may result in slightly different validation tsv printouts, -but ``CuBIDS`` is compatible with all versions of the validator at or above v1.6.2. We also recommend using ``CuBIDS`` with the optional ``DataLad`` version control capabilities. We use ``DataLad`` throughout our walkthrough of the CuBIDS Workflow on diff --git a/pyproject.toml b/pyproject.toml index b77e9e9a..26918e60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,10 +23,10 @@ requires-python = ">=3.8" dependencies = [ "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", "numpy<=2.1.3", - "pandas<=2.2.2", - "pybids<=0.17.0", + "pandas<=2.2.3", + "pybids<=0.17.2", "pyyaml", - "scikit-learn<=1.5.1", + "scikit-learn<=1.6.0", "tqdm", ] dynamic = ["version"] @@ -86,6 +86,7 @@ cubids-copy-exemplars = "cubids.cli:_enter_copy_exemplars" cubids-undo = "cubids.cli:_enter_undo" bids-sidecar-merge = "cubids.cli:_enter_bids_sidecar_merge" cubids-validate = "cubids.cli:_enter_validate" +cubids-bids-version = "cubids.cli:_enter_bids_version" cubids-datalad-save = "cubids.cli:_enter_datalad_save" cubids-print-metadata-fields = "cubids.cli:_enter_print_metadata_fields" cubids-remove-metadata-fields = "cubids.cli:_enter_remove_metadata_fields"