Skip to content

Commit f7e8234

Browse files
vyasrcsadorf
andauthored
feat: Validate dependencies.yaml using jsonschema (#29)
This PR enables validating the contents of a dependencies.yaml file directly without doing any processing. The schema is encoded using [JSON Schema](https://json-schema.org/) and validated using [the Python implementation](https://python-jsonschema.readthedocs.io/). The new Python code is fairly minimal, and it would be even shorter except that I leveraged the object-oriented API to show all errors in a file instead of simply showing the first error using `jsonschema.validate`. The majority of the new lines are from the schema definition. The validation is injected into the normal CLI usage so that schemas are always validated before dependency files are generated, ensuring that developers see useful errors about why their dependencies.yaml file is invalid rather than opaque runtime errors when dfg fails to use the file. --------- Co-authored-by: Simon Adorf <[email protected]>
1 parent e710ac0 commit f7e8234

File tree

10 files changed

+310
-17
lines changed

10 files changed

+310
-17
lines changed

.pre-commit-config.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ repos:
1717
args:
1818
- --py38-plus
1919
- repo: https://github.com/PyCQA/isort
20-
rev: '5.10.1'
20+
rev: '5.12.0'
2121
hooks:
2222
- id: isort
2323
- repo: https://github.com/psf/black
@@ -30,3 +30,12 @@ repos:
3030
- id: flake8
3131
args:
3232
- --show-source
33+
- repo: https://github.com/python-jsonschema/check-jsonschema
34+
rev: 0.21.0
35+
hooks:
36+
- id: check-metaschema
37+
files: ^src/rapids_dependency_file_generator/schema.json$
38+
- id: check-jsonschema
39+
files: ^tests/examples/([^/]*)/dependencies.yaml$
40+
args: ["--schemafile", "src/rapids_dependency_file_generator/schema.json"]
41+
- id: check-github-workflows

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include src/rapids_dependency_file_generator/schema.json

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ classifiers = [
1818
"License :: OSI Approved :: Apache Software License",
1919
"Programming Language :: Python :: 3",
2020
]
21+
requires-python = ">=3.8"
2122
dependencies = [
2223
"PyYAML",
24+
"jsonschema",
2325
]
2426

2527
[project.scripts]

src/rapids_dependency_file_generator/cli.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from ._version import __version__ as version
66
from .constants import OutputTypes, default_dependency_file_path
77
from .rapids_dependency_file_generator import make_dependency_files
8+
from .rapids_dependency_file_validator import validate_dependencies
89

910

1011
def validate_args(argv):
@@ -63,6 +64,8 @@ def main(argv=None):
6364
with open(args.config) as f:
6465
parsed_config = yaml.load(f, Loader=yaml.FullLoader)
6566

67+
validate_dependencies(parsed_config)
68+
6669
matrix = generate_matrix(args.matrix)
6770
to_stdout = all([args.file_key, args.output, args.matrix is not None])
6871

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""Logic for validating dependency files."""
2+
3+
import json
4+
import textwrap
5+
6+
import jsonschema
7+
import pkg_resources
8+
from jsonschema.exceptions import best_match
9+
10+
SCHEMA = json.loads(pkg_resources.resource_string(__name__, "schema.json"))
11+
12+
13+
def validate_dependencies(dependencies):
14+
"""Valid a dictionary against the dependencies.yaml spec.
15+
16+
Parameters
17+
----------
18+
dependencies : dict
19+
The parsed dependencies.yaml file.
20+
21+
Raises
22+
------
23+
jsonschema.exceptions.ValidationError
24+
If the dependencies do not conform to the schema
25+
"""
26+
validator = jsonschema.Draft7Validator(SCHEMA)
27+
errors = list(validator.iter_errors(dependencies))
28+
if len(errors) > 0:
29+
print("The provided dependency file contains schema errors.")
30+
best_matching_error = best_match(errors)
31+
print("\n", textwrap.indent(str(best_matching_error), "\t"), "\n")
32+
raise RuntimeError("The provided dependencies data is invalid.")
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"$id": "https://raw.githubusercontent.com/rapidsai/dependency-file-generator/v1.0.0/src/rapids_dependency_file_generator/schema.json",
4+
"type": "object",
5+
"title": "RAPIDS Package Dependency Specification Format",
6+
"description": "Consolidated specification of RAPIDS project dependencies",
7+
"properties": {
8+
"files": {
9+
"type": "object",
10+
"patternProperties": {
11+
".*": {
12+
"type": "object",
13+
"properties": {
14+
"output": {"$ref": "#/$defs/outputs"},
15+
"includes": {"type": "array", "items": {"type": "string"}},
16+
"matrix": {"$ref": "#/$defs/matrix"},
17+
"requirements_dir": {"type": "string"},
18+
"conda_dir": {"type": "string"}
19+
},
20+
"additionalProperties": false,
21+
"required": ["output", "includes"]
22+
}
23+
},
24+
"minProperties": 1
25+
},
26+
"dependencies": {
27+
"type": "object",
28+
"patternProperties": {
29+
".*": {
30+
"type": "object",
31+
"properties": {
32+
"common": {
33+
"type": "array",
34+
"items": {
35+
"type": "object",
36+
"properties": {
37+
"output_types": {"$ref": "#/$defs/outputs"},
38+
"packages": {"$ref": "#/$defs/packages"}
39+
},
40+
"required": ["output_types", "packages"],
41+
"additionalProperties": false
42+
}
43+
},
44+
"specific": {
45+
"type": "array",
46+
"items": {
47+
"type": "object",
48+
"properties": {
49+
"output_types": {"$ref": "#/$defs/outputs"},
50+
"matrices": {"$ref": "#/$defs/matrices"}
51+
},
52+
"required": ["output_types", "matrices"],
53+
"additionalProperties": false
54+
}
55+
}
56+
},
57+
"minProperties": 1,
58+
"additionalProperties": false
59+
}
60+
}
61+
},
62+
"channels": {"$ref": "#/$defs/channels"}
63+
},
64+
"required": ["files", "dependencies"],
65+
"additionalProperties": false,
66+
"$defs": {
67+
"channel": {
68+
"type": "string",
69+
"format": "iri-reference"
70+
},
71+
"channel-list": {
72+
"type": "array",
73+
"items": {
74+
"$ref": "#/$defs/channel"
75+
}
76+
},
77+
"channels": {
78+
"$oneOf": [
79+
{"$ref": "#/$defs/channel"},
80+
{"$ref": "#/$defs/channel-list"}
81+
]
82+
},
83+
"matrix": {
84+
"type": "object",
85+
"patternProperties": {
86+
".*": {
87+
"type": "array",
88+
"items": {"type": "string"}
89+
}
90+
}
91+
},
92+
"matrix-matcher": {
93+
"type": "object",
94+
"properties": {
95+
"matrix": {
96+
"oneOf": [
97+
{
98+
"type": "object",
99+
"patternProperties": {
100+
".*": {"type": "string"}
101+
}
102+
},
103+
{"type": "null"}
104+
]
105+
},
106+
"packages": {"oneOf": [
107+
{"$ref": "#/$defs/requirements"},
108+
{"type": "null"}
109+
]}
110+
},
111+
"requiredProperties": ["matrix", "packages"],
112+
"additionalProperties": false
113+
},
114+
"matrices": {
115+
"type": "array",
116+
"items": {"$ref": "#/$defs/matrix-matcher"}
117+
},
118+
"output-types": {
119+
"enum": ["conda", "requirements"]
120+
},
121+
"output-types-array": {
122+
"type": "array",
123+
"item": {"$ref": "#/$defs/output-types"}
124+
},
125+
"outputs": {
126+
"oneOf": [
127+
{"$ref": "#/$defs/output-types"},
128+
{"$ref": "#/$defs/output-types-array"},
129+
{"const": "none"}
130+
]
131+
},
132+
"packages": {
133+
"type": "array",
134+
"items": {
135+
"oneOf": [
136+
{"$ref": "#/$defs/requirement"},
137+
{"$ref": "#/$defs/pip-requirements"}
138+
]
139+
}
140+
},
141+
"requirement": {
142+
"type": "string"
143+
},
144+
"requirements": {
145+
"type": "array",
146+
"items": {
147+
"$ref": "#/$defs/requirement"
148+
},
149+
"minItems": 1
150+
151+
},
152+
"pip-requirements": {
153+
"type": "object",
154+
"properties": {
155+
"pip": {"$ref": "#/$defs/requirements"}
156+
},
157+
"additionalProperties": false,
158+
"required": ["pip"]
159+
}
160+
}
161+
}

tests/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import pytest
2+
3+
from rapids_dependency_file_generator.rapids_dependency_file_validator import SCHEMA
4+
5+
6+
@pytest.fixture(scope="session")
7+
def schema():
8+
return SCHEMA
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
files:
2+
build:
3+
output: conda
4+
conda_dir: output/actual
5+
matrix:
6+
cuda: ["11.5", "11.6"]
7+
arch: [x86_64]
8+
includes:
9+
- build
10+
channels:
11+
- rapidsai
12+
- conda-forge
13+
dependencies:
14+
build:
15+
common:
16+
- output_types: [conda, requirements]
17+
packages:
18+
- clang=11.1.0
19+
- spdlog>=1.8.5,<1.9
20+
- output_types: conda
21+
packages:
22+
- pip
23+
- pip:
24+
- git+https://github.com/python-streamz/streamz.git@master
25+
specific:
26+
- output_types: [conda, requirements]
27+
matrices:
28+
- matrix:
29+
cuda: "11.5"
30+
packages:
31+
- 1234
32+
- cuda-python>=11.5,<11.7.1
33+
- matrix:
34+
cuda: "11.6"
35+
packages:
36+
- cuda-python>=11.6,<11.7.1
37+
- output_types: conda
38+
matrices:
39+
- matrix:
40+
cuda: "11.5"
41+
packages:
42+
- cudatoolkit=11.5
43+
- matrix:
44+
cuda: "11.6"
45+
packages:
46+
- cudatoolkit=11.6

tests/test_examples.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,22 @@
33
import pathlib
44
import shutil
55

6+
import jsonschema
67
import pytest
8+
import yaml
9+
from jsonschema.exceptions import ValidationError
710

811
from rapids_dependency_file_generator.cli import main
912

1013
CURRENT_DIR = pathlib.Path(__file__).parent
1114

15+
EXAMPLE_FILES = [
16+
pth
17+
for pth in CURRENT_DIR.glob("examples/*/dependencies.yaml")
18+
if "no-specific-match" not in str(pth.absolute())
19+
]
20+
INVALID_EXAMPLE_FILES = list(CURRENT_DIR.glob("examples/invalid/*/dependencies.yaml"))
21+
1222

1323
@pytest.fixture(scope="session", autouse=True)
1424
def clean_actual_files():
@@ -25,23 +35,26 @@ def make_file_set(file_dir):
2535
}
2636

2737

28-
@pytest.mark.parametrize(
29-
"test_name",
30-
[
31-
"conda-minimal",
32-
"integration",
33-
"matrix",
34-
"no-matrix",
35-
"requirements-minimal",
36-
"specific-fallback-first",
37-
"specific-fallback",
38-
],
38+
@pytest.fixture(
39+
params=[example_file.parent for example_file in EXAMPLE_FILES],
40+
ids=[example_file.parent.stem for example_file in EXAMPLE_FILES],
3941
)
40-
def test_examples(test_name):
41-
test_dir = CURRENT_DIR.joinpath("examples", test_name)
42-
expected_dir = test_dir.joinpath("output", "expected")
43-
actual_dir = test_dir.joinpath("output", "actual")
44-
dep_file_path = test_dir.joinpath("dependencies.yaml")
42+
def example_dir(request):
43+
return request.param
44+
45+
46+
@pytest.fixture(
47+
params=[example_file.parent for example_file in INVALID_EXAMPLE_FILES],
48+
ids=[example_file.parent.stem for example_file in INVALID_EXAMPLE_FILES],
49+
)
50+
def invalid_example_dir(request):
51+
return request.param
52+
53+
54+
def test_examples(example_dir):
55+
expected_dir = example_dir.joinpath("output", "expected")
56+
actual_dir = example_dir.joinpath("output", "actual")
57+
dep_file_path = example_dir.joinpath("dependencies.yaml")
4558

4659
main(["--config", str(dep_file_path)])
4760

@@ -63,3 +76,16 @@ def test_error_examples(test_name):
6376

6477
with pytest.raises(ValueError):
6578
main(["--config", str(dep_file_path)])
79+
80+
81+
def test_examples_are_valid(schema, example_dir):
82+
dep_file_path = example_dir / "dependencies.yaml"
83+
instance = yaml.load(dep_file_path.read_text(), Loader=yaml.SafeLoader)
84+
jsonschema.validate(instance, schema=schema)
85+
86+
87+
def test_invalid_examples_are_invalid(schema, invalid_example_dir):
88+
dep_file_path = invalid_example_dir / "dependencies.yaml"
89+
instance = yaml.load(dep_file_path.read_text(), Loader=yaml.SafeLoader)
90+
with pytest.raises(ValidationError):
91+
jsonschema.validate(instance, schema=schema)

tests/test_schema.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import jsonschema
2+
3+
4+
def test_schema_is_valid(schema):
5+
jsonschema.Draft7Validator.check_schema(schema)

0 commit comments

Comments
 (0)