-
Notifications
You must be signed in to change notification settings - Fork 0
/
tasks.py
113 lines (80 loc) · 3.05 KB
/
tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from pathlib import Path
from invoke import task
MODEL_DIRECTORY = Path(__file__).parent / "yard" / "models"
@task
def regenerate_models(_):
import glob
import linkml.generators.pydanticgen as pd
schemas = glob.glob("./schemas/schemas/*.yaml")
print("Regenerating pydantic models")
if not schemas:
raise RuntimeError(
"No schemas found; check that the `schemas` git submodule is available"
)
for schema in schemas:
print(schema)
schema_path = Path(schema)
gen = pd.PydanticGenerator(schema, verbose=True)
output = gen.serialize()
with open(MODEL_DIRECTORY / f"{schema_path.name.strip('.yaml')}.py", "w") as f:
f.writelines(output)
print("Done!")
@task(pre=[regenerate_models])
def validate_entries(_):
print("Validating entries")
from yard.models import Extractor, FileType
from yard.utils import load_registry_collection
counts = {}
errors = []
for type_ in (FileType, Extractor):
entries = load_registry_collection(
type_,
database=None,
validate=True,
)
counts[type_] = len(entries)
print(f"Loaded {counts[type_]} {type_.__name__} entries")
if type_ is Extractor:
filetype_ids = set(
d.stem
for d in Path(__file__).parent.glob("./yard/data/filetypes/*.yml")
)
for extractor in entries:
for filetype in extractor.supported_filetypes:
if filetype.id not in filetype_ids:
errors.append(
f"Extractor {extractor.name=} has invalid filetype {filetype.id=}. Should be one of {filetype_ids=}"
)
if errors:
raise RuntimeError("\n".join(errors))
print("Done!")
@task
def check_for_yaml(_):
from pathlib import Path
print("Checking for erroneous .yaml files.")
extractors = list(Path(__file__).parent.glob("./yard/data/extractors/*.yaml"))
filetypes = list(Path(__file__).parent.glob("./yard/data/filetypes/*.yaml"))
for e in extractors:
print(f"Found {e} with bad file extension (should be .yml here)")
for f in filetypes:
print(f"Found {f} with bad file extension (should be .yml here)")
if extractors or filetypes:
raise RuntimeError(f"Found files with bad extensions: {filetypes} {extractors}")
print("Done!")
@task
def validate_lfs_examples(_):
"""Loop through the LFS dir and check that each directory has a corresponding filetype."""
filetype_ids = set(
d.stem for d in Path(__file__).parent.glob("./yard/data/filetypes/*.yaml")
)
lfs_filetype_dirs = set(
d.name for d in Path(__file__).parent.glob("./yard/data/lfs/*")
)
errors = []
for lfs_dir in lfs_filetype_dirs:
if lfs_dir not in filetype_ids:
errors.append(
f"Found LFS directory {lfs_dir} without corresponding filetype"
)
if errors:
raise RuntimeError("\n".join(errors))