-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathvalidation.py
74 lines (65 loc) · 2.29 KB
/
validation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Development script to automate running the validation tests.
# These are large-scale tests that are not possible to run
# under unit-test conditions.
import pathlib
import shutil
import click
from bio2zarr import vcf2zarr
# TODO add support here for split vcfs. Perhaps simplest to take a
# directory provided as input as indicating this, and then having
# the original unsplit vs split files in there following some
# naming conventions.
@click.command
@click.argument("vcfs", nargs=-1)
@click.option("-p", "--worker-processes", type=int, default=1)
@click.option("-f", "--force", is_flag=True, default=False)
# TODO add options for verbose and to force the use of a given
# index file
def cli(vcfs, worker_processes, force):
data_path = pathlib.Path("validation-data")
if len(vcfs) == 0:
vcfs = (
list(data_path.glob("*.vcf.gz"))
+ list(data_path.glob("*.bcf"))
+ list(data_path.glob("*.split"))
)
else:
vcfs = [pathlib.Path(f) for f in vcfs]
tmp_path = pathlib.Path("validation-tmp")
tmp_path.mkdir(exist_ok=True)
for f in vcfs:
print("Validate", f)
if f.is_dir():
files = list(f.glob("*.vcf.gz")) + list(f.glob("*.bcf"))
source_file = f.with_suffix("").with_suffix("")
else:
files = [f]
source_file = f
exploded = tmp_path / (f.name + ".icf")
if force and exploded.exists():
shutil.rmtree(exploded)
if not exploded.exists():
vcf2zarr.explode(
exploded,
files,
worker_processes=worker_processes,
show_progress=True,
)
spec = tmp_path / (f.name + ".schema")
if force or not spec.exists():
with open(spec, "w") as specfile:
vcf2zarr.mkschema(exploded, specfile)
zarr = tmp_path / (f.name + ".vcz")
if force and zarr.exists():
shutil.rmtree(zarr)
if not zarr.exists():
vcf2zarr.encode(
exploded,
zarr,
spec,
worker_processes=worker_processes,
show_progress=True,
)
vcf2zarr.verify(source_file, zarr, show_progress=True)
if __name__ == "__main__":
cli()