Skip to content

Commit

Permalink
Remove sample subsetting temporarily.
Browse files Browse the repository at this point in the history
It has a number of problems and better not to try and fix them in a
hurry pre shipping. See sgkit-dev#121 and sgkit-dev#122
  • Loading branch information
jeromekelleher committed Jan 14, 2025
1 parent 229cc29 commit cb846c0
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 59 deletions.
28 changes: 15 additions & 13 deletions tests/test_bcftools_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,21 @@ def run_vcztools(args: str) -> str:
"view --no-version -G",
"sample.vcf.gz"
),
(
"view --no-update --no-version --samples-file "
"tests/data/txt/samples.txt",
"sample.vcf.gz"),
("view -I --no-version -S tests/data/txt/samples.txt", "sample.vcf.gz"),
("view --no-version -s NA00001", "sample.vcf.gz"),
("view --no-version -s NA00001,NA00003", "sample.vcf.gz"),
("view --no-version -s HG00096", "1kg_2020_chrM.vcf.gz"),
("view --no-version -s '' --force-samples", "sample.vcf.gz"),
("view --no-version -s ^NA00001", "sample.vcf.gz"),
("view --no-version -s ^NA00003,NA00002", "sample.vcf.gz"),
("view --no-version -s ^NA00003,NA00002,NA00003", "sample.vcf.gz"),
("view --no-version -S ^tests/data/txt/samples.txt", "sample.vcf.gz"),
# Temporarily removing until sample handling fixed:
# https://github.com/sgkit-dev/vcztools/issues/121
# (
# "view --no-update --no-version --samples-file "
# "tests/data/txt/samples.txt",
# "sample.vcf.gz"),
# ("view -I --no-version -S tests/data/txt/samples.txt", "sample.vcf.gz"),
# ("view --no-version -s NA00001", "sample.vcf.gz"),
# ("view --no-version -s NA00001,NA00003", "sample.vcf.gz"),
# ("view --no-version -s HG00096", "1kg_2020_chrM.vcf.gz"),
# ("view --no-version -s '' --force-samples", "sample.vcf.gz"),
# ("view --no-version -s ^NA00001", "sample.vcf.gz"),
# ("view --no-version -s ^NA00003,NA00002", "sample.vcf.gz"),
# ("view --no-version -s ^NA00003,NA00002,NA00003", "sample.vcf.gz"),
# ("view --no-version -S ^tests/data/txt/samples.txt", "sample.vcf.gz"),
]
)
# fmt: on
Expand Down
18 changes: 10 additions & 8 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,16 @@ def test_view_write_pipe(self, tmp_path, vcz_path):
assert "Is a directory" in result.stderr


def test_excluding_and_including_samples(vcz_path):
samples_file_path = pathlib.Path("tests/data/txt/samples.txt")
error_message = re.escape("vcztools does not support combining -s and -S")

with pytest.raises(AssertionError, match=error_message):
run_vcztools(f"view {vcz_path} -s NA00001 -S ^{samples_file_path}")
with pytest.raises(AssertionError, match=error_message):
run_vcztools(f"view {vcz_path} -s ^NA00001 -S {samples_file_path}")
# Removing until we reimplement sample handling:
# https://github.com/sgkit-dev/vcztools/issues/121
# def test_excluding_and_including_samples(vcz_path):
# samples_file_path = pathlib.Path("tests/data/txt/samples.txt")
# error_message = re.escape("vcztools does not support combining -s and -S")

# with pytest.raises(AssertionError, match=error_message):
# run_vcztools(f"view {vcz_path} -s NA00001 -S ^{samples_file_path}")
# with pytest.raises(AssertionError, match=error_message):
# run_vcztools(f"view {vcz_path} -s ^NA00001 -S {samples_file_path}")


@mock.patch("sys.exit")
Expand Down
78 changes: 40 additions & 38 deletions vcztools/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,29 +138,29 @@ def query(path, output, list_samples, format, include, exclude):
default=None,
help="Regions to include.",
)
@click.option(
"--force-samples", is_flag=True, help="Only warn about unknown sample subsets."
)
@click.option(
"-I",
"--no-update",
is_flag=True,
help="Do not recalculate INFO fields for the sample subset.",
)
@click.option(
"-s",
"--samples",
type=str,
default=None,
help="Samples to include.",
)
@click.option(
"-S",
"--samples-file",
type=str,
default=None,
help="File of sample names to include.",
)
# @click.option(
# "--force-samples", is_flag=True, help="Only warn about unknown sample subsets."
# )
# @click.option(
# "-I",
# "--no-update",
# is_flag=True,
# help="Do not recalculate INFO fields for the sample subset.",
# )
# @click.option(
# "-s",
# "--samples",
# type=str,
# default=None,
# help="Samples to include.",
# )
# @click.option(
# "-S",
# "--samples-file",
# type=str,
# default=None,
# help="File of sample names to include.",
# )
@click.option(
"-G",
"--drop-genotypes",
Expand All @@ -185,10 +185,10 @@ def view(
no_version,
regions,
targets,
force_samples,
no_update,
samples,
samples_file,
# force_samples,
# no_update,
# samples,
# samples_file,
drop_genotypes,
include,
exclude,
Expand All @@ -201,17 +201,19 @@ def view(
f"Only uncompressed VCF output supported, suffix .{suffix} not allowed"
)

if samples_file:
assert not samples, "vcztools does not support combining -s and -S"
# Dropping implementation here until it's reimplemented after initial release:
# https://github.com/sgkit-dev/vcztools/issues/121
# if samples_file:
# assert not samples, "vcztools does not support combining -s and -S"

samples = ""
exclude_samples_file = samples_file.startswith("^")
samples_file = samples_file.lstrip("^")
# samples = ""
# exclude_samples_file = samples_file.startswith("^")
# samples_file = samples_file.lstrip("^")

with open(samples_file) as file:
if exclude_samples_file:
samples = "^" + samples
samples += ",".join(line.strip() for line in file.readlines())
# with open(samples_file) as file:
# if exclude_samples_file:
# samples = "^" + samples
# samples += ",".join(line.strip() for line in file.readlines())

with handle_broken_pipe(output):
vcf_writer.write_vcf(
Expand All @@ -222,8 +224,8 @@ def view(
no_version=no_version,
variant_regions=regions,
variant_targets=targets,
no_update=no_update,
samples=samples,
# no_update=no_update,
# samples=samples,
drop_genotypes=drop_genotypes,
include=include,
exclude=exclude,
Expand Down

0 comments on commit cb846c0

Please sign in to comment.