From cb846c0d226e32c4ec00706bb5d1fe9900599df5 Mon Sep 17 00:00:00 2001 From: Jerome Kelleher Date: Tue, 14 Jan 2025 14:30:30 +0000 Subject: [PATCH] Remove sample subsetting temporarily. It has a number of problems and better not to try and fix them in a hurry pre shipping. See #121 and #122 --- tests/test_bcftools_validation.py | 28 +++++------ tests/test_cli.py | 18 +++---- vcztools/cli.py | 78 ++++++++++++++++--------------- 3 files changed, 65 insertions(+), 59 deletions(-) diff --git a/tests/test_bcftools_validation.py b/tests/test_bcftools_validation.py index d8813d6..27e6246 100644 --- a/tests/test_bcftools_validation.py +++ b/tests/test_bcftools_validation.py @@ -54,19 +54,21 @@ def run_vcztools(args: str) -> str: "view --no-version -G", "sample.vcf.gz" ), - ( - "view --no-update --no-version --samples-file " - "tests/data/txt/samples.txt", - "sample.vcf.gz"), - ("view -I --no-version -S tests/data/txt/samples.txt", "sample.vcf.gz"), - ("view --no-version -s NA00001", "sample.vcf.gz"), - ("view --no-version -s NA00001,NA00003", "sample.vcf.gz"), - ("view --no-version -s HG00096", "1kg_2020_chrM.vcf.gz"), - ("view --no-version -s '' --force-samples", "sample.vcf.gz"), - ("view --no-version -s ^NA00001", "sample.vcf.gz"), - ("view --no-version -s ^NA00003,NA00002", "sample.vcf.gz"), - ("view --no-version -s ^NA00003,NA00002,NA00003", "sample.vcf.gz"), - ("view --no-version -S ^tests/data/txt/samples.txt", "sample.vcf.gz"), + # Temporarily removing until sample handling fixed: + # https://github.com/sgkit-dev/vcztools/issues/121 + # ( + # "view --no-update --no-version --samples-file " + # "tests/data/txt/samples.txt", + # "sample.vcf.gz"), + # ("view -I --no-version -S tests/data/txt/samples.txt", "sample.vcf.gz"), + # ("view --no-version -s NA00001", "sample.vcf.gz"), + # ("view --no-version -s NA00001,NA00003", "sample.vcf.gz"), + # ("view --no-version -s HG00096", "1kg_2020_chrM.vcf.gz"), + # ("view --no-version -s '' --force-samples", "sample.vcf.gz"), + # ("view --no-version -s ^NA00001", "sample.vcf.gz"), + # ("view --no-version -s ^NA00003,NA00002", "sample.vcf.gz"), + # ("view --no-version -s ^NA00003,NA00002,NA00003", "sample.vcf.gz"), + # ("view --no-version -S ^tests/data/txt/samples.txt", "sample.vcf.gz"), ] ) # fmt: on diff --git a/tests/test_cli.py b/tests/test_cli.py index 9bb3901..74abcb9 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -76,14 +76,16 @@ def test_view_write_pipe(self, tmp_path, vcz_path): assert "Is a directory" in result.stderr -def test_excluding_and_including_samples(vcz_path): - samples_file_path = pathlib.Path("tests/data/txt/samples.txt") - error_message = re.escape("vcztools does not support combining -s and -S") - - with pytest.raises(AssertionError, match=error_message): - run_vcztools(f"view {vcz_path} -s NA00001 -S ^{samples_file_path}") - with pytest.raises(AssertionError, match=error_message): - run_vcztools(f"view {vcz_path} -s ^NA00001 -S {samples_file_path}") +# Removing until we reimplement sample handling: +# https://github.com/sgkit-dev/vcztools/issues/121 +# def test_excluding_and_including_samples(vcz_path): +# samples_file_path = pathlib.Path("tests/data/txt/samples.txt") +# error_message = re.escape("vcztools does not support combining -s and -S") + +# with pytest.raises(AssertionError, match=error_message): +# run_vcztools(f"view {vcz_path} -s NA00001 -S ^{samples_file_path}") +# with pytest.raises(AssertionError, match=error_message): +# run_vcztools(f"view {vcz_path} -s ^NA00001 -S {samples_file_path}") @mock.patch("sys.exit") diff --git a/vcztools/cli.py b/vcztools/cli.py index e8ed2b4..ed73cf5 100644 --- a/vcztools/cli.py +++ b/vcztools/cli.py @@ -138,29 +138,29 @@ def query(path, output, list_samples, format, include, exclude): default=None, help="Regions to include.", ) -@click.option( - "--force-samples", is_flag=True, help="Only warn about unknown sample subsets." -) -@click.option( - "-I", - "--no-update", - is_flag=True, - help="Do not recalculate INFO fields for the sample subset.", -) -@click.option( - "-s", - "--samples", - type=str, - default=None, - help="Samples to include.", -) -@click.option( - "-S", - "--samples-file", - type=str, - default=None, - help="File of sample names to include.", -) +# @click.option( +# "--force-samples", is_flag=True, help="Only warn about unknown sample subsets." +# ) +# @click.option( +# "-I", +# "--no-update", +# is_flag=True, +# help="Do not recalculate INFO fields for the sample subset.", +# ) +# @click.option( +# "-s", +# "--samples", +# type=str, +# default=None, +# help="Samples to include.", +# ) +# @click.option( +# "-S", +# "--samples-file", +# type=str, +# default=None, +# help="File of sample names to include.", +# ) @click.option( "-G", "--drop-genotypes", @@ -185,10 +185,10 @@ def view( no_version, regions, targets, - force_samples, - no_update, - samples, - samples_file, + # force_samples, + # no_update, + # samples, + # samples_file, drop_genotypes, include, exclude, @@ -201,17 +201,19 @@ def view( f"Only uncompressed VCF output supported, suffix .{suffix} not allowed" ) - if samples_file: - assert not samples, "vcztools does not support combining -s and -S" + # Dropping implementation here until it's reimplemented after initial release: + # https://github.com/sgkit-dev/vcztools/issues/121 + # if samples_file: + # assert not samples, "vcztools does not support combining -s and -S" - samples = "" - exclude_samples_file = samples_file.startswith("^") - samples_file = samples_file.lstrip("^") + # samples = "" + # exclude_samples_file = samples_file.startswith("^") + # samples_file = samples_file.lstrip("^") - with open(samples_file) as file: - if exclude_samples_file: - samples = "^" + samples - samples += ",".join(line.strip() for line in file.readlines()) + # with open(samples_file) as file: + # if exclude_samples_file: + # samples = "^" + samples + # samples += ",".join(line.strip() for line in file.readlines()) with handle_broken_pipe(output): vcf_writer.write_vcf( @@ -222,8 +224,8 @@ def view( no_version=no_version, variant_regions=regions, variant_targets=targets, - no_update=no_update, - samples=samples, + # no_update=no_update, + # samples=samples, drop_genotypes=drop_genotypes, include=include, exclude=exclude,