Skip to content

Commit

Permalink
Handle broken pipes and improve output file handling
Browse files Browse the repository at this point in the history
Closes #114
  • Loading branch information
jeromekelleher committed Jan 14, 2025
1 parent 27dbfac commit df07971
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 26 deletions.
70 changes: 64 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import pathlib
import re
from unittest import mock

import click.testing as ct
import pytest

import vcztools.cli as cli
from tests.test_bcftools_validation import run_vcztools
from tests.utils import vcz_path_cache

Expand All @@ -19,13 +22,58 @@ def test_version_header(vcz_path):
assert output.find("Date=") >= 0


def test_view_bad_output(tmp_path, vcz_path):
bad_output = tmp_path / "output.vcf.gz"
class TestOutput:
def test_view_unsupported_output(self, tmp_path, vcz_path):
bad_output = tmp_path / "output.vcf.gz"

with pytest.raises(
ValueError, match=re.escape("Output file extension must be .vcf, got: .gz")
):
run_vcztools(f"view --no-version {vcz_path} -o {bad_output}")
with pytest.raises(
ValueError,
match=re.escape(
"Only uncompressed VCF output supported, suffix .gz not allowed"
),
):
run_vcztools(f"view --no-version {vcz_path} -o {bad_output}")

@pytest.mark.parametrize("suffix", ["gz", "bgz", "bcf"])
def test_view_unsupported_output_suffix(self, tmp_path, vcz_path, suffix):
bad_output = tmp_path / f"output.vcf.{suffix}"

with pytest.raises(ValueError, match=f".{suffix} not allowed"):
run_vcztools(f"view --no-version {vcz_path} -o {bad_output}")

def test_view_good_path(self, tmp_path, vcz_path):
output_path = tmp_path / "tmp.vcf"
runner = ct.CliRunner(mix_stderr=False)
result = runner.invoke(
cli.vcztools_main,
f"view --no-version {vcz_path} -o {output_path}",
catch_exceptions=False,
)
assert result.exit_code == 0
assert len(result.stdout) == 0
assert output_path.exists()

def test_view_write_directory(self, tmp_path, vcz_path):
runner = ct.CliRunner(mix_stderr=False)
result = runner.invoke(
cli.vcztools_main,
f"view --no-version {vcz_path} -o {tmp_path}",
catch_exceptions=False,
)
assert result.exit_code == 1
assert len(result.stdout) == 0
assert "Is a directory" in result.stderr

def test_view_write_pipe(self, tmp_path, vcz_path):
runner = ct.CliRunner(mix_stderr=False)
result = runner.invoke(
cli.vcztools_main,
f"view --no-version {vcz_path} -o {tmp_path}",
catch_exceptions=False,
)
assert result.exit_code == 1
assert len(result.stdout) == 0
assert "Is a directory" in result.stderr


def test_excluding_and_including_samples(vcz_path):
Expand All @@ -36,3 +84,13 @@ def test_excluding_and_including_samples(vcz_path):
run_vcztools(f"view {vcz_path} -s NA00001 -S ^{samples_file_path}")
with pytest.raises(AssertionError, match=error_message):
run_vcztools(f"view {vcz_path} -s ^NA00001 -S {samples_file_path}")


@mock.patch("sys.exit")
@mock.patch("os.dup2")
def test_broken_pipe(mocked_dup2, mocked_exit, tmp_path):
with open(tmp_path / "tmp.txt", "w") as output:
with cli.handle_broken_pipe(output):
raise BrokenPipeError()
mocked_dup2.assert_called_once()
mocked_exit.assert_called_once_with(1)
69 changes: 49 additions & 20 deletions vcztools/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import contextlib
import os
import sys

import click
Expand All @@ -6,6 +8,26 @@
from . import stats as stats_module
from . import vcf_writer


@contextlib.contextmanager
def handle_broken_pipe(output):
"""
Handle sigpipe following official advice:
https://docs.python.org/3/library/signal.html#note-on-sigpipe
"""
try:
yield
# flush output here to force SIGPIPE to be triggered
# while inside this try block.
output.flush()
except BrokenPipeError:
# Python flushes standard streams on exit; redirect remaining output
# to devnull to avoid another BrokenPipeError at shutdown
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
sys.exit(1) # Python exits with error code 1 on EPIPE


include = click.option(
"-i", "--include", type=str, help="Filter expression to include variant sites."
)
Expand Down Expand Up @@ -68,7 +90,11 @@ def query(path, list_samples, format, include, exclude):
@click.command
@click.argument("path", type=click.Path())
@click.option(
"-o", "--output", type=str, default=None, help="File path to write output to."
"-o",
"--output",
type=click.File("w"),
default="-",
help="File path to write output to (defaults to stdout '-').",
)
@click.option(
"-h",
Expand Down Expand Up @@ -149,9 +175,13 @@ def view(
include,
exclude,
):
if output and not output.endswith(".vcf") and output != "/dev/null":
split = output.split(".")
raise ValueError(f"Output file extension must be .vcf, got: .{split[-1]}")
suffix = output.name.split(".")[-1]
# Exclude suffixes which require bgzipped or BCF output:
# https://github.com/samtools/htslib/blob/329e7943b7ba3f0af15b0eaa00a367a1ac15bd83/vcf.c#L3815
if suffix in ["gz", "bcf", "bgz"]:
raise ValueError(
f"Only uncompressed VCF output supported, suffix .{suffix} not allowed"
)

if samples_file:
assert not samples, "vcztools does not support combining -s and -S"
Expand All @@ -165,22 +195,21 @@ def view(
samples = "^" + samples
samples += ",".join(line.strip() for line in file.readlines())

# TODO: use no_update when fixing https://github.com/sgkit-dev/vcztools/issues/75

vcf_writer.write_vcf(
path,
output,
header_only=header_only,
no_header=no_header,
no_version=no_version,
variant_regions=regions,
variant_targets=targets,
no_update=no_update,
samples=samples,
drop_genotypes=drop_genotypes,
include=include,
exclude=exclude,
)
with handle_broken_pipe(output):
vcf_writer.write_vcf(
path,
output,
header_only=header_only,
no_header=no_header,
no_version=no_version,
variant_regions=regions,
variant_targets=targets,
no_update=no_update,
samples=samples,
drop_genotypes=drop_genotypes,
include=include,
exclude=exclude,
)


@click.group(cls=NaturalOrderGroup, name="vcztools")
Expand Down

0 comments on commit df07971

Please sign in to comment.