Skip to content

Commit

Permalink
Mask VCF missing rather than fill values
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Feb 16, 2024
1 parent c05ddfe commit b547141
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
8 changes: 4 additions & 4 deletions sgkit/io/vcf/vcf_writer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,16 +494,16 @@ def vcf_genotypes_to_byte_buf_size(call_genotype):


def create_mask(arr):
"""Return a mask array of shape ``arr.shape[0]` for masking out fill values."""
"""Return a mask array of shape ``arr.shape[0]` for masking out missing values."""
axis = tuple(range(1, len(arr.shape)))
if arr.dtype == bool:
return ~arr
elif arr.dtype in (np.int8, np.int16, np.int32):
return np.all(arr == INT_FILL, axis=axis)
return np.all(arr == INT_MISSING, axis=axis)
elif arr.dtype == np.float32:
return np.all(arr.view("i4") == FLOAT32_FILL_AS_INT32, axis=axis)
return np.all(arr.view("i4") == FLOAT32_MISSING_AS_INT32, axis=axis)
elif arr.dtype.kind == "S":
return np.all(arr == STR_FILL_BYTE, axis=axis)
return np.all(arr == STR_MISSING_BYTE, axis=axis)
else:
raise ValueError(f"Unsupported dtype: {arr.dtype}")

Expand Down
1 change: 0 additions & 1 deletion sgkit/tests/io/vcf/test_vcf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def test_zarr_to_vcf(shared_datadir, tmp_path, output_is_path):
)


@pytest.mark.skip("Temporarily disabling pending bugfix; #1196")
@pytest.mark.parametrize("in_memory_ds", [True, False])
@pytest.mark.filterwarnings(
"ignore::sgkit.io.vcfzarr_reader.DimensionNameForFixedFormatFieldWarning",
Expand Down

0 comments on commit b547141

Please sign in to comment.