Skip to content

Commit

Permalink
chore: consolidate variant annotation paths into list
Browse files Browse the repository at this point in the history
  • Loading branch information
vivienho committed Feb 12, 2025
1 parent 89e1785 commit 35e6ba0
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 28 deletions.
3 changes: 1 addition & 2 deletions src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,7 @@ class _ConsequenceToPathogenicityScoreMap(TypedDict):
)
vep_output_json_path: str = MISSING
variant_index_path: str = MISSING
gnomad_variant_annotations_path: str | None = None
lof_curation_variant_annotations_path: str | None = None
variant_annotations_path: list[str] | None = None
hash_threshold: int = 300
consequence_to_pathogenicity_score: ClassVar[
list[_ConsequenceToPathogenicityScoreMap]
Expand Down
40 changes: 14 additions & 26 deletions src/gentropy/variant_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ def __init__(
vep_output_json_path: str,
variant_index_path: str,
hash_threshold: int,
gnomad_variant_annotations_path: str | None = None,
lof_curation_variant_annotations_path: str | None = None,
variant_annotations_path: list[str] | None = None,
amino_acid_change_annotations: list[str] | None = None,
) -> None:
"""Run VariantIndex step.
Expand All @@ -38,8 +37,7 @@ def __init__(
vep_output_json_path (str): Variant effect predictor output path (in json format).
variant_index_path (str): Variant index dataset path to save resulting data.
hash_threshold (int): Hash threshold for variant identifier length.
gnomad_variant_annotations_path (str | None): Path to extra variant annotation dataset.
lof_curation_variant_annotations_path (str | None): Path to loss-of-function variant annotation dataset.
variant_annotations_path (list[str] | None): List of paths to extra variant annotation datasets.
amino_acid_change_annotations (list[str] | None): list of paths to amino-acid based variant annotations.
"""
# Extract variant annotations from VEP output:
Expand All @@ -48,30 +46,20 @@ def __init__(
)

# Process variant annotations if provided:
if gnomad_variant_annotations_path:
# Read variant annotations from parquet:
annotations = VariantIndex.from_parquet(
session=session,
path=gnomad_variant_annotations_path,
recursiveFileLookup=True,
id_threshold=hash_threshold,
)

# Update index with extra annotations:
variant_index = variant_index.add_annotation(annotations)

# Process LOF annotations if provided:
if lof_curation_variant_annotations_path:
# Read LOF annotations from parquet:
lof_annotations = VariantIndex.from_parquet(
session=session,
path=lof_curation_variant_annotations_path,
)
if variant_annotations_path:
for annotation_path in variant_annotations_path:
# Read variant annotations from parquet:
annotations = VariantIndex.from_parquet(
session=session,
path=annotation_path,
recursiveFileLookup=True,
id_threshold=hash_threshold,
)

# Update variant index with LOF annotations:
variant_index = variant_index.add_annotation(lof_annotations)
# Update index with extra annotations:
variant_index = variant_index.add_annotation(annotations)

# If provided read amion-acid based annotation and enrich variant index:
# If provided read amino-acid based annotation and enrich variant index:
if amino_acid_change_annotations:
for annotation_path in amino_acid_change_annotations:
annotation_data = AminoAcidVariants.from_parquet(
Expand Down

0 comments on commit 35e6ba0

Please sign in to comment.