Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse conditionSet.MultipleConditionExplanation value #96

Merged
merged 23 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Updated submission schema to the latest API schema (01-03-2024)
- Switched from Draft3Validator to Draft7Validator in validate.py
- Updated fastapi lib to fix security vulnerability
- Parse `conditionSet.MultipleConditionExplanation` value from `Explanation for multiple conditions` column of variants file

## [2.4]
### Changed
Expand Down
4 changes: 2 additions & 2 deletions preClinVar/demo/Variant_snv_hgvs.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"##Local ID","Linking ID","Gene symbol","Reference sequence","HGVS","Variation identifiers","Condition ID type","Condition ID value","Clinical significance","Date last evaluated","Mode of inheritance"
"4c7d5c70d955875504db72ef8e1abe77","4c7d5c70d955875504db72ef8e1abe77","POT1","NM_015450.3","c.903G>T","rs116916706","OMIM","604187;606798","Likely pathogenic","2022-12-07","Autosomal recessive inheritance"
"##Local ID","Linking ID","Gene symbol","Reference sequence","HGVS","Variation identifiers","Condition ID type","Condition ID value","Explanation for multiple conditions","Clinical significance","Date last evaluated","Mode of inheritance"
"4c7d5c70d955875504db72ef8e1abe77","4c7d5c70d955875504db72ef8e1abe77","POT1","NM_015450.3","c.903G>T","rs116916706","OMIM","604187;606798","Novel disease","Likely pathogenic","2022-12-07","Autosomal recessive inheritance"
21 changes: 12 additions & 9 deletions preClinVar/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,27 +72,30 @@ def set_item_clin_sig(item, variant_dict):
# customAssertionScore


def set_item_condition_set(item, variant_dict):
def set_item_condition_set(item: dict, variant_dict: dict):
"""Set the conditionSet key/values for an API submission item
Args:
item(dict). An item in the clinvarSubmission.items list
variant_dict(dict). Example: {'##Local ID': '1d9ce6ebf2f82d913cfbe20c5085947b', 'Linking ID': '1d9ce6ebf2f82d913cfbe20c5085947b', 'Gene symbol': 'XDH', 'Reference sequence': 'NM_000379.4', 'HGVS': 'c.2751del', ..}
"""
conditions = []
conditions: list = []

# Check if phenotype was specified in Variant file
cond_db = CONDITIONS_MAP.get(variant_dict.get("Condition ID type"))
cond_values = variant_dict.get("Condition ID value")
# Check if condition ID is specified in Variant file
cond_db: str = CONDITIONS_MAP.get(variant_dict.get("Condition ID type"))
cond_values: str = variant_dict.get("Condition ID value")
multi_condition_explanation: str = variant_dict.get("Explanation for multiple conditions")

if cond_db and cond_values:
cond_values = cond_values.split(";")
for cond_id in cond_values:
conditions.append({"db": cond_db, "id": cond_id})
condition = {"db": cond_db, "id": cond_id}
conditions.append(condition)
if conditions:
item["conditionSet"] = {"condition": conditions}

# NOT parsing the following key/values for now:
# condition.db.name
if multi_condition_explanation:
item["conditionSet"][
"MultipleConditionExplanation"
] = multi_condition_explanation.capitalize()


def set_item_local_id(item, variant_dict):
Expand Down
4 changes: 2 additions & 2 deletions preClinVar/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import logging
import re
from typing import List, Union
from typing import List

import requests
import uvicorn
Expand Down Expand Up @@ -98,7 +98,7 @@ async def tsv_2_json(
files: List[UploadFile] = File(...),
):
"""Create a json submission object using 2 TSV files (Variant.tsv and CaseData.tsv).
Validate the submission objects agains the official schema:
Validate the submission objects against the official schema:
https://www.ncbi.nlm.nih.gov/clinvar/docs/api_http/
"""
# Extract lines from Variants.tsv and Casedata.tsv files present in POST request
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ pytest-cov = "^3.0.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
build-backend = "poetry.core.masonry.api"
30 changes: 29 additions & 1 deletion tests/test_file_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from preClinVar.constants import CLNSIG_TERMS
from preClinVar.file_parser import set_item_clin_sig, set_item_variant_set
from preClinVar.file_parser import (
csv_lines,
set_item_clin_sig,
set_item_condition_set,
set_item_variant_set,
)


def test_set_item_clin_sig_fix_case():
Expand All @@ -26,3 +31,26 @@ def test_set_item_variant_set_hgvs():
set_item_variant_set(item, variant_dict)
# THEN hgvs field should contain both Reference sequence and HGVS
assert item["variantSet"]["variant"][0]["hgvs"] == ":".join([REFSEQ, HGVS])


def test_set_item_condition_set():
"""Test the function that sets condition conditionSet values."""

item = {}
CONDITION_DB = "OMIM"
OMIM_NUMBERS = "604187,604187"
MULTIPLE_COND_EXPLANATION = "Novel disease"
variant_dict = {
"Condition ID type": CONDITION_DB,
"Condition ID value": OMIM_NUMBERS,
"Explanation for multiple conditions": MULTIPLE_COND_EXPLANATION,
}

# WHEN variant set is created from variant_dict containing condition info
set_item_condition_set(item=item, variant_dict=variant_dict)

# THEN it should contain the expected key/values
assert item["conditionSet"]["MultipleConditionExplanation"] == MULTIPLE_COND_EXPLANATION
for condition in item["conditionSet"]["condition"]:
assert condition["db"] == CONDITION_DB
assert condition["id"] in OMIM_NUMBERS
Loading