Skip to content

Commit

Permalink
Merge pull request #96 from Clinical-Genomics/multiple_conditions_exp…
Browse files Browse the repository at this point in the history
…lanation

Parse conditionSet.MultipleConditionExplanation value
  • Loading branch information
northwestwitch authored Mar 15, 2024
2 parents 7da1aa4 + 80b9b0e commit c4abec1
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Updated submission schema to the latest API schema (01-03-2024)
- Switched from Draft3Validator to Draft7Validator in validate.py
- Updated fastapi lib to fix security vulnerability
- Parse `conditionSet.MultipleConditionExplanation` value from `Explanation for multiple conditions` column of variants file

## [2.4]
### Changed
Expand Down
21 changes: 12 additions & 9 deletions preClinVar/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,27 +72,30 @@ def set_item_clin_sig(item, variant_dict):
# customAssertionScore


def set_item_condition_set(item, variant_dict):
def set_item_condition_set(item: dict, variant_dict: dict):
"""Set the conditionSet key/values for an API submission item
Args:
item(dict). An item in the clinvarSubmission.items list
variant_dict(dict). Example: {'##Local ID': '1d9ce6ebf2f82d913cfbe20c5085947b', 'Linking ID': '1d9ce6ebf2f82d913cfbe20c5085947b', 'Gene symbol': 'XDH', 'Reference sequence': 'NM_000379.4', 'HGVS': 'c.2751del', ..}
"""
conditions = []
conditions: list = []

# Check if phenotype was specified in Variant file
cond_db = CONDITIONS_MAP.get(variant_dict.get("Condition ID type"))
cond_values = variant_dict.get("Condition ID value")
# Check if condition ID is specified in Variant file
cond_db: str = CONDITIONS_MAP.get(variant_dict.get("Condition ID type"))
cond_values: str = variant_dict.get("Condition ID value")
multi_condition_explanation: str = variant_dict.get("Explanation for multiple conditions")

if cond_db and cond_values:
cond_values = cond_values.split(";")
for cond_id in cond_values:
conditions.append({"db": cond_db, "id": cond_id})
condition = {"db": cond_db, "id": cond_id}
conditions.append(condition)
if conditions:
item["conditionSet"] = {"condition": conditions}

# NOT parsing the following key/values for now:
# condition.db.name
if multi_condition_explanation:
item["conditionSet"][
"MultipleConditionExplanation"
] = multi_condition_explanation.capitalize()


def set_item_local_id(item, variant_dict):
Expand Down
4 changes: 2 additions & 2 deletions preClinVar/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import logging
import re
from typing import List, Union
from typing import List

import requests
import uvicorn
Expand Down Expand Up @@ -98,7 +98,7 @@ async def tsv_2_json(
files: List[UploadFile] = File(...),
):
"""Create a json submission object using 2 TSV files (Variant.tsv and CaseData.tsv).
Validate the submission objects agains the official schema:
Validate the submission objects against the official schema:
https://www.ncbi.nlm.nih.gov/clinvar/docs/api_http/
"""
# Extract lines from Variants.tsv and Casedata.tsv files present in POST request
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ pytest-cov = "^3.0.0"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
build-backend = "poetry.core.masonry.api"
30 changes: 29 additions & 1 deletion tests/test_file_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
from preClinVar.constants import CLNSIG_TERMS
from preClinVar.file_parser import set_item_clin_sig, set_item_variant_set
from preClinVar.file_parser import (
csv_lines,
set_item_clin_sig,
set_item_condition_set,
set_item_variant_set,
)


def test_set_item_clin_sig_fix_case():
Expand All @@ -26,3 +31,26 @@ def test_set_item_variant_set_hgvs():
set_item_variant_set(item, variant_dict)
# THEN hgvs field should contain both Reference sequence and HGVS
assert item["variantSet"]["variant"][0]["hgvs"] == ":".join([REFSEQ, HGVS])


def test_set_item_condition_set():
"""Test the function that sets condition conditionSet values."""

item = {}
CONDITION_DB = "OMIM"
OMIM_NUMBERS = "604187,604187"
MULTIPLE_COND_EXPLANATION = "Novel disease"
variant_dict = {
"Condition ID type": CONDITION_DB,
"Condition ID value": OMIM_NUMBERS,
"Explanation for multiple conditions": MULTIPLE_COND_EXPLANATION,
}

# WHEN variant set is created from variant_dict containing condition info
set_item_condition_set(item=item, variant_dict=variant_dict)

# THEN it should contain the expected key/values
assert item["conditionSet"]["MultipleConditionExplanation"] == MULTIPLE_COND_EXPLANATION
for condition in item["conditionSet"]["condition"]:
assert condition["db"] == CONDITION_DB
assert condition["id"] in OMIM_NUMBERS

0 comments on commit c4abec1

Please sign in to comment.