diff --git a/CHANGELOG.md b/CHANGELOG.md index 2132ca5..45b5460 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ - Updated submission schema to the latest API schema (01-03-2024) - Switched from Draft3Validator to Draft7Validator in validate.py - Updated fastapi lib to fix security vulnerability +- Parse `conditionSet.MultipleConditionExplanation` value from `Explanation for multiple conditions` column of variants file ## [2.4] ### Changed diff --git a/preClinVar/demo/Variant_snv_hgvs.csv b/preClinVar/demo/Variant_snv_hgvs.csv index 6674130..63c7116 100644 --- a/preClinVar/demo/Variant_snv_hgvs.csv +++ b/preClinVar/demo/Variant_snv_hgvs.csv @@ -1,2 +1,2 @@ -"##Local ID","Linking ID","Gene symbol","Reference sequence","HGVS","Variation identifiers","Condition ID type","Condition ID value","Clinical significance","Date last evaluated","Mode of inheritance" -"4c7d5c70d955875504db72ef8e1abe77","4c7d5c70d955875504db72ef8e1abe77","POT1","NM_015450.3","c.903G>T","rs116916706","OMIM","604187;606798","Likely pathogenic","2022-12-07","Autosomal recessive inheritance" +"##Local ID","Linking ID","Gene symbol","Reference sequence","HGVS","Variation identifiers","Condition ID type","Condition ID value","Explanation for multiple conditions","Clinical significance","Date last evaluated","Mode of inheritance" +"4c7d5c70d955875504db72ef8e1abe77","4c7d5c70d955875504db72ef8e1abe77","POT1","NM_015450.3","c.903G>T","rs116916706","OMIM","604187;606798","Novel disease","Likely pathogenic","2022-12-07","Autosomal recessive inheritance" diff --git a/preClinVar/file_parser.py b/preClinVar/file_parser.py index 06bc374..4c2f2c0 100644 --- a/preClinVar/file_parser.py +++ b/preClinVar/file_parser.py @@ -72,27 +72,30 @@ def set_item_clin_sig(item, variant_dict): # customAssertionScore -def set_item_condition_set(item, variant_dict): +def set_item_condition_set(item: dict, variant_dict: dict): """Set the conditionSet key/values for an API submission item Args: item(dict). An item in the clinvarSubmission.items list variant_dict(dict). Example: {'##Local ID': '1d9ce6ebf2f82d913cfbe20c5085947b', 'Linking ID': '1d9ce6ebf2f82d913cfbe20c5085947b', 'Gene symbol': 'XDH', 'Reference sequence': 'NM_000379.4', 'HGVS': 'c.2751del', ..} """ - conditions = [] + conditions: list = [] - # Check if phenotype was specified in Variant file - cond_db = CONDITIONS_MAP.get(variant_dict.get("Condition ID type")) - cond_values = variant_dict.get("Condition ID value") + # Check if condition ID is specified in Variant file + cond_db: str = CONDITIONS_MAP.get(variant_dict.get("Condition ID type")) + cond_values: str = variant_dict.get("Condition ID value") + multi_condition_explanation: str = variant_dict.get("Explanation for multiple conditions") if cond_db and cond_values: cond_values = cond_values.split(";") for cond_id in cond_values: - conditions.append({"db": cond_db, "id": cond_id}) + condition = {"db": cond_db, "id": cond_id} + conditions.append(condition) if conditions: item["conditionSet"] = {"condition": conditions} - - # NOT parsing the following key/values for now: - # condition.db.name + if multi_condition_explanation: + item["conditionSet"][ + "MultipleConditionExplanation" + ] = multi_condition_explanation.capitalize() def set_item_local_id(item, variant_dict): diff --git a/preClinVar/main.py b/preClinVar/main.py index d68cbe7..77158ab 100644 --- a/preClinVar/main.py +++ b/preClinVar/main.py @@ -1,7 +1,7 @@ import json import logging import re -from typing import List, Union +from typing import List import requests import uvicorn @@ -98,7 +98,7 @@ async def tsv_2_json( files: List[UploadFile] = File(...), ): """Create a json submission object using 2 TSV files (Variant.tsv and CaseData.tsv). - Validate the submission objects agains the official schema: + Validate the submission objects against the official schema: https://www.ncbi.nlm.nih.gov/clinvar/docs/api_http/ """ # Extract lines from Variants.tsv and Casedata.tsv files present in POST request diff --git a/pyproject.toml b/pyproject.toml index ec78267..d95936d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,4 +24,4 @@ pytest-cov = "^3.0.0" [build-system] requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/tests/test_file_parser.py b/tests/test_file_parser.py index 9fe8664..a165b00 100644 --- a/tests/test_file_parser.py +++ b/tests/test_file_parser.py @@ -1,5 +1,10 @@ from preClinVar.constants import CLNSIG_TERMS -from preClinVar.file_parser import set_item_clin_sig, set_item_variant_set +from preClinVar.file_parser import ( + csv_lines, + set_item_clin_sig, + set_item_condition_set, + set_item_variant_set, +) def test_set_item_clin_sig_fix_case(): @@ -26,3 +31,26 @@ def test_set_item_variant_set_hgvs(): set_item_variant_set(item, variant_dict) # THEN hgvs field should contain both Reference sequence and HGVS assert item["variantSet"]["variant"][0]["hgvs"] == ":".join([REFSEQ, HGVS]) + + +def test_set_item_condition_set(): + """Test the function that sets condition conditionSet values.""" + + item = {} + CONDITION_DB = "OMIM" + OMIM_NUMBERS = "604187,604187" + MULTIPLE_COND_EXPLANATION = "Novel disease" + variant_dict = { + "Condition ID type": CONDITION_DB, + "Condition ID value": OMIM_NUMBERS, + "Explanation for multiple conditions": MULTIPLE_COND_EXPLANATION, + } + + # WHEN variant set is created from variant_dict containing condition info + set_item_condition_set(item=item, variant_dict=variant_dict) + + # THEN it should contain the expected key/values + assert item["conditionSet"]["MultipleConditionExplanation"] == MULTIPLE_COND_EXPLANATION + for condition in item["conditionSet"]["condition"]: + assert condition["db"] == CONDITION_DB + assert condition["id"] in OMIM_NUMBERS