diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ffab88..3ec7f47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## [unreleased] +### Changed +- Fix uppercase/lowercase letters when parsing clinsig terms from files + ## [2.2] ### Changed - Provide the URL to the prod service running on the Clinical Genomics server instead of the stage one on README page diff --git a/preClinVar/constants.py b/preClinVar/constants.py index 1d5ea25..7670b3d 100644 --- a/preClinVar/constants.py +++ b/preClinVar/constants.py @@ -1,6 +1,26 @@ DRY_RUN_SUBMISSION_URL = "https://submit.ncbi.nlm.nih.gov/api/v1/submissions/?dry-run=true" VALIDATE_SUBMISSION_URL = "https://submit.ncbi.nlm.nih.gov/apitest/v1/submissions" +CLNSIG_TERMS = [ + "Pathogenic", + "Likely pathogenic", + "Uncertain significance", + "Likely benign", + "Benign", + "Pathogenic, low penetrance", + "Uncertain risk allele", + "Likely pathogenic, low penetrance", + "Established risk allele", + "Likely risk allele", + "affects", + "association", + "drug response", + "confers sensitivity", + "protective", + "other", + "not provided", +] + CONDITIONS_MAP = { "HPO": "HP", "MedGen": "MedGen", diff --git a/preClinVar/file_parser.py b/preClinVar/file_parser.py index 3ebd8c1..9974221 100644 --- a/preClinVar/file_parser.py +++ b/preClinVar/file_parser.py @@ -3,7 +3,7 @@ from csv import DictReader from tempfile import NamedTemporaryFile -from preClinVar.constants import CONDITIONS_MAP, SNV_COORDS, SV_COORDS +from preClinVar.constants import CLNSIG_TERMS, CONDITIONS_MAP, SNV_COORDS, SV_COORDS LOG = logging.getLogger("uvicorn.access") @@ -45,6 +45,12 @@ def set_item_clin_sig(item, variant_dict): """ # set first required params clinsig = variant_dict.get("Clinical significance") + # Make sure clinsig term is compliant with API standards: + for term in CLNSIG_TERMS: + if clinsig.lower() == term.lower(): + clinsig = term + break + clinsig_comment = variant_dict.get("Comment on clinical significance") last_eval = variant_dict.get("Date last evaluated") inherit_mode = variant_dict.get("Mode of inheritance") diff --git a/tests/test_file_parser.py b/tests/test_file_parser.py new file mode 100644 index 0000000..671e9de --- /dev/null +++ b/tests/test_file_parser.py @@ -0,0 +1,15 @@ +from preClinVar.constants import CLNSIG_TERMS +from preClinVar.file_parser import set_item_clin_sig + + +def test_set_item_clin_sig_fix_case(): + """Test the function that collects clinsig from Variant file when clisnsig term has wrong uppercase/lowercase""" + # GIVEN a variant dictionary with a non-compliant slinsig value + item = {} + variant_dict = {"Clinical significance": "Likely Pathogenic"} + + # WHEN clisig is collected from variant_dict + set_item_clin_sig(item, variant_dict) + + # THEN it's converted into a compliant term + assert item["clinicalSignificance"]["clinicalSignificanceDescription"] in CLNSIG_TERMS