Skip to content

Commit

Permalink
Improve warning messages for positions outside of the sequence range (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
mihailefter committed Feb 10, 2020
1 parent 80723b2 commit 1449bb3
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 5 deletions.
16 changes: 16 additions & 0 deletions mutalyzer/nc_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@
from mutalyzer.config import settings


def get_chromosome_ids(transcript_id):
ids = []
accession = transcript_id.split('.')[0]
version = transcript_id.split('.')[1]

references = Transcript.query. \
filter_by(transcript_accession=accession). \
filter_by(transcript_version=version).all()

for transcript in references:
reference = Reference.query. \
filter_by(id=transcript.reference_id).first()
ids.append('{}.{}'.format(reference.accession, reference.version))
return ids


def get_entire_nc_record(record_id, geneName=None):

# Get the accession
Expand Down
41 changes: 36 additions & 5 deletions mutalyzer/variantchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from mutalyzer.mapping import Converter
from mutalyzer import Retriever
from mutalyzer import GenRecord
from mutalyzer.nc_db import get_nc_record
from mutalyzer.nc_db import get_nc_record, get_chromosome_ids
from datetime import datetime

# Exceptions used (privately) in this module.
Expand Down Expand Up @@ -957,6 +957,7 @@ def process_raw_variant(mutator, variant, record, transcript, output):
@raise _RawVariantError: Cannot process this raw variant.
@raise _VariantError: Cannot further process the entire variant.
"""
original_reftype = variant.RefType
variant, original_description = variant.RawVar, variant[-1]

# `argument` may be a number, or a subsequence of the reference.
Expand Down Expand Up @@ -1092,14 +1093,44 @@ def process_raw_variant(mutator, variant, record, transcript, output):
'the begin position.')
raise _RawVariantError()

def _get_original_position(position, transcript, reftype):
# Note that this still does not provide the original location.
# For 'NG_012337.1(SDHD_v001):c.53-22274del' it provides 'c.-21325'
if transcript:
if reftype == 'c':
return 'c.{} (g.{})'.format(transcript.CM.g2c(position), position)
elif reftype == 'n':
return 'n.{} (g.{})'.format(transcript.CM.tuple2string(
transcript.CM.g2x(position)), position)
return 'g.{}'.format(position)

def _get_nm_in_nc_tip(mol_type, transcript_id):
if mol_type == 'n':
chromosome_ids = get_chromosome_ids(transcript_id)
examples = ', '.join(['{}({})'.format(
c_id, transcript_id) for c_id in chromosome_ids])
if examples:
return ' Tip: make use of a genomic reference sequence, ' \
'e.g., {}.'.format(examples)
else:
return ' Tip: make use of a genomic reference sequence ' \
'like NC_*(NM_*).'
return ''

if first < 1:
output.addMessage(__file__, 4, 'ERANGE', 'Position %i is out of range.' %
first)
message = 'Position {} is outside of the sequence range {}.{}'.format(
_get_original_position(first, transcript, original_reftype),
'[1, {}]'.format(len(mutator.orig)),
_get_nm_in_nc_tip(record.record.molType, transcript.transcriptID))
output.addMessage(__file__, 4, 'ERANGE', message)
raise _RawVariantError()

if last > len(mutator.orig):
output.addMessage(__file__, 4, 'ERANGE', 'Position %s is out of range.' %
last)
message = 'Position {} is outside of the sequence range {}.{}'.format(
_get_original_position(first, transcript, original_reftype),
'[1, {}]'.format(len(mutator.orig)),
_get_nm_in_nc_tip(record.record.molType, transcript.transcriptID))
output.addMessage(__file__, 4, 'ERANGE', message)
raise _RawVariantError()

splice_abort = False
Expand Down

0 comments on commit 1449bb3

Please sign in to comment.