Merge branch 'release/1.2.1' into master

rhshah · Oct 1, 2020 · a2f8654 · a2f8654
2 parents e951828 + bde401c
commit a2f8654
Show file tree

Hide file tree

Showing 8 changed files with 56 additions and 30 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -14,7 +14,7 @@ v1.0.5
 Major Bug Fix:
 Did not assign intron number properly in some cases and that might give wrong annotation
 ## v1.0.6
-RS:Added funtionality to get counts of events from Cosmic Fusion Export.
+RS:Added functionality to get counts of events from Cosmic Fusion Export.
 ## v1.0.7
 Updated to have no padding and proper transcript selection
 ## v1.0.8
@@ -23,3 +23,5 @@ Removing TIMM23B for proper annotation of RET-NCOA4
 RS:Added functionality to kinase domain involvement in the annotation
 ## v1.2.0
 RS:Bug fix for running auto selection mode and upgrading requirements
+## v1.2.1
+GJ:Bug fix for selection of transcript, error when MT chromosome. 
diff --git a/README.rst b/README.rst
@@ -1,7 +1,8 @@
 iAnnotateSV: Annotation of structural variants detected from NGS
 ================================================================
 
-:Author: Ronak H Shah
+:Author: `Ronak H Shah <http://github.com/rhshah>`_
+:Contributors: `Gowtham Jayakumaran <https://github.com/andurill>`_ and `Ian Johonson <https://github.com/ionox0>`_
 :Contact: [email protected]
 :Source code: http://github.com/rhshah/iAnnotateSV
 :License: `Apache License 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_

diff --git a/docs/index.rst b/docs/index.rst
@@ -6,7 +6,8 @@
 iAnnotateSV: Annotation of structural variants detected from NGS
 ================================================================
 
-:Author: Ronak H Shah
+:Author: `Ronak H Shah <http://github.com/rhshah>`_
+:Contributors: `Gowtham Jayakumaran <https://github.com/andurill>`_ and `Ian Johonson <https://github.com/ionox0>`_
 :Contact: [email protected]
 :Source code: http://github.com/rhshah/iAnnotateSV
 :License: `Apache License 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_

diff --git a/iAnnotateSV/AnnotateEachBreakpoint.py b/iAnnotateSV/AnnotateEachBreakpoint.py
@@ -6,14 +6,18 @@
 import pandas as pd
 import numpy as np
 import helper as hp
+import re
 import FindTranscript as ft
+from models import *
 
 def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
     #print "Annotating a coordinate:",position
     if(chromosome.startswith('chr')):
         chromosome = chromosome
     else:
         chromosome = "chr" + chromosome
+    if not re.match(r"(chr[1-9]$|chr1[0-9]$|chr2[0-2]$|chr[X|Y]$)", chromosome):
+        raise ChrError(":".join([str(chromosome), str(position)]))
     #Find all the chromosomes
     idxList = df[df['chrom'] == chromosome].index.tolist()
     transcriptIndex = []
@@ -24,10 +28,7 @@ def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
         if((geneStart <= position) and (geneEnd >= position)):
             #print position,geneStart,geneEnd
             transcriptIndex.append(index)
-    desc = None
-    intronnum = None
-    intronframe = None
-    #print transcriptIndex
+    geneName, transcript, desc, zone, strandDirection, intronnum, intronframe = (None,)*7
     if(transcriptIndex):
         coordData = pd.DataFrame(index=np.asarray(transcriptIndex),columns=['c', 'd', 'e', 'd1', 'd2', 'e1', 'e2','f'])
 
@@ -39,7 +40,7 @@ def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
             c = None # zone: 1=exon, 2=intron, 3=3'-UTR, 4=5'-UTR, 5=promoter
             d,e = (None for i in range(2)) # for exons: which one, and how far
             d1,d2,e1,e2 = (None for i in range(4)) # for introns: between which exons and how far?
-            f = None; #for introns: how many bases in the partially completed codon?
+            f = None #for introns: how many bases in the partially completed codon?
 
             #print df.iloc[tindex]['#name']
             #in promoter region ?
@@ -175,4 +176,6 @@ def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
                 desc = 'IGR: ' + hp.bp2str(distBefore[afterIdx],2) + ' after ' + geneName + '(' + strandDirection + ')' 
                 #print a
                 break
-    return(geneName,transcript,desc,zone,strandDirection,intronnum,intronframe)   
+        if not all([geneName, transcript, desc]):
+            raise IntergenicError(":".join([str(chromosome), str(position)]))
+    return(geneName,transcript,desc,zone,strandDirection,intronnum,intronframe)   
diff --git a/iAnnotateSV/__init__.py b/iAnnotateSV/__init__.py
@@ -1,6 +1,6 @@
 
 __title__ = 'iAnnotateSV'
-__version__ = '1.2.0'
+__version__ = '1.2.1'
 __author__ = 'Ronak H Shah'
 __license__ = 'Apache Software License'
-__copyright__ = 'Copyright 2019 Ronak Hasmukh Shah'
+__copyright__ = 'Copyright 2020 Ronak Hasmukh Shah'
diff --git a/iAnnotateSV/iAnnotateSV.py b/iAnnotateSV/iAnnotateSV.py
@@ -15,6 +15,7 @@
 import AddExternalAnnotations as aea
 import AnnotationForKinaseDomain as kda
 import VisualizeSV as vsv
+from models import *
 import os
 import sys
 import logging
@@ -275,6 +276,7 @@ def processSV(svDF, refDF, args):
         pos2 = int(row.loc['pos2'])
         str1 = int(row.loc['str1'])
         str2 = int(row.loc['str2'])
+        b1, b2 = (None,)*2
         if(args.autoSelect):
             (gene1, transcript1, site1, zone1, strand1, intronnum1,
              intronframe1) = aeb.AnnotateEachBreakpoint(chr1, pos1, str1, refDF, args.autoSelect)
@@ -293,24 +295,24 @@ def processSV(svDF, refDF, args):
                 chr1, pos1, str1, chr2, pos2, str2, gene1, transcript1, site1, gene2, transcript2,
                 site2, fusionFunction]
         else:
-            (gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List,
-             intronframe1List) = aeb.AnnotateEachBreakpoint(chr1, pos1, str1, refDF, args.autoSelect)
-            # print "1:\n", gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List
-            (gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List,
-             intronframe2List) = aeb.AnnotateEachBreakpoint(chr2, pos2, str2, refDF, args.autoSelect)
-            # print "\n2:\n", gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List
-            (gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1) = fct.FindCT(
-                gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List, ctDict)
-            # print "\n3:\n", gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1
-            (gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2) = fct.FindCT(
-                gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List, ctDict)
-            # print "\n4:\n", gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2
-            ann1S = pd.Series([gene1, transcript1, site1, zone1, strand1, str1, intronnum1, intronframe1], index=[
-                              'gene1', 'transcript1', 'site1', 'zone1', 'txstrand1', 'readstrand1', 'intronnum1', 'intronframe1'])
-            ann2S = pd.Series([gene2, transcript2, site2, zone2, strand2, str2, intronnum2, intronframe2], index=[
-                              'gene2', 'transcript2', 'site2', 'zone2', 'txstrand2', 'readstrand2', 'intronnum2', 'intronframe2'])
-            fusionFunction = pf.PredictFunctionForSV(ann1S, ann2S)
-            # print "\n5:\n", fusionFunction
+            try:
+                (gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List) = aeb.AnnotateEachBreakpoint(chr1, pos1, str1, refDF, args.autoSelect)
+                (gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1) = fct.FindCT(gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List, ctDict)
+            except (IntergenicError, ChrError) as b1:
+                logging.info("iAnnotateSV: " + str(b1))
+                (gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1) = ("-",)*7
+            try:
+                (gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List) = aeb.AnnotateEachBreakpoint(chr2, pos2, str2, refDF, args.autoSelect)
+                (gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2) = fct.FindCT(gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List, ctDict)
+            except (IntergenicError, ChrError) as b2:
+                logging.info("iAnnotateSV: " + str(b2))
+                (gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2) = ("-",)*7
+            ann1S = pd.Series([gene1, transcript1, site1, zone1, strand1, str1, intronnum1, intronframe1], index=['gene1', 'transcript1', 'site1', 'zone1', 'txstrand1', 'readstrand1', 'intronnum1', 'intronframe1'])
+            ann2S = pd.Series([gene2, transcript2, site2, zone2, strand2, str2, intronnum2, intronframe2], index=['gene2', 'transcript2', 'site2', 'zone2', 'txstrand2', 'readstrand2', 'intronnum2', 'intronframe2'])
+            if not any([b1, b2]):
+                fusionFunction = pf.PredictFunctionForSV(ann1S, ann2S)
+            else:
+                fusionFunction = "-"
             annDF.loc[
                 count,
                 ['chr1', 'pos1', 'str1', 'chr2', 'pos2', 'str2', 'gene1', 'transcript1', 'site1',

diff --git a/iAnnotateSV/models.py b/iAnnotateSV/models.py
@@ -0,0 +1,17 @@
+class Error(Exception):
+    '''Base class for other exceptions'''
+    pass
+
+class ChrError(Error):
+    '''Raise when the breakpoint is not in autosome or allosome, where annotation is not possible'''
+    def __init__(self, bkp):
+        Exception.__init__(
+                self, "Breakpoint " + str(bkp) + " is in neither autosome nor allosome, and cannot be annotated."
+        )
+
+class IntergenicError(Error):
+    '''Raise when a breakpoint in intergenic region cannot be resolved'''
+    def __init__(self, bkp):
+        Exception.__init__(
+            self, "Intergenic breakpoint " + str(bkp) + " cannot be resolved."
+        )
diff --git a/setup.cfg b/setup.cfg
@@ -15,7 +15,7 @@ classifiers =
     Development Status :: 5 - Production/Stable
 
 [bumpversion]
-current_version = 1.2.0
+current_version = 1.2.1
 commit = True
 tag = True