Skip to content

Commit

Permalink
Merge branch 'release/1.2.1' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshah committed Oct 1, 2020
2 parents e951828 + bde401c commit a2f8654
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 30 deletions.
4 changes: 3 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ v1.0.5
Major Bug Fix:
Did not assign intron number properly in some cases and that might give wrong annotation
## v1.0.6
RS:Added funtionality to get counts of events from Cosmic Fusion Export.
RS:Added functionality to get counts of events from Cosmic Fusion Export.
## v1.0.7
Updated to have no padding and proper transcript selection
## v1.0.8
Expand All @@ -23,3 +23,5 @@ Removing TIMM23B for proper annotation of RET-NCOA4
RS:Added functionality to kinase domain involvement in the annotation
## v1.2.0
RS:Bug fix for running auto selection mode and upgrading requirements
## v1.2.1
GJ:Bug fix for selection of transcript, error when MT chromosome.
3 changes: 2 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
iAnnotateSV: Annotation of structural variants detected from NGS
================================================================

:Author: Ronak H Shah
:Author: `Ronak H Shah <http://github.com/rhshah>`_
:Contributors: `Gowtham Jayakumaran <https://github.com/andurill>`_ and `Ian Johonson <https://github.com/ionox0>`_
:Contact: [email protected]
:Source code: http://github.com/rhshah/iAnnotateSV
:License: `Apache License 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_
Expand Down
3 changes: 2 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
iAnnotateSV: Annotation of structural variants detected from NGS
================================================================

:Author: Ronak H Shah
:Author: `Ronak H Shah <http://github.com/rhshah>`_
:Contributors: `Gowtham Jayakumaran <https://github.com/andurill>`_ and `Ian Johonson <https://github.com/ionox0>`_
:Contact: [email protected]
:Source code: http://github.com/rhshah/iAnnotateSV
:License: `Apache License 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_
Expand Down
15 changes: 9 additions & 6 deletions iAnnotateSV/AnnotateEachBreakpoint.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,18 @@
import pandas as pd
import numpy as np
import helper as hp
import re
import FindTranscript as ft
from models import *

def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
#print "Annotating a coordinate:",position
if(chromosome.startswith('chr')):
chromosome = chromosome
else:
chromosome = "chr" + chromosome
if not re.match(r"(chr[1-9]$|chr1[0-9]$|chr2[0-2]$|chr[X|Y]$)", chromosome):
raise ChrError(":".join([str(chromosome), str(position)]))
#Find all the chromosomes
idxList = df[df['chrom'] == chromosome].index.tolist()
transcriptIndex = []
Expand All @@ -24,10 +28,7 @@ def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
if((geneStart <= position) and (geneEnd >= position)):
#print position,geneStart,geneEnd
transcriptIndex.append(index)
desc = None
intronnum = None
intronframe = None
#print transcriptIndex
geneName, transcript, desc, zone, strandDirection, intronnum, intronframe = (None,)*7
if(transcriptIndex):
coordData = pd.DataFrame(index=np.asarray(transcriptIndex),columns=['c', 'd', 'e', 'd1', 'd2', 'e1', 'e2','f'])

Expand All @@ -39,7 +40,7 @@ def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
c = None # zone: 1=exon, 2=intron, 3=3'-UTR, 4=5'-UTR, 5=promoter
d,e = (None for i in range(2)) # for exons: which one, and how far
d1,d2,e1,e2 = (None for i in range(4)) # for introns: between which exons and how far?
f = None; #for introns: how many bases in the partially completed codon?
f = None #for introns: how many bases in the partially completed codon?

#print df.iloc[tindex]['#name']
#in promoter region ?
Expand Down Expand Up @@ -175,4 +176,6 @@ def AnnotateEachBreakpoint(chromosome,position,strand,df,autoSelect):
desc = 'IGR: ' + hp.bp2str(distBefore[afterIdx],2) + ' after ' + geneName + '(' + strandDirection + ')'
#print a
break
return(geneName,transcript,desc,zone,strandDirection,intronnum,intronframe)
if not all([geneName, transcript, desc]):
raise IntergenicError(":".join([str(chromosome), str(position)]))
return(geneName,transcript,desc,zone,strandDirection,intronnum,intronframe)
4 changes: 2 additions & 2 deletions iAnnotateSV/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

__title__ = 'iAnnotateSV'
__version__ = '1.2.0'
__version__ = '1.2.1'
__author__ = 'Ronak H Shah'
__license__ = 'Apache Software License'
__copyright__ = 'Copyright 2019 Ronak Hasmukh Shah'
__copyright__ = 'Copyright 2020 Ronak Hasmukh Shah'
38 changes: 20 additions & 18 deletions iAnnotateSV/iAnnotateSV.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import AddExternalAnnotations as aea
import AnnotationForKinaseDomain as kda
import VisualizeSV as vsv
from models import *
import os
import sys
import logging
Expand Down Expand Up @@ -275,6 +276,7 @@ def processSV(svDF, refDF, args):
pos2 = int(row.loc['pos2'])
str1 = int(row.loc['str1'])
str2 = int(row.loc['str2'])
b1, b2 = (None,)*2
if(args.autoSelect):
(gene1, transcript1, site1, zone1, strand1, intronnum1,
intronframe1) = aeb.AnnotateEachBreakpoint(chr1, pos1, str1, refDF, args.autoSelect)
Expand All @@ -293,24 +295,24 @@ def processSV(svDF, refDF, args):
chr1, pos1, str1, chr2, pos2, str2, gene1, transcript1, site1, gene2, transcript2,
site2, fusionFunction]
else:
(gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List,
intronframe1List) = aeb.AnnotateEachBreakpoint(chr1, pos1, str1, refDF, args.autoSelect)
# print "1:\n", gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List
(gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List,
intronframe2List) = aeb.AnnotateEachBreakpoint(chr2, pos2, str2, refDF, args.autoSelect)
# print "\n2:\n", gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List
(gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1) = fct.FindCT(
gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List, ctDict)
# print "\n3:\n", gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1
(gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2) = fct.FindCT(
gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List, ctDict)
# print "\n4:\n", gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2
ann1S = pd.Series([gene1, transcript1, site1, zone1, strand1, str1, intronnum1, intronframe1], index=[
'gene1', 'transcript1', 'site1', 'zone1', 'txstrand1', 'readstrand1', 'intronnum1', 'intronframe1'])
ann2S = pd.Series([gene2, transcript2, site2, zone2, strand2, str2, intronnum2, intronframe2], index=[
'gene2', 'transcript2', 'site2', 'zone2', 'txstrand2', 'readstrand2', 'intronnum2', 'intronframe2'])
fusionFunction = pf.PredictFunctionForSV(ann1S, ann2S)
# print "\n5:\n", fusionFunction
try:
(gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List) = aeb.AnnotateEachBreakpoint(chr1, pos1, str1, refDF, args.autoSelect)
(gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1) = fct.FindCT(gene1List, transcript1List, site1List, zone1List, strand1List, intronnum1List, intronframe1List, ctDict)
except (IntergenicError, ChrError) as b1:
logging.info("iAnnotateSV: " + str(b1))
(gene1, transcript1, site1, zone1, strand1, intronnum1, intronframe1) = ("-",)*7
try:
(gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List) = aeb.AnnotateEachBreakpoint(chr2, pos2, str2, refDF, args.autoSelect)
(gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2) = fct.FindCT(gene2List, transcript2List, site2List, zone2List, strand2List, intronnum2List, intronframe2List, ctDict)
except (IntergenicError, ChrError) as b2:
logging.info("iAnnotateSV: " + str(b2))
(gene2, transcript2, site2, zone2, strand2, intronnum2, intronframe2) = ("-",)*7
ann1S = pd.Series([gene1, transcript1, site1, zone1, strand1, str1, intronnum1, intronframe1], index=['gene1', 'transcript1', 'site1', 'zone1', 'txstrand1', 'readstrand1', 'intronnum1', 'intronframe1'])
ann2S = pd.Series([gene2, transcript2, site2, zone2, strand2, str2, intronnum2, intronframe2], index=['gene2', 'transcript2', 'site2', 'zone2', 'txstrand2', 'readstrand2', 'intronnum2', 'intronframe2'])
if not any([b1, b2]):
fusionFunction = pf.PredictFunctionForSV(ann1S, ann2S)
else:
fusionFunction = "-"
annDF.loc[
count,
['chr1', 'pos1', 'str1', 'chr2', 'pos2', 'str2', 'gene1', 'transcript1', 'site1',
Expand Down
17 changes: 17 additions & 0 deletions iAnnotateSV/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
class Error(Exception):
'''Base class for other exceptions'''
pass

class ChrError(Error):
'''Raise when the breakpoint is not in autosome or allosome, where annotation is not possible'''
def __init__(self, bkp):
Exception.__init__(
self, "Breakpoint " + str(bkp) + " is in neither autosome nor allosome, and cannot be annotated."
)

class IntergenicError(Error):
'''Raise when a breakpoint in intergenic region cannot be resolved'''
def __init__(self, bkp):
Exception.__init__(
self, "Intergenic breakpoint " + str(bkp) + " cannot be resolved."
)
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ classifiers =
Development Status :: 5 - Production/Stable

[bumpversion]
current_version = 1.2.0
current_version = 1.2.1
commit = True
tag = True

Expand Down

0 comments on commit a2f8654

Please sign in to comment.