-
Notifications
You must be signed in to change notification settings - Fork 1
/
gb2multifastaprot.py
60 lines (47 loc) · 1.66 KB
/
gb2multifastaprot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
from Bio import SeqIO
import sys
import os
import subprocess
#convert genbank to multi fasta nucleotide format
inputfile = sys.argv[1]
outfile = sys.argv[2]
f = open(inputfile,'r')
g= open(outfile,'w')
for seq_record in SeqIO.parse(f, "genbank") :
print("Dealing with GenBank record %s" % seq_record.id)
c=0
c2=0
for seq_feature in seq_record.features :
#print seq_feature.qualifiers.keys()
if seq_feature.type=="CDS" :
#get location
if 'translation' in seq_feature.qualifiers.keys():# and 'gene' in seq_feature.qualifiers.keys():
try:
startpos=int(str(seq_feature.location).split("(")[0].lstrip("[").rstrip("]").split(":")[0])
endpos=int(str(seq_feature.location).split("(")[0].lstrip("[").rstrip("]").split(":")[1])
print("CDS",startpos,endpos)
except:
c2=c2+1
print("translation failed")
c=c+1
g.write(">"+seq_record.id+"-"+str(c))
if 'locus_tag' in seq_feature.qualifiers.keys():
g.write(" "+seq_feature.qualifiers['locus_tag'][0])
if 'gene' in seq_feature.qualifiers.keys():
g.write(" "+seq_feature.qualifiers['gene'][0])
if 'product' in seq_feature.qualifiers.keys():
g.write(" "+seq_feature.qualifiers['product'][0])
g.write("\n"+seq_feature.qualifiers['translation'][0]+"\n")
'''
try:
g.write(" "+seq_feature.qualifiers[locus_tag][0],
seq_feature.qualifiers['gene'][0],
seq_feature.qualifiers['product'][0],
seq_feature.qualifiers['translation'][0])
#seq_record.seq[startpos:endpos]))
except:
print "failed parse"
p2=p2=1
'''
print(str(seq_record.id), "genes", c,"translation failed", c2)