-
Notifications
You must be signed in to change notification settings - Fork 0
/
orfr.py
79 lines (58 loc) · 2.37 KB
/
orfr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from Bio import SeqIO
records = SeqIO.parse("orf-out.txt", "fasta")
for record in records:
print(record.seq)
print()
print()
import re
from Bio.Seq import reverse_complement
DNA_CODON = {
'TTT': 'F', 'CTT': 'L', 'ATT': 'I', 'GTT': 'V',
'TTC': 'F', 'CTC': 'L', 'ATC': 'I', 'GTC': 'V',
'TTA': 'L', 'CTA': 'L', 'ATA': 'I', 'GTA': 'V',
'TTG': 'L', 'CTG': 'L', 'ATG': 'M', 'GTG': 'V',
'TCT': 'S', 'CCT': 'P', 'ACT': 'T', 'GCT': 'A',
'TCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A',
'TCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A',
'TCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A',
'TAT': 'Y', 'CAT': 'H', 'AAT': 'N', 'GAT': 'D',
'TAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D',
'TAA': 'Stop', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E',
'TAG': 'Stop', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E',
'TGT': 'C', 'CGT': 'R', 'AGT': 'S', 'GGT': 'G',
'TGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G',
'TGA': 'Stop', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G',
'TGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G'}
def translate_codon(codon):
protein = None
if len(codon) == 3 and codon in DNA_CODON:
protein = DNA_CODON[codon]
return protein
# generate sub_seqs according to start codons
def refine_start_codon(DNA):
start_codon_indexes = [m.start() for m in re.finditer('(?=ATG)', DNA)]
refine_seqs = []
for i in start_codon_indexes:
refine_seqs.append(re.findall('...',DNA[i:]))
return refine_seqs
# Translating DNA to protein
def translate_dna_protein(seq):
proteins = []
for sub_seq in refine_start_codon(seq):
protein = ""
found_stop = False
for codon in sub_seq:
if translate_codon(codon) == "Stop":
found_stop = True
break
else:
protein += translate_codon(codon)
if found_stop:
proteins.append(protein)
return proteins
if __name__ == '__main__':
dna = open('rosalind_orfr.txt').readline().strip()
reversed_dna = reverse_complement(dna)
possibility_dna = translate_dna_protein(dna)
possibility_rev = translate_dna_protein(reversed_dna)
print(max(set(possibility_dna + possibility_rev), key = len))