-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFastaSequence.py
116 lines (103 loc) · 3.66 KB
/
FastaSequence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- coding: utf-8 -*-
# copyright 2006 inesc/id
# written by luís pedro coelho
#
# licence: mit licence:
#
# permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "software"), to deal in
# the software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
# of the software, and to permit persons to whom the software is furnished to do
# so, subject to the following conditions:
#
# the above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the software.
#
# the software is provided "as is", without warranty of any kind, express or
# implied, including but not limited to the warranties of merchantability,
# fitness for a particular purpose and noninfringement. in no event shall the
# authors or copyright holders be liable for any claim, damages or other
# liability, whether in an action of contract, tort or otherwise, arising
# from, out of or in connection with the software or the use or other
# dealings in the software.
class fasta_sequence:
"""
fasta sequence with a header
two members: header and seq
"""
__slots__ = [ 'seq', 'header']
def __init__(self, h=None, s=None):
self.SetHeader(h)
self.SetSeq(s)
def SetHeader(self,Header):
self.header = Header
def SetSeq(self,Seq):
self.seq = Seq
def GetSeq(self):
return self.seq
def GetHeader(self):
return self.header
def fasta_read(input):
"""
fasta_read(input):
@param input can be either a file or the name of a file.
returns a list of fasta_sequence objects with all the sequences in the file.
comments (lines starting with ';') are ignored.
"""
if type(input) == str:
if input.endswith('.gz'):
import gzip
input=gzip.gzipfile(input)
else:
input=file(input)
results = []
header = ''
seq_items = []
first = true
for line in input:
if line[0] == ';':
continue # comment
elif line[0] == '>':
if not first:
seq= "".join(seq_items)
results.append(fasta_sequence(header,seq))
seq_items = []
header = line[1:-1] # eat '>' and '\n'
first = false
else:
seq_items.append(line[:-1])
if len(seq_items) > 0:
seq = "".join(seq_items)
results.append(fasta_sequence(header,seq))
return results
def fasta_write(output,s):
"""
fasta_write(output, sequence[s])
@param output either a file (opened for writing) or a filename
@param sequence it can be either a fasta_sequence or a list of fasta_sequence objects
writes the sequence(s) into the file in fasta format
"""
line_width=70
if type(output) == str:
if output.endswith('.gz'):
import gzip
output=gzip.gzipfile(output,'w')
else:
output=file(output,'w')
if type(s) == list:
for ss in s:
fasta_write(output,ss)
else:
output.write("> %s\n" % s.header)
for i in xrange(0,len(s.seq),line_width):
output.write("%s\n" % s.seq[i:i+line_width])
def rfasta_write(output,seqs):
"""
restricted fasta
this format (used by biopropector) is just fasta with the whole sequence on one line.
"""
if type(output) == str: output=file(output,'w')
for s in seqs:
output.write("> %s\n" % s.header)
output.write("%s\n" % s.seq)