-
Notifications
You must be signed in to change notification settings - Fork 1
/
analyze.py
executable file
·44 lines (41 loc) · 1.98 KB
/
analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/local/bin/python
import sys
from zipfile import ZipFile
for input_path in sys.argv[1:]:
print "\n Analyzing %s\n" % input_path
print "-"*(14+len(input_path)+4)
if input_path.endswith(".zip"):
zf = ZipFile(input_path, 'r')
input_fh = zf.open(zf.infolist()[0])
else:
input_fh = open(input_path)
as_str = ''.join(
[line.split('\t')[3].strip() for line in input_fh
if line.startswith('#') == False ])
print "Read in %d genotype components" % len(as_str)
print " First occurence of 'GATTACA' @ %d" % as_str.find('GATTACA')
print " Last occurence of 'GATTACA' @ %d" % as_str.rfind('GATTACA')
print " Total occurences of 'GATTACA' = %d" % as_str.count('GATTACA')
print " Kitty score (# of 'CAT's) = %d" % as_str.count('CAT')
print " Who's it? (# of 'TAG's) = %d" % as_str.count('TAG')
print " GATT compliance = %d" % as_str.count('GATT')
print " ATT customer = %d" % as_str.count('ATT')
print " Sassy one (# of 'TAT's) = %d" % as_str.count('TAT')
print " Lil Monsters (# of 'GAGA's) = %d" % as_str.count('GAGA')
print " Perverts (# of 'TATA's) = %d" % as_str.count('TATA')
print " Gross ones (# of 'GAG's) = %d" % as_str.count('GAG')
print " Original Gangsta (# of 'GAT's) = %d" % as_str.count('GAT')
print " Appropriateness (# of 'TACT's) = %d" % as_str.count('TACT')
print
g_count = sum(1 for c in as_str if c == 'G')
a_count = sum(1 for c in as_str if c == 'A')
t_count = sum(1 for c in as_str if c == 'T')
c_count = sum(1 for c in as_str if c == 'C')
d_count = sum(1 for c in as_str if c == '-')
jonx = sum(1 for c in as_str if c not in ['G', 'A', 'T', 'C', '-'])
print "Found %d G's" % g_count
print "Found %d A's" % a_count
print "Found %d T's" % t_count
print "Found %d C's" % c_count
print "Found %d -'s" % d_count
print "Found %d something elses" % jonx