-
Notifications
You must be signed in to change notification settings - Fork 0
/
habitat.py
119 lines (64 loc) · 1.58 KB
/
habitat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/python
import sys
import os
import re
import glob
digits = re.compile(r'(\d+)')
def tokenize(filename):
return tuple(int(token) if match else token
for token, match in
((fragment, digits.search(fragment))
for fragment in digits.split(filename)))
f = open(sys.argv[1],'r') #normalised otu table (floats)
folder = sys.argv[2]
g = open(sys.argv[3],'w')
filelist=glob.glob(folder+"/*")
filelist.sort(key=tokenize)
print filelist
c=0
t=0
#make collated habitat dict
data={}
habitats=[]
for i in filelist:
f1=open(i,'r')
otu = f1.readline().split(": ")[1].rstrip("\n")
f1.readline()
k = f1.readline().split("\t")
pchit = float(k[0].split("%")[0])/100
data[otu]={}
for j in k[1:]:
hab = j.split(":")[0]
pchab=float(j.split(" ")[1].split("%")[0])/100
data[otu][hab]=pchit*pchab
if hab not in habitats:
habitats.append(hab)
habitats.sort()
samples=[]
sampledata={}
for i in f:
k = i.split("\t")
k[-1]=k[-1].rstrip("\n")
if i[0]=="#": #get sample names
for x in k[1:]:
samples.append(x)
sampledata[x]={}
else:
otu=k[0]
c=-1
for x in k[1:]:
c=c+1
sampledata[samples[c]][otu]=float(x)
title="#habitat\t"+"\t".join(str(p) for p in samples)+"\n"
g.write(title)
for x in habitats:
g.write(x)
for i in samples:
v=0
for j in data.keys(): #loop otus
if x in data[j].keys():
v = v + (data[j][x] * sampledata[i][j])
g.write("\t"+str(v))
g.write("\n")
for i in data.keys():
print i, data[i].keys()