-
Notifications
You must be signed in to change notification settings - Fork 0
/
comparison_stats.py
92 lines (62 loc) · 2.44 KB
/
comparison_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
from dipy.io.pickles import load_pickle,save_pickle
from scipy.stats import describe
import csv
"""
C_sizes = ['10000','25000','50000','100000']
for i,s in enumerate(C_sizes):
C=load_pickle('C'+s+'.pkl')
stats=[(len(C[k]),sum([C[k][j]['N'] for j in C[k].keys()])) for k in C.keys()]
print np.mean(np.array(stats),)
stop
"""
#sizes = ['10k','25k','50k','100k']
sizes =['_full']
metrics = ['Purity', 'RandomAccuracy', 'PairsConcordancy', 'Completeness', 'Correctness', 'MatchedAgreement', 'MatchedKappa']
with open('tmp.csv', 'wb') as f:
writer = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
#print f, '"Purity",,"Purity","Random Accuracy","Pairs Concordancy","Completeness","Correctness","Matched Agreement","Matched Kappa"\n'
writer.writerow([None,None]+metrics)
alldata = []
for size in sizes:
results=load_pickle('results'+size+'.pkl')
keys = results.keys()
table = np.zeros((len(results), len(metrics)))
for i,k in enumerate(keys):
r = results[k]
for j,m in enumerate(metrics):
table[i,j] = r[m]
alldata = alldata + [table]
d = describe(table,axis=0)
#size = d[0]
type = ['Min','Max','Mean','s.d.']
#print >> f, size,'", Min,"'
mins = (None,type[0])+tuple(d[1][0])
maxs = (None,type[1])+tuple(d[1][0])
means = (size,type[2])+tuple(d[2])
sds = (None,type[3])+tuple(np.sqrt(d[3]))
tab = np.vstack((means,sds))
writer.writerows(tab)
writer.writerow((None,))
f.close()
import matplotlib.pyplot as plt
fig = plt.figure()
for j, met in enumerate(metrics):
ax = [None,None,None,None]
'''
for i,data in enumerate(alldata):
if i == 0:
ax[i] = fig.add_subplot(7,4,4*j+i+1)
else:
ax[i] = fig.add_subplot(7,4,4*j+i+1,sharex=ax[0],sharey=ax[0])
n, bins, patches = ax[i].hist(data[:,j], normed = True, facecolor='green', alpha=0.75)
'''
for i,data in enumerate(alldata):
if i == 0:
ax[i] = fig.add_subplot(4,7,j+7*i+1)
else:
ax[i] = fig.add_subplot(4,7,j+7*i+1,sharex=ax[0],sharey=ax[0])
n, bins, patches = ax[i].hist(data[:,j], normed = True, facecolor='green', alpha=0.75)
ax[i].set_xticklabels([])
ax[i].set_yticklabels([])
plt.show()