-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathDPopt.py
executable file
·66 lines (60 loc) · 1.71 KB
/
DPopt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
import cluster
import sys
import csv
import math
import random
import scipy.optimize
# function to minimize
def g(l, returnObject=False):
minError = sys.maxint
for i in range(0,iters):
k1 = cluster.dpmeans(res, l, xVal)
err, xerr = k1.run()
if xerr < minError:
minError = xerr
kmin = k1
if returnObject:
return minError, k1
return minError
#parameters
iters = 8 # iterations in search for min
maxClusters = 12 # used for setting minimum lambda
xValFrac = 0.2 # 20% of data for xVal
# Read data from standard in
res = []
for row in csv.reader(sys.stdin):
res.append([float(x) for x in row])
nFeatures = len(res[0])
minx, maxx = [sys.maxint for i in range(0,nFeatures)], [-sys.maxint for i in range(0,nFeatures)]
for r in res:
idx = 0
for i in minx:
if r[idx] < i:
minx[idx] = r[idx]
idx += 1
idx = 0
for i in maxx:
if r[idx] > i:
maxx[idx] = r[idx]
idx += 1
dataSpread = max([abs(x - y) for x, y in zip(maxx, minx)])
# quick and dirty min scale is average dist if data was along dataspread
dataGrain = dataSpread/maxClusters
# make sure data is in random order
random.shuffle(res)
# set aside for cross-validation
xVal = int(xValFrac*len(res))
optLambda = scipy.optimize.brent(g,
brack=(1./dataSpread, 1./dataGrain),
tol=1e-4,
full_output=0,
maxiter=100)
e,k = g(optLambda, returnObject=True)
wrtr = csv.writer(open("./output/opt_result.csv","wb"))
for x in k.getOutput():
wrtr.writerow(x)
eWrtr = csv.writer(open("./output/opt_error.csv","wb"))
for x in k.getErrors():
eWrtr.writerow(x)
print "lambda: %2.5f\n with error: %2.5f\n"%(optLambda,e)