-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrandom_projection.py
97 lines (65 loc) · 2 KB
/
random_projection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
import load_data as data
import iterations
import dimensions
import sys
from scipy.spatial.distance import euclidean
'''
compare different implementations of random projection
dataset:
- https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/
not sparse shape: 569,30
sparse shape: 569,1369
usage:
compare by iterations
$ python random_projection.py iterations 10
compare by iterations with binary encoding
$ python random_projection.py iterations 10 --encode
compare by dimensions, 5 to 20 dimensions with stepsize 1
$ python random_projection.py dimensions 5-20-1
compare by dimensions with binary encoding, 50 to 500 dimensions with stepsize 50
$ python random_projection.py dimensions 50-500-50 --encode
'''
len_args = len(sys.argv)
if len_args < 3 or len_args > 4:
print "wrong parameter, see usage"
exit()
params = sys.argv
mode = params[1]
config = params[2]
binary_encode = False
if "--encode" in params:
binary_encode = True
print binary_encode
# amount of rows
if hasattr(data, "toarray"):
data = data.toarray()
data = data.load(binary_encode)
orig_shape = np.shape(data)
print orig_shape
orig_rows = orig_shape[0]
orig_dimension = orig_shape[1]
origDistances = np.empty((orig_rows, orig_rows))
r = range(orig_rows)
for i in r:
for j in r:
if i == j:
origDistances[i][j] = .0
else:
origDistances[i][j] = euclidean(data[i], data[j])
if mode == "iterations":
param = int(config)
iterations.compare(origDistances, data, param, binary_encode)
elif mode == "dimensions":
config = config.split("-")
if len(config) != 3:
print "wrong parameter, see usage"
exit()
start = int(config[0])
stop = int(config[1])
stepsize = int(config[2])
d = np.arange(start, stop+stepsize, stepsize)
dimensions.compare(origDistances, data, d, binary_encode)
else:
print "wrong parameter, see usage"
exit()