-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSVM.py
114 lines (85 loc) · 4 KB
/
SVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import itertools
import numpy as np
import cvxopt
from sklearn import datasets
from sklearn.utils import shuffle
import data_processing
class RetardedSVM:
def __init__(self):
self.w = None
self.b = None
def fit(self, data, labels):
classes = list(set(labels.T[0]))
if len(classes) != 2:
raise ValueError
y = np.array(list(map(lambda x: 1 if x == classes[0] else -1, labels)))
no_vectors = data.shape[0]
P = np.zeros((no_vectors, no_vectors))
for i, j in itertools.product(range(no_vectors), range(no_vectors)):
P[i, j] = -np.dot(data[i, :], data[j, :])*y[i]*y[j]
print(np.linalg.matrix_rank(P), P.shape)
print(P)
q = cvxopt.matrix(np.ones((data.shape[0],)))
P = cvxopt.matrix(P.astype(np.double))
G = cvxopt.matrix(-np.identity(no_vectors).astype(np.double))
h = cvxopt.matrix(np.zeros((no_vectors,)).astype(np.double))
A = cvxopt.matrix(np.diag(y).astype(np.double))
# A = np.asarray(y)
b = cvxopt.matrix(h)
# print(P.shape, q.shape, G.shape, h.shape, A.shape, b.shape)
alpha = cvxopt.solvers.qp(P, q, G, h, A, b)['x']
i = max([i if not np.isclose(n, 0) else -1 for i, n in enumerate(alpha)])
self.w = sum([xi*yi*alphai for xi, yi, alphai in zip(data, y, alpha)], start=np.zeros(no_vectors))
self.b = 1 - np.dot(sum([a*yi*xi for a, yi, xi in zip(alpha, y, data)], start=np.zeros(data[0].shape)), data[i])
def predict(self, v):
return np.dot(self.w, v) + self.b > 0
class SVM:
def __init__(self, softing_parameter=100, kernel=lambda x, y: np.dot(x, y)):
self.beta = None
self.C = softing_parameter
self.kernel = kernel
def gradient(self, x, y):
if 1-y*(self.kernel(self.beta, x)) <= 0:
return self.beta
return self.beta - self.C*y*x
def fit(self, data, labels, epochs=10000, learning_rate=lambda t: 1/(1000 + t)):
# print(labels)
classes = sorted(list(set(labels)))
if len(classes) != 2:
raise ValueError
pretty_labels = np.array(list(map(lambda x: -1 if x == classes[0] else 1, labels)))
self.beta = np.random.normal(0, 1, data.shape[1] + 1)
for i in range(epochs):
x, y = list(zip(data, pretty_labels))[np.random.randint(0, data.shape[0])]
x = np.append(x, 1)
self.beta -= learning_rate(i)*self.gradient(x, y)
def predict(self, x):
x = np.append(x, 1)
return 1 if self.kernel(x, self.beta) >= 0 else -1
if __name__ == '__main__':
# np.random.seed(1)
data = data_processing.data_generator(no_samples_per_class=10, no_classes=2, no_dimensions=2, centers=((0, 0), (15, 5)))
# data_processing.plot_data(data)
# svm = SVM(softing_parameter=10, kernel=lambda x, y: (np.dot(x, y) + 1000)**10)
svm = SVM(softing_parameter=10000, kernel=lambda x, y: np.dot(x, y))
# print(svm.predict(np.array([15, 5])))
data, target = datasets.load_iris(as_frame=True, return_X_y=True)
data['target'] = target
data = data[data['target'] < 2]
data = shuffle(data)
print(data.shape)
training_data = data.iloc[:30, :]
test_data = data.iloc[30:, :]
print(len(test_data))
training_labels = training_data['target'].to_numpy(copy=True)
test_labels = test_data['target'].to_numpy(copy=True)
training_data = training_data.drop('target', axis=1)
test_data = test_data.drop('target', axis=1)
training_data = training_data.to_numpy(copy=True)
test_data = test_data.to_numpy(copy=True)
svm.fit(training_data, training_labels, epochs=20000)
test_labels = np.where(test_labels == 0, -1, test_labels)
good = [svm.predict(x) == y for x, y in zip(test_data, test_labels)]
predictions = [svm.predict(x) for x in test_data]
print(len(predictions))
print(f'accuracy: {good.count(True)/len(good)}')