-
Notifications
You must be signed in to change notification settings - Fork 1
/
SVM.py
118 lines (87 loc) · 3.8 KB
/
SVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
style.use('ggplot')
data_dict = {-1:np.array([[1,7],[2,8],[3,8],]),
1:np.array([[5,1],[6,-1],[7,3],])}
class Support_Vector_Machine:
def __init__(self, visualization = True):
self.visualization = visualization
self.colors = {1:'r', -1:'b'}
if self.visualization:
self.fig = plt.figure()
self.ax = self.fig.add_subplot(1,1,1)
def fit(self, data):
self.data = data
opt_dict = {}
transforms = [[1,1],[-1,1],[-1,-1],[1,-1]]
all_data = []
for yi in self.data:
for featureset in self.data[yi]:
for feature in featureset:
all_data.append(feature)
self.max_feature_value = max(all_data)
self.min_feature_value = min(all_data)
all_data = None
step_sizes = [self.max_feature_value * 0.1, self.max_feature_value * 0.01, self.max_feature_value * 0.001]
b_range_multiple = 5
b_multiple = 5
latest_optimum = self.max_feature_value*10
for step in step_sizes:
w = np.array([latest_optimum, latest_optimum])
optimized = False
while not optimized:
for b in np.arange(-1*(self.max_feature_value*b_range_multiple), self.max_feature_value*b_range_multiple, step*b_multiple):
for transformation in transforms:
w_t = w*transformation
found_option = True
for i in self.data:
for xi in self.data[i]:
yi = i
if not yi*(np.dot(w_t,xi)+b) >= 1:
found_option = False
if found_option:
opt_dict[np.linalg.norm(w_t)] = [w_t,b]
if w[0] < 0:
optimized = True
print('Optimized a step')
else:
w = w - step
norms = sorted([n for n in opt_dict])
opt_choice = opt_dict[norms[0]]
self.w = opt_choice[0]
self.b = opt_choice[1]
latest_optimum = opt_choice[0][0] + step*2
def predict(self, features):
classification = np.sign(np.dot(np.array(features), self.w) + self.b)
if classification != 0 and self.visualization:
self.ax.scatter(features[0], features[1], s = 200, marker = '*', c = self.colors[classification])
else:
print('Featureset ',features, ' is on the decision boundary')
return classification
def visualize(self):
[[self.ax.scatter(x[0],x[1],s=100,color=self.colors[i]) for x in data_dict[i]] for i in data_dict]
def hyperplane(x, w, b, v):
return (-w[0]*x-b+v) / w[1]
datarange = (self.min_feature_value*0.9,self.max_feature_value*1.1)
hyp_x_min = datarange[0]
hyp_x_max = datarange[1]
psv1 = hyperplane(hyp_x_min, self.w, self.b, 1)
psv2 = hyperplane(hyp_x_max, self.w, self.b, 1)
self.ax.plot([hyp_x_min,hyp_x_max],[psv1,psv2], 'k')
nsv1 = hyperplane(hyp_x_min, self.w, self.b, -1)
nsv2 = hyperplane(hyp_x_max, self.w, self.b, -1)
self.ax.plot([hyp_x_min,hyp_x_max],[nsv1,nsv2], 'k')
db1 = hyperplane(hyp_x_min, self.w, self.b, 0)
db2 = hyperplane(hyp_x_max, self.w, self.b, 0)
self.ax.plot([hyp_x_min,hyp_x_max],[db1,db2], 'g--')
plt.show()
def main():
svm = Support_Vector_Machine()
svm.fit(data = data_dict)
predict_us = [[0,10],[1,3],[3,4],[3,5],[5,5],[5,6],[6,-5],[5,8]]
for p in predict_us:
svm.predict(p)
svm.visualize()
if __name__ == '__main__':
main()