-
Notifications
You must be signed in to change notification settings - Fork 0
/
meanShift.py
125 lines (90 loc) · 3.7 KB
/
meanShift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
#X, y = make_blobs(n_samples=15, centers=3, n_features=2)
X = np.array([[1, 2], [1.5, 1.8], [5, 8], [8, 8], [1, 0.6], [9, 11], [8, 2], [10,2], [9, 3]])
print(X)
#plt.scatter(X[:,0], X[:, 1], s=150)
#plt.show()
colors = 10*['g', 'r', 'b', 'c', 'k']
class Mean_Shift:
def __init__(self, radius = None, radius_norm_step = 100):
self.radius = radius
self.radius_norm_step = radius_norm_step
def fit(self, data):
if self.radius == None:
all_data_centroid = np.average(data, axis=0)
all_data_norm = np.linalg.norm(all_data_centroid)
self.radius = all_data_norm/self.radius_norm_step
centroids = {}
for i in range(len(data)):
centroids[i] = data[i]
weights = [i for i in range(self.radius_norm_step)][::-1]
while True:
new_centroids = []
for i in centroids:
in_radius = []
centroid = centroids[i]
for featureset in data:
distance = np.linalg.norm(np.array(featureset)-np.array(centroid))
if distance == 0:
distance = 0.0000000001
weight_index = int(distance/self.radius)
if weight_index > self.radius_norm_step-1:
weight_index = self.radius_norm_step-1
to_add = (weights[weight_index]**2)*[featureset]
in_radius += to_add
new_centroid = np.average(in_radius, axis=0)
new_centroids.append(tuple(new_centroid))
uniques = sorted(list(set(new_centroids)))
to_pop =[]
for i in uniques:
for ii in uniques:
if i == ii:
pass
elif np.linalg.norm(np.array(i)-np.array(ii)) < self.radius:
to_pop.append(ii)
break
for i in to_pop:
try:
uniques.remove(i)
except:
pass
prev_centroids = dict(centroids)
centroids = {}
for i in range(len(uniques)):
centroids[i] = np.array(uniques[i])
optimised = True
for i in centroids:
if not np.array_equal(centroids[i], prev_centroids[i]):
optimised = False
if not optimised:
break
if optimised:
break
self.centroids = centroids
self.classifications = {}
for i in range(len(self.centroids)):
self.classifications[i] = []
for featureset in data:
distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
def predict():
distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
return classification
"""
clf = Mean_Shift()
clf.fit(X)
centroids = clf.centroids
for classification in clf.classifications:
color = colors[classification]
for featureset in clf.classifications[classification]:
plt.scatter(featureset[0], featureset[1], marker='x', color=color, s=150, linewidths=5)
for c in centroids:
plt.scatter(centroids[c][0], centroids[c][1], color='k', marker='*', s=150, linewidths=5)
plt.show()
"""