forked from trekhleb/javascript-algorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
/
kMeans.js
85 lines (74 loc) · 3.14 KB
/
kMeans.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import * as mtrx from '../../math/matrix/Matrix';
import euclideanDistance from '../../math/euclidean-distance/euclideanDistance';
/**
* Classifies the point in space based on k-Means algorithm.
*
* @param {number[][]} data - array of dataSet points, i.e. [[0, 1], [3, 4], [5, 7]]
* @param {number} k - number of clusters
* @return {number[]} - the class of the point
*/
export default function KMeans(
data,
k = 1,
) {
if (!data) {
throw new Error('The data is empty');
}
// Assign k clusters locations equal to the location of initial k points.
const dataDim = data[0].length;
const clusterCenters = data.slice(0, k);
// Continue optimization till convergence.
// Centroids should not be moving once optimized.
// Calculate distance of each candidate vector from each cluster center.
// Assign cluster number to each data vector according to minimum distance.
// Matrix of distance from each data point to each cluster centroid.
const distances = mtrx.zeros([data.length, k]);
// Vector data points' classes. The value of -1 means that no class has bee assigned yet.
const classes = Array(data.length).fill(-1);
let iterate = true;
while (iterate) {
iterate = false;
// Calculate and store the distance of each data point from each cluster.
for (let dataIndex = 0; dataIndex < data.length; dataIndex += 1) {
for (let clusterIndex = 0; clusterIndex < k; clusterIndex += 1) {
distances[dataIndex][clusterIndex] = euclideanDistance(
[clusterCenters[clusterIndex]],
[data[dataIndex]],
);
}
// Assign the closest cluster number to each dataSet point.
const closestClusterIdx = distances[dataIndex].indexOf(
Math.min(...distances[dataIndex]),
);
// Check if data point class has been changed and we still need to re-iterate.
if (classes[dataIndex] !== closestClusterIdx) {
iterate = true;
}
classes[dataIndex] = closestClusterIdx;
}
// Recalculate cluster centroid values via all dimensions of the points under it.
for (let clusterIndex = 0; clusterIndex < k; clusterIndex += 1) {
// Reset cluster center coordinates since we need to recalculate them.
clusterCenters[clusterIndex] = Array(dataDim).fill(0);
let clusterSize = 0;
for (let dataIndex = 0; dataIndex < data.length; dataIndex += 1) {
if (classes[dataIndex] === clusterIndex) {
// Register one more data point of current cluster.
clusterSize += 1;
for (let dimensionIndex = 0; dimensionIndex < dataDim; dimensionIndex += 1) {
// Add data point coordinates to the cluster center coordinates.
clusterCenters[clusterIndex][dimensionIndex] += data[dataIndex][dimensionIndex];
}
}
}
// Calculate the average for each cluster center coordinate.
for (let dimensionIndex = 0; dimensionIndex < dataDim; dimensionIndex += 1) {
clusterCenters[clusterIndex][dimensionIndex] = parseFloat(Number(
clusterCenters[clusterIndex][dimensionIndex] / clusterSize,
).toFixed(2));
}
}
}
// Return the clusters assigned.
return classes;
}