Skip to content

Commit db6c435

Browse files
zqqceexiaoiver
andauthored
feat: k-means algorithm (#75)
* feat: v5 algorithm k-means * fix: remove the properties param, because of the 'data' property in node * test: unit test for k-means * fix: fix lint * fix: replace vector with num array * fix: replace originIndex in data field with a map * fix: move clusterId in data field to a new map named nodeToCluster, and return it from k-means func * fix: fix lint * fix: the return value of louvain algorithm * fix: use ID from graphlib instead of NodeID --------- Co-authored-by: yuqi.pyq <[email protected]>
1 parent 305a20a commit db6c435

File tree

8 files changed

+693
-5
lines changed

8 files changed

+693
-5
lines changed

__tests__/unit/k-means.spec.ts

Lines changed: 391 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,391 @@
1+
import { kMeans } from '../../packages/graph/src'
2+
import propertiesGraphData from '../data/cluster-origin-properties-data.json';
3+
import { Graph } from "@antv/graphlib";
4+
import { dataPropertiesTransformer, dataLabelDataTransformer } from '../utils/data';
5+
6+
7+
describe('kMeans abnormal demo', () => {
8+
it('no properties demo: ', () => {
9+
const noPropertiesData = {
10+
nodes: [
11+
{
12+
id: 'node-0',
13+
data: {},
14+
},
15+
{
16+
id: 'node-1',
17+
data: {},
18+
},
19+
{
20+
id: 'node-2',
21+
data: {},
22+
},
23+
{
24+
id: 'node-3',
25+
data: {},
26+
}
27+
],
28+
}
29+
const graph = new Graph(noPropertiesData);
30+
const { clusters, clusterEdges } = kMeans(graph, 2);
31+
expect(clusters.length).toBe(1);
32+
expect(clusterEdges.length).toBe(0);
33+
});
34+
});
35+
36+
37+
describe('kMeans normal demo', () => {
38+
it('simple data demo: ', () => {
39+
const simpleGraphData = {
40+
nodes: [
41+
{
42+
id: 'node-0',
43+
properties: {
44+
amount: 10,
45+
city: '10001',
46+
}
47+
},
48+
{
49+
id: 'node-1',
50+
properties: {
51+
amount: 10000,
52+
city: '10002',
53+
}
54+
},
55+
{
56+
id: 'node-2',
57+
properties: {
58+
amount: 3000,
59+
city: '10003',
60+
}
61+
},
62+
{
63+
id: 'node-3',
64+
properties: {
65+
amount: 3200,
66+
city: '10003',
67+
}
68+
},
69+
{
70+
id: 'node-4',
71+
properties: {
72+
amount: 2000,
73+
city: '10003',
74+
}
75+
}
76+
],
77+
edges: [
78+
{
79+
id: 'edge-0',
80+
source: 'node-0',
81+
target: 'node-1',
82+
},
83+
{
84+
id: 'edge-1',
85+
source: 'node-0',
86+
target: 'node-2',
87+
},
88+
{
89+
id: 'edge-4',
90+
source: 'node-3',
91+
target: 'node-2',
92+
},
93+
{
94+
id: 'edge-5',
95+
source: 'node-2',
96+
target: 'node-1',
97+
},
98+
{
99+
id: 'edge-6',
100+
source: 'node-4',
101+
target: 'node-1',
102+
},
103+
]
104+
}
105+
const data = dataPropertiesTransformer(simpleGraphData);
106+
const graph = new Graph(data);
107+
const { clusters, nodeToCluster } = kMeans(graph, 3);
108+
expect(clusters.length).toBe(3);
109+
const nodes = graph.getAllNodes();
110+
111+
112+
expect(nodeToCluster.get(nodes[2].id)).toEqual(nodeToCluster.get(nodes[3].id));
113+
expect(nodeToCluster.get(nodes[2].id)).toEqual(nodeToCluster.get(nodes[4].id));
114+
});
115+
116+
117+
it('complex data demo: ', () => {
118+
const data = dataLabelDataTransformer(propertiesGraphData);
119+
const graph = new Graph(data);
120+
const { clusters,nodeToCluster } = kMeans(graph, 3);
121+
expect(clusters.length).toBe(3);
122+
const nodes = graph.getAllNodes();
123+
expect(nodeToCluster.get(nodes[0].id)).toEqual(nodeToCluster.get(nodes[1].id));
124+
expect(nodeToCluster.get(nodes[0].id)).toEqual(nodeToCluster.get(nodes[2].id));
125+
expect(nodeToCluster.get(nodes[0].id)).toEqual(nodeToCluster.get(nodes[3].id));
126+
expect(nodeToCluster.get(nodes[0].id)).toEqual(nodeToCluster.get(nodes[4].id));
127+
expect(nodeToCluster.get(nodes[5].id)).toEqual(nodeToCluster.get(nodes[6].id));
128+
expect(nodeToCluster.get(nodes[5].id)).toEqual(nodeToCluster.get(nodes[7].id));
129+
expect(nodeToCluster.get(nodes[5].id)).toEqual(nodeToCluster.get(nodes[8].id));
130+
expect(nodeToCluster.get(nodes[5].id)).toEqual(nodeToCluster.get(nodes[9].id));
131+
expect(nodeToCluster.get(nodes[5].id)).toEqual(nodeToCluster.get(nodes[10].id));
132+
expect(nodeToCluster.get(nodes[11].id)).toEqual(nodeToCluster.get(nodes[12].id));
133+
expect(nodeToCluster.get(nodes[11].id)).toEqual(nodeToCluster.get(nodes[13].id));
134+
expect(nodeToCluster.get(nodes[11].id)).toEqual(nodeToCluster.get(nodes[14].id));
135+
expect(nodeToCluster.get(nodes[11].id)).toEqual(nodeToCluster.get(nodes[15].id));
136+
expect(nodeToCluster.get(nodes[11].id)).toEqual(nodeToCluster.get(nodes[16].id));
137+
});
138+
139+
it('demo use involvedKeys: ', () => {
140+
const simpleGraphData = {
141+
nodes: [
142+
{
143+
id: 'node-0',
144+
properties: {
145+
amount: 10,
146+
city: '10001',
147+
}
148+
},
149+
{
150+
id: 'node-1',
151+
properties: {
152+
amount: 10000,
153+
city: '10002',
154+
}
155+
},
156+
{
157+
id: 'node-2',
158+
properties: {
159+
amount: 3000,
160+
city: '10003',
161+
}
162+
},
163+
{
164+
id: 'node-3',
165+
properties: {
166+
amount: 3200,
167+
city: '10003',
168+
}
169+
},
170+
{
171+
id: 'node-4',
172+
properties: {
173+
amount: 2000,
174+
city: '10003',
175+
}
176+
}
177+
],
178+
edges: [
179+
{
180+
id: 'edge-0',
181+
source: 'node-0',
182+
target: 'node-1',
183+
},
184+
{
185+
id: 'edge-1',
186+
source: 'node-0',
187+
target: 'node-2',
188+
},
189+
{
190+
id: 'edge-4',
191+
source: 'node-3',
192+
target: 'node-2',
193+
},
194+
{
195+
id: 'edge-5',
196+
source: 'node-2',
197+
target: 'node-1',
198+
},
199+
{
200+
id: 'edge-6',
201+
source: 'node-4',
202+
target: 'node-1',
203+
},
204+
]
205+
}
206+
const data = dataPropertiesTransformer(simpleGraphData);
207+
const involvedKeys = ['amount'];
208+
const graph = new Graph(data);
209+
const { clusters ,nodeToCluster} = kMeans(graph, 3, involvedKeys);
210+
expect(clusters.length).toBe(3);
211+
const nodes = graph.getAllNodes();
212+
expect(nodeToCluster.get(nodes[2].id)).toEqual(nodeToCluster.get(nodes[3].id));
213+
expect(nodeToCluster.get(nodes[2].id)).toEqual(nodeToCluster.get(nodes[4].id));
214+
});
215+
216+
it('demo use uninvolvedKeys: ', () => {
217+
const simpleGraphData = {
218+
nodes: [
219+
{
220+
id: 'node-0',
221+
properties: {
222+
amount: 10,
223+
city: '10001',
224+
}
225+
},
226+
{
227+
id: 'node-1',
228+
properties: {
229+
amount: 10000,
230+
city: '10002',
231+
}
232+
},
233+
{
234+
id: 'node-2',
235+
properties: {
236+
amount: 3000,
237+
city: '10003',
238+
}
239+
},
240+
{
241+
id: 'node-3',
242+
properties: {
243+
amount: 3200,
244+
city: '10003',
245+
}
246+
},
247+
{
248+
id: 'node-4',
249+
properties: {
250+
amount: 2000,
251+
city: '10003',
252+
}
253+
}
254+
],
255+
edges: [
256+
{
257+
id: 'edge-0',
258+
source: 'node-0',
259+
target: 'node-1',
260+
},
261+
{
262+
id: 'edge-1',
263+
source: 'node-0',
264+
target: 'node-2',
265+
},
266+
{
267+
id: 'edge-4',
268+
source: 'node-3',
269+
target: 'node-2',
270+
},
271+
{
272+
id: 'edge-5',
273+
source: 'node-2',
274+
target: 'node-1',
275+
},
276+
{
277+
id: 'edge-6',
278+
source: 'node-4',
279+
target: 'node-1',
280+
},
281+
]
282+
}
283+
const data = dataPropertiesTransformer(simpleGraphData);
284+
const graph = new Graph(data);
285+
const uninvolvedKeys = ['id', 'city'];
286+
const { clusters,nodeToCluster } = kMeans(graph, 3, [], uninvolvedKeys);
287+
expect(clusters.length).toBe(3);
288+
const nodes = graph.getAllNodes(); data
289+
expect(nodeToCluster.get(nodes[2].id)).toEqual(nodeToCluster.get(nodes[3].id));
290+
expect(nodeToCluster.get(nodes[2].id)).toEqual(nodeToCluster.get(nodes[4].id));
291+
});
292+
293+
});
294+
295+
describe('kMeans All properties values are numeric demo', () => {
296+
it('all properties values are numeric demo: ', () => {
297+
const allPropertiesValuesNumericData = {
298+
nodes: [
299+
{
300+
id: 'node-0',
301+
properties: {
302+
max: 1000000,
303+
mean: 900000,
304+
min: 800000,
305+
}
306+
},
307+
{
308+
id: 'node-1',
309+
properties: {
310+
max: 1600000,
311+
mean: 1100000,
312+
min: 600000,
313+
}
314+
},
315+
{
316+
id: 'node-2',
317+
properties: {
318+
max: 5000,
319+
mean: 3500,
320+
min: 2000,
321+
}
322+
},
323+
{
324+
id: 'node-3',
325+
properties: {
326+
max: 9000,
327+
mean: 7500,
328+
min: 6000,
329+
}
330+
}
331+
],
332+
edges: [],
333+
}
334+
const data = dataPropertiesTransformer(allPropertiesValuesNumericData);
335+
const graph = new Graph(data);
336+
const { clusters, clusterEdges,nodeToCluster } = kMeans(graph, 2);
337+
expect(clusters.length).toBe(2);
338+
expect(clusterEdges.length).toBe(0);
339+
const nodes = graph.getAllNodes();
340+
expect(nodeToCluster.get(nodes[0].id)).toEqual(nodeToCluster.get(nodes[1].id));
341+
expect(nodeToCluster.get(nodes[2].id)).toEqual(nodeToCluster.get(nodes[3].id));
342+
});
343+
it('only one property and the value are numeric demo: ', () => {
344+
const allPropertiesValuesNumericData = {
345+
nodes: [
346+
{
347+
id: 'node-0',
348+
properties: {
349+
num: 10,
350+
}
351+
},
352+
{
353+
id: 'node-1',
354+
properties: {
355+
num: 12,
356+
}
357+
},
358+
{
359+
id: 'node-2',
360+
properties: {
361+
num: 56,
362+
}
363+
},
364+
{
365+
id: 'node-3',
366+
properties: {
367+
num: 300,
368+
}
369+
},
370+
{
371+
id: 'node-4',
372+
properties: {
373+
num: 350,
374+
}
375+
}
376+
],
377+
edges: [],
378+
}
379+
const data = dataPropertiesTransformer(allPropertiesValuesNumericData);
380+
const graph = new Graph(data);
381+
const { clusters, clusterEdges,nodeToCluster } = kMeans(graph, 2);
382+
expect(clusters.length).toBe(2);
383+
expect(clusterEdges.length).toBe(0);
384+
const nodes = graph.getAllNodes();
385+
expect(nodeToCluster.get(nodes[0].id)).toEqual(nodeToCluster.get(nodes[1].id));
386+
expect(nodeToCluster.get(nodes[0].id)).toEqual(nodeToCluster.get(nodes[2].id));
387+
expect(nodeToCluster.get(nodes[3].id)).toEqual(nodeToCluster.get(nodes[4].id));
388+
});
389+
390+
});
391+

0 commit comments

Comments
 (0)