|
| 1 | +import { uniqueId } from "@antv/util"; |
| 2 | +import { ClusterData, INode, IEdge, Graph, Matrix } from "./types"; |
| 3 | +import { ID } from "@antv/graphlib"; |
| 4 | + |
| 5 | +function getAdjMatrix(graph: Graph, directed: boolean) { |
| 6 | + const nodes = graph.getAllNodes(); |
| 7 | + const matrix: Matrix[] = []; |
| 8 | + // map node with index in data.nodes |
| 9 | + const nodeMap = new Map<string | number, number>(); |
| 10 | + |
| 11 | + if (!nodes) { |
| 12 | + throw new Error("invalid nodes data!"); |
| 13 | + } |
| 14 | + |
| 15 | + if (nodes) { |
| 16 | + nodes.forEach((node, i) => { |
| 17 | + nodeMap.set(node.id, i); |
| 18 | + const row: number[] = []; |
| 19 | + matrix.push(row); |
| 20 | + }); |
| 21 | + } |
| 22 | + |
| 23 | + const edges = graph.getAllEdges(); |
| 24 | + if (edges) { |
| 25 | + edges.forEach((edge) => { |
| 26 | + const { source, target } = edge; |
| 27 | + const sIndex = nodeMap.get(source); |
| 28 | + const tIndex = nodeMap.get(target); |
| 29 | + if ((!sIndex && sIndex !== 0) || (!tIndex && tIndex !== 0)) return; |
| 30 | + matrix[sIndex][tIndex] = 1; |
| 31 | + if (!directed) { |
| 32 | + matrix[tIndex][sIndex] = 1; |
| 33 | + } |
| 34 | + }); |
| 35 | + } |
| 36 | + return matrix; |
| 37 | +} |
| 38 | + |
| 39 | +/** |
| 40 | + * Performs label propagation clustering on the given graph. |
| 41 | + * @param graph The graph object representing the nodes and edges. |
| 42 | + * @param directed A boolean indicating whether the graph is directed or not. Default is false. |
| 43 | + * @param weightPropertyName The name of the property used as the weight for edges. Default is 'weight'. |
| 44 | + * @param maxIteration The maximum number of iterations for label propagation. Default is 1000. |
| 45 | + * @returns The clustering result including clusters, cluster edges, and node-to-cluster mapping. |
| 46 | + */ |
| 47 | +export const labelPropagation = ( |
| 48 | + graph: Graph, |
| 49 | + directed: boolean = false, |
| 50 | + weightPropertyName: string = "weight", |
| 51 | + maxIteration: number = 1000 |
| 52 | +): ClusterData => { |
| 53 | + // the origin data |
| 54 | + const nodes = graph.getAllNodes(); |
| 55 | + const edges = graph.getAllEdges(); |
| 56 | + |
| 57 | + const clusters: { [key: string]: { id: string; nodes: INode[] } } = {}; |
| 58 | + const nodeMap: { [key: ID]: { node: INode; idx: number } } = {}; |
| 59 | + const nodeToCluster = new Map<ID, string>(); |
| 60 | + // init the clusters and nodeMap |
| 61 | + nodes.forEach((node, i) => { |
| 62 | + const cid: string = uniqueId(); |
| 63 | + nodeToCluster.set(node.id, cid); |
| 64 | + clusters[cid] = { |
| 65 | + id: cid, |
| 66 | + nodes: [node], |
| 67 | + }; |
| 68 | + nodeMap[node.id] = { |
| 69 | + node, |
| 70 | + idx: i, |
| 71 | + }; |
| 72 | + }); |
| 73 | + |
| 74 | + // the adjacent matrix of calNodes inside clusters |
| 75 | + const adjMatrix = getAdjMatrix(graph, directed); |
| 76 | + // the sum of each row in adjacent matrix |
| 77 | + const ks = []; |
| 78 | + /** |
| 79 | + * neighbor nodes (id for key and weight for value) for each node |
| 80 | + * neighbors = { |
| 81 | + * id(node_id): { id(neighbor_1_id): weight(weight of the edge), id(neighbor_2_id): weight(weight of the edge), ... }, |
| 82 | + * ... |
| 83 | + * } |
| 84 | + */ |
| 85 | + const neighbors: Map<ID, Map<ID, number>> = new Map<ID, Map<ID, number>>(); |
| 86 | + adjMatrix.forEach((row, i) => { |
| 87 | + let k = 0; |
| 88 | + const iid = nodes[i].id; |
| 89 | + neighbors.set(iid, new Map<ID, number>()); |
| 90 | + row.forEach((entry, j) => { |
| 91 | + if (!entry) return; |
| 92 | + k += entry; |
| 93 | + const jid = nodes[j].id; |
| 94 | + neighbors.get(iid).set(jid, entry); |
| 95 | + }); |
| 96 | + ks.push(k); |
| 97 | + }); |
| 98 | + |
| 99 | + let iter = 0; |
| 100 | + |
| 101 | + while (iter < maxIteration) { |
| 102 | + let changed = false; |
| 103 | + nodes.forEach((node) => { |
| 104 | + const neighborClusters: { [key: string]: number } = {}; |
| 105 | + neighbors.get(node.id).forEach((neighborId, value) => { |
| 106 | + const neighborWeight = neighbors.get(node.id).get(neighborId); |
| 107 | + const neighborNode = nodeMap[neighborId].node; |
| 108 | + const neighborClusterId = nodeToCluster.get(neighborNode.id); |
| 109 | + if (!neighborClusters[neighborClusterId]) { |
| 110 | + neighborClusters[neighborClusterId] = 0; |
| 111 | + } |
| 112 | + neighborClusters[neighborClusterId] += neighborWeight; |
| 113 | + }); |
| 114 | + // find the cluster with max weight |
| 115 | + let maxWeight = -Infinity; |
| 116 | + let bestClusterIds: string[] = []; |
| 117 | + Object.keys(neighborClusters).forEach((clusterId) => { |
| 118 | + if (maxWeight < neighborClusters[clusterId]) { |
| 119 | + maxWeight = neighborClusters[clusterId]; |
| 120 | + bestClusterIds = [clusterId]; |
| 121 | + } else if (maxWeight === neighborClusters[clusterId]) { |
| 122 | + bestClusterIds.push(clusterId); |
| 123 | + } |
| 124 | + }); |
| 125 | + if ( |
| 126 | + bestClusterIds.length === 1 && |
| 127 | + bestClusterIds[0] === nodeToCluster.get(node.id) |
| 128 | + ) { |
| 129 | + return; |
| 130 | + } |
| 131 | + const selfClusterIdx = bestClusterIds.indexOf(nodeToCluster.get(node.id)); |
| 132 | + if (selfClusterIdx >= 0) bestClusterIds.splice(selfClusterIdx, 1); |
| 133 | + if (bestClusterIds && bestClusterIds.length) { |
| 134 | + changed = true; |
| 135 | + |
| 136 | + // remove from origin cluster |
| 137 | + const selfCluster = clusters[nodeToCluster.get(node.id)]; |
| 138 | + const nodeInSelfClusterIdx = selfCluster.nodes.indexOf(node); |
| 139 | + selfCluster.nodes.splice(nodeInSelfClusterIdx, 1); |
| 140 | + |
| 141 | + // move the node to the best cluster |
| 142 | + const randomIdx = Math.floor(Math.random() * bestClusterIds.length); |
| 143 | + const bestCluster = clusters[bestClusterIds[randomIdx]]; |
| 144 | + bestCluster.nodes.push(node); |
| 145 | + nodeToCluster.set(node.id, bestCluster.id); |
| 146 | + } |
| 147 | + }); |
| 148 | + if (!changed) break; |
| 149 | + iter++; |
| 150 | + } |
| 151 | + |
| 152 | + // delete the empty clusters |
| 153 | + Object.keys(clusters).forEach((clusterId) => { |
| 154 | + const cluster = clusters[clusterId]; |
| 155 | + if (!cluster.nodes || !cluster.nodes.length) { |
| 156 | + delete clusters[clusterId]; |
| 157 | + } |
| 158 | + }); |
| 159 | + |
| 160 | + // get the cluster edges |
| 161 | + const clusterEdges: IEdge[] = []; |
| 162 | + const clusterEdgeMap: { [key: string]: IEdge } = {}; |
| 163 | + edges.forEach((edge) => { |
| 164 | + let i = 0; |
| 165 | + const { source, target } = edge; |
| 166 | + const weight = (edge.data[weightPropertyName] || 1) as number; |
| 167 | + const sourceClusterId = nodeToCluster.get(nodeMap[source].node.id); |
| 168 | + const targetClusterId = nodeToCluster.get(nodeMap[target].node.id); |
| 169 | + const newEdgeId = `${sourceClusterId}---${targetClusterId}`; |
| 170 | + if (clusterEdgeMap[newEdgeId]) { |
| 171 | + clusterEdgeMap[newEdgeId].data.weight += weight; |
| 172 | + (clusterEdgeMap[newEdgeId].data.count as number)++; |
| 173 | + } else { |
| 174 | + const newEdge = { |
| 175 | + id: i++, |
| 176 | + source: sourceClusterId, |
| 177 | + target: targetClusterId, |
| 178 | + data: { |
| 179 | + weight, |
| 180 | + count: 1, |
| 181 | + }, |
| 182 | + }; |
| 183 | + clusterEdgeMap[newEdgeId] = newEdge; |
| 184 | + clusterEdges.push(newEdge); |
| 185 | + } |
| 186 | + }); |
| 187 | + |
| 188 | + const clustersArray: { id: string; nodes: INode[] }[] = []; |
| 189 | + Object.keys(clusters).forEach((clusterId) => { |
| 190 | + clustersArray.push(clusters[clusterId]); |
| 191 | + }); |
| 192 | + return { |
| 193 | + clusters: clustersArray, |
| 194 | + clusterEdges, |
| 195 | + nodeToCluster, |
| 196 | + }; |
| 197 | +}; |
0 commit comments