diff --git a/src/Graphs.jl b/src/Graphs.jl index e573f5d69..1248ea861 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -23,7 +23,8 @@ using DataStructures: union!, find_root!, BinaryMaxHeap, - BinaryMinHeap + BinaryMinHeap, + DefaultDict using LinearAlgebra: I, Symmetric, diagm, eigen, eigvals, norm, rmul!, tril, triu import LinearAlgebra: Diagonal, issymmetric, mul! using Random: @@ -307,6 +308,8 @@ export # community modularity, + community_detection_greedy_modularity, + community_detection_greedy_modularity_fast, core_periphery_deg, local_clustering, local_clustering_coefficient, @@ -518,6 +521,8 @@ include("centrality/eigenvector.jl") include("centrality/radiality.jl") include("community/modularity.jl") include("community/label_propagation.jl") +include("community/greedy_modularity.jl") +include("community/greedy_modularity_fast.jl") include("community/core-periphery.jl") include("community/clustering.jl") include("community/cliques.jl") diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl new file mode 100644 index 000000000..a4dcae108 --- /dev/null +++ b/src/community/greedy_modularity.jl @@ -0,0 +1,106 @@ +function community_detection_greedy_modularity(g::AbstractGraph; weights::AbstractMatrix=weights(g)) + if is_directed(g) + throw(ArgumentError("The graph must not be directed")) + end + n = nv(g) + c = Vector{Int}(1:n) + cs = Vector{Vector{Int}}(undef, n) + T = float(eltype(weights)) + qs = Vector{T}(undef, n) + Q, e, a = compute_modularity(g, c, weights) + m = sum(a) + cs[1] = copy(c) + qs[1] = Q + for i in 2:n + Q = modularity_greedy_step!(g, Q, e, a, c, m) + cs[i] = copy(c) + qs[i] = Q + end + imax = argmax(qs) + return rewrite_class_ids(cs[imax]), qs +end + +function modularity_greedy_step!( + g::AbstractGraph, + Q::T, + e::AbstractMatrix{T}, + a::AbstractVector{T}, + c::AbstractVector{<:Integer}, + m::T +) where {T} + n = nv(g) + dq_max::typeof(Q) = typemin(Q) + to_merge::Tuple{Int,Int} = (0, 0) + for edge in edges(g) + u, v = src(edge), dst(edge) + if c[u] != c[v] + dq = (e[c[u], c[v]] / m - a[c[u]] * a[c[v]] / m^2) + if dq > dq_max + dq_max = dq + to_merge = (c[u], c[v]) + end + end + end + if to_merge == (0,0) + for i in vertices(g) + if c[i] != c[1] + to_merge = (c[1], c[i]) + break + end + end + end + c1, c2 = to_merge + for i in 1:n + e[c1, i] += e[c2, i] + end + for i in 1:n + if i == c2 + continue + end + e[i, c1] += e[i, c2] + end + a[c1] = a[c1] + a[c2] + for i in 1:n + if c[i] == c2 + c[i] = c1 + end + end + return Q + 2 * dq_max +end + +function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) + modularity_type = float(eltype(w)) + Q = zero(modularity_type) + m = sum(w[src(e), dst(e)] for e in edges(g); init=Q) * 2 + n_groups = maximum(c) + a = zeros(modularity_type, n_groups) + e = zeros(modularity_type, n_groups, n_groups) + m == 0 && return 0.0, e, a + for u in vertices(g) + for v in neighbors(g, u) + if c[u] == c[v] + Q += w[u, v] + end + e[c[u], c[v]] += w[u, v] + a[c[u]] += w[u, v] + end + end + Q *= m + for i in 1:n_groups + Q -= a[i]^2 + end + Q /= m^2 + return Q, e, a +end + +function rewrite_class_ids(v::AbstractVector{<:Integer}) + d = Dict{Int, Int}() + vn = zeros(Int64, length(v)) + for i in eachindex(v) + if !(v[i] in keys(d)) + d[v[i]] = length(d) + 1 + end + vn[i] = d[v[i]] + end + return vn +end diff --git a/src/community/greedy_modularity_fast.jl b/src/community/greedy_modularity_fast.jl new file mode 100644 index 000000000..26ea9da28 --- /dev/null +++ b/src/community/greedy_modularity_fast.jl @@ -0,0 +1,127 @@ +function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::AbstractMatrix=weights(g)) + if is_directed(g) + throw(ArgumentError("The graph must not be directed")) + end + n = nv(g) + c = Vector{Int}(1:n) + dq_dict, dq_heap, dq_global_heap, a, m = compute_dq(g, c, weights) + modularity_type = float(eltype(weights)) + empty_row_heap = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse) # placeholder, lasts empty forever + while length(dq_global_heap) > 1 + (u,v), (dq, _) = dequeue_pair!(dq_global_heap) + if dq <= zero(modularity_type) + return rewrite_class_ids(c) + end + dequeue!(dq_heap[u]) + if !isempty(dq_heap[u]) + enqueue!(dq_global_heap, peek(dq_heap[u])) + end + if peek(dq_heap[v])[1] == (v,u) + dequeue!(dq_heap[v]) + delete!(dq_global_heap, (v,u)) + if !isempty(dq_heap[v]) + enqueue!(dq_global_heap, peek(dq_heap[v])) + end + else + delete!(dq_heap[v], (v,u)) + end + + c[c .== u] .= v + + neighbors_u = setdiff(keys(dq_dict[u]), v) + neighbors_v = setdiff(keys(dq_dict[v]), u) + neighbors_all = union(neighbors_u, neighbors_v) + neighbors_common = intersect(neighbors_u, neighbors_v) + + for w in neighbors_all + if w in neighbors_common + dq_w = dq_dict[v][w] + dq_dict[u][w] + elseif w in neighbors_v + dq_w = dq_dict[v][w] - a[u] * a[w] / m^2 + else + dq_w = dq_dict[u][w] - a[v] * a[w] / m^2 + end + for (row, column) in ((v, w), (w, v)) + dq_heap_row = dq_heap[row] + dq_dict[row][column] = dq_w + if !isempty(dq_heap_row) + oldmax = peek(dq_heap_row) + else + oldmax = nothing + end + dq_heap_row[(row,column)] = (dq_w, (-row, -column)) # update or insert + if isnothing(oldmax) + dq_global_heap[(row, column)] = (dq_w, (-row, -column)) + else + newmax = peek(dq_heap_row) + if newmax != oldmax + delete!(dq_global_heap, oldmax[1]) ## is it still there? + enqueue!(dq_global_heap, newmax) + end + end + end + end + + for (w, _) in dq_dict[u] + delete!(dq_dict[w], u) + if w != v + for (row, column) in ((w,u), (u,w)) + dq_heap_row = dq_heap[row] + if peek(dq_heap_row)[1] == (row, column) + dequeue!(dq_heap_row) + delete!(dq_global_heap, (row, column)) + if !isempty(dq_heap_row) + enqueue!(dq_global_heap, peek(dq_heap_row)) + end + else + delete!(dq_heap_row, (row, column)) + end + end + end + end + delete!(dq_dict, u) + dq_heap[u] = empty_row_heap + a[v] += a[u] + a[u] = 0 + end + return rewrite_class_ids(c) +end + +function compute_dq( + g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray +) + modularity_type = float(eltype(w)) + Q_zero = zero(modularity_type) + m = sum(w[src(e), dst(e)] for e in edges(g); init=Q_zero) * 2 + n_groups = maximum(c) + a = zeros(modularity_type, n_groups) + + typical_dict = DefaultDict{Int, modularity_type}(Q_zero) + dq_dict = Dict{Int,typeof(typical_dict)}() + for v in vertices(g) + dq_dict[v] = DefaultDict{Int, modularity_type}(Q_zero) + end + + for u in vertices(g) + for v in neighbors(g, u) + dq_dict[u][v] += w[u,v] + a[c[u]] += w[u, v] + end + end + + for (u, dct) in dq_dict + for (v, w) in dct + dq_dict[u][v] = w / m - a[c[u]] * a[c[v]] / m^2 + end + end + + typical_queue = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse) + dq_heap = Dict{Int,typeof(typical_queue)}() + for u in vertices(g) + dq_heap[u] = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, (u, v) => (dq, (-u, -v)) for (v, dq) in dq_dict[u]) + end + + v_connected = filter(v -> !isempty(dq_heap[v]), vertices(g)) + global_heap = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, peek(dq_heap[v]) for v in v_connected) + return dq_dict, dq_heap, global_heap, a, m +end diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl new file mode 100644 index 000000000..907ee8d89 --- /dev/null +++ b/test/community/greedy_modularity.jl @@ -0,0 +1,114 @@ +@testset "Greedy modularity: karate club" begin + g = smallgraph(:karate) + + expected_c_w = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] + expected_q_w = 0.3806706114398422 + + c_w, _ = community_detection_greedy_modularity(g) + + @test c_w == expected_c_w + + @test modularity(g, c_w) ≈ expected_q_w +end + +@testset "Greedy modularity: weighted karate club" begin + g = smallgraph(:karate) + w = [ + 0 4 5 3 3 3 3 2 2 0 2 3 1 3 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 2 0 0; + 4 0 6 3 0 0 0 4 0 0 0 0 0 5 0 0 0 1 0 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0; + 5 6 0 3 0 0 0 4 5 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 0; + 3 3 3 0 0 0 0 3 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 5 0 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 2 5 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 4 4 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 4; + 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2; + 2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 5 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4; + 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2; + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1; + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 4 0 3 0 0 5 4; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 3 0 0 0 2 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 2 0 0 0 0 0 0 7 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2; + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 3 0 0 0 0 0 0 0 0 4; + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 4 0 0 0 0 0 4 2; + 0 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3; + 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 7 0 0 2 0 0 0 4 4; + 0 0 2 0 0 0 0 0 3 0 0 0 0 0 3 3 0 0 1 0 3 0 2 5 0 0 0 0 0 4 3 4 0 5; + 0 0 0 0 0 0 0 0 4 2 0 0 0 3 2 4 0 0 2 1 1 0 3 4 0 0 2 4 2 2 3 4 5 0 + ] + expected_c_w = [1, 1, 1, 1, 2, 2, 2, 1, 3, 3, 2, 1, 1, 1, 3, 3, 2, 1, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] + expected_q_w = 0.4345214669889994 + + c_w, _ = community_detection_greedy_modularity(g, weights=w) + @test c_w == expected_c_w + @test modularity(g,c_w, distmx=w) ≈ expected_q_w +end + + +@testset "Greedy modularity: disconnected graph" begin + g = SimpleGraph(10) + for i=1:5 + add_edge!(g, 2*i - 1, 2*i) + end + c, _ = community_detection_greedy_modularity(g) + q = modularity(g, c) + + expected_c = [1,1,2,2,3,3,4,4,5,5] + expected_q = 0.8 + + @test c == expected_c + @test q ≈ expected_q +end + + +@testset "Greedy modularity: complete graph" begin + g = complete_graph(10) + c, _ = community_detection_greedy_modularity(g) + q = modularity(g, c) + + expected_c = ones(Int, 10) + expected_q = 0.0 + + @test c == expected_c + @test q ≈ expected_q +end + + +@testset "Greedy modularity: empty graph" begin + g = SimpleGraph(10) + c, _ = community_detection_greedy_modularity(g) + q = modularity(g, c) + + expected_c = Vector(1:10) + expected_q = 0.0 + + @test c == expected_c + @test q ≈ expected_q +end + + +@testset "Greedy modularity: random stochastic block model graph" begin + g_sbm = stochastic_block_model(99,1,[500,1000]) + expected_c = [i > 500 ? 2 : 1 for i=1:1500] + + c, _ = community_detection_greedy_modularity(g_sbm) + + matches1 = sum(c .== expected_c) + matches2 = sum((3 .- c) .== expected_c) # complementary cluster numbers assignment + + @test matches1 ≥ 0.95 * nv(g_sbm) || matches2 ≥ 0.95 * nv(g_sbm) +end +