From c752c02970e16ad948d53b706cc76ebd8317f80f Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Wed, 15 Nov 2023 16:20:39 +0100 Subject: [PATCH 01/20] add greedy modularity --- src/Graphs.jl | 2 + src/community/greedy_modularity.jl | 90 ++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 src/community/greedy_modularity.jl diff --git a/src/Graphs.jl b/src/Graphs.jl index e573f5d69..e77447734 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -307,6 +307,7 @@ export # community modularity, + greedy_modularity, core_periphery_deg, local_clustering, local_clustering_coefficient, @@ -518,6 +519,7 @@ include("centrality/eigenvector.jl") include("centrality/radiality.jl") include("community/modularity.jl") include("community/label_propagation.jl") +include("community/greedy_modularity.jl") include("community/core-periphery.jl") include("community/clustering.jl") include("community/cliques.jl") diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl new file mode 100644 index 000000000..98b3c1f22 --- /dev/null +++ b/src/community/greedy_modularity.jl @@ -0,0 +1,90 @@ +function greedy_modularity(g::AbstractGraph) + n = length(vertices(g)) + c = Vector(1:n) + cs = Vector() + qs = fill(-1., n) + Q, e, a = compute_modularity(g, c) + push!(cs, c) + qs[1] = Q + for i=1:n-1 + Q = modularity_greedy_step!(g, Q, e, a, c) + push!(cs, c) + qs[i+1] = Q + end + imax = argmax(qs) + return rewrite_class_ids(cs[imax]) +end + +function modularity_greedy_step!(g::AbstractGraph, Q::Float64, e::Matrix{<:AbstractFloat}, a::AbstractVector{<:AbstractFloat}, c::AbstractVector{<:Integer}) + m = 2 * length(edges(g)) + n = length(vertices(g)) + dq_max = -1 + tried = Set{Tuple{Int64, Int64}}() + to_merge::Tuple{Integer, Integer} = (0,0) + tried = Set() + for edge in edges(g) + u = min(src(edge), dst(edge)) + v = max(src(edge), dst(edge)) + if !((u, v) in tried) + push!(tried, (u,v)) + dq = 2* (e[u,v] / m - a[u]*a[v] / m^2) + if dq > dq_max + dq_max = dq + to_merge = (c[u], c[v]) + end + end + end + c1, c2 = to_merge + for i=1:n + e[c1, i] += e[c2, i] + end + for i=1:n + if i == c2 + continue + end + e[i, c1] += e[i, c2] + end + a[c1] = a[c1] + a[c2] + for i=1:n + if c[i] == c2 + c[i] = c1 + end + end + return Q +end + + +function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}) + Q = 0 + m = length(edges(g)) * 2 + n_groups = maximum(c) + a = zeros(n_groups) + e = zeros(n_groups, n_groups) + for u in vertices(g) + for v in neighbors(g, u) + if c[u] == c[v] + Q += 1 + e[c[i], c[j]] += 1 + end + a[c[u]] += 1 + end + end + Q *= m + for i=1:n_groups + Q -= a[i]^2 + end + Q /= m^2 + return Q, e, a +end + +function rewrite_class_ids(v::AbstractVector{<:Integer}) + d = Dict() + vn = zeros(Int64, length(v)) + for i=eachindex(v) + if !(v[i] in keys(d)) + d[v[i]] = length(d) + 1 + end + vn[i] = d[v[i]] + end + return vn +end \ No newline at end of file From 322cf701b56fe5aa720b9c9f22de5d59851b49b0 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Tue, 21 Nov 2023 13:47:02 +0100 Subject: [PATCH 02/20] fix algo --- src/community/greedy_modularity.jl | 48 +++++++++++++++++------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 98b3c1f22..d48f9a4d4 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -1,33 +1,41 @@ -function greedy_modularity(g::AbstractGraph) +function community_detection_greedy_modularity(g::AbstractGraph) n = length(vertices(g)) c = Vector(1:n) cs = Vector() - qs = fill(-1., n) + qs = fill(-1.0, n) Q, e, a = compute_modularity(g, c) + println(Q) push!(cs, c) qs[1] = Q - for i=1:n-1 + for i in 1:(n - 1) Q = modularity_greedy_step!(g, Q, e, a, c) - push!(cs, c) - qs[i+1] = Q + println(Q) + push!(cs, copy(c)) + qs[i + 1] = Q end imax = argmax(qs) return rewrite_class_ids(cs[imax]) end -function modularity_greedy_step!(g::AbstractGraph, Q::Float64, e::Matrix{<:AbstractFloat}, a::AbstractVector{<:AbstractFloat}, c::AbstractVector{<:Integer}) +function modularity_greedy_step!( + g::AbstractGraph, + Q::Float64, + e::Matrix{<:AbstractFloat}, + a::AbstractVector{<:AbstractFloat}, + c::AbstractVector{<:Integer}, +) m = 2 * length(edges(g)) n = length(vertices(g)) dq_max = -1 - tried = Set{Tuple{Int64, Int64}}() - to_merge::Tuple{Integer, Integer} = (0,0) + tried = Set{Tuple{Int64,Int64}}() + to_merge::Tuple{Integer,Integer} = (0, 0) tried = Set() for edge in edges(g) u = min(src(edge), dst(edge)) v = max(src(edge), dst(edge)) - if !((u, v) in tried) - push!(tried, (u,v)) - dq = 2* (e[u,v] / m - a[u]*a[v] / m^2) + if c[u] != c[v] && !((c[u], c[v]) in tried) + push!(tried, (c[u], c[v])) + dq = (e[c[u], c[v]] / m - a[c[u]] * a[c[v]] / m^2) if dq > dq_max dq_max = dq to_merge = (c[u], c[v]) @@ -35,25 +43,25 @@ function modularity_greedy_step!(g::AbstractGraph, Q::Float64, e::Matrix{<:Abstr end end c1, c2 = to_merge - for i=1:n + println(dq_max, " ", c1, " ", c2) + for i in 1:n e[c1, i] += e[c2, i] end - for i=1:n + for i in 1:n if i == c2 continue end e[i, c1] += e[i, c2] end a[c1] = a[c1] + a[c2] - for i=1:n + for i in 1:n if c[i] == c2 c[i] = c1 end end - return Q + return Q + 2 * dq_max end - function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}) Q = 0 m = length(edges(g)) * 2 @@ -64,13 +72,13 @@ function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}) for v in neighbors(g, u) if c[u] == c[v] Q += 1 - e[c[i], c[j]] += 1 end + e[c[u], c[v]] += 1 a[c[u]] += 1 end end Q *= m - for i=1:n_groups + for i in 1:n_groups Q -= a[i]^2 end Q /= m^2 @@ -80,11 +88,11 @@ end function rewrite_class_ids(v::AbstractVector{<:Integer}) d = Dict() vn = zeros(Int64, length(v)) - for i=eachindex(v) + for i in eachindex(v) if !(v[i] in keys(d)) d[v[i]] = length(d) + 1 end vn[i] = d[v[i]] end return vn -end \ No newline at end of file +end From fee65afcd6a7145e5e3c2ce096bdd4df1f208673 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Tue, 21 Nov 2023 13:50:18 +0100 Subject: [PATCH 03/20] remove debug output --- src/community/greedy_modularity.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index d48f9a4d4..26b10b555 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -4,12 +4,10 @@ function community_detection_greedy_modularity(g::AbstractGraph) cs = Vector() qs = fill(-1.0, n) Q, e, a = compute_modularity(g, c) - println(Q) push!(cs, c) qs[1] = Q for i in 1:(n - 1) Q = modularity_greedy_step!(g, Q, e, a, c) - println(Q) push!(cs, copy(c)) qs[i + 1] = Q end @@ -43,7 +41,6 @@ function modularity_greedy_step!( end end c1, c2 = to_merge - println(dq_max, " ", c1, " ", c2) for i in 1:n e[c1, i] += e[c2, i] end From f7c16527b9bca981a4a330d22c15df187b721873 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Tue, 21 Nov 2023 14:34:38 +0100 Subject: [PATCH 04/20] add karate club test --- test/community/greedy_modularity.jl | 50 +++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 test/community/greedy_modularity.jl diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl new file mode 100644 index 000000000..8bf919e1d --- /dev/null +++ b/test/community/greedy_modularity.jl @@ -0,0 +1,50 @@ +@testset "Greedy modularity: karate club" begin + g = SimpleGraph( + [ + 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 + 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 + 1 1 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 + 1 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 + 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 + 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 + 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 + 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 + 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 + 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 1 + 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 1 1 0 1 + 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0 + ], + ) + + expected_c = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] + expected_q = 0.3806706114398422 + + c = community_detection_greedy_modularity(g) + + @test c == expected_c + + @test modularity(g, c) ≈ expected_q + +end \ No newline at end of file From d2d92e897874f48c329307778c940bacd97f68c5 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Tue, 28 Nov 2023 15:56:02 +0100 Subject: [PATCH 05/20] add weighted and type-generic modularity optimization --- src/community/greedy_modularity.jl | 60 ++++++++++++++++------------- test/community/greedy_modularity.jl | 39 +------------------ 2 files changed, 34 insertions(+), 65 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 26b10b555..3bbfb11ce 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -1,15 +1,20 @@ function community_detection_greedy_modularity(g::AbstractGraph) - n = length(vertices(g)) - c = Vector(1:n) - cs = Vector() - qs = fill(-1.0, n) - Q, e, a = compute_modularity(g, c) - push!(cs, c) + if is_directed(g) + throw(ArgumentError("The graph must not be directed")) + end + n = nv(g) + c = Vector{Int}(1:n) + cs = Vector{Vector{Int}}(undef, n) + w = weights(g) + T = float(eltype(w)) + qs = Vector{T}(undef, n) + Q, e, a = compute_modularity(g, c, w) + cs[1] = copy(c) qs[1] = Q - for i in 1:(n - 1) + for i in 2:n Q = modularity_greedy_step!(g, Q, e, a, c) - push!(cs, copy(c)) - qs[i + 1] = Q + cs[i] = copy(c) + qs[i] = Q end imax = argmax(qs) return rewrite_class_ids(cs[imax]) @@ -17,17 +22,17 @@ end function modularity_greedy_step!( g::AbstractGraph, - Q::Float64, - e::Matrix{<:AbstractFloat}, - a::AbstractVector{<:AbstractFloat}, + Q::T, + e::AbstractMatrix{T}, + a::AbstractVector{T}, c::AbstractVector{<:Integer}, -) - m = 2 * length(edges(g)) - n = length(vertices(g)) - dq_max = -1 - tried = Set{Tuple{Int64,Int64}}() +) where {T} + m = 2 * ne(g) + n = nv(g) + dq_max::typeof(Q) = typemin(Q) + tried = Set{Tuple{Integer,Integer}}() to_merge::Tuple{Integer,Integer} = (0, 0) - tried = Set() + tried = Set{Tuple{Integer,Integer}}() for edge in edges(g) u = min(src(edge), dst(edge)) v = max(src(edge), dst(edge)) @@ -59,19 +64,20 @@ function modularity_greedy_step!( return Q + 2 * dq_max end -function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}) - Q = 0 - m = length(edges(g)) * 2 +function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) + modularity_type = float(eltype(w)) + Q = zero(modularity_type) + m = sum([w[src(e), dst(e)] for e in edges(g)]) * 2 n_groups = maximum(c) - a = zeros(n_groups) - e = zeros(n_groups, n_groups) + a = zeros(modularity_type, n_groups) + e = zeros(modularity_type, n_groups, n_groups) for u in vertices(g) for v in neighbors(g, u) if c[u] == c[v] - Q += 1 + Q += w[u, v] end - e[c[u], c[v]] += 1 - a[c[u]] += 1 + e[c[u], c[v]] += w[u, v] + a[c[u]] += w[u, v] end end Q *= m @@ -83,7 +89,7 @@ function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}) end function rewrite_class_ids(v::AbstractVector{<:Integer}) - d = Dict() + d = Dict{Integer, Integer}() vn = zeros(Int64, length(v)) for i in eachindex(v) if !(v[i] in keys(d)) diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl index 8bf919e1d..815dac8c0 100644 --- a/test/community/greedy_modularity.jl +++ b/test/community/greedy_modularity.jl @@ -1,42 +1,5 @@ @testset "Greedy modularity: karate club" begin - g = SimpleGraph( - [ - 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 0 - 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 - 1 1 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 - 1 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 - 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 - 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 - 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 - 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 - 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 1 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 - 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 - 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 - 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 - 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 1 1 - 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 1 0 1 0 1 1 0 0 0 0 0 1 1 1 0 1 - 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 0 - ], - ) + g = smallgraph(:karate) expected_c = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] expected_q = 0.3806706114398422 From 58fcc8cd99a225de7f7596167df59c4d1967d3ca Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Wed, 6 Dec 2023 13:44:48 +0100 Subject: [PATCH 06/20] slow algo: bugfixes, performance optimization, tests with SBM --- src/community/greedy_modularity.jl | 58 +++++++++--------- test/community/greedy_modularity.jl | 93 +++++++++++++++++++++++++++-- 2 files changed, 118 insertions(+), 33 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 3bbfb11ce..d2b1d4a0a 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -1,18 +1,18 @@ -function community_detection_greedy_modularity(g::AbstractGraph) +function community_detection_greedy_modularity(g::AbstractGraph; weights::AbstractMatrix=weights(g)) if is_directed(g) throw(ArgumentError("The graph must not be directed")) end n = nv(g) c = Vector{Int}(1:n) cs = Vector{Vector{Int}}(undef, n) - w = weights(g) - T = float(eltype(w)) + T = float(eltype(weights)) qs = Vector{T}(undef, n) - Q, e, a = compute_modularity(g, c, w) + Q, e, a = compute_modularity(g, c, weights) + m = sum(a) cs[1] = copy(c) qs[1] = Q for i in 2:n - Q = modularity_greedy_step!(g, Q, e, a, c) + Q = modularity_greedy_step!(g, Q, e, a, c, m) cs[i] = copy(c) qs[i] = Q end @@ -26,18 +26,14 @@ function modularity_greedy_step!( e::AbstractMatrix{T}, a::AbstractVector{T}, c::AbstractVector{<:Integer}, + m::T ) where {T} - m = 2 * ne(g) n = nv(g) dq_max::typeof(Q) = typemin(Q) - tried = Set{Tuple{Integer,Integer}}() - to_merge::Tuple{Integer,Integer} = (0, 0) - tried = Set{Tuple{Integer,Integer}}() + to_merge::Tuple{Int,Int} = (0, 0) for edge in edges(g) - u = min(src(edge), dst(edge)) - v = max(src(edge), dst(edge)) - if c[u] != c[v] && !((c[u], c[v]) in tried) - push!(tried, (c[u], c[v])) + u, v = src(edge), dst(edge) + if c[u] != c[v] dq = (e[c[u], c[v]] / m - a[c[u]] * a[c[v]] / m^2) if dq > dq_max dq_max = dq @@ -45,29 +41,33 @@ function modularity_greedy_step!( end end end - c1, c2 = to_merge - for i in 1:n - e[c1, i] += e[c2, i] - end - for i in 1:n - if i == c2 - continue + if dq_max > zero(typeof(Q)) + c1, c2 = to_merge + for i in 1:n + e[c1, i] += e[c2, i] end - e[i, c1] += e[i, c2] - end - a[c1] = a[c1] + a[c2] - for i in 1:n - if c[i] == c2 - c[i] = c1 + for i in 1:n + if i == c2 + continue + end + e[i, c1] += e[i, c2] + end + a[c1] = a[c1] + a[c2] + for i in 1:n + if c[i] == c2 + c[i] = c1 + end end + return Q + 2 * dq_max + else + return Q end - return Q + 2 * dq_max end function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) modularity_type = float(eltype(w)) Q = zero(modularity_type) - m = sum([w[src(e), dst(e)] for e in edges(g)]) * 2 + m = sum(w[src(e), dst(e)] for e in edges(g)) * 2 n_groups = maximum(c) a = zeros(modularity_type, n_groups) e = zeros(modularity_type, n_groups, n_groups) @@ -89,7 +89,7 @@ function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::A end function rewrite_class_ids(v::AbstractVector{<:Integer}) - d = Dict{Integer, Integer}() + d = Dict{Int, Int}() vn = zeros(Int64, length(v)) for i in eachindex(v) if !(v[i] in keys(d)) diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl index 815dac8c0..12accc82e 100644 --- a/test/community/greedy_modularity.jl +++ b/test/community/greedy_modularity.jl @@ -1,13 +1,98 @@ @testset "Greedy modularity: karate club" begin g = smallgraph(:karate) - expected_c = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] - expected_q = 0.3806706114398422 + expected_c_w = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] + expected_q_w = 0.3806706114398422 + c_w = community_detection_greedy_modularity(g) + + @test c_w == expected_c_w + + @test modularity(g, c_w) ≈ expected_q_w +end + +@testset "Greedy modularity: weighted karate club" begin + g = smallgraph(:karate) + w = [ + 0 4 5 3 3 3 3 2 2 0 2 3 1 3 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 2 0 0; + 4 0 6 3 0 0 0 4 0 0 0 0 0 5 0 0 0 1 0 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0; + 5 6 0 3 0 0 0 4 5 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 0; + 3 3 3 0 0 0 0 3 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 5 0 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 2 5 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 4 4 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 4; + 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2; + 2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 5 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4; + 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2; + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1; + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 4 0 3 0 0 5 4; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 3 0 0 0 2 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 2 0 0 0 0 0 0 7 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2; + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 3 0 0 0 0 0 0 0 0 4; + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 4 0 0 0 0 0 4 2; + 0 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3; + 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 7 0 0 2 0 0 0 4 4; + 0 0 2 0 0 0 0 0 3 0 0 0 0 0 3 3 0 0 1 0 3 0 2 5 0 0 0 0 0 4 3 4 0 5; + 0 0 0 0 0 0 0 0 4 2 0 0 0 3 2 4 0 0 2 1 1 0 3 4 0 0 2 4 2 2 3 4 5 0 + ] + expected_c_w = [1, 1, 1, 1, 2, 2, 2, 1, 3, 3, 2, 1, 1, 1, 3, 3, 2, 1, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] + expected_q_w = 0.4345214669889994 + + c_w = community_detection_greedy_modularity(g, weights=w) + @test c_w == expected_c_w + @test modularity(g,c_w, distmx=w) ≈ expected_q_w +end + + +@testset "Greedy modularity: disconnected graph" begin + g = SimpleGraph(10) + for i=1:5 + add_edge!(g, 2*i - 1, 2*i) + end c = community_detection_greedy_modularity(g) + q = modularity(g, c) + + expected_c = [1,1,2,2,3,3,4,4,5,5] + expected_q = 0.8 @test c == expected_c + @test q ≈ expected_q +end + + +@testset "Greedy modularity: complete graph" begin + g = complete_graph(10) + c = community_detection_greedy_modularity(g) + q = modularity(g, c) + + expected_c = ones(Int, 10) + expected_q = 0 + + @test c == expected_c + @test q ≈ expected_q +end + + +@testset "Greedy modularity: random stochastic block model graph" begin + g_sbm = stochastic_block_model(99,1,[500,1000]) + expected_c = [i > 500 ? 2 : 1 for i=1:1500] + + c = community_detection_greedy_modularity(g_sbm) - @test modularity(g, c) ≈ expected_q + @test c == expected_c # can fail with low probability? +end -end \ No newline at end of file From 40a381baf425e88559ab99a586286cef2cb6529c Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Wed, 6 Dec 2023 19:30:28 +0100 Subject: [PATCH 07/20] fix and add empty graph case --- src/community/greedy_modularity.jl | 3 ++- test/community/greedy_modularity.jl | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index d2b1d4a0a..b84c556ea 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -67,10 +67,11 @@ end function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) modularity_type = float(eltype(w)) Q = zero(modularity_type) - m = sum(w[src(e), dst(e)] for e in edges(g)) * 2 + m = sum(w[src(e), dst(e)] for e in edges(g); init=Q) * 2 n_groups = maximum(c) a = zeros(modularity_type, n_groups) e = zeros(modularity_type, n_groups, n_groups) + m == 0 && return 0.0, e, a for u in vertices(g) for v in neighbors(g, u) if c[u] == c[v] diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl index 12accc82e..ab0aef108 100644 --- a/test/community/greedy_modularity.jl +++ b/test/community/greedy_modularity.jl @@ -80,7 +80,20 @@ end q = modularity(g, c) expected_c = ones(Int, 10) - expected_q = 0 + expected_q = 0.0 + + @test c == expected_c + @test q ≈ expected_q +end + + +@testset "Greedy modularity: empty graph" begin + g = SimpleGraph(10) + c = community_detection_greedy_modularity(g) + q = modularity(g, c) + + expected_c = Vector(1:10) + expected_q = 0.0 @test c == expected_c @test q ≈ expected_q From 5412d417aad1e5aa2cf0cc6e12c89c8618348dcb Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Wed, 13 Dec 2023 15:59:09 +0100 Subject: [PATCH 08/20] remove history of modularity opt steps + add sparse matrix --- src/community/greedy_modularity.jl | 67 +++++++++++++++++++----------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index b84c556ea..1a69aebb6 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -1,23 +1,27 @@ -function community_detection_greedy_modularity(g::AbstractGraph; weights::AbstractMatrix=weights(g)) +using SparseArrays: spzeros, findnz, sparse, rowvals, nonzeros, nzrange, dropzeros! + +function community_detection_greedy_modularity( + g::AbstractGraph; weights::AbstractMatrix=weights(g) +) if is_directed(g) throw(ArgumentError("The graph must not be directed")) end n = nv(g) c = Vector{Int}(1:n) - cs = Vector{Vector{Int}}(undef, n) - T = float(eltype(weights)) - qs = Vector{T}(undef, n) Q, e, a = compute_modularity(g, c, weights) m = sum(a) - cs[1] = copy(c) - qs[1] = Q + Q_max = Q + c_best = copy(c) for i in 2:n Q = modularity_greedy_step!(g, Q, e, a, c, m) - cs[i] = copy(c) - qs[i] = Q + if Q_max < Q + Q_max = Q + c_best = copy(c) + else + break + end end - imax = argmax(qs) - return rewrite_class_ids(cs[imax]) + return rewrite_class_ids(c_best) end function modularity_greedy_step!( @@ -26,18 +30,24 @@ function modularity_greedy_step!( e::AbstractMatrix{T}, a::AbstractVector{T}, c::AbstractVector{<:Integer}, - m::T + m::T, ) where {T} n = nv(g) dq_max::typeof(Q) = typemin(Q) - to_merge::Tuple{Int,Int} = (0, 0) - for edge in edges(g) - u, v = src(edge), dst(edge) - if c[u] != c[v] - dq = (e[c[u], c[v]] / m - a[c[u]] * a[c[v]] / m^2) - if dq > dq_max - dq_max = dq - to_merge = (c[u], c[v]) + to_merge = (0, 0) + _, y = size(e) + rows = rowvals(e) + vals = nonzeros(e) + for col in 1:y + for i in nzrange(e, col) + row = rows[i] + value = vals[i] + if row != col + dq = (value / m - a[row] * a[col] / m^2) + if dq > dq_max + dq_max = dq + to_merge = (row, col) + end end end end @@ -45,6 +55,7 @@ function modularity_greedy_step!( c1, c2 = to_merge for i in 1:n e[c1, i] += e[c2, i] + e[c2, i] = 0 end for i in 1:n if i == c2 @@ -52,6 +63,8 @@ function modularity_greedy_step!( end e[i, c1] += e[i, c2] end + e[:, c2] .= 0 + dropzeros!(e) a[c1] = a[c1] + a[c2] for i in 1:n if c[i] == c2 @@ -64,20 +77,26 @@ function modularity_greedy_step!( end end -function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) +function compute_modularity( + g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray +) modularity_type = float(eltype(w)) Q = zero(modularity_type) m = sum(w[src(e), dst(e)] for e in edges(g); init=Q) * 2 n_groups = maximum(c) a = zeros(modularity_type, n_groups) - e = zeros(modularity_type, n_groups, n_groups) + ei, ej = Vector{Int}(), Vector{Int}() + ev = Vector{modularity_type}() + e = spzeros(modularity_type, n_groups, n_groups) m == 0 && return 0.0, e, a for u in vertices(g) for v in neighbors(g, u) if c[u] == c[v] Q += w[u, v] end - e[c[u], c[v]] += w[u, v] + push!(ei, c[u]) + push!(ej, c[v]) + push!(ev, w[u, v]) a[c[u]] += w[u, v] end end @@ -86,11 +105,11 @@ function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::A Q -= a[i]^2 end Q /= m^2 - return Q, e, a + return Q, sparse(ei, ej, ev), a end function rewrite_class_ids(v::AbstractVector{<:Integer}) - d = Dict{Int, Int}() + d = Dict{Int,Int}() vn = zeros(Int64, length(v)) for i in eachindex(v) if !(v[i] in keys(d)) From ee65d63a1f8bfdcb2eed0285b25c3e57883f8e9e Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Wed, 13 Dec 2023 16:28:01 +0100 Subject: [PATCH 09/20] Rename and export --- src/Graphs.jl | 1 + src/community/greedy_modularity.jl | 6 +- test/community/greedy_modularity.jl | 171 +++++++++++++++++++--------- 3 files changed, 120 insertions(+), 58 deletions(-) diff --git a/src/Graphs.jl b/src/Graphs.jl index e77447734..d1d6e5278 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -37,6 +37,7 @@ using Random: shuffle! using SparseArrays: SparseMatrixCSC, nonzeros, nzrange, rowvals import SparseArrays: blockdiag, sparse +using SparseArrays: spzeros, findnz, sparse, rowvals, nonzeros, nzrange, dropzeros! import Base: adjoint, write, diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 1a69aebb6..72d0cbdb1 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -1,8 +1,4 @@ -using SparseArrays: spzeros, findnz, sparse, rowvals, nonzeros, nzrange, dropzeros! - -function community_detection_greedy_modularity( - g::AbstractGraph; weights::AbstractMatrix=weights(g) -) +function greedy_modularity(g::AbstractGraph; weights::AbstractMatrix=weights(g)) if is_directed(g) throw(ArgumentError("The graph must not be directed")) end diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl index ab0aef108..2dc56f47a 100644 --- a/test/community/greedy_modularity.jl +++ b/test/community/greedy_modularity.jl @@ -1,10 +1,45 @@ @testset "Greedy modularity: karate club" begin g = smallgraph(:karate) - expected_c_w = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] + expected_c_w = [ + 1, + 2, + 2, + 2, + 1, + 1, + 1, + 2, + 3, + 2, + 1, + 1, + 2, + 2, + 3, + 3, + 1, + 2, + 3, + 1, + 3, + 2, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + ] expected_q_w = 0.3806706114398422 - c_w = community_detection_greedy_modularity(g) + c_w = greedy_modularity(g) @test c_w == expected_c_w @@ -14,69 +49,102 @@ end @testset "Greedy modularity: weighted karate club" begin g = smallgraph(:karate) w = [ - 0 4 5 3 3 3 3 2 2 0 2 3 1 3 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 2 0 0; - 4 0 6 3 0 0 0 4 0 0 0 0 0 5 0 0 0 1 0 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0; - 5 6 0 3 0 0 0 4 5 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 0; - 3 3 3 0 0 0 0 3 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 3 0 0 0 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 3 0 0 0 0 0 5 0 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 3 0 0 0 2 5 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 2 4 4 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 2 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 4; - 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2; - 2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 3 5 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4; - 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2; - 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1; - 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 4 0 3 0 0 5 4; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 3 0 0 0 2 0 0; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 2 0 0 0 0 0 0 7 0 0; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2; - 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 3 0 0 0 0 0 0 0 0 4; - 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2; - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 4 0 0 0 0 0 4 2; - 0 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3; - 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 7 0 0 2 0 0 0 4 4; - 0 0 2 0 0 0 0 0 3 0 0 0 0 0 3 3 0 0 1 0 3 0 2 5 0 0 0 0 0 4 3 4 0 5; - 0 0 0 0 0 0 0 0 4 2 0 0 0 3 2 4 0 0 2 1 1 0 3 4 0 0 2 4 2 2 3 4 5 0 + 0 4 5 3 3 3 3 2 2 0 2 3 1 3 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 2 0 0 + 4 0 6 3 0 0 0 4 0 0 0 0 0 5 0 0 0 1 0 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0 + 5 6 0 3 0 0 0 4 5 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 0 + 3 3 3 0 0 0 0 3 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 3 0 0 0 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 3 0 0 0 0 0 5 0 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 3 0 0 0 2 5 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 2 4 4 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 2 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 4 + 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 + 2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 3 5 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 + 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1 + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 4 0 3 0 0 5 4 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 3 0 0 0 2 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 2 0 0 0 0 0 0 7 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2 + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 3 0 0 0 0 0 0 0 0 4 + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 4 0 0 0 0 0 4 2 + 0 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 + 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 7 0 0 2 0 0 0 4 4 + 0 0 2 0 0 0 0 0 3 0 0 0 0 0 3 3 0 0 1 0 3 0 2 5 0 0 0 0 0 4 3 4 0 5 + 0 0 0 0 0 0 0 0 4 2 0 0 0 3 2 4 0 0 2 1 1 0 3 4 0 0 2 4 2 2 3 4 5 0 + ] + expected_c_w = [ + 1, + 1, + 1, + 1, + 2, + 2, + 2, + 1, + 3, + 3, + 2, + 1, + 1, + 1, + 3, + 3, + 2, + 1, + 3, + 1, + 3, + 1, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, ] - expected_c_w = [1, 1, 1, 1, 2, 2, 2, 1, 3, 3, 2, 1, 1, 1, 3, 3, 2, 1, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] expected_q_w = 0.4345214669889994 - c_w = community_detection_greedy_modularity(g, weights=w) + c_w = greedy_modularity(g; weights=w) @test c_w == expected_c_w - @test modularity(g,c_w, distmx=w) ≈ expected_q_w + @test modularity(g, c_w; distmx=w) ≈ expected_q_w end - @testset "Greedy modularity: disconnected graph" begin g = SimpleGraph(10) - for i=1:5 - add_edge!(g, 2*i - 1, 2*i) + for i in 1:5 + add_edge!(g, 2 * i - 1, 2 * i) end - c = community_detection_greedy_modularity(g) + c = greedy_modularity(g) q = modularity(g, c) - expected_c = [1,1,2,2,3,3,4,4,5,5] + expected_c = [1, 1, 2, 2, 3, 3, 4, 4, 5, 5] expected_q = 0.8 @test c == expected_c @test q ≈ expected_q end - @testset "Greedy modularity: complete graph" begin g = complete_graph(10) - c = community_detection_greedy_modularity(g) + c = greedy_modularity(g) q = modularity(g, c) expected_c = ones(Int, 10) @@ -86,10 +154,9 @@ end @test q ≈ expected_q end - @testset "Greedy modularity: empty graph" begin g = SimpleGraph(10) - c = community_detection_greedy_modularity(g) + c = greedy_modularity(g) q = modularity(g, c) expected_c = Vector(1:10) @@ -99,13 +166,11 @@ end @test q ≈ expected_q end - @testset "Greedy modularity: random stochastic block model graph" begin - g_sbm = stochastic_block_model(99,1,[500,1000]) - expected_c = [i > 500 ? 2 : 1 for i=1:1500] + g_sbm = stochastic_block_model(99, 1, [500, 1000]) + expected_c = [i > 500 ? 2 : 1 for i in 1:1500] - c = community_detection_greedy_modularity(g_sbm) + c = greedy_modularity(g_sbm) @test c == expected_c # can fail with low probability? end - From 6200ecf3816dd8b6a3679e3de720998ce4a03f4b Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Tue, 26 Dec 2023 15:12:50 +0100 Subject: [PATCH 10/20] Revert "Rename and export" This reverts commit ee65d63a1f8bfdcb2eed0285b25c3e57883f8e9e. --- src/Graphs.jl | 1 - src/community/greedy_modularity.jl | 6 +- test/community/greedy_modularity.jl | 171 +++++++++------------------- 3 files changed, 58 insertions(+), 120 deletions(-) diff --git a/src/Graphs.jl b/src/Graphs.jl index d1d6e5278..e77447734 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -37,7 +37,6 @@ using Random: shuffle! using SparseArrays: SparseMatrixCSC, nonzeros, nzrange, rowvals import SparseArrays: blockdiag, sparse -using SparseArrays: spzeros, findnz, sparse, rowvals, nonzeros, nzrange, dropzeros! import Base: adjoint, write, diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 72d0cbdb1..1a69aebb6 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -1,4 +1,8 @@ -function greedy_modularity(g::AbstractGraph; weights::AbstractMatrix=weights(g)) +using SparseArrays: spzeros, findnz, sparse, rowvals, nonzeros, nzrange, dropzeros! + +function community_detection_greedy_modularity( + g::AbstractGraph; weights::AbstractMatrix=weights(g) +) if is_directed(g) throw(ArgumentError("The graph must not be directed")) end diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl index 2dc56f47a..ab0aef108 100644 --- a/test/community/greedy_modularity.jl +++ b/test/community/greedy_modularity.jl @@ -1,45 +1,10 @@ @testset "Greedy modularity: karate club" begin g = smallgraph(:karate) - expected_c_w = [ - 1, - 2, - 2, - 2, - 1, - 1, - 1, - 2, - 3, - 2, - 1, - 1, - 2, - 2, - 3, - 3, - 1, - 2, - 3, - 1, - 3, - 2, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - ] + expected_c_w = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] expected_q_w = 0.3806706114398422 - c_w = greedy_modularity(g) + c_w = community_detection_greedy_modularity(g) @test c_w == expected_c_w @@ -49,102 +14,69 @@ end @testset "Greedy modularity: weighted karate club" begin g = smallgraph(:karate) w = [ - 0 4 5 3 3 3 3 2 2 0 2 3 1 3 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 2 0 0 - 4 0 6 3 0 0 0 4 0 0 0 0 0 5 0 0 0 1 0 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0 - 5 6 0 3 0 0 0 4 5 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 0 - 3 3 3 0 0 0 0 3 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 3 0 0 0 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 3 0 0 0 0 0 5 0 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 3 0 0 0 2 5 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 2 4 4 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 2 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 4 - 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 - 2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 3 5 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4 - 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 - 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1 - 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 4 0 3 0 0 5 4 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 3 0 0 0 2 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 2 0 0 0 0 0 0 7 0 0 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2 - 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 3 0 0 0 0 0 0 0 0 4 - 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 4 0 0 0 0 0 4 2 - 0 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 - 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 7 0 0 2 0 0 0 4 4 - 0 0 2 0 0 0 0 0 3 0 0 0 0 0 3 3 0 0 1 0 3 0 2 5 0 0 0 0 0 4 3 4 0 5 - 0 0 0 0 0 0 0 0 4 2 0 0 0 3 2 4 0 0 2 1 1 0 3 4 0 0 2 4 2 2 3 4 5 0 - ] - expected_c_w = [ - 1, - 1, - 1, - 1, - 2, - 2, - 2, - 1, - 3, - 3, - 2, - 1, - 1, - 1, - 3, - 3, - 2, - 1, - 3, - 1, - 3, - 1, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, - 3, + 0 4 5 3 3 3 3 2 2 0 2 3 1 3 0 0 0 2 0 2 0 2 0 0 0 0 0 0 0 0 0 2 0 0; + 4 0 6 3 0 0 0 4 0 0 0 0 0 5 0 0 0 1 0 2 0 2 0 0 0 0 0 0 0 0 2 0 0 0; + 5 6 0 3 0 0 0 4 5 1 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 2 0; + 3 3 3 0 0 0 0 3 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 2 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 5 0 0 0 3 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 2 5 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 4 4 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 0 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 3 4; + 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2; + 2 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 1 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 3 5 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 2; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 4; + 0 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2; + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 1; + 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 3; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 0 4 0 3 0 0 5 4; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 3 0 0 0 2 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 2 0 0 0 0 0 0 7 0 0; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 2; + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 3 0 0 0 0 0 0 0 0 4; + 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2; + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 4 0 0 0 0 0 4 2; + 0 2 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3; + 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 7 0 0 2 0 0 0 4 4; + 0 0 2 0 0 0 0 0 3 0 0 0 0 0 3 3 0 0 1 0 3 0 2 5 0 0 0 0 0 4 3 4 0 5; + 0 0 0 0 0 0 0 0 4 2 0 0 0 3 2 4 0 0 2 1 1 0 3 4 0 0 2 4 2 2 3 4 5 0 ] + expected_c_w = [1, 1, 1, 1, 2, 2, 2, 1, 3, 3, 2, 1, 1, 1, 3, 3, 2, 1, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] expected_q_w = 0.4345214669889994 - c_w = greedy_modularity(g; weights=w) + c_w = community_detection_greedy_modularity(g, weights=w) @test c_w == expected_c_w - @test modularity(g, c_w; distmx=w) ≈ expected_q_w + @test modularity(g,c_w, distmx=w) ≈ expected_q_w end + @testset "Greedy modularity: disconnected graph" begin g = SimpleGraph(10) - for i in 1:5 - add_edge!(g, 2 * i - 1, 2 * i) + for i=1:5 + add_edge!(g, 2*i - 1, 2*i) end - c = greedy_modularity(g) + c = community_detection_greedy_modularity(g) q = modularity(g, c) - expected_c = [1, 1, 2, 2, 3, 3, 4, 4, 5, 5] + expected_c = [1,1,2,2,3,3,4,4,5,5] expected_q = 0.8 @test c == expected_c @test q ≈ expected_q end + @testset "Greedy modularity: complete graph" begin g = complete_graph(10) - c = greedy_modularity(g) + c = community_detection_greedy_modularity(g) q = modularity(g, c) expected_c = ones(Int, 10) @@ -154,9 +86,10 @@ end @test q ≈ expected_q end + @testset "Greedy modularity: empty graph" begin g = SimpleGraph(10) - c = greedy_modularity(g) + c = community_detection_greedy_modularity(g) q = modularity(g, c) expected_c = Vector(1:10) @@ -166,11 +99,13 @@ end @test q ≈ expected_q end + @testset "Greedy modularity: random stochastic block model graph" begin - g_sbm = stochastic_block_model(99, 1, [500, 1000]) - expected_c = [i > 500 ? 2 : 1 for i in 1:1500] + g_sbm = stochastic_block_model(99,1,[500,1000]) + expected_c = [i > 500 ? 2 : 1 for i=1:1500] - c = greedy_modularity(g_sbm) + c = community_detection_greedy_modularity(g_sbm) @test c == expected_c # can fail with low probability? end + From 4fe505dafca5bab0a139d2375701a1584e687daf Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Tue, 26 Dec 2023 15:17:18 +0100 Subject: [PATCH 11/20] Revert "remove history of modularity opt steps + add sparse matrix" This reverts commit 5412d417aad1e5aa2cf0cc6e12c89c8618348dcb. --- src/community/greedy_modularity.jl | 67 +++++++++++------------------- 1 file changed, 24 insertions(+), 43 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 1a69aebb6..b84c556ea 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -1,27 +1,23 @@ -using SparseArrays: spzeros, findnz, sparse, rowvals, nonzeros, nzrange, dropzeros! - -function community_detection_greedy_modularity( - g::AbstractGraph; weights::AbstractMatrix=weights(g) -) +function community_detection_greedy_modularity(g::AbstractGraph; weights::AbstractMatrix=weights(g)) if is_directed(g) throw(ArgumentError("The graph must not be directed")) end n = nv(g) c = Vector{Int}(1:n) + cs = Vector{Vector{Int}}(undef, n) + T = float(eltype(weights)) + qs = Vector{T}(undef, n) Q, e, a = compute_modularity(g, c, weights) m = sum(a) - Q_max = Q - c_best = copy(c) + cs[1] = copy(c) + qs[1] = Q for i in 2:n Q = modularity_greedy_step!(g, Q, e, a, c, m) - if Q_max < Q - Q_max = Q - c_best = copy(c) - else - break - end + cs[i] = copy(c) + qs[i] = Q end - return rewrite_class_ids(c_best) + imax = argmax(qs) + return rewrite_class_ids(cs[imax]) end function modularity_greedy_step!( @@ -30,24 +26,18 @@ function modularity_greedy_step!( e::AbstractMatrix{T}, a::AbstractVector{T}, c::AbstractVector{<:Integer}, - m::T, + m::T ) where {T} n = nv(g) dq_max::typeof(Q) = typemin(Q) - to_merge = (0, 0) - _, y = size(e) - rows = rowvals(e) - vals = nonzeros(e) - for col in 1:y - for i in nzrange(e, col) - row = rows[i] - value = vals[i] - if row != col - dq = (value / m - a[row] * a[col] / m^2) - if dq > dq_max - dq_max = dq - to_merge = (row, col) - end + to_merge::Tuple{Int,Int} = (0, 0) + for edge in edges(g) + u, v = src(edge), dst(edge) + if c[u] != c[v] + dq = (e[c[u], c[v]] / m - a[c[u]] * a[c[v]] / m^2) + if dq > dq_max + dq_max = dq + to_merge = (c[u], c[v]) end end end @@ -55,7 +45,6 @@ function modularity_greedy_step!( c1, c2 = to_merge for i in 1:n e[c1, i] += e[c2, i] - e[c2, i] = 0 end for i in 1:n if i == c2 @@ -63,8 +52,6 @@ function modularity_greedy_step!( end e[i, c1] += e[i, c2] end - e[:, c2] .= 0 - dropzeros!(e) a[c1] = a[c1] + a[c2] for i in 1:n if c[i] == c2 @@ -77,26 +64,20 @@ function modularity_greedy_step!( end end -function compute_modularity( - g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray -) +function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) modularity_type = float(eltype(w)) Q = zero(modularity_type) m = sum(w[src(e), dst(e)] for e in edges(g); init=Q) * 2 n_groups = maximum(c) a = zeros(modularity_type, n_groups) - ei, ej = Vector{Int}(), Vector{Int}() - ev = Vector{modularity_type}() - e = spzeros(modularity_type, n_groups, n_groups) + e = zeros(modularity_type, n_groups, n_groups) m == 0 && return 0.0, e, a for u in vertices(g) for v in neighbors(g, u) if c[u] == c[v] Q += w[u, v] end - push!(ei, c[u]) - push!(ej, c[v]) - push!(ev, w[u, v]) + e[c[u], c[v]] += w[u, v] a[c[u]] += w[u, v] end end @@ -105,11 +86,11 @@ function compute_modularity( Q -= a[i]^2 end Q /= m^2 - return Q, sparse(ei, ej, ev), a + return Q, e, a end function rewrite_class_ids(v::AbstractVector{<:Integer}) - d = Dict{Int,Int}() + d = Dict{Int, Int}() vn = zeros(Int64, length(v)) for i in eachindex(v) if !(v[i] in keys(d)) From bb85ea6899d38e1b5121a6aabb45dc83fd1443f7 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Thu, 28 Dec 2023 16:11:32 +0100 Subject: [PATCH 12/20] fix import and random test for old algo --- src/Graphs.jl | 2 +- test/community/greedy_modularity.jl | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Graphs.jl b/src/Graphs.jl index e77447734..a2bd2cf93 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -307,7 +307,7 @@ export # community modularity, - greedy_modularity, + community_detection_greedy_modularity, core_periphery_deg, local_clustering, local_clustering_coefficient, diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl index ab0aef108..53b73c86c 100644 --- a/test/community/greedy_modularity.jl +++ b/test/community/greedy_modularity.jl @@ -106,6 +106,9 @@ end c = community_detection_greedy_modularity(g_sbm) - @test c == expected_c # can fail with low probability? + matches1 = sum(c .== expected_c) + matches2 = sum((3 .- c) .== expected_c) # complementary cluster numbers assignment + + @test matches1 ≥ 0.95 * nv(g_sbm) || matches2 ≥ nv(g_sbm) end From 353c6a406bac1b9fb1f9e96171aa7afe17ffccca Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Wed, 3 Jan 2024 17:17:22 +0100 Subject: [PATCH 13/20] buggy fast modularity implementation with PriorityQueue --- src/Graphs.jl | 4 +- src/community/greedy_modularity_fast.jl | 128 ++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/community/greedy_modularity_fast.jl diff --git a/src/Graphs.jl b/src/Graphs.jl index a2bd2cf93..bead4b7a2 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -23,7 +23,8 @@ using DataStructures: union!, find_root!, BinaryMaxHeap, - BinaryMinHeap + BinaryMinHeap, + DefaultDict using LinearAlgebra: I, Symmetric, diagm, eigen, eigvals, norm, rmul!, tril, triu import LinearAlgebra: Diagonal, issymmetric, mul! using Random: @@ -520,6 +521,7 @@ include("centrality/radiality.jl") include("community/modularity.jl") include("community/label_propagation.jl") include("community/greedy_modularity.jl") +include("community/greedy_modularity_fast.jl") include("community/core-periphery.jl") include("community/clustering.jl") include("community/cliques.jl") diff --git a/src/community/greedy_modularity_fast.jl b/src/community/greedy_modularity_fast.jl new file mode 100644 index 000000000..c98003f71 --- /dev/null +++ b/src/community/greedy_modularity_fast.jl @@ -0,0 +1,128 @@ +function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::AbstractMatrix=weights(g)) + if is_directed(g) + throw(ArgumentError("The graph must not be directed")) + end + n = nv(g) + c = Vector{Int}(1:n) + dq_dict, dq_heap, dq_global_heap, a = compute_dq(g, c, weights) + modularity_type = float(eltype(weights)) + for _ in 2:n + try + (u,v), dq = dequeue_pair!(dq_global_heap) + catch err + if isa(err, BoundsError) + break + end + throw(error("unknown error at call to priority queue")) + end + dequeue!(dq_heap[u]) + if !isempty(dq_heap[u]) + enqueue!(dq_global_heap, first(dq_heap[u])) + end + if first(dq_heap[v])[1] == (v,u) + dequeue!(dq_heap[v]) + delete!(dq_global_heap, (v,u)) + if !isempty(dq_heap[v]) + enqueue!(dq_global_heap, first(dq_heap[v])) + end + else + delete!(dq_heap[v], (v,u)) + end + + c[c .== u] .= v + + neighbors_u = keys(dq_dict[u]) + neighbors_v = keys(dq_dict[v]) + neighbors_all = union(neighbors_u, neighbors_v) + neighbors_common = intersect(neighbors_u, neighbors_v) + + for w in neighbors_all + if w in neighbors_common + dq_w = dq_dict[v][w] + dq_dict[u][w] + elseif w in neighbors_v + dq_w = dq_dict[v][w] - 2 * a[u] * a[w] + else + dq_w = dq_dict[v][w] - 2 * a[v] * a[w] + end + for (row, column) in ((v, w), (w, v)) + dq_heap_row = dq_heap[row] + dq_dict[row][column] = dq_w + if !isempty(dq_heap_row) + oldmax = first(dq_heap_row) + else + oldmax = nothing + end + dq_heap_row[(row,column)] = dq_w # update or insert + if isnothing(oldmax) + dq_global_heap[(row, column)] = dq_w + else + newmax = first(dq_heap_row) + if newmax != oldmax + delete!(dq_global_heap, oldmax[1]) ## is it still there? + enqueue!(dq_global_heap, newmax) + end + end + end + end + + for (w, _) in dq_dict[u] + delete!(dq_dict[w], u) + if w != v + for (row, column) in ((w,u), (u,w)) + dq_heap_row = dq_heap[row] + if first(dq_heap_row)[1] == (row, column) + dequeue!(dq_heap_row) + delete!(dq_global_heap, (row, column)) + if !isempty(dq_heap_row) + enqueue!(dq_global_heap, first(dq_heap_row)) + end + else + delete!(dq_heap_row, (row, column)) + end + end + end + end + delete!(dq_dict, u) + dq_heap[u] = PriorityQueue{Tuple{Int, Int}, modularity_type}(Base.Order.Reverse) # placeholder, lasts empty forever + a[v] += a[u] + a[u] = 0 + end + return rewrite_class_ids(c) +end + +function compute_dq( + g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray +) + modularity_type = float(eltype(w)) + Q_zero = zero(modularity_type) + m = sum(w[src(e), dst(e)] for e in edges(g); init=Q_zero) * 2 + n_groups = maximum(c) + a = zeros(modularity_type, n_groups) + # m == 0 && return 0.0, spzeros(modularity_type, n_groups, n_groups), a + dq_dict = DefaultDict{Int, DefaultDict}(() -> DefaultDict{Int,modularity_type}(Q_zero)) + + for u in vertices(g) + for v in neighbors(g, u) + dq_dict[u][v] += w[u,v] + a[c[u]] += w[u, v] + end + end + + for (u, dct) in dq_dict + for (v, w) in dct + dq_dict[u][v] = w / m - a[c[u]] * a[c[v]] / m^2 + end + end + + dq_heap = Dict(u=>PriorityQueue{Tuple{Int, Int}, modularity_type}(Base.Order.Reverse, (u,v)=> dq for (v, dq) in dq_dict[u]) for u in vertices(g)) + v_connected = filter(v -> !isempty(dq_heap[v]), vertices(g)) + global_heap = PriorityQueue{Tuple{Int, Int}, modularity_type}(Base.Order.Reverse, first(dq_heap[v]) for v in v_connected) + return dq_dict, dq_heap, global_heap, a +end + +# g = SimpleGraph(4) +# add_edge!(g,1,2) +# add_edge!(g,3,4) +# w = weights(g) +# c = 1:4 +# compute_dq(g,c,w) \ No newline at end of file From 4615d671bfa9a1a6032e088796a06db512c32808 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Thu, 4 Jan 2024 20:07:40 +0100 Subject: [PATCH 14/20] return history of modularity along with best community partitioning --- src/community/greedy_modularity.jl | 34 +++++++++++++----------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index b84c556ea..1e6c5c7d5 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -17,7 +17,7 @@ function community_detection_greedy_modularity(g::AbstractGraph; weights::Abstra qs[i] = Q end imax = argmax(qs) - return rewrite_class_ids(cs[imax]) + return rewrite_class_ids(cs[imax]), qs end function modularity_greedy_step!( @@ -41,27 +41,23 @@ function modularity_greedy_step!( end end end - if dq_max > zero(typeof(Q)) - c1, c2 = to_merge - for i in 1:n - e[c1, i] += e[c2, i] - end - for i in 1:n - if i == c2 - continue - end - e[i, c1] += e[i, c2] + c1, c2 = to_merge + for i in 1:n + e[c1, i] += e[c2, i] + end + for i in 1:n + if i == c2 + continue end - a[c1] = a[c1] + a[c2] - for i in 1:n - if c[i] == c2 - c[i] = c1 - end + e[i, c1] += e[i, c2] + end + a[c1] = a[c1] + a[c2] + for i in 1:n + if c[i] == c2 + c[i] = c1 end - return Q + 2 * dq_max - else - return Q end + return Q + 2 * dq_max end function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) From cf8025902bce54245fc0d947c8958516d4e399b0 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Thu, 4 Jan 2024 20:10:22 +0100 Subject: [PATCH 15/20] Revert "return history of modularity along with best community partitioning" This reverts commit 4615d671bfa9a1a6032e088796a06db512c32808. --- src/community/greedy_modularity.jl | 34 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 1e6c5c7d5..b84c556ea 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -17,7 +17,7 @@ function community_detection_greedy_modularity(g::AbstractGraph; weights::Abstra qs[i] = Q end imax = argmax(qs) - return rewrite_class_ids(cs[imax]), qs + return rewrite_class_ids(cs[imax]) end function modularity_greedy_step!( @@ -41,23 +41,27 @@ function modularity_greedy_step!( end end end - c1, c2 = to_merge - for i in 1:n - e[c1, i] += e[c2, i] - end - for i in 1:n - if i == c2 - continue + if dq_max > zero(typeof(Q)) + c1, c2 = to_merge + for i in 1:n + e[c1, i] += e[c2, i] end - e[i, c1] += e[i, c2] - end - a[c1] = a[c1] + a[c2] - for i in 1:n - if c[i] == c2 - c[i] = c1 + for i in 1:n + if i == c2 + continue + end + e[i, c1] += e[i, c2] + end + a[c1] = a[c1] + a[c2] + for i in 1:n + if c[i] == c2 + c[i] = c1 + end end + return Q + 2 * dq_max + else + return Q end - return Q + 2 * dq_max end function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) From 888ae41fc61f3588da4cb461150091e597dcb448 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Thu, 4 Jan 2024 20:13:38 +0100 Subject: [PATCH 16/20] return history of modularity along with best partitioning --- src/community/greedy_modularity.jl | 34 +++++++++++++----------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index b84c556ea..1e6c5c7d5 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -17,7 +17,7 @@ function community_detection_greedy_modularity(g::AbstractGraph; weights::Abstra qs[i] = Q end imax = argmax(qs) - return rewrite_class_ids(cs[imax]) + return rewrite_class_ids(cs[imax]), qs end function modularity_greedy_step!( @@ -41,27 +41,23 @@ function modularity_greedy_step!( end end end - if dq_max > zero(typeof(Q)) - c1, c2 = to_merge - for i in 1:n - e[c1, i] += e[c2, i] - end - for i in 1:n - if i == c2 - continue - end - e[i, c1] += e[i, c2] + c1, c2 = to_merge + for i in 1:n + e[c1, i] += e[c2, i] + end + for i in 1:n + if i == c2 + continue end - a[c1] = a[c1] + a[c2] - for i in 1:n - if c[i] == c2 - c[i] = c1 - end + e[i, c1] += e[i, c2] + end + a[c1] = a[c1] + a[c2] + for i in 1:n + if c[i] == c2 + c[i] = c1 end - return Q + 2 * dq_max - else - return Q end + return Q + 2 * dq_max end function compute_modularity(g::AbstractGraph, c::AbstractVector{<:Integer}, w::AbstractArray) From 019612acbbb38fc6627b9ef811d09ef313c525c5 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Thu, 4 Jan 2024 23:33:05 +0100 Subject: [PATCH 17/20] fix edge case with no edges between communities left --- src/community/greedy_modularity.jl | 8 ++++++++ test/community/greedy_modularity.jl | 14 +++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/community/greedy_modularity.jl b/src/community/greedy_modularity.jl index 1e6c5c7d5..a4dcae108 100644 --- a/src/community/greedy_modularity.jl +++ b/src/community/greedy_modularity.jl @@ -41,6 +41,14 @@ function modularity_greedy_step!( end end end + if to_merge == (0,0) + for i in vertices(g) + if c[i] != c[1] + to_merge = (c[1], c[i]) + break + end + end + end c1, c2 = to_merge for i in 1:n e[c1, i] += e[c2, i] diff --git a/test/community/greedy_modularity.jl b/test/community/greedy_modularity.jl index 53b73c86c..907ee8d89 100644 --- a/test/community/greedy_modularity.jl +++ b/test/community/greedy_modularity.jl @@ -4,7 +4,7 @@ expected_c_w = [1, 2, 2, 2, 1, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 3, 1, 2, 3, 1, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] expected_q_w = 0.3806706114398422 - c_w = community_detection_greedy_modularity(g) + c_w, _ = community_detection_greedy_modularity(g) @test c_w == expected_c_w @@ -52,7 +52,7 @@ end expected_c_w = [1, 1, 1, 1, 2, 2, 2, 1, 3, 3, 2, 1, 1, 1, 3, 3, 2, 1, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] expected_q_w = 0.4345214669889994 - c_w = community_detection_greedy_modularity(g, weights=w) + c_w, _ = community_detection_greedy_modularity(g, weights=w) @test c_w == expected_c_w @test modularity(g,c_w, distmx=w) ≈ expected_q_w end @@ -63,7 +63,7 @@ end for i=1:5 add_edge!(g, 2*i - 1, 2*i) end - c = community_detection_greedy_modularity(g) + c, _ = community_detection_greedy_modularity(g) q = modularity(g, c) expected_c = [1,1,2,2,3,3,4,4,5,5] @@ -76,7 +76,7 @@ end @testset "Greedy modularity: complete graph" begin g = complete_graph(10) - c = community_detection_greedy_modularity(g) + c, _ = community_detection_greedy_modularity(g) q = modularity(g, c) expected_c = ones(Int, 10) @@ -89,7 +89,7 @@ end @testset "Greedy modularity: empty graph" begin g = SimpleGraph(10) - c = community_detection_greedy_modularity(g) + c, _ = community_detection_greedy_modularity(g) q = modularity(g, c) expected_c = Vector(1:10) @@ -104,11 +104,11 @@ end g_sbm = stochastic_block_model(99,1,[500,1000]) expected_c = [i > 500 ? 2 : 1 for i=1:1500] - c = community_detection_greedy_modularity(g_sbm) + c, _ = community_detection_greedy_modularity(g_sbm) matches1 = sum(c .== expected_c) matches2 = sum((3 .- c) .== expected_c) # complementary cluster numbers assignment - @test matches1 ≥ 0.95 * nv(g_sbm) || matches2 ≥ nv(g_sbm) + @test matches1 ≥ 0.95 * nv(g_sbm) || matches2 ≥ 0.95 * nv(g_sbm) end From 051520265ecaccc7ca916823ddfb807135a94975 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Mon, 22 Jan 2024 04:39:40 +0100 Subject: [PATCH 18/20] add fast algorithm working (mind the precision) --- src/community/greedy_modularity_fast.jl | 61 ++++++++++--------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/src/community/greedy_modularity_fast.jl b/src/community/greedy_modularity_fast.jl index c98003f71..f6df53584 100644 --- a/src/community/greedy_modularity_fast.jl +++ b/src/community/greedy_modularity_fast.jl @@ -4,26 +4,23 @@ function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::A end n = nv(g) c = Vector{Int}(1:n) - dq_dict, dq_heap, dq_global_heap, a = compute_dq(g, c, weights) + dq_dict, dq_heap, dq_global_heap, a, m = compute_dq(g, c, weights) modularity_type = float(eltype(weights)) - for _ in 2:n - try - (u,v), dq = dequeue_pair!(dq_global_heap) - catch err - if isa(err, BoundsError) - break - end - throw(error("unknown error at call to priority queue")) + empty_row_heap = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse) # placeholder, lasts empty forever + while length(dq_global_heap) > 1 + (u,v), (dq, _) = dequeue_pair!(dq_global_heap) + if dq <= zero(modularity_type) + return rewrite_class_ids(c) end dequeue!(dq_heap[u]) if !isempty(dq_heap[u]) - enqueue!(dq_global_heap, first(dq_heap[u])) + enqueue!(dq_global_heap, peek(dq_heap[u])) end - if first(dq_heap[v])[1] == (v,u) + if peek(dq_heap[v])[1] == (v,u) dequeue!(dq_heap[v]) delete!(dq_global_heap, (v,u)) if !isempty(dq_heap[v]) - enqueue!(dq_global_heap, first(dq_heap[v])) + enqueue!(dq_global_heap, peek(dq_heap[v])) end else delete!(dq_heap[v], (v,u)) @@ -31,8 +28,8 @@ function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::A c[c .== u] .= v - neighbors_u = keys(dq_dict[u]) - neighbors_v = keys(dq_dict[v]) + neighbors_u = setdiff(keys(dq_dict[u]), v) + neighbors_v = setdiff(keys(dq_dict[v]), u) neighbors_all = union(neighbors_u, neighbors_v) neighbors_common = intersect(neighbors_u, neighbors_v) @@ -40,23 +37,23 @@ function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::A if w in neighbors_common dq_w = dq_dict[v][w] + dq_dict[u][w] elseif w in neighbors_v - dq_w = dq_dict[v][w] - 2 * a[u] * a[w] + dq_w = dq_dict[v][w] - a[u] * a[w] / m^2 else - dq_w = dq_dict[v][w] - 2 * a[v] * a[w] + dq_w = dq_dict[u][w] - a[v] * a[w] / m^2 end for (row, column) in ((v, w), (w, v)) dq_heap_row = dq_heap[row] dq_dict[row][column] = dq_w if !isempty(dq_heap_row) - oldmax = first(dq_heap_row) + oldmax = peek(dq_heap_row) else oldmax = nothing end - dq_heap_row[(row,column)] = dq_w # update or insert + dq_heap_row[(row,column)] = (dq_w, (-row, -column)) # update or insert if isnothing(oldmax) - dq_global_heap[(row, column)] = dq_w + dq_global_heap[(row, column)] = (dq_w, (-row, -column)) else - newmax = first(dq_heap_row) + newmax = peek(dq_heap_row) if newmax != oldmax delete!(dq_global_heap, oldmax[1]) ## is it still there? enqueue!(dq_global_heap, newmax) @@ -70,11 +67,11 @@ function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::A if w != v for (row, column) in ((w,u), (u,w)) dq_heap_row = dq_heap[row] - if first(dq_heap_row)[1] == (row, column) + if peek(dq_heap_row)[1] == (row, column) dequeue!(dq_heap_row) delete!(dq_global_heap, (row, column)) if !isempty(dq_heap_row) - enqueue!(dq_global_heap, first(dq_heap_row)) + enqueue!(dq_global_heap, peek(dq_heap_row)) end else delete!(dq_heap_row, (row, column)) @@ -83,7 +80,7 @@ function community_detection_greedy_modularity_fast(g::AbstractGraph; weights::A end end delete!(dq_dict, u) - dq_heap[u] = PriorityQueue{Tuple{Int, Int}, modularity_type}(Base.Order.Reverse) # placeholder, lasts empty forever + dq_heap[u] = empty_row_heap a[v] += a[u] a[u] = 0 end @@ -98,8 +95,7 @@ function compute_dq( m = sum(w[src(e), dst(e)] for e in edges(g); init=Q_zero) * 2 n_groups = maximum(c) a = zeros(modularity_type, n_groups) - # m == 0 && return 0.0, spzeros(modularity_type, n_groups, n_groups), a - dq_dict = DefaultDict{Int, DefaultDict}(() -> DefaultDict{Int,modularity_type}(Q_zero)) + dq_dict = Dict(v => DefaultDict{Int, modularity_type}(Q_zero) for v in vertices(g)) for u in vertices(g) for v in neighbors(g, u) @@ -114,15 +110,8 @@ function compute_dq( end end - dq_heap = Dict(u=>PriorityQueue{Tuple{Int, Int}, modularity_type}(Base.Order.Reverse, (u,v)=> dq for (v, dq) in dq_dict[u]) for u in vertices(g)) + dq_heap = Dict(u=>PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, (u,v)=> (dq, (-u,-v)) for (v, dq) in dq_dict[u]) for u in vertices(g)) v_connected = filter(v -> !isempty(dq_heap[v]), vertices(g)) - global_heap = PriorityQueue{Tuple{Int, Int}, modularity_type}(Base.Order.Reverse, first(dq_heap[v]) for v in v_connected) - return dq_dict, dq_heap, global_heap, a -end - -# g = SimpleGraph(4) -# add_edge!(g,1,2) -# add_edge!(g,3,4) -# w = weights(g) -# c = 1:4 -# compute_dq(g,c,w) \ No newline at end of file + global_heap = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, peek(dq_heap[v]) for v in v_connected) + return dq_dict, dq_heap, global_heap, a, m +end \ No newline at end of file From e777f073590dbbaa97be55cf74f9f4e2a9f244ee Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Mon, 22 Jan 2024 11:02:39 +0100 Subject: [PATCH 19/20] Fix type instability --- src/community/greedy_modularity_fast.jl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/community/greedy_modularity_fast.jl b/src/community/greedy_modularity_fast.jl index f6df53584..26ea9da28 100644 --- a/src/community/greedy_modularity_fast.jl +++ b/src/community/greedy_modularity_fast.jl @@ -95,7 +95,12 @@ function compute_dq( m = sum(w[src(e), dst(e)] for e in edges(g); init=Q_zero) * 2 n_groups = maximum(c) a = zeros(modularity_type, n_groups) - dq_dict = Dict(v => DefaultDict{Int, modularity_type}(Q_zero) for v in vertices(g)) + + typical_dict = DefaultDict{Int, modularity_type}(Q_zero) + dq_dict = Dict{Int,typeof(typical_dict)}() + for v in vertices(g) + dq_dict[v] = DefaultDict{Int, modularity_type}(Q_zero) + end for u in vertices(g) for v in neighbors(g, u) @@ -109,9 +114,14 @@ function compute_dq( dq_dict[u][v] = w / m - a[c[u]] * a[c[v]] / m^2 end end + + typical_queue = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse) + dq_heap = Dict{Int,typeof(typical_queue)}() + for u in vertices(g) + dq_heap[u] = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, (u, v) => (dq, (-u, -v)) for (v, dq) in dq_dict[u]) + end - dq_heap = Dict(u=>PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, (u,v)=> (dq, (-u,-v)) for (v, dq) in dq_dict[u]) for u in vertices(g)) v_connected = filter(v -> !isempty(dq_heap[v]), vertices(g)) global_heap = PriorityQueue{Tuple{Int, Int}, Tuple{modularity_type, Tuple{Int, Int}}}(Base.Order.Reverse, peek(dq_heap[v]) for v in v_connected) return dq_dict, dq_heap, global_heap, a, m -end \ No newline at end of file +end From ac77ee5580a8b3ab9bbc50d2b167570d2ae8e279 Mon Sep 17 00:00:00 2001 From: Oleg Fafurin Date: Mon, 22 Jan 2024 15:25:20 +0100 Subject: [PATCH 20/20] export fast algo --- src/Graphs.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Graphs.jl b/src/Graphs.jl index bead4b7a2..1248ea861 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -309,6 +309,7 @@ export # community modularity, community_detection_greedy_modularity, + community_detection_greedy_modularity_fast, core_periphery_deg, local_clustering, local_clustering_coefficient,