-
Notifications
You must be signed in to change notification settings - Fork 1
/
Cluster.jl
147 lines (118 loc) · 3.97 KB
/
Cluster.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
abstract type AbstractCluster end
type Cluster <: AbstractCluster
n :: Int # Number of samples
ss :: Vector{Float64} # Sufficient Statistics
end
type DataCluster <: AbstractCluster
ids :: Set{Int} # IDs of data in the WORKER
n :: Int
ss :: Vector{Float64}
end
type SubCluster <: AbstractCluster
super :: Int # Merged to which supercluster
n :: Int
ss :: Vector{Float64}
end
type SuperCluster <: AbstractCluster
ids :: Set{Int} # IDs of sub clusters
n :: Int
ss :: Vector{Float64}
end
const ClusterCollection = Dict{Int,Cluster}
const DataClusterCollection = Dict{Int,DataCluster}
const SubClusterCollection = Dict{Int, SubCluster}
const SuperClusterCollection = Dict{Int, SuperCluster}
## Constructor
Cluster(hp::HyperParameter) = Cluster(0,zeros(Float64,hp.ds))
Cluster(c::AbstractCluster) = Cluster(c.n,deepcopy(c.ss))
DataCluster(hp::HyperParameter) = DataCluster(Set{Int}(),0,zeros(Float64,hp.ds))
DataCluster(c::AbstractCluster) = DataCluster(Set{Int}(),c.n,deepcopy(c.ss))
SuperCluster(hp::HyperParameter) = SuperCluster(Set{Int}(),0,zeros(Float64, hp.ds))
SuperCluster(c::AbstractCluster,id::Int) = SuperCluster(Set{Int}(id),c.n,deepcopy(c.ss))
## Manipulate Statistics
function add_stat!(c1::AbstractCluster, c2::AbstractCluster)
assert(length(c1.ss)==length(c2.ss))
@simd for d in eachindex(c1.ss)
@inbounds c1.ss[d] += c2.ss[d]
end
c1.n += c2.n
end
function sub_stat!(c1::AbstractCluster, c2::AbstractCluster)
assert(length(c1.ss)==length(c2.ss))
@simd for d in eachindex(c1.ss)
@inbounds c1.ss[d] -= c2.ss[d]
end
c1.n -= c2.n
assert(c1.n >= 0)
end
# function copy_stat!(c1::AbstractCluster, c2::AbstractCluster)
# assert(length(c1.ss)==length(c2.ss))
# c1.n = c2.n
# copy!(c1.ss,c2.ss)
# end
# function empty_stat!(ac::AbstractCluster)
# ac.n = 0
# fill!(ac.ss,0)
# end
## Data Cluster Basics
function is_empty_cluster(dc::DataCluster)
if dc.n == 0
assert(isempty(dc.ids)) ## not true if plused other
assert(sum(abs2,dc.ss) < eps(Float32))
return true
else
return false
end
end
function merge_dc!(c1::DataCluster, c2::DataCluster)
add_stat!(c1, c2)
NOASSERT || assert(isempty(intersect(c1.ids,c2.ids)))
union!(c1.ids, c2.ids)
end
## Add / Remove data::Vector in DataCluster
function add_data!(c::DataCluster, x::AbstractVector, id::Int, hp::HyperParameter)
assert(!in(id,c.ids))
add_ss!(c.ss, x, hp) # Type and length check
c.n += 1
union!(c.ids, id)
end
function rm_data!(c::DataCluster, x::AbstractVector, id::Int, hp::HyperParameter)
assert(in(id,c.ids))
sub_ss!(c.ss, x, hp) # Type and length check
c.n -= 1
setdiff!(c.ids, id)
end
## Add / Remove data::Vector in DataClusterCollection
## TODO seems only CRP use this function, merge to CRP
function add_data!(cc::DataClusterCollection, cid::Int, x::AbstractVector, did::Int, hp::HyperParameter)
haskey(cc,cid) || (cc[cid] = DataCluster(hp))
add_data!(cc[cid], x, did, hp)
end
function rm_data!(cc::DataClusterCollection, cid::Int, x::AbstractVector, did::Int, hp::HyperParameter)
assert(haskey(cc, cid))
rm_data!(cc[cid], x, did, hp)
is_empty_cluster(cc[cid]) && delete!(cc, cid)
end
## Add / Remove subcluster in supercluster
## TODO seems only recv_cluster(::Center) use this function
function add_sub!(s::SuperCluster, c::AbstractCluster, cid::Int)
assert(!in(cid,s.ids))
add_stat!(s,c)
union!(s.ids, cid)
end
function rm_sub!(s::SuperCluster, c::AbstractCluster, cid::Int)
assert(in(cid,s.ids))
sub_stat!(s,c)
setdiff!(s.ids, cid)
end
## Merge two Super Cluster
function merge_sc!(sc1::SuperCluster, sc2::SuperCluster)
add_stat!(sc1,sc2)
NOASSERT || assert(isempty(intersect(sc1.ids,sc2.ids)))
union!(sc1.ids,sc2.ids)
end
function merge_sc(sc1::SuperCluster, sc2::SuperCluster)
sc = deepcopy(sc1)
merge_sc!(sc,sc2)
return sc
end