-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSPM_fast.py
211 lines (155 loc) · 8.15 KB
/
SPM_fast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import numpy as np
import math
from tqdm import tqdm, trange
import scipy.sparse as sps
class SPM(object):
def __init__(self, A, target, p=None, delta_A=None, n_remove_link=None, n_eigen=300):
"""
:param A: original adjacency matrix that would be decomposed in A = A_r + delta_A. NEED TO BE DENSE
:param p: fraction of links to generate a perturbation set delta_E
:param delta_A: particular perturbation. If None, a random perturbation is computed
:param k: Number of eigevalues (and eigenvectors) to extract. Max=A.shape[0]-1 but it will slow down a lot
:return:
"""
assert (p is not None and delta_A is None and n_remove_link is None) or \
(p is None and delta_A is not None and n_remove_link is not None) , "Wrong input choice!"
assert (sps.isspmatrix_csr(A) or sps.isspmatrix_csc(A)), "A is not csr matrix"
self.A = A
self.p = p
self.N = self.A.shape[0]
self.delta_A = delta_A
self.n_removed_link = n_remove_link
self.n_eigen = n_eigen
self.target = np.sort(target)
def split_init(self, isSingle=True):
if self.delta_A is not None:
# self.n_removed_link = n_remove_link
# self.delta_A = delta_A
self.A_r = self.A - self.delta_A
self.links = np.where(np.triu(self.delta_A) == 1)
self.links = list(zip(self.links[0], self.links[1]))
else:
if isSingle:
np.random.seed(42)
self.n_removed_link = math.ceil(self.N * self.p)
# Extract a fraction p of links from A to generate delta_A
links = sps.find(sps.triu(self.A))[:2] # Tuple (x, y) with x and y are lists of indices, we consider only
# the upper triangle of the matrix and reflect the symmetry of the
# matrix in self.create_adj_matrix()
indices = np.random.randint(0, links[0].shape[0], self.n_removed_link)
r = links[0][indices]
c = links[1][indices]
self.links = list(zip(r,c))
# Create delta_A
rows = np.append(r,c)
cols = np.append(c,r)
self.delta_A = self.create_adj_matrix(self.N, rows, cols)
# Create A_r = A - delta_A
self.A_r = self.A - self.delta_A
def create_adj_matrix(self, N, rows, cols):
data = np.ones(rows.shape[0])
matrix = sps.csr_matrix((data, (rows, cols)), shape=(N, N))
return matrix
def compute_delta_lambda(self, v, delta_A):
"""
v : eigenvectors
delta_A : perturbation matrix
"""
res = np.empty(self.n_eigen)
for k in range(self.n_eigen):
# num = v[:, k].T.dot(delta_A).dot(v[:, k])
# Change delta_A with xT in order to use sparse dot: sparse matrix need to be before
num = (delta_A.T.dot(v[:, k])).T.dot(v[:, k])
den = v[:, k].T.dot(v[:, k])
res[k] = num / den
return res
def compute_perturbed_matrix(self, A_r, delta_A):
self.pbar.set_description("Computing Eigen")
lambda_k, x_k = sps.linalg.eigsh(A_r, k=self.n_eigen) # Eigenvalues/eigenvector extraction for symmetric matrix
# Normalize lambda_k by its norm
lambda_k /= np.linalg.norm(lambda_k)
if len(set(lambda_k)) != len(lambda_k):
print("Degenerate-Eigenvalues! --> Correction..")
delta_lambda_k = None
exit()
else:
#print("Non-degenerate Eigenvalues. Compute delta_lambda_k")
self.pbar.set_description("Computing delta lambdas")
delta_lambda_k = self.compute_delta_lambda(x_k, delta_A)
# Compute Perturbed Matrix
# perturbed_A = np.zeros((self.N, self.N), dtype=np.float16))
# Perturbed_A : Non-squared matrix. Si restringe il calcolo ai soli elementi necessari
perturbed_A = np.zeros((len(self.target), self.N), dtype=np.float16)
x_k = x_k.astype(np.float16)
self.pbar.set_description("Computing Perturbed matrix")
# perturbed_A = perturbed_A + ((lambda_k + delta_lambda_k)*(x_k.dot(x_k.T)))
tmp_outer = np.empty((len(self.target), self.N), dtype=np.float16)
for k in tqdm(range(self.n_eigen), desc="Compute Perturbed A" , leave=False):
# perturbed_A += (lambda_k[k] + delta_lambda_k[k]) * np.dot(x_k[:, k][:, None], x_k[:, k][None, :])
x_k_target = np.take(x_k[:, k], self.target) * (lambda_k[k] + delta_lambda_k[k])
np.outer(x_k_target, x_k[:, k], out=tmp_outer)
np.add(perturbed_A, tmp_outer, out=perturbed_A)
# perturbed_A Cleaning: check the existing link in A_r [i.e. in E_r] and set those positions = -np.inf in
# perturbed_A such that these links don't occur in the ranking to extract the most probable missing link.
# What we are doing is to consider the set U - E_r, where U is the universe of possible links given N nodes
#perturbed_A[np.where(A_r == 1)] = -np.inf
self.pbar.set_description("Remove seen ")
# perturbed_A[sps.find(A_r)[:2]] = -np.inf # Filter seen sul main
# Moreover we set to -np.inf also the element (i,i) in the diagonal since we do not have self-links
# np.fill_diagonal(perturbed_A, -np.inf)
# Per ogni riga i della matrice si deve mettere il valore di quell'utente a -np.inf perché non ci possono essere self-links
for i, e in enumerate(self.target):
perturbed_A[i, e] = -np.inf
return perturbed_A
def extract_ranking(self, perturbed_A, at):
# ranking = perturbed_A.ravel().argsort()[::-1]
# Since the matrix is symmetric, we can consider the upper triangle and consider the first half of results
# (the second half represents the element of the low triangle of the matrix, set to zero)
#print("Get ranking..")
up_matrix_ranking = np.triu(perturbed_A).ravel().argsort()[::-1]
ranking = up_matrix_ranking[:len(up_matrix_ranking) // 2]
ranking = [(int(x / self.N), x % self.N) for x in ranking][:at]
return ranking
def evaluate(self, predicted_links):
# We need to replicate each link such that there are both (x,y) and (y, x)
preds = set(predicted_links).union(set([(y, x) for (x, y) in predicted_links]))
links = set(self.links).union(set([(y, x) for (x, y) in self.links]))
n_missing_links = len(links)
correct_predicted = len(set(preds).intersection(links))
return correct_predicted / n_missing_links
def run(self, verbose=True):
self.split_init(isSingle=True)
self.perturbed_A = self.compute_perturbed_matrix(self.A_r, self.delta_A)
ranks = self.extract_ranking(self.perturbed_A, self.n_removed_link)
if verbose:
print(f'Missing Links: {self.links}')
print(f'Predicted Links {ranks}')
print(f"Structural Consistency: {self.evaluate(ranks)}")
return self.links, ranks
def k_runs(self, k, verbose=True, save=False):
assert (k != 1), "k must be != 1, call run otherwise"
# Progress bar
self.pbar = tqdm(range(k))
#res = np.zeros((self.N, self.N))
res = np.zeros((len(self.target), self.N))
#for i in trange(k, desc='Iteration'):
for i in self.pbar:
self.pbar.set_description("Split")
self.split_init(isSingle=False)
res += self.compute_perturbed_matrix(self.A_r, self.delta_A)
# Reset delta_A
self.delta_A = None
res /= k
if save:
np.save(f"SPM_similarity_{self.n_eigen}_{k}.npy", res)
"""
# TODO non ha senso visto che self.links cambia da iterazione ad iterazione
self.pbar.set_description("Extract rankings")
ranks = self.extract_ranking(res, self.n_removed_link)
if verbose:
print(f'Missing Links: {self.links}')
print(f'Predicted Links {ranks}')
print(f"Structural Consistency: {self.evaluate(ranks)}")
return self.links, ranks
"""
return res