Skip to content

Commit a959891

Browse files
authored
SiNE embeddings
1 parent adc6e69 commit a959891

File tree

6 files changed

+676
-0
lines changed

6 files changed

+676
-0
lines changed

SiNE/graph.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import networkx as nx
2+
import numpy as np
3+
import csv
4+
5+
# From https://github.com/CompNet/SignedCentrality
6+
7+
class Vocabulary(object):
8+
def __init__(self, graph):
9+
self._id2node = {}
10+
self._node2id = {}
11+
self._curr_id = 1
12+
for node in graph.nodes():
13+
if node not in self._node2id:
14+
self._curr_id += 1
15+
self._node2id[node] = self._curr_id
16+
self._id2node[self._curr_id] = node
17+
18+
def id2node(self, id):
19+
return self._id2node[id]
20+
21+
def node2id(self, node):
22+
return self._node2id[node]
23+
24+
def augment(self, graph):
25+
for node in graph.nodes():
26+
if node not in self._node2id:
27+
self._curr_id += 1
28+
self._node2id[node] = self._curr_id
29+
self._id2node[self._curr_id] = node
30+
31+
def __len__(self):
32+
return self._curr_id
33+
34+
35+
class Graph(object):
36+
def __init__(self, positive_graph, negative_graph):
37+
self.positive_graph = positive_graph
38+
self.negative_graph = negative_graph
39+
self.vocab = Vocabulary(positive_graph)
40+
self.vocab.augment(negative_graph)
41+
42+
def get_positive_edges(self):
43+
return self.positive_graph.edges()
44+
45+
def get_negative_edges(self):
46+
return self.negative_graph.edges()
47+
48+
def __len__(self):
49+
return len(self.vocab)
50+
#return max(len(self.positive_graph), len(self.negative_graph))
51+
52+
def get_triplets(self, p0=True, ids=True):
53+
triplets = []
54+
for xi in self.positive_graph.nodes():
55+
for xj in self.positive_graph[xi]:
56+
if xj in self.negative_graph:
57+
for xk in self.negative_graph[xj]:
58+
a, b, c = xi, xj, xk
59+
if ids:
60+
a = self.vocab.node2id(xi)
61+
b = self.vocab.node2id(xj)
62+
c = self.vocab.node2id(xk)
63+
triplets.append([a, b, c])
64+
elif p0:
65+
a, b = xi, xj
66+
c = 0
67+
if ids:
68+
a = self.vocab.node2id(xi)
69+
b = self.vocab.node2id(xj)
70+
triplets.append([a, b, c])
71+
triplets = np.array(triplets)
72+
return triplets
73+
74+
@staticmethod
75+
def read_from_file(filepath, delimiter=',', directed=False):
76+
positive_graph = nx.DiGraph() if directed else nx.Graph()
77+
negative_graph = nx.DiGraph() if directed else nx.Graph()
78+
file = open(filepath)
79+
#skip header line
80+
next(file)
81+
for line in file:
82+
line = line.strip()
83+
#print(line)
84+
u, v, w = line.split(delimiter)
85+
w = float(w)
86+
if w > 0:
87+
positive_graph.add_edge(u, v, weight=w)
88+
if w < 0:
89+
negative_graph.add_edge(u, v, weight=w)
90+
file.close()
91+
graph = Graph(positive_graph, negative_graph)
92+
return graph
93+
94+
95+
96+
97+
98+
99+

SiNE/learn_SiNE_emb.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from SiNEmaster.graph import *
2+
from SiNEmaster.stemmodels import SiNE, fit_sine_model as fit_model
3+
import pickle
4+
5+
#pickled list of labels
6+
labels_path = "labels.pickle"
7+
graphs_path = "data/CCS"
8+
9+
embeddings = []
10+
labels = []
11+
with open(labels_path, "rb") as f:
12+
lb = pickle.load(f)
13+
14+
15+
for i in range(2545):
16+
try:
17+
graph = Graph.read_from_file("%s/%s.csv" %(graphs_path, i), delimiter=',', directed=True)
18+
if len(graph.get_positive_edges()) + len(graph.get_negative_edges()) > 1:
19+
20+
model = fit_model(
21+
num_nodes=len(graph),
22+
dims_arr=[32, 32],
23+
triples=graph.get_triplets(),
24+
triples0=None,
25+
delta=1.0,
26+
delta0=0.5,
27+
batch_size=300,
28+
batch_size0=300,
29+
epochs=30,
30+
lr=0.01,
31+
lam=0.0001,
32+
lr_decay=0.0,
33+
p=2,
34+
print_loss=False,
35+
p0=False,
36+
)
37+
38+
embedding = model.get_x()
39+
embedding = embedding.detach().numpy().tolist()[0]
40+
embeddings.append(embedding)
41+
labels.append(lb[i])
42+
print (i)
43+
except:
44+
print ("error")
45+
46+
with open("out/SiNE/sine_embeddings.pkl", "wb") as f:
47+
pickle.dump(embeddings, f)

SiNE/model.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import torch
2+
import torch.nn as nn
3+
from torch.nn.parameter import Parameter
4+
from torch.autograd import Variable
5+
import numpy as np
6+
import torch.optim as optim
7+
8+
# From https://github.com/CompNet/SignedCentrality
9+
10+
def hadamard(x, y):
11+
return x * y
12+
13+
14+
def average(x, y):
15+
return (x + y)/2.0
16+
17+
18+
def l1(x, y):
19+
return np.abs(x - y)
20+
21+
22+
def l2(x, y):
23+
return np.power(x - y, 2)
24+
25+
26+
def concat(x, y):
27+
return np.concatenate((x, y), axis=1)
28+
29+
30+
FEATURE_FUNCS = {
31+
'l1': l1,
32+
'l2': l2,
33+
'concat': concat,
34+
'average': average,
35+
'hadamard': hadamard
36+
}
37+
38+
39+
class SiNE(nn.Module):
40+
def __init__(self, num_nodes, dim1, dim2):
41+
super(SiNE, self).__init__()
42+
self.tanh = nn.Tanh()
43+
self.embeddings = nn.Embedding(num_nodes + 1, dim1)
44+
self.layer11 = nn.Linear(dim1, dim2, bias=False)
45+
self.layer12 = nn.Linear(dim1, dim2, bias=False)
46+
self.bias1 = Parameter(torch.zeros(1))
47+
self.layer2 = nn.Linear(dim2, 1, bias=False)
48+
self.bias2 = Parameter(torch.zeros(1))
49+
self.register_parameter('bias1', self.bias1)
50+
self.register_parameter('bias2', self.bias2)
51+
52+
def forward(self, xi, xj, xk, delta):
53+
i_emb = self.embeddings(xi)
54+
j_emb = self.embeddings(xj)
55+
k_emb = self.embeddings(xk)
56+
57+
z11 = self.tanh(self.layer11(i_emb) + self.layer12(j_emb) + self.bias1)
58+
z12 = self.tanh(self.layer11(i_emb) + self.layer12(k_emb) + self.bias1)
59+
60+
f_pos = self.tanh(self.layer2(z11) + self.bias2)
61+
f_neg = self.tanh(self.layer2(z12) + self.bias2)
62+
63+
zeros = Variable(torch.zeros(1))
64+
65+
loss = torch.max(zeros, f_pos + delta - f_neg)
66+
loss = torch.sum(loss)
67+
68+
return loss
69+
70+
def _regularizer(self, x):
71+
zeros = torch.zeros_like(x)
72+
normed = torch.norm(x - zeros, p=2)
73+
term = torch.pow(normed, 2)
74+
# print('The parameter of ', x)
75+
# print('Yields ',term)
76+
return term
77+
78+
def regularize_weights(self):
79+
loss = 0
80+
for parameter in self.parameters():
81+
loss += self._regularizer(parameter)
82+
return loss
83+
84+
def get_embedding(self, x):
85+
x = Variable(torch.LongTensor([x]))
86+
emb = self.embeddings(x)
87+
emb = emb.data.numpy()[0]
88+
return emb
89+
90+
def get_edge_feature(self, x, y, operation='hadamard'):
91+
func = FEATURE_FUNCS[operation]
92+
x = self.get_embedding(x)
93+
y = self.get_embedding(y)
94+
return func(x, y)
95+
96+
97+
98+
99+
def tensorfy_col(x, col_idx):
100+
col = x[:,col_idx]
101+
col = torch.LongTensor(col)
102+
col = Variable(col)
103+
return col
104+
105+
106+
def get_training_batch(triples, batch_size):
107+
nrows = triples.shape[0]
108+
rows = np.random.choice(nrows, batch_size, replace=False)
109+
choosen = triples[rows,:]
110+
xi = tensorfy_col(choosen, 0)
111+
xj = tensorfy_col(choosen, 1)
112+
xk = tensorfy_col(choosen, 2)
113+
return xi, xj, xk
114+
115+
116+
def fit_model(sine, triplets, delta, batch_size, epochs, alpha,
117+
lr=0.4, weight_decay=0.0, print_loss=True):
118+
optimizer = optim.Adagrad(sine.parameters(), lr=lr, weight_decay=weight_decay)
119+
for epoch in range(epochs):
120+
sine.zero_grad()
121+
xi, xj, xk = get_training_batch(triplets, batch_size)
122+
loss = sine(xi, xj, xk, delta)
123+
# print(loss)
124+
regularizer_loss = alpha * sine.regularize_weights()
125+
# print(regularizer_loss)
126+
loss += regularizer_loss
127+
loss.backward()
128+
optimizer.step()
129+
if print_loss:
130+
print('Loss at epoch ', epoch + 1, ' is ', loss.data[0])
131+
return sine
132+
133+
134+

SiNE/stemgraph.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import networkx as nx
2+
import numpy as np
3+
4+
# From https://github.com/CompNet/SignedCentrality
5+
6+
def get_empty_graph(directed=True):
7+
if directed:
8+
return nx.DiGraph()
9+
return nx.Graph()
10+
11+
def from_edgelist_array_to_graph(X, y, directed=True):
12+
positive_graph = get_empty_graph(directed)
13+
negative_graph = get_empty_graph(directed)
14+
15+
for edge, label in zip(X, y):
16+
u, v = edge
17+
if label == 0:
18+
negative_graph.add_edge(u, v)
19+
else:
20+
positive_graph.add_edge(u, v)
21+
return positive_graph, negative_graph
22+
23+
24+
def get_triples(positive_graph, negative_graph, p0=True):
25+
triples = []
26+
triples0 = []
27+
for u, v in positive_graph.edges():
28+
if v in negative_graph:
29+
v_neigbors = negative_graph[v]
30+
for w in v_neigbors:
31+
triple = (u, v, w)
32+
triples.append(triple)
33+
elif p0:
34+
triple0 = (u, v, 0)
35+
triples0.append(triple0)
36+
triples = np.array(triples)
37+
triples0 = np.array(triples0)
38+
if p0:
39+
return triples, triples0
40+
return triples
41+
42+

0 commit comments

Comments
 (0)