Skip to content

Commit 89832b8

Browse files
committed
Fix bug in random initialization
1 parent 744b3c7 commit 89832b8

File tree

2 files changed

+22
-4
lines changed

2 files changed

+22
-4
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
.idea/
22
data/
3+
runs/
34
*.tar.gz
45
*.pyc

src/cnn.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,18 @@ def J(X, y, params, keep_probs):
179179
A1, regular_cache1 = regular_forward_prop(A0, W1, b1, relu, keep_probs[1])
180180
A2, regular_cache2 = regular_forward_prop(A1, W2, b2, sigmoid, 1.0)
181181

182+
# print("a0 mean = " + str(np.mean(A0)))
183+
# print("a0 var = " + str(np.var(A0)))
184+
# print("a0 n var = " + str(1.0 / np.var(A0)))
185+
# print("a0 max = " + str(np.max(A0)))
186+
# print("a0 min = " + str(np.min(A0)))
187+
#
188+
# print("a1 mean = " + str(np.mean(A1)))
189+
# print("a1 var = " + str(np.var(A1)))
190+
# print("a1 n var = " + str(1.0 / np.var(A1)))
191+
# print("a1 max = " + str(np.max(A1)))
192+
# print("a1 min = " + str(np.min(A1)))
193+
182194
cost = np.sum((-y * np.log(A2) - (1 - y) * np.log(1 - A2)), axis=1) / batch_size
183195

184196
caches = conv_cache, regular_cache1, regular_cache2
@@ -249,7 +261,7 @@ def random_initialization(vocab_size, embedding_size, num_filters, filter_sizes,
249261
E = np.random.rand(vocab_size, embedding_size) * 2 - 1
250262
F = [np.random.randn(filter_size, embedding_size, num_filters) * np.sqrt(6.0 / filter_size / embedding_size) for filter_size in filter_sizes]
251263
b = [np.zeros((1, 1, num_filters)) for i in range(total_filters)]
252-
W1 = np.random.randn(hidden_units, num_filters * total_filters) * np.sqrt(2.0 / num_filters * total_filters)
264+
W1 = np.random.randn(hidden_units, num_filters * total_filters) * np.sqrt(2.0 / num_filters / total_filters)
253265
b1 = np.zeros((hidden_units, 1))
254266
W2 = np.random.randn(1, hidden_units) * np.sqrt(1.0 / hidden_units)
255267
b2 = np.zeros((1, 1))
@@ -295,8 +307,10 @@ def cnn(X_train, y_train, X_dev, y_dev, load_params_file, dump_dir, vocab_size,
295307
for mini_batch in mini_batches:
296308
iteration += 1
297309

298-
# if iteration % 5 == 0:
299-
# break
310+
# print("mean = " + str([np.mean(x) for x in params]))
311+
# print("var = " + str([np.var(x) for x in params]))
312+
# print("max = " + str([np.max(x) for x in params]))
313+
# print("min = " + str([np.min(x) for x in params]))
300314

301315
X, y = mini_batch
302316

@@ -321,7 +335,10 @@ def cnn(X_train, y_train, X_dev, y_dev, load_params_file, dump_dir, vocab_size,
321335

322336
grads = [dE] + dF + db + [dW1, db1, dW2, db2]
323337

324-
# gradient_checking(params, grads, X, y, total_filters)
338+
# print("mean g = " + str([np.mean(x) for x in grads]))
339+
# print("var g = " + str([np.var(x) for x in grads]))
340+
# print("max g = " + str([np.max(x) for x in grads]))
341+
# print("min g = " + str([np.min(x) for x in grads]))
325342

326343
v_grads = [v * beta1 + g * (1 - beta1) for v, g in zip(v_grads, grads)]
327344
s_grads = [s * beta2 + g * g * (1 - beta2) for s, g in zip(s_grads, grads)]

0 commit comments

Comments
 (0)