-
Notifications
You must be signed in to change notification settings - Fork 0
/
neuralnet.py
executable file
·667 lines (541 loc) · 21 KB
/
neuralnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
################################################################################
# CSE 253: Programming Assignment 2
# Code snippet by Manjot Bilkhu
# Winter 2020
################################################################################
# We've provided you with the dataset in PA2.zip
################################################################################
# To install PyYaml, refer to the instructions for your system:
# https://pyyaml.org/wiki/PyYAMLDocumentation
################################################################################
# If you don't have NumPy installed, please use the instructions here:
# https://scipy.org/install.html
################################################################################
import os, gzip
import random
import yaml
import numpy as np
import math
from utils import numerical_approximation, plot_acc, plot_loss, graph_error
from matplotlib import pyplot as plt
from utils import plot
train_std = None
train_mean = None
def load_config(version):
"""
Load the configuration from config.yaml.
"""
#
return yaml.load(open('./configs/config' + version + '.yaml', 'r'), Loader=yaml.SafeLoader)
def normalize_data(img):
"""
Normalize your inputs here and return them.
"""
return (img - train_mean) / train_std
def one_hot_encoding(labels, num_classes=10):
"""
Encode labels using one hot encoding and return them.
@param labels: numerical labels
@param num_classes: number of classes to be included in the one hot encoded data
@return:
"""
one_hot_labels = []
for label in labels:
ohe = [0] * num_classes
ohe[int(label)] = 1
one_hot_labels.append(ohe)
return np.array(one_hot_labels)
def mini_batch(x, y, size):
"""
Shuffle and split the data into mini batches
@param x: images
@param y: labels
@param size: batch size
@return: list of batches
"""
if len(x) < size:
return x, y
num_batches = math.floor(len(x) / size)
x_batches = []
y_batches = []
data = list(zip(x, y))
random.shuffle(data)
x, y = zip(*data)
x = np.array(x)
y = np.array(y)
for i in range(num_batches):
x_batches.append(x[i * size: (i + 1) * size])
y_batches.append(y[i * size: (i + 1) * size])
return x_batches, y_batches
def load_data(path, mode='train'):
"""
Load Fashion MNIST data.
@param path: folder location of dataset
@param mode: Use mode='train' for train and mode='t10k' for test.
@return: normalized data and one encoded labels
"""
labels_path = os.path.join(path, f'{mode}-labels-idx1-ubyte.gz')
images_path = os.path.join(path, f'{mode}-images-idx3-ubyte.gz')
with gzip.open(labels_path, 'rb') as lbpath:
labels = np.frombuffer(lbpath.read(), dtype=np.uint8, offset=8)
with gzip.open(images_path, 'rb') as imgpath:
images = np.frombuffer(imgpath.read(), dtype=np.uint8, offset=16).reshape(len(labels), 784)
if mode == 'train':
global train_mean, train_std
train_mean = np.mean(images)
train_std = np.std(images)
normalized_images = normalize_data(images)
one_hot_labels = one_hot_encoding(labels, num_classes=10)
return normalized_images, one_hot_labels
def softmax(x):
"""
Implement the softmax function here.
Remember to take care of the overflow condition.
@param x: predictions
@return: softmax predictions
"""
e = np.exp(x - np.amax(x, axis=1, keepdims=True))
return e / np.sum(e, axis=len(x.shape) - 1, keepdims=True)
class Activation:
"""
The class implements different types of activation functions for
your neural network layers.
Example (for sigmoid):
>>> sigmoid_layer = Activation("sigmoid")
>>> z = sigmoid_layer(a)
>>> gradient = sigmoid_layer.backward(delta=1.0)
"""
def __init__(self, activation_type="sigmoid"):
"""
Initialize activation type and placeholders here.
"""
if activation_type not in ["sigmoid", "tanh", "ReLU"]:
raise NotImplementedError(f"{activation_type} is not implemented.")
# Type of non-linear activation.
self.activation_type = activation_type
# Placeholder for input. This will be used for computing gradients.
self.x = None
def __call__(self, a):
"""
This method allows your instances to be callable.
"""
return self.forward(a)
def forward(self, a):
"""
Compute the forward pass.
"""
if self.activation_type == "sigmoid":
return self.sigmoid(a)
elif self.activation_type == "tanh":
return self.tanh(a)
elif self.activation_type == "ReLU":
return self.ReLU(a)
def backward(self, delta):
"""
Compute the backward pass.
"""
if self.activation_type == "sigmoid":
grad = self.grad_sigmoid()
elif self.activation_type == "tanh":
grad = self.grad_tanh()
elif self.activation_type == "ReLU":
grad = self.grad_ReLU()
return grad * delta
def sigmoid(self, x):
"""
Implement the sigmoid activation here.
"""
self.x = 1 / (1 + np.exp(-x))
return self.x
def tanh(self, x):
"""
Implement tanh here.
"""
self.x = np.tanh(x)
return self.x
def ReLU(self, x):
"""
Implement ReLU here.
"""
self.x = np.maximum(0, x)
return self.x
def grad_sigmoid(self):
"""
Compute the gradient for sigmoid here.
"""
return self.x * (1 - self.x)
def grad_tanh(self):
"""
Compute the gradient for tanh here.
"""
return 1 - self.x ** 2
def grad_ReLU(self):
"""
Compute the gradient for ReLU here.
"""
grad = np.zeros(self.x.shape)
grad[self.x <= 0] = 0
grad[self.x > 0] = 1
return grad
class Layer:
"""
This class implements Fully Connected layers for your neural network.
Example:
>>> fully_connected_layer = Layer(784, 100)
>>> output = fully_connected_layer(input)
>>> gradient = fully_connected_layer.backward(delta=1.0)
"""
def __init__(self, in_units, out_units):
"""
Define the architecture and create placeholder.
"""
np.random.seed(42)
self.w = np.random.randn(in_units, out_units) # Declare the Weight matrix
self.b = np.array(np.zeros((1, out_units))) # Create a placeholder for Bias
self.x = None # Save the input to forward in this
self.a = None # Save the output of forward pass in this (without activation)
self.d_x = None # Save the gradient w.r.t x in this
self.d_w = None # Save the gradient w.r.t w in this
self.d_b = None # Save the gradient w.r.t b in this
self.v = None # Momentum
def __call__(self, x):
"""
Make layer callable.
"""
return self.forward(x)
def forward(self, x):
"""
Compute the forward pass through the layer here.
Do not apply activation here.
Return self.a
"""
self.x = x
self.a = self.x.dot(self.w) + self.b
return self.a
def backward(self, delta):
"""
Write the code for backward pass. This takes in gradient from its next layer as input,
computes gradient for its weights and the delta to pass to its previous layers.
Return self.dx
"""
self.d_x = delta.dot(self.w.T)
self.d_w = self.x.T.dot(delta)
self.d_b = np.mean(delta, axis=0).reshape((1, -1))
return self.d_x
class Neuralnetwork():
"""
Create a Neural Network specified by the input configuration.
Example:
>>> net = NeuralNetwork(config)
>>> output = net(input)
>>> net.backward()
"""
def __init__(self, config):
"""
Create the Neural Network using config.
"""
self.layers = [] # Store all layers in this list.
self.x = None # Save the input to forward in this
self.y = None # Save the output vector of model in this
self.targets = None # Save the targets in forward in this variable
self.config = config
self.training_loss = []
self.validation_loss = []
self.training_acc = []
self.validation_acc = []
self.validation_increments = 0
# Add layers specified by layer_specs.
for i in range(len(config['layer_specs']) - 1):
self.layers.append(Layer(config['layer_specs'][i], config['layer_specs'][i + 1]))
if i < len(config['layer_specs']) - 2:
self.layers.append(Activation(config['activation']))
def __call__(self, x, targets=None):
"""
Make NeuralNetwork callable.
"""
return self.forward(x, targets)
def forward(self, x, targets=None):
"""
Compute forward pass through all the layers in the network and return it.
If targets are provided, return loss as well.
"""
self.x = x
for layer in self.layers:
self.x = layer.forward(self.x)
self.y = softmax(self.x)
self.targets = targets
if targets is not None:
return self.loss(self.y, targets), self.y
return None, self.y
def loss(self, logits, targets):
'''
compute the categorical cross-entropy loss and return it.
'''
eps = 1e-15
logits = np.clip(logits, eps, 1 - eps)
loss = -np.sum(targets * np.log(logits))
# L2 loss:
l2_penalty = self.config['L2_penalty']
if l2_penalty > 0:
for layer in self.layers:
if isinstance(layer, Layer):
loss += l2_penalty / 2 * np.sum(np.square(layer.w))
return loss / logits.shape[0]
def backward(self):
'''
Implement backpropagation here.
Call backward methods of individual layer's.
'''
delta = self.targets - self.y
for layer in reversed(self.layers):
delta = layer.backward(delta)
def log_metrics(self, training_loss, validation_loss, training_acc, validation_acc):
"""
Save epoch metrics to the model
@param training_loss: Training loss under one epoch
@param validation_loss: Validation loss under one epoch
@param training_acc: Training accuracy under one epoch
@param validation_acc: Validation accuracy under one epoch
"""
if len(self.validation_loss) > 0 and validation_loss > self.validation_loss[-1]:
self.validation_increments += 1
else:
self.validation_increments = 0
self.training_loss.append(training_loss)
self.validation_loss.append(validation_loss)
self.training_acc.append(training_acc)
self.validation_acc.append(validation_acc)
def update_weights(self):
"""
Update the weights, after back-propagation has happen
"""
for layer in self.layers:
if isinstance(layer, Layer):
learning_rate = self.config['learning_rate']
if self.config['momentum']:
if layer.v is None:
layer.v = layer.d_w
momentum_gamma = self.config['momentum_gamma']
layer.v = momentum_gamma * layer.v + (1 - momentum_gamma) * layer.d_w
layer.w += learning_rate * layer.v - self.config['L2_penalty'] * layer.w
else:
layer.w += learning_rate * layer.d_w - self.config['L2_penalty'] * layer.w
layer.b += learning_rate * layer.d_b
def train(model, x_train, y_train, x_valid, y_valid, config):
"""
Train your model here.
Implement batch SGD to train the model.
Implement Early Stopping.
Use config to set parameters for training like learning rate, momentum, etc.
@param model: model to to train
@param x_train: training data, images
@param y_train: labels for training data
@param x_valid: validation data, images
@param y_valid: labels for validation data
@param config: current configuration
"""
# number of times the error can increase before ending training
threshold = config['early_stop_epoch']
# store current-best model
# best_model = model
for epoch in range(config['epochs']):
# for each batch in one epoch
# break data into 128-size batches for small batch gradient descent
x_batches, y_batches = mini_batch(x_train, y_train, config['batch_size'])
for i in range(len(x_batches)):
model.forward(x_batches[i], y_batches[i])
model.backward()
model.update_weights() # implements momentum and regularization
# track metrics across each epoch
tl, ta = test(model, x_train, y_train)
vl, va = test(model, x_valid, y_valid)
model.log_metrics(tl, vl, ta, va)
# early stopping condition
if config['early_stop'] == 'True' and epoch >= 4 and model.validation_increments > threshold:
break
if epoch % 10 == 0:
print("Running epoch {}".format(epoch + 1))
def test(model, X_test, y_test):
"""
Calculate and return the accuracy on the test set.
"""
loss, predictions = model.forward(X_test, y_test)
targets = np.argmax(y_test, axis=-1)
predictions = np.argmax(predictions, axis=-1)
accuracy = np.sum(targets == predictions) / len(y_test)
return loss, accuracy
def task_b():
# Load the configuration.
config = load_config("b")
num_of_examples = 12
# load the test data
x_train, y_train = load_data(path="./", mode="train")
x_train = x_train[:num_of_examples]
y_train = y_train[:num_of_examples]
# Create the model
model = Neuralnetwork(config)
# bias output weight
numerical_approximation(x_train, y_train, model, 2, 0, 0, bias=True)
# hidden bias weight
numerical_approximation(x_train, y_train, model, 0, 0, 0, bias=True)
# two hidden to output weight
numerical_approximation(x_train, y_train, model, 2, 0, 1)
numerical_approximation(x_train, y_train, model, 2, 2, 2)
# two input to hidden weights
numerical_approximation(x_train, y_train, model, 0, 0, 1)
numerical_approximation(x_train, y_train, model, 0, 2, 2)
def task_c():
###############################
# Load the configuration.
config = load_config("c")
# Create the model
model = Neuralnetwork(config)
# Load the data
x_train, y_train = load_data(path="./", mode="train")
x_test, y_test = load_data(path="./", mode="t10k")
num_examples = len(x_train)
print("# examples:", num_examples)
# Hold the values from each model that we will graph
ten_training_losses = []
ten_training_accuracies = []
ten_validation_losses = []
ten_validation_accuracies = []
# store best model for test set
best_model = model
# hack to fix error
best_model.validation_loss.append(100)
# perform k fold 10 times:
K = 2
for k in range(K): # for each fold
print("K value: ", k)
model = Neuralnetwork(config)
X = []
y = []
k_size = len(x_train) - int(len(x_train) / K)
for i in range(k_size): # get folds of size 1/K)
r = np.random.randint(num_examples)
X.append(x_train[r])
y.append(y_train[r])
X = np.array(X)
y = np.array(y)
# create validation set
size = len(X)
validation_size = 0.9
Xv, yv = X[int(size * validation_size):], y[int(size * validation_size):]
Xt, yt = X[:int(size * validation_size)], y[:int(size * validation_size)]
# train the model
train(model, Xt, yt, Xv, yv, config)
# append the values of each model to their lists
ten_training_accuracies.append(model.training_acc)
ten_training_losses.append(model.training_loss)
ten_validation_accuracies.append(model.validation_acc)
ten_validation_losses.append(model.validation_loss)
# Store best model
if model.validation_loss[-1] < best_model.validation_loss[-1]:
best_model = model
### Report training and validation accuracy and loss for each K
# find the minimum epoch number to convergence:
lengths = []
for item in ten_validation_losses:
lengths.append(len(item))
cutoff = min(lengths)
# align the losses and accuracy by cutoff value
aligned_training_losses = np.array([x[:cutoff] for x in ten_training_losses])
aligned_training_accuracies = np.array([x[:cutoff] for x in ten_training_accuracies])
aligned_validation_losses = np.array([x[:cutoff] for x in ten_validation_losses])
aligned_validation_accuracies = np.array([x[:cutoff] for x in ten_validation_accuracies])
# plot the data
aligned_data = [aligned_training_losses, aligned_validation_losses, aligned_training_accuracies,
aligned_validation_accuracies]
means = []
stds = []
for i in range(len(aligned_data)):
data = aligned_data[i]
mean = np.mean(data, axis=0) # divide sum of columns by num of folds
means.append(mean)
std = np.std(data, axis=0)
stds.append(std)
graph_error(means[0:2], stds[0:2], ["Epoch", "Loss"], ["Training", "Validation"], "Training vs Validation Loss",
show=True)
graph_error(means[2:], stds[2:], ["Epoch", "Loss"], ["Training", "Validation"], "Training vs Validation Accuracy",
show=True)
### Report Test Accuracy of best model
test_loss, test_acc = test(best_model, x_test, y_test)
print("Test accuracy of best model: ", test_acc)
def task_d():
d_configs = ["d1", "d2"]
titles = ["0.001 Regularization", "0.0001 Regularization"]
for i in range(2):
config = load_config(d_configs[i])
model = Neuralnetwork(config)
x_train, y_train = load_data(path="./", mode="train")
x_test, y_test = load_data(path="./", mode="t10k")
train_size = 0.9
size = len(x_train)
x_valid, y_valid = x_train[int(size * train_size):], y_train[int(size * train_size):]
x_train, y_train = x_train[:int(size * train_size)], y_train[:int(size * train_size)]
train(model, x_train, y_train, x_valid, y_valid, config)
plot(model, titles[i])
test_loss, test_acc = test(model, x_test, y_test)
print("Test accuracy: {}".format(test_acc))
def task_e():
models = []
for i in range(3):
config = load_config("e{}".format(i + 1))
model = Neuralnetwork(config)
x_train, y_train = load_data(path="./", mode="train")
x_test, y_test = load_data(path="./", mode="t10k")
train_size = 0.9
size = len(x_train)
x_valid, y_valid = x_train[int(size * train_size):], y_train[int(size * train_size):]
x_train, y_train = x_train[:int(size * train_size)], y_train[:int(size * train_size)]
train(model, x_train, y_train, x_valid, y_valid, config)
models.append(model)
test_acc = test(model, x_test, y_test)
print("Test accuracy: {}".format(test_acc))
for model in models:
plot_loss(model, "Loss for different activation functions", False)
plt.legend(["Training loss - Tanh", "Validation loss - Tanh",
"Training loss - Sigmoid", "Validation loss - Sigmoid",
"Training loss - ReLU", "Validation loss - ReLU"])
plt.show()
for model in models:
plot_acc(model, "Accuracy for different activation functions", False)
plt.legend(["Training accuracy - Tanh", "Validation accuracy - Tanh",
"Training accuracy - Sigmoid", "Validation accuracy - Sigmoid",
"Training accuracy - ReLU", "Validation accuracy - ReLU"])
plt.show()
def task_f():
# task i
configs = ["fhalf", "fdouble", "fdeeper"]
titles = ["One Hidden Layer (25 Nodes)", "One Hidden Layer (100 Nodes)", "Two Hidden Layers (50 Nodes Each)"]
for i in range(3):
config = load_config(configs[i])
# Create the model
model = Neuralnetwork(config)
# Load the data
x_train, y_train = load_data(path="./", mode="train")
train_size = 0.9
size = len(x_train)
x_valid, y_valid = x_train[int(size * train_size):], y_train[int(size * train_size):]
x_train, y_train = x_train[:int(size * train_size)], y_train[:int(size * train_size)]
train(model, x_train, y_train, x_valid, y_valid, config)
plot(model, titles[i])
if __name__ == "__main__":
task = ''
while task != 'q':
task = input("Choose your task - lowercase letter: ")
if task == 'b':
task_b()
elif task == 'c':
task_c()
elif task == 'd':
task_d()
elif task == 'e':
task_e()
elif task == 'f':
task_f()
elif task == 'q':
print("Ending Program")
else:
print("invalid entry - select from {b,c,d,e,f}")