This repository was archived by the owner on Dec 18, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
139 lines (111 loc) · 4.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Based on: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
from __future__ import print_function
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
from dataset import load_dataset
# Define artifact directories where results from the session can be saved
checkpoint_path = os.environ.get('CHECKPOINT_PATH', 'checkpoints/')
# Dump the network weights to a file like this:
# np.savez(path.join(checkpoint_path, 'model.npz'), *lasagne.layers.get_all_param_values(network))
# Load them again later like this:
# with np.load('model.npz') as f:
# param_values = [f['arr_%d' % i] for i in range(len(f.files))]
# lasagne.layers.set_all_param_values(network, param_values)
def build_cnn(input_var=None):
network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
input_var=input_var)
network = lasagne.layers.Conv2DLayer(
network, num_filters=32, filter_size=(5, 5),
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
network = lasagne.layers.Conv2DLayer(
network, num_filters=32, filter_size=(5, 5),
nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=256,
nonlinearity=lasagne.nonlinearities.rectify)
network = lasagne.layers.DenseLayer(
lasagne.layers.dropout(network, p=.5),
num_units=10,
nonlinearity=lasagne.nonlinearities.softmax)
return network
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
def main(num_epochs=100):
print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
input_var = T.tensor4('inputs')
target_var = T.ivector('targets')
print("Building model and compiling functions...")
network = build_cnn(input_var)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=0.01, momentum=0.9)
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
train_fn = theano.function([input_var, target_var], loss, updates=updates)
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
print("Starting training...")
for epoch in range(num_epochs):
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
test_err += err
test_acc += acc
test_batches += 1
print("Final results:")
print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
# Dump the network weights to a file like this:
np.savez(os.path.join(checkpoint_path, 'model.npz'), *lasagne.layers.get_all_param_values(network))
if __name__ == '__main__':
main()