Skip to content

Commit bced15a

Browse files
committed
More updates
1 parent ac60c85 commit bced15a

File tree

7 files changed

+92
-15
lines changed

7 files changed

+92
-15
lines changed

data/t10k-images-idx3-ubyte

7.48 MB
Binary file not shown.

data/t10k-labels-idx1-ubyte

9.77 KB
Binary file not shown.

parameters/last_epoch.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

parameters/min_cost.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

show_training_images.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from numpy import *
2-
m = 1
2+
# Change this m for the index of the picture you want to show
3+
m = 100
34
bytes_to_read = m * 784
45
with open("data/train-images-idx3-ubyte", "rb") as f:
56
meta = f.read(16)
@@ -12,7 +13,7 @@
1213
y = reshape(array([raw_labels[i] for i in range(m)]), (m,1))
1314

1415
import matplotlib.pyplot as plt
15-
pixels = reshape(x, (28,28))
16-
plt.title('Shown image is for ' + str(y))
16+
pixels = reshape(x[m-1], (28,28))
17+
plt.title('Shown image is for ' + str(y[m-1]))
1718
plt.imshow(pixels, cmap='gray')
1819
plt.show()

test.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from Layer import *
2+
import json
3+
4+
m = 1000
5+
bytes_to_read = m * 784
6+
theta_filename = 'parameters/min_cost.json'
7+
# theta_filename = 'parameters/last_epoch.json'
8+
testfile_image = 'data/t10k-images-idx3-ubyte'
9+
testfile_label = 'data/t10k-labels-idx1-ubyte'
10+
11+
# Read thetas
12+
with open(theta_filename, 'r') as f:
13+
js_obj = json.load(f)
14+
15+
# Read testing set
16+
with open(testfile_image, 'rb') as f:
17+
meta = f.read(16)
18+
raw = f.read(bytes_to_read)
19+
x = reshape(array([raw[i] for i in range(bytes_to_read)]), (m,784))
20+
x = insert(x, 0, 1, axis=1)
21+
22+
# Read testing set labels
23+
with open(testfile_label, 'rb') as f:
24+
meta = f.read(8)
25+
raw = f.read(m)
26+
y = array([raw[i] for i in range(m)])
27+
28+
network = []
29+
# Input layer
30+
network.append(Layer(a=x,theta=reshape(array(js_obj['theta1']),(25,785))))
31+
# Two hidden layers
32+
network.append(Layer(theta=reshape(array(js_obj['theta2']),(25,26))))
33+
network.append(Layer(theta=reshape(array(js_obj['theta3']),(10,26))))
34+
# Output layer
35+
network.append(Layer())
36+
37+
network[1].activate(network[0],next_to_input=True)
38+
for i in range(2,len(network)):
39+
network[i].activate(network[i-1])
40+
41+
network_predictions = array([])
42+
for a in network[3].a:
43+
temp, prediction = 0, 0
44+
for confidence in range(a.size):
45+
if confidence > temp:
46+
temp = a[confidence]
47+
prediction = confidence
48+
network_predictions = append(network_predictions, prediction)
49+
50+
correct_predictions = 0
51+
for i in range(m):
52+
if y[i] == network_predictions[i]:
53+
correct_predictions += 1
54+
55+
print('Correct network predictions:', str(100*correct_predictions/m)+'%')

train.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
from Layer import *
2-
# Note to self: it's a good idea to start with like 5 images to make sure everything is working before moving on to 60000
32

4-
m = 60000
3+
m = 1000
54
bytes_to_read = m * 784
65
lame = 0.05 # Inside joke: this is the regularization term lambda
7-
alpha = 0.55 # Learning rate term
6+
alpha = 0.3 # Learning rate term
87

98
# Parsing the training data and their labels
109
with open("data/train-images-idx3-ubyte", "rb") as f:
@@ -45,7 +44,18 @@ def J(nn, training_data):
4544
# Output layer
4645
network.append(Layer())
4746

47+
def save(nn, filename):
48+
import json
49+
js_obj = {}
50+
js_obj['cost'] = J(nn,y) # using "global" instance y
51+
for i in range(len(nn)-1):
52+
js_obj['theta'+str(i+1)] = list(reshape(nn[i].theta, (nn[i].theta.size,)))
53+
with open(filename,'w') as fi:
54+
json.dump(js_obj, fi)
55+
4856
def train(nn, epoch):
57+
temp_cost = 50
58+
4959
# using the instance variable nn
5060
def forward_propagation():
5161
nn[1].activate(nn[0],next_to_input=True)
@@ -56,20 +66,29 @@ def back_propagation():
5666
nn[2].delt(nn[3], m, next_to_output=True)
5767
for i in range(len(nn)-3,-1,-1):
5868
nn[i].delt(nn[i+1],m)
69+
70+
# TODO: Don't penelize theta 0's !!!
5971
for i in range(len(nn)-2,-1,-1):
60-
nn[i].theta -= nn[i].gradiant/m + nn[i].theta * lame / m
72+
nn[i].theta -= nn[i].gradiant / m
73+
nn[i].theta[:,1:] -= nn[i].theta[:,1:] * lame / m
6174

6275
for i in range(epoch):
6376
forward_propagation()
64-
print('Cost:', J(nn,y))
77+
cost = J(nn,y)
78+
# Save the thetas that produce the lowest cost
79+
if cost < temp_cost:
80+
save(nn,'parameters/min_cost.json')
81+
temp_cost = cost
82+
print('Cost:', cost)
6583
# print('Network Output:\n', nn[3].a)
6684
# print('Desired Output:\n', y)
6785
back_propagation()
6886

69-
train(network, 2)
70-
nn[1].activate(nn[0],next_to_input=True)
71-
for i in range(2,len(nn)):
72-
nn[i].activate(nn[i-1])
73-
print(network[3].a[100])
74-
print(y[100])
75-
# TODO: remember to save the thetas at the end of training
87+
train(network, 50)
88+
network[1].activate(network[0],next_to_input=True)
89+
for i in range(2,len(network)):
90+
network[i].activate(network[i-1])
91+
print(network[3].a[50])
92+
print(y[50])
93+
# Save the thetas from the last descent/epoch
94+
save(network,'parameters/last_epoch.json')

0 commit comments

Comments
 (0)