stephenwang5
diff --git a/‎data/t10k-images-idx3-ubyte
7.48 MB b/‎data/t10k-images-idx3-ubyte
7.48 MB
diff --git a/‎data/t10k-labels-idx1-ubyte
9.77 KB b/‎data/t10k-labels-idx1-ubyte
9.77 KB
diff --git a/‎parameters/last_epoch.json
Lines changed: 1 addition & 0 deletions b/‎parameters/last_epoch.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎parameters/min_cost.json
Lines changed: 1 addition & 0 deletions b/‎parameters/min_cost.json
Lines changed: 1 addition & 0 deletions
diff --git a/‎show_training_images.py
Lines changed: 4 additions & 3 deletions b/‎show_training_images.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎test.py
Lines changed: 55 additions & 0 deletions b/‎test.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎train.py
Lines changed: 31 additions & 12 deletions b/‎train.py
Lines changed: 31 additions & 12 deletions
@@ -1,5 +1,6 @@
 from numpy import *
-m = 1
+# Change this m for the index of the picture you want to show
+m = 100
 bytes_to_read  = m * 784
 with open("data/train-images-idx3-ubyte", "rb") as f:
     meta = f.read(16)
@@ -12,7 +13,7 @@
 y = reshape(array([raw_labels[i] for i in range(m)]), (m,1))
 
 import matplotlib.pyplot as plt
-pixels = reshape(x, (28,28))
-plt.title('Shown image is for ' + str(y))
+pixels = reshape(x[m-1], (28,28))
+plt.title('Shown image is for ' + str(y[m-1]))
 plt.imshow(pixels, cmap='gray')
 plt.show()
@@ -0,0 +1,55 @@
+from Layer import *
+import json
+
+m = 1000
+bytes_to_read = m * 784
+theta_filename = 'parameters/min_cost.json'
+# theta_filename = 'parameters/last_epoch.json'
+testfile_image = 'data/t10k-images-idx3-ubyte'
+testfile_label = 'data/t10k-labels-idx1-ubyte'
+
+# Read thetas
+with open(theta_filename, 'r') as f:
+    js_obj = json.load(f)
+
+# Read testing set
+with open(testfile_image, 'rb') as f:
+    meta = f.read(16)
+    raw = f.read(bytes_to_read)
+x = reshape(array([raw[i] for i in range(bytes_to_read)]), (m,784))
+x = insert(x, 0, 1, axis=1)
+
+# Read testing set labels
+with open(testfile_label, 'rb') as f:
+    meta = f.read(8)
+    raw = f.read(m)
+y = array([raw[i] for i in range(m)])
+
+network = []
+# Input layer
+network.append(Layer(a=x,theta=reshape(array(js_obj['theta1']),(25,785))))
+# Two hidden layers
+network.append(Layer(theta=reshape(array(js_obj['theta2']),(25,26))))
+network.append(Layer(theta=reshape(array(js_obj['theta3']),(10,26))))
+# Output layer
+network.append(Layer())
+
+network[1].activate(network[0],next_to_input=True)
+for i in range(2,len(network)):
+    network[i].activate(network[i-1])
+
+network_predictions = array([])
+for a in network[3].a:
+    temp, prediction = 0, 0
+    for confidence in range(a.size):
+        if confidence > temp:
+            temp = a[confidence]
+            prediction = confidence
+    network_predictions = append(network_predictions, prediction)
+
+correct_predictions = 0
+for i in range(m):
+    if y[i] == network_predictions[i]:
+        correct_predictions += 1
+
+print('Correct network predictions:', str(100*correct_predictions/m)+'%')
@@ -1,10 +1,9 @@
 from Layer import *
-# Note to self: it's a good idea to start with like 5 images to make sure everything is working before moving on to 60000
 
-m = 60000
+m = 1000
 bytes_to_read = m * 784
 lame = 0.05  # Inside joke: this is the regularization term lambda
-alpha = 0.55 # Learning rate term
+alpha = 0.3 # Learning rate term
 
 # Parsing the training data and their labels
 with open("data/train-images-idx3-ubyte", "rb") as f:
@@ -45,7 +44,18 @@ def J(nn, training_data):
 # Output layer
 network.append(Layer())
 
+def save(nn, filename):
+    import json
+    js_obj = {}
+    js_obj['cost'] = J(nn,y)    # using "global" instance y
+    for i in range(len(nn)-1):
+        js_obj['theta'+str(i+1)] = list(reshape(nn[i].theta, (nn[i].theta.size,)))
+    with open(filename,'w') as fi:
+        json.dump(js_obj, fi)
+
 def train(nn, epoch):
+    temp_cost = 50
+
     # using the instance variable nn
     def forward_propagation():
         nn[1].activate(nn[0],next_to_input=True)
@@ -56,20 +66,29 @@ def back_propagation():
         nn[2].delt(nn[3], m, next_to_output=True)
         for i in range(len(nn)-3,-1,-1):
             nn[i].delt(nn[i+1],m)
+
+        # TODO: Don't penelize theta 0's !!!
         for i in range(len(nn)-2,-1,-1):
-            nn[i].theta -= nn[i].gradiant/m + nn[i].theta * lame / m
+            nn[i].theta -= nn[i].gradiant / m
+            nn[i].theta[:,1:] -= nn[i].theta[:,1:] * lame / m
 
     for i in range(epoch):
         forward_propagation()
-        print('Cost:', J(nn,y))
+        cost = J(nn,y)
+        # Save the thetas that produce the lowest cost
+        if cost < temp_cost:
+            save(nn,'parameters/min_cost.json')
+            temp_cost = cost
+        print('Cost:', cost)
         # print('Network Output:\n', nn[3].a)
         # print('Desired Output:\n', y)
         back_propagation()
 
-train(network, 2)
-nn[1].activate(nn[0],next_to_input=True)
-for i in range(2,len(nn)):
-    nn[i].activate(nn[i-1])
-print(network[3].a[100])
-print(y[100])
-# TODO: remember to save the thetas at the end of training
+train(network, 50)
+network[1].activate(network[0],next_to_input=True)
+for i in range(2,len(network)):
+    network[i].activate(network[i-1])
+print(network[3].a[50])
+print(y[50])
+# Save the thetas from the last descent/epoch
+save(network,'parameters/last_epoch.json')