@@ -6,7 +6,7 @@ class Network:
6
6
def __init__ (self , params ):
7
7
# params is a list containing sizes layer wises
8
8
self .layers = len (params )
9
- self .biases = [np .random .randn (siz ) for siz in params [1 :]] # first layer won't have bias
9
+ self .biases = [np .random .randn (siz , 1 ) for siz in params [1 :]] # first layer won't have bias
10
10
#to do check if the param should have a 1 (bias should be a row vector)
11
11
self .weights = [np .random .randn (siz , prev ) for siz , prev in zip (params [1 :], params [:- 1 ])]
12
12
@@ -17,18 +17,58 @@ def gradient_descent(self, training_data, cycles, eta, batch_size, num_batches):
17
17
# to get better averaging we do this grouping cycles number of times
18
18
n = len (training_data )
19
19
for iter in range (cycles ):
20
- random .shuffle (training_data )
21
20
mini_batches = [training_data [s :s + batch_size ] for s in range (0 , n , batch_size )]
22
21
22
+ count = 0
23
23
for batch in mini_batches :
24
+ base_w = [np .zeros (w .shape ) for w in self .weights ]
25
+ # random.shuffle(training_data)
26
+ base_b = [np .zeros (b .shape ) for b in self .biases ]
24
27
for dataset in batch :
28
+
25
29
# do back propagation for this dataset
26
30
# average out this to obtain the gradient
27
31
change_w , change_b = self .back_prop (dataset )
32
+ base_w = [w + ch for w , ch in zip (base_w , change_w )]
33
+ base_b = [b + ch for b , ch in zip (base_b , change_b )]
34
+
35
+ # we have the average gradient
36
+ self .weights = [w - (eta * ch / len (batch )) for w , ch in zip (self .weights , base_w )]
37
+ self .biases = [b - (eta * ch / len (batch )) for b , ch in zip (self .biases , base_b )]
38
+ count += 1
39
+ print ("Finished batch {0}" .format (count ))
40
+
41
+ def test (self , training_data , l , r ):
42
+ i = l
43
+ success = 0
44
+ total = 0
45
+ while i <= r :
46
+ result = self .forward (training_data [i ][0 ])
47
+ best_val = 0
48
+ best = - 1
49
+ j = 0
50
+ actual = - 1
51
+ while j <= 9 :
52
+ if result [j ] > best_val :
53
+ best_val = result [i ]
54
+ best = j
55
+ if training_data [i ][1 ][j ] > 0.5 :
56
+ actual = j
57
+ j += 1
58
+
59
+ for term , actual in zip (result , training_data [i ][1 ]):
60
+ net_cost += (term - actual )* (term - actual )
61
+ net_cost /= len (result )
62
+
63
+ if actual == best :
64
+ success += 1
65
+ total += 1
66
+
67
+ print ("Success: {0}/{1}" .format (success , total ))
28
68
29
69
def sigmoid (self , vector ):
30
70
#returns sigmoid of a vector
31
- return 1.0 / 1.0 + np .exp (- vector )
71
+ return 1.0 / ( 1.0 + np .exp (- vector ) )
32
72
33
73
def sigmoid_prime (self , vector ):
34
74
return self .sigmoid (vector )* (1 - self .sigmoid (vector ))
@@ -44,14 +84,32 @@ def back_prop(self, dataset):
44
84
zs = []
45
85
a = dataset [0 ]
46
86
for weight , bias in zip (self .weights , self .biases ):
87
+ # print(bias.shape)
47
88
zs .append (np .dot (weight , a ) + bias )
48
89
a = self .sigmoid (np .dot (weight , a ) + bias )
49
90
activations .append (a )
50
91
92
+ layers = len (self .weights ) + 1
51
93
delta = 2 * (activations [- 1 ]- dataset [1 ])* self .sigmoid_prime (zs [- 1 ])
52
-
53
-
54
-
94
+ change_bias = self .biases
95
+ change_weight = self .weights
55
96
97
+ change_bias [layers - 2 ] = delta
98
+ # currently operating on weights at layers-2-iter
99
+ for j in range (len (change_weight [layers - 2 ])):
100
+ for k in range (len (change_weight [layers - 2 ][j ])):
101
+ change_weight [layers - 2 ][j ][k ] = activations [layers - 2 ][k ]* delta [j ]
56
102
103
+ # want to return gradients layer wise
104
+ for iter in range (layers - 3 ):
105
+ delta = np .dot (self .weights [layers - 2 - iter ].transpose (), delta )* self .sigmoid_prime (zs [layers - 3 - iter ])
106
+ change_bias [layers - 3 - iter ] = delta
107
+ # currently operating on weights at layers-2-iter
108
+ for j in range (len (change_weight [layers - 3 - iter ])):
109
+ for k in range (len (change_weight [layers - 3 - iter ][j ])):
110
+ change_weight [layers - 3 - iter ][j ][k ] = activations [layers - 3 - iter ][k ]* delta [j ]
111
+ # back propagate delta
57
112
113
+
114
+ return change_weight , change_bias
115
+
0 commit comments