-
Notifications
You must be signed in to change notification settings - Fork 0
/
Try.py
224 lines (193 loc) · 8.95 KB
/
Try.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import numpy as np
as_strided = np.lib.stride_tricks.as_strided
from scipy import signal
import math
"""
+ To be added:
+ all layer must define a method or attribute to return output shape
+ Layers:
+ trainable_parameters, input_shape, output_shape attributes must be stored in the base class Layers
+
+ MaxPool2d:
+ Conv2d:
+ Stride & Dilation to the Conv2d
+ Non-symmetric kernel_size
+ def __call__ vs __call__ :
+ def __call__(self, input): return self.forward(input) # This doesn't requires forward before
+ __call__ = forward # this line must come after the definition of forward method
+ (N,C,H,W)
N: Input Samples (aka batch)
C: Channels
H: Height
W: Width
+ The input shape (not only the number of channels but width & height as well) is needed to be able to calculate the bias shape, which is the same as the output shape.
+ The name 'input' is used rather than 'x' in the forward pass, because during the backward pass we need to refer to it.
"""
class Layer:
def __init__(self): pass
def __call__(self, x): return self.forward(x)
def forward(self, input): raise NotImplementedError
def backward(self, output_gradient, learning_rate): raise NotImplementedError
class MaxPool2d(Layer):
def __init__(self, input_shape, kernel_size):
"""
I'am not plannig on adding the strid for the Max-Pooling
If you want to drop more info use larger kernel instead.
"""
self.input_shape = input_shape
self.channels, self.Xh, self.Xw = input_shape
self.Kh, self.Kw = kernel_size
self.output_shape = self.channels, self.Xh//self.Kh, self.Xw//self.Kw
def forward(self, input):
out = np.zeros(self.output_shape)
dtypeSize = input.itemsize # default stride along the first axis (column)
for indx, channel in enumerate(input):
out[indx] = as_strided(channel, shape=(self.Xh//self.Kh, self.Xw//self.Kw, self.Kh,self.Kw),
strides=(self.Xw*self.Kh*dtypeSize, self.Kw*dtypeSize,
self.Xw//dtypeSize, dtypeSize)).max(axis=(-2,-1))
return out
def backward(self, output_gradient): return np.zeros(self.input_shape)
class Dense(Layer):
def __init__(self, input_shape, output_shape):
# shape means size in the case of Dense
self.input_shape = input_shape
self.output_shape = output_shape, 1
self.layers_name = self.__class__.__name__
lim = 1 / math.sqrt(input_shape)
# Like Mike - Each row represents the weights of the current neuron to the neurons in the previous layer. So that in the forward pass we don't transpose, allowing the weight matrix comes first.
self.weights = np.random.uniform(-lim, lim, (output_shape, input_shape))
self.bias = np.random.randn(output_shape, 1)
def forward(self, input):
self.input = input # The input must be 1-column vector.
return np.dot(self.weights, self.input) + self.bias
def backward(self, output_gradient, learning_rate):
"""
To make it easier for you to remember/understand:
+ Both output_gradient & input are column vector
+ Output gradient has the same size as the output.
+ The weights_gradient matrix has the same size as the weights, thus
+ The number of rows must be equal to the length in the output_gradient.
+ The number of columns must be equal to the length in the input.
+ input_gradient has the same size as the input.
"""
weights_gradient = np.dot(output_gradient, self.input.T) # Nx1•1xM = NxM
input_gradient = np.dot(self.weights.T, output_gradient) # NxM•Mx1 = Nx1
# optimizer.optimize(weights_gradient, output_gradient)
self.weights -= learning_rate * weights_gradient
self.bias -= learning_rate * output_gradient
return input_gradient
class Conv2d(Layer):
"""
BackProp in CNN Explanation & How To: youtu.be/Lakz2MoHy6o
*: Convolution =
⋆: Correlation
!Note: X*K = X⋆rot180(K)
"""
def __init__(self, input_shape, depth, kernel_size):
# stride & dilation to be added
# Only symmetric kernel_size is allowed for now.
"""
We need to know the input_shape: channels, height & width once the layer is created,
because we need to Create & Initialize the layer's weight & biases
+ weights shape is calculated using the kernel and output depth
+ biases shape is calculated using the input shape and kernel size & output depth.
Input channels is also needed during the forward/backward pass
Ouput Shape can be calculated from the output of the convolution
"""
self.input_shape = input_shape
self.channels, self.input_height, self.input_width = input_shape
self.depth = depth # Number of filters.
self.output_shape = (depth, self.input_height - kernel_size + 1, self.input_width - kernel_size + 1)
self.kernels_shape = (depth, self.channels, kernel_size, kernel_size)
self.kernels = np.random.randn(*self.kernels_shape) # (depth, channels,height, width)
self.biases = np.random.randn(*self.output_shape)
def forward(self, x):
self.input = x # Storing the input for the backward pass.
self.output = np.copy(self.biases) # copy the bias instead zero_like avoiding sum in the loop
for i in range(self.depth): # loop over depth first, each out channel is independent
for j in range(self.channels): # loop over in_channel second, all channels must be summed.
self.output[i] += signal.correlate2d(self.input[j], self.kernels[i, j], "valid")
return self.output
def backward(self, output_gradient, learning_rate):
# Using the output_gradient, we calculate the kernels_gradient, and the input_gradient.
# kernels_gradient = ∂E/∂K_ij = X_j⋅∂E/Y_i = X⋆∂E/Y_i
# inpt_gradient = output_gradient*K = output_gradient⋆rot180(K) !Note: both *&⋆ full version
# biases_gradient = output_gradient
kernels_gradient = np.zeros(self.kernels_shape)
input_gradient = np.zeros(self.input_shape)
for i in range(self.depth):
for j in range(self.channels):
kernels_gradient[i, j] = signal.correlate2d(self.input[j], output_gradient[i], "valid")
input_gradient[j] += signal.convolve2d(output_gradient[i], self.kernels[i, j], "full")
# Here: Define a function responsible for updating the params be be able to freeze layers.
self.kernels -= learning_rate * kernels_gradient
self.biases -= learning_rate * output_gradient
return input_gradient
class Reshape(Layer):
"""Inefficient for Dense models, but To flatten a layer, this is the only solution. """
def __init__(self, input_shape, output_shape):
self.input_shape = input_shape
self.output_shape = output_shape
def forward(self, input): return np.reshape(input, (self.output_shape,1))
# def __call__(self, input): return self.forward(input)
def backward(self, output_gradient, learning_rate):
return np.reshape(output_gradient, self.input_shape)
class Dropout(Layer):
def __init__(self, input_shape, p=0.1):
self.p = p # Probability to Drop
self.input_shape = input_shape
self.output_shape = input_shape
self.mask = None
def forward(self, input):
self.mask = np.random.rand(*self.input_shape) < self.p
output = np.copy(input)
output[self.mask] = 0
return output
def backward(self, output_gradient):
input_gradient = np.ones(self.input_shape)
input_gradient[self.mask] = 0
return input_gradient, None
if __name__ == "__main__":
lr = 0.001
x = np.random.randn(3, 28, 28)
C1 = Conv2d(x.shape, 10, 3)
C2 = Conv2d(C1.output_shape, 8, 3)
MP = MaxPool2d(C2.output_shape, (2,2))
C3 = Conv2d(MP.output_shape, 6, 3)
R = Reshape(C3.output_shape, np.prod(C3.output_shape))
D1 = Dense(R.output_shape, 100)
D2 = Dense(100, 10)
x1 = C1(x)
x2 = C2(x1)
x2mp = MP(x2)
x3 = C3(x2mp)
x4 = R(x3)
x5 = D1(x4)
x6 = D2(x5)
print("# Forward===========")
print("Input:",x.shape)
print(x1.shape)
print(x2.shape)
print(x2mp.shape)
print(x3.shape)
print(x4.shape)
print(x5.shape)
print(x6.shape)
x_back1 = D2.backward(x6, lr)
x_back2 = D1.backward(x_back1, lr)
x_back3 = R.backward( x_back2, lr)
x_back4 = C3.backward(x_back3, lr)
x_back4mp = MP.backward(x_back4)
x_back5 = C2.backward(x_back4mp, lr)
x_back6 = C1.backward(x_back5, lr)
print("Backward===========")
# print("Dense---")
print(x_back1.shape)
print(x_back2.shape)
# print("Reshape---")
print(x_back3.shape)
# print("# Conv---")
print(x_back4.shape)
# print("Unpooling",x_back4mp.shape)
print(x_back5.shape)
print(x_back6.shape)