-
Notifications
You must be signed in to change notification settings - Fork 15
/
neural_net.rb
249 lines (197 loc) · 8.21 KB
/
neural_net.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
class NeuralNet
attr_reader :shape, :outputs
attr_accessor :weights, :weight_update_values
DEFAULT_TRAINING_OPTIONS = {
max_iterations: 1_000,
error_threshold: 0.01
}
def initialize shape
@shape = shape
end
def run input
# Input to this method represents the output of the first layer (i.e., the input layer)
@outputs = [input]
set_initial_weight_values if @weights.nil?
# Now calculate output of neurons in subsequent layers:
1.upto(output_layer).each do |layer|
source_layer = layer - 1 # i.e, the layer that is feeding into this one
source_outputs = @outputs[source_layer]
@outputs[layer] = @weights[layer].map do |neuron_weights|
# inputs to this neuron are the neuron outputs from the source layer times weights
inputs = neuron_weights.map.with_index do |weight, i|
source_output = source_outputs[i] || 1 # if no output, this is the bias neuron
weight * source_output
end
sum_of_inputs = inputs.reduce(:+)
# the activated output of this neuron (using sigmoid activation function)
sigmoid sum_of_inputs
end
end
# Outputs of neurons in the last layer is the final result
@outputs[output_layer]
end
def train inputs, expected_outputs, opts = {}
opts = DEFAULT_TRAINING_OPTIONS.merge(opts)
error_threshold, log_every = opts[:error_threshold], opts[:log_every]
iteration, error = 0, 0
set_initial_weight_update_values if @weight_update_values.nil?
set_weight_changes_to_zeros
set_previous_gradients_to_zeroes
while iteration < opts[:max_iterations]
iteration += 1
error = train_on_batch(inputs, expected_outputs)
if log_every && (iteration % log_every == 0)
puts "[#{iteration}] #{(error * 100).round(2)}% mse"
end
break if error_threshold && (error < error_threshold)
end
{error: error.round(5), iterations: iteration, below_error_threshold: (error < error_threshold)}
end
private
def train_on_batch inputs, expected_outputs
total_mse = 0
set_gradients_to_zeroes
inputs.each.with_index do |input, i|
run input
training_error = calculate_training_error expected_outputs[i]
update_gradients training_error
total_mse += mean_squared_error training_error
end
update_weights
total_mse / inputs.length.to_f # average mean squared error for batch
end
def calculate_training_error ideal_output
@outputs[output_layer].map.with_index do |output, i|
output - ideal_output[i]
end
end
def update_gradients training_error
deltas = {}
# Starting from output layer and working backwards, backpropagating the training error
output_layer.downto(1).each do |layer|
deltas[layer] = []
@shape[layer].times do |neuron|
neuron_error = if layer == output_layer
-training_error[neuron]
else
target_layer = layer + 1
weighted_target_deltas = deltas[target_layer].map.with_index do |target_delta, target_neuron|
target_weight = @weights[target_layer][target_neuron][neuron]
target_delta * target_weight
end
weighted_target_deltas.reduce(:+)
end
output = @outputs[layer][neuron]
activation_derivative = output * (1.0 - output)
delta = deltas[layer][neuron] = neuron_error * activation_derivative
# gradient for each of this neuron's incoming weights is calculated:
# the last output from incoming source neuron (from -1 layer)
# times this neuron's delta (calculated from error coming back from +1 layer)
source_neurons = @shape[layer - 1] + 1 # account for bias neuron
source_outputs = @outputs[layer - 1]
gradients = @gradients[layer][neuron]
source_neurons.times do |source_neuron|
source_output = source_outputs[source_neuron] || 1 # if no output, this is the bias neuron
gradient = source_output * delta
gradients[source_neuron] += gradient # accumulate gradients from batch
end
end
end
end
MIN_STEP, MAX_STEP = Math.exp(-6), 50
# Now that we've calculated gradients for the batch, we can use these to update the weights
# Using the RPROP algorithm - somewhat more complicated than classic backpropagation algorithm, but much faster
def update_weights
1.upto(output_layer) do |layer|
source_layer = layer - 1
source_neurons = @shape[source_layer] + 1 # account for bias neuron
@shape[layer].times do |neuron|
source_neurons.times do |source_neuron|
weight_change = @weight_changes[layer][neuron][source_neuron]
weight_update_value = @weight_update_values[layer][neuron][source_neuron]
# for RPROP, we use the negative of the calculated gradient
gradient = -@gradients[layer][neuron][source_neuron]
previous_gradient = @previous_gradients[layer][neuron][source_neuron]
c = sign(gradient * previous_gradient)
case c
when 1 then # no sign change; accelerate gradient descent
weight_update_value = [weight_update_value * 1.2, MAX_STEP].min
weight_change = -sign(gradient) * weight_update_value
when -1 then # sign change; we've jumped over a local minimum
weight_update_value = [weight_update_value * 0.5, MIN_STEP].max
weight_change = -weight_change # roll back previous weight change
gradient = 0 # so won't trigger sign change on next update
when 0 then
weight_change = -sign(gradient) * weight_update_value
end
@weights[layer][neuron][source_neuron] += weight_change
@weight_changes[layer][neuron][source_neuron] = weight_change
@weight_update_values[layer][neuron][source_neuron] = weight_update_value
@previous_gradients[layer][neuron][source_neuron] = gradient
end
end
end
end
def set_weight_changes_to_zeros
@weight_changes = build_connection_matrixes { 0.0 }
end
def set_gradients_to_zeroes
@gradients = build_connection_matrixes { 0.0 }
end
def set_previous_gradients_to_zeroes
@previous_gradients = build_connection_matrixes { 0.0 }
end
def set_initial_weight_update_values
@weight_update_values = build_connection_matrixes { 0.1 }
end
def set_initial_weight_values
# Initialize all weights to random float value
@weights = build_connection_matrixes { rand(-0.5..0.5) }
# Update weights for first hidden layer (Nguyen-Widrow method)
# This is a bit obscure, and not entirely necessary, but it should help the network train faster
beta = 0.7 * @shape[1]**(1.0 / @shape[0])
@shape[1].times do |neuron|
weights = @weights[1][neuron]
norm = Math.sqrt weights.map {|w| w**2}.reduce(:+)
updated_weights = weights.map {|weight| (beta * weight) / norm }
@weights[1][neuron] = updated_weights
end
end
def build_connection_matrixes
1.upto(output_layer).inject({}) do |hsh, layer|
# Number of incoming connections to each neuron in this layer:
source_neurons = @shape[layer - 1] + 1 # == number of neurons in prev layer + a bias neuron
# matrix[neuron] == Array of values for each incoming connection to neuron
matrix = Array.new(@shape[layer]) do |neuron|
Array.new(source_neurons) { yield }
end
hsh[layer] = matrix
hsh
end
end
def output_layer
@shape.length - 1
end
def sigmoid x
1 / (1 + Math.exp(-x))
end
def mean_squared_error errors
errors.map {|e| e**2}.reduce(:+) / errors.length.to_f
end
ZERO_TOLERANCE = Math.exp(-16)
def sign x
if x > ZERO_TOLERANCE
1
elsif x < -ZERO_TOLERANCE
-1
else
0 # x is zero, or a float very close to zero
end
end
def marshal_dump
[@shape, @weights, @weight_update_values]
end
def marshal_load array
@shape, @weights, @weight_update_values = array
end
end