-
Notifications
You must be signed in to change notification settings - Fork 1
/
simple_esn.hpp
390 lines (359 loc) · 14.7 KB
/
simple_esn.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
#pragma once
// Echo state network class and training functions. //
#include "arrayfire_utils.hpp"
#include "common.hpp"
#include "net.hpp"
#include <arrayfire.h>
#include <boost/program_options.hpp>
#include <cassert>
namespace esn {
namespace po = boost::program_options;
/// Echo state network.
template <af::dtype DType = af::dtype::f64>
class simple_esn : public net_base {
// the number of reservoir neurons
long n_;
// the number of input neurons
long n_ins_;
// the number of output neurons
long n_outs_;
// reservoir connections
// a matrix of size n x n
af::array reservoir_w_;
// input weights
// each row are the input weights for a single input neuron
af::array input_w_;
// output weights
// each col are the output weights for a single neuron, the last col is the intercept
af::array output_w_;
// feedback weights
af::array feedback_w_;
// biases
// a single vector of lenght `n`
af::array biases_;
// whether the random noise is enabled/disabled
bool noise_enabled_;
// standard deviation of the state noise
double noise_;
// leakage rate
double leakage_;
// the current state
af::array state_;
// the last output of the net
af::array last_output_;
// random engines
std::mt19937* prng_;
af::randomEngine af_prng_;
public:
simple_esn() = default;
/// Echo state network constructor.
simple_esn(
long n,
long n_ins,
long n_outs,
af::array reservoir_w,
af::array input_w,
af::array feedback_w,
af::array biases,
double noise,
double leakage,
std::mt19937& prng)
: n_{n}
, n_ins_{n_ins}
, n_outs_{n_outs}
, reservoir_w_{std::move(reservoir_w)}
, input_w_{std::move(input_w)}
, output_w_{af::constant(af::NaN, n_outs_, n_ + 1, DType)}
, feedback_w_{std::move(feedback_w)}
, biases_{std::move(biases)}
, noise_enabled_{true}
, noise_{noise}
, leakage_{leakage}
, state_{af::constant(0, n_, DType)}
, last_output_{af::constant(0, n_outs_, DType)}
, prng_{&prng}
, af_prng_{AF_RANDOM_ENGINE_DEFAULT, prng_->operator()()}
{
// check types
assert((reservoir_w_.type() == DType));
assert((input_w_.type() == DType));
assert((feedback_w_.type() == DType));
assert((biases_.type() == DType));
assert((state_.type() == DType));
// check dimensions
assert((reservoir_w_.dims() == af::dim4{n_, n_}));
assert((input_w_.dims() == af::dim4{n_, n_ins_}));
assert((feedback_w_.dims() == af::dim4{n_, n_outs_}));
assert((biases_.dims() == af::dim4{n_}));
assert((state_.dims() == af::dim4{n_}));
}
/// Echo state network constructor with no biases.
simple_esn(
long n,
long n_ins,
long n_outs,
af::array reservoir_w,
af::array input_w,
af::array feedback_w,
double noise,
double leakage,
std::mt19937& prng)
: simple_esn{
n,
n_ins,
n_outs,
std::move(reservoir_w),
std::move(input_w),
std::move(feedback_w),
af::constant(0, n, DType),
noise,
leakage,
prng}
{
}
/// Perform a single step with multiple inputs.
/// \param input The input values of size [n_ins].
/// \param feedback The teacher-forced feedback to be used instead
/// of the network's output of size [n_outs].
/// \param desired The desired output. This is only used for callbacks.
/// This is only allowed if no feedback is provided.
/// Has to be of size [n_outs].
void step(
const af::array& input,
const std::optional<af::array>& feedback = std::nullopt,
const std::optional<af::array>& desired = std::nullopt) override
{
assert(!desired || !feedback);
assert((input.dims() == af::dim4{n_ins_}));
assert((!feedback || feedback->dims() == af::dim4{n_outs_}));
assert((!desired || desired->dims() == af::dim4{n_outs_}));
af::array weighted_input = af::matmul(input_w_, input);
af::array feedback_activation = af::matmul(feedback_w_, last_output_);
af::array internal_activation = af::matmul(reservoir_w_, state_);
af::array noise =
1. + af::randn({state_.dims()}, DType, af_prng_) * noise_ * noise_enabled_;
af::array state_before_activation = std::move(weighted_input)
+ std::move(internal_activation) + std::move(feedback_activation) + biases_;
state_ *= 1. - leakage_;
state_ += af::tanh(state_before_activation * noise);
af::eval(state_);
if (feedback)
last_output_ = *feedback;
else
last_output_ = af::matmul(output_w_, af_utils::add_ones(state_, 0));
assert(last_output_.dims() == (af::dim4{n_outs_}));
// Call the registered callback functions.
for (on_state_change_callback_t& fnc : on_state_change_callbacks_) {
on_state_change_data data = {
.state = state_, .input = input, .output = last_output_, .desired = desired};
fnc(*this, std::move(data));
}
}
/// Perform multiple steps with multiple input seqences.
/// \param input Input sequence of dimensions [n_ins, time].
/// \param feedback The desired output sequences to be teacher-forced into the net.
/// Needs to have dimensions [n_outs, time]
/// \param desired The desired output. This is only used for callbacks.
/// This is only allowed if no feedback is provided.
/// Has to be of size [n_outs, time].
/// \return The array of intermediate states of dimensions [n, time] and the array
/// of intermediate outputs of dimensions [n_outs, time].
feed_result_t feed(
const af::array& input,
const std::optional<af::array>& feedback = std::nullopt,
const std::optional<af::array>& desired = std::nullopt) override
{
assert(!desired || !feedback);
assert(input.type() == DType);
assert(input.numdims() == 2);
assert(input.dims(0) == n_ins_);
assert(input.dims(1) > 0);
assert(!feedback || feedback->type() == DType);
assert(!feedback || feedback->numdims() <= 2);
assert(!feedback || feedback->dims(0) == n_outs_);
assert(!feedback || feedback->dims(1) == input.dims(1));
assert(!desired || desired->type() == DType);
assert(!desired || desired->numdims() <= 2);
assert(!desired || desired->dims(0) == n_outs_);
assert(!desired || desired->dims(1) == input.dims(1));
feed_result_t result;
result.states = af::array(n_, input.dims(1), DType);
result.outputs = af::array(n_outs_, input.dims(1), DType);
for (long i = 0; i < input.dims(1); ++i) {
std::optional<af::array> feedback_i;
if (feedback) feedback_i = feedback.value()(af::span, i);
std::optional<af::array> desired_i;
if (desired) desired_i = desired.value()(af::span, i);
step(input(af::span, i), feedback_i, desired_i);
result.states(af::span, i) = state_;
result.outputs(af::span, i) = last_output_;
}
return result;
}
/// Train the network on the given sequence.
/// \param inputs Input sequence of dimensions [n_ins, time].
/// \param desired The desired output sequences. Those are also teacher-forced into the net.
/// Needs to have dimensions [n_outs, time]
void train(const af::array& input, const af::array& desired) override
{
assert(input.type() == DType);
assert(input.numdims() == 2);
assert(input.dims(0) == n_ins_);
assert(input.dims(1) > 0);
assert(desired.type() == DType);
assert(desired.numdims() <= 2);
assert(desired.dims(0) == n_outs_);
assert(desired.dims(1) == input.dims(1));
af::array states = feed(input, desired).states;
assert((states.dims() == af::dim4{state_.elements(), input.dims(1)}));
output_w_ = af_utils::lstsq_train(states.T(), desired.T()).T();
assert((output_w_.dims() == af::dim4{n_outs_, state_.elements() + 1}));
}
/// Perform multiple steps using self's output as input.
/// \param n_steps The number of steps to take.
/// \param desired The desired output. This is used only for callbacks.
/// Needs to have dimensions [n_outs, time]
/// \return The same as \ref feed().
feed_result_t
loop(long n_steps, const std::optional<af::array>& desired = std::nullopt) override
{
assert(n_ins_ == n_outs_);
assert(n_steps > 0);
assert(!desired || desired->type() == DType);
assert(!desired || desired->numdims() <= 2);
assert(!desired || desired->dims(0) == n_outs_);
assert(!desired || desired->dims(1) == n_steps);
feed_result_t result;
result.states = af::array(n_, n_steps, DType);
result.outputs = af::array(n_outs_, n_steps, DType);
for (long i = 0; i < n_steps; ++i) {
std::optional<af::array> desired_i;
if (desired) desired_i = desired.value()(af::span, i);
// Create a shallow copy of `last_output_` so that when step() changes it,
// it's `input` parameter does not change (it is taken as const ref).
step(af::array(last_output_), std::nullopt, desired_i);
result.states(af::span, i) = state_;
result.outputs(af::span, i) = last_output_;
}
return result;
}
/// Get the current state of the network.
const af::array& state() const override
{
return state_;
}
/// Set the current state of the network.
void state(af::array new_state) override
{
assert(new_state.type() == DType);
state_ = std::move(new_state);
}
/// Get the reservoir weights of the network.
const af::array& reservoir_weights() const
{
return reservoir_w_;
}
/// Get the input weights of the network.
const af::array& input_weights() const
{
return input_w_;
}
/// The number of neurons.
long n() const
{
return n_;
}
/// The number of inputs.
long n_ins() const override
{
return n_ins_;
}
/// The number of outputs.
long n_outs() const override
{
return n_outs_;
}
/// The average number of inputs to a neuron.
double neuron_ins() const override
{
af::array in = af::sum(reservoir_w_ != 0, 1);
return af::mean<double>(reservoir_w_);
}
/// Set the learning rate.
void learning_rate(double) override
{
// Not implemented.
}
/// Disable random noise e.g., for lyapunov testing.
void random_noise(bool enable) override
{
noise_enabled_ = enable;
}
std::unique_ptr<net_base> clone() const override
{
return std::make_unique<simple_esn>(*this);
}
};
/// Generate a random echo state network.
///
/// \tparam DType The arrayfire data type.
/// \param n_ins The number of inputs.
/// \param n_outs The number of outputs.
/// \param args The parameters by which is the network constructed.
template <af::dtype DType = af::dtype::f64>
simple_esn<DType>
random_esn(long n_ins, long n_outs, const po::variables_map& args, std::mt19937& prng)
{
// The number of neurons.
long n = args.at("esn.neurons").as<long>();
// Standard deviation of the normal distribution generating the reservoir.
double sigma_res = args.at("esn.sigma-res").as<double>();
// The mean of the normal distribution generating the reservoir.
double mu_res = args.at("esn.mu-res").as<double>();
// The upper bound for the input weights.
// Those are generated uniformly from [0, in_upper].
double in_upper = args.at("esn.in-weight").as<double>();
// The upper bound for the feedback weights.
// Those are generated uniformly from [0, fb_upper].
double fb_upper = args.at("esn.fb-weight").as<double>();
// The sparsity of the reservoir weight matrix. For 0, the matrix is
// fully connected. For 1, the matrix is completely zero.
double sparsity = args.at("esn.sparsity").as<double>();
// Standard deviation of the noise added to the states.
double noise = args.at("esn.noise").as<double>();
// The leakage rate.
double leakage = args.at("esn.leakage").as<double>();
af::randomEngine af_prng{AF_RANDOM_ENGINE_DEFAULT, prng()};
af::array reservoir_w = sigma_res * af::randn({n, n}, DType, af_prng) + mu_res;
af::array input_w = af::randu({n, n_ins}, DType, af_prng) * in_upper;
af::array feedback_w = af::randu({n, n_outs}, DType, af_prng) * fb_upper;
// make the reservoir sparse by the given coefficient
reservoir_w *= af::randu({reservoir_w.dims()}, DType, af_prng) >= sparsity;
return simple_esn<DType>{
n, n_ins, n_outs, std::move(reservoir_w), std::move(input_w), std::move(feedback_w),
noise, leakage, prng};
}
/// Echo state network options description for command line parsing.
po::options_description esn_arg_description()
{
po::options_description esn_arg_desc{"Echo state network options"};
esn_arg_desc.add_options() //
("esn.neurons", po::value<long>()->default_value(128), //
"The number of neurons.") //
("esn.sigma-res", po::value<double>()->default_value(0.19762725044833218), //
"See random_esn().") //
("esn.mu-res", po::value<double>()->default_value(-0.0068959284626413861), //
"See random_esn().") //
("esn.in-weight", po::value<double>()->default_value(-0.004004819844231784), //
"See random_esn().") //
("esn.fb-weight", po::value<double>()->default_value(3.8736953058739977e-05), //
"See random_esn().") //
("esn.sparsity", po::value<double>()->default_value(0.81627753813108161), //
"See random_esn().") //
("esn.noise", po::value<double>()->default_value(1e-8), //
"Standard deviation of the noise added to the states of the network.") //
("esn.leakage", po::value<double>()->default_value(0.96129431689957912), //
"See random_esn()."); //
return esn_arg_desc;
}
} // end namespace esn