-
Notifications
You must be signed in to change notification settings - Fork 1
/
lcnn.hpp
726 lines (656 loc) · 30.5 KB
/
lcnn.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
#pragma once
// Locally connected Echo state network class and training functions. //
#include "common.hpp"
#include "net.hpp"
#include "simple_esn.hpp"
#include <arrayfire.h>
#include <boost/program_options.hpp>
#include <cassert>
#include <random>
namespace esn {
namespace po = boost::program_options;
/// Locally connected network configuration.
struct lcnn_config {
/// The initial state of the network of size [state_height, state_width].
af::array init_state;
/// The reservoir weights of size [state_height*state_width, state_height*state_width].
af::array reservoir_w_full;
/// The reservoir biases of size [state_height, state_width].
af::array reservoir_b;
/// The input weights of size [state_height, state_width, n_ins].
af::array input_w;
/// The feedback weights of size [state_height, state_width, n_outs].
af::array feedback_w;
// Learning is not available at the moment.
// /// If it is set to 0.0, the network does not learn.
// double learning_rate = 0;
// /// The multiplier of the difference between the neuron's state and the
// /// exponential moving average.
// double learning_rate_ema = 0;
// /// The multiplier for the difference between two potentials (post - pre).
// double learning_rate_diff = 0;
// /// The constant towards which are the neurons biased.
// double learning_bias = 0;
// /// The rate by which do the weights change to get neurons towards the bias.
// double learning_rate_bias = 0;
// /// The rate of exponential weight decay.
// double weight_decay = 0;
// /// The decay rate of the exponential moving average of the state used for weight learning.
// double state_ema_decay = 0;
/// The probability that a neuron will fire in every moment.
double random_spike_prob = 0;
/// The standard deviation of the random spike.
double random_spike_std = 0;
/// The standard deviation of the noise added to the potentials.
double noise = 0;
/// The leakage of the potential.
double leakage = 1.0;
lcnn_config() = default;
lcnn_config(const po::variables_map& args)
{
// Learning is not available at the moment.
// learning_rate = args.at("lcnn.learning-rate").as<double>();
// learning_rate_ema = args.at("lcnn.learning-rate-ema").as<double>();
// learning_rate_diff = args.at("lcnn.learning-rate-diff").as<double>();
// learning_bias = args.at("lcnn.learning-bias").as<double>();
// learning_rate_bias = args.at("lcnn.learning-rate-bias").as<double>();
// weight_decay = args.at("lcnn.weight-decay").as<double>();
// state_ema_decay = args.at("lcnn.state-ema-decay").as<double>();
random_spike_prob = args.at("lcnn.random-spike-prob").as<double>();
random_spike_std = args.at("lcnn.random-spike-std").as<double>();
noise = args.at("lcnn.noise").as<double>();
leakage = args.at("lcnn.leakage").as<double>();
}
};
/// Advanced Echo State Networks with various reservoir topologies.
template <af::dtype DType = af::dtype::f64>
class lcnn : public net_base {
protected:
long n_ins_;
long n_outs_;
af::array state_delta_; // working variable used during the step function
af::array state_;
af::array last_output_; // the last output of the net
af::array reservoir_w_full_;
af::array reservoir_b_;
af::array input_w_;
af::array feedback_w_;
af::array output_w_;
// Random engines.
std::mt19937* prng_;
af::randomEngine af_prng_;
// Learning is not available at the moment.
// af::array state_ema_; // exponential moving average
// double learning_rate_;
// double learning_rate_ema_;
// double learning_rate_diff_;
// double learning_bias_;
// double learning_rate_bias_;
// double weight_decay_;
// double state_ema_decay_;
bool noise_enabled_;
double random_spike_prob_;
double random_spike_std_;
double noise_;
double leakage_;
af::array update_via_weights_matmul_impl(const af::array& state) const
{
af::array new_state = af::matmul(reservoir_w_full_, af::flat(state));
return af::moddims(std::move(new_state), state.dims());
}
/// Update the state matrix using masked matrix multiplication.
virtual void update_via_weights_matmul()
{
state_delta_ = update_via_weights_matmul_impl(state_);
}
/// Update the state matrix by adding the inputs.
virtual void update_via_input(const af::array& input)
{
af::array input_w = af::moddims(input_w_, state_.dims(0) * state_.dims(1), n_ins_);
af::array delta = af::matmul(std::move(input_w), input);
state_delta_ += af::moddims(std::move(delta), state_.dims());
}
/// Update the state matrix by adding the feedback.
virtual void update_via_feedback(const af::array& feedback)
{
af::array feedback_w = af::moddims(feedback_w_, state_.dims(0) * state_.dims(1), n_outs_);
af::array delta = af::matmul(std::move(feedback_w), feedback);
state_delta_ += af::moddims(std::move(delta), state_.dims());
}
/// Update the state matrix by adding the bias.
virtual void update_via_bias()
{
state_delta_ += reservoir_b_;
}
/// Update the state matrix by applying the activation function.
virtual void update_via_activation()
{
// Add spurious spikes.
if (noise_enabled_ && random_spike_prob_ > 0.) {
af::array spikes =
2. * af::randu({state_.dims()}, DType, af_prng_) < random_spike_prob_ - 1.;
spikes *= random_spike_std_;
state_delta_ += spikes;
}
// Add noise to the states.
if (noise_enabled_ && noise_ != 0.)
state_delta_ *= 1. + af::randn({state_.dims()}, DType, af_prng_) * noise_;
// Leak some potential.
state_ *= 1. - leakage_;
// Apply the activation function.
state_ += af::tanh(std::move(state_delta_));
// Update the state with the proper leakage.
af::eval(state_);
}
// Learning is not available at the moment.
// /// Update the weights.
// ///
// /// WARNING: This algorithm does not sucessfully learn yet.
// virtual void update_weights(
// const af::array& /* unwrapped_weights */,
// const af::array& unwrapped_old_state)
// {
// int kernel_height = reservoir_w_.dims(2);
// int kernel_width = reservoir_w_.dims(3);
// // calculate presynaptic and postsynaptic states for each connection
// af::array presynaptic_state = unwrapped_old_state;
// af::array postsynaptic_state =
// af::tile(af::flat(state_), 1, kernel_height * kernel_width);
// // match state_ema_ to each of the connections
// af::array state_ema =
// af::tile(af::flat(state_ema_), 1, kernel_height * kernel_width);
// // Update the weights according to the difference from ema.
// af::array weight_delta =
// learning_rate_ema_ * af::pow(postsynaptic_state - state_ema, 3.);
// // Update the weights according to the difference between pre and
// // postsynaptic state.
// weight_delta +=
// learning_rate_diff_ * af::pow(postsynaptic_state - presynaptic_state, 3.);
// // Update the weights towards the bias.
// weight_delta +=
// learning_rate_bias_ * af::pow(postsynaptic_state - learning_bias_, 3.);
// // Reshape the weight deltas back to the reservoir_w_ shape and update.
// weight_delta = af::moddims(std::move(weight_delta), reservoir_w_.dims());
// reservoir_w_ += learning_rate_ * std::move(weight_delta);
// // decay the weights
// reservoir_w_ *= 1. - weight_decay_;
// af::eval(reservoir_w_);
// }
/// Update the last output of the network after having a new state.
virtual void update_last_output(const std::optional<af::array>& feedback = std::nullopt)
{
if (feedback) {
last_output_ = *feedback;
} else {
af::array flat_state_1 = af_utils::add_ones(af::flat(state_), 0);
last_output_ = af::clamp(af::matmul(output_w_, flat_state_1), -1., 1.);
}
assert(last_output_.dims() == (af::dim4{n_outs_}));
}
public:
lcnn() = default;
/// Locally connected echo state network constructor.
lcnn(lcnn_config cfg, std::mt19937& prng)
: n_ins_{cfg.input_w.dims(2)}
, n_outs_{cfg.feedback_w.dims(2)}
, last_output_{af::constant(0, n_outs_, DType)}
, output_w_{af::constant(af::NaN, n_outs_, cfg.init_state.elements() + 1, DType)}
, prng_{&prng}
, af_prng_{AF_RANDOM_ENGINE_DEFAULT, prng_->operator()()}
// Learning is not available at the moment.
// , learning_rate_ {cfg.learning_rate}
// , learning_rate_ema_ {cfg.learning_rate_ema}
// , learning_rate_diff_ {cfg.learning_rate_diff}
// , learning_bias_ {cfg.learning_bias}
// , learning_rate_bias_ {cfg.learning_rate_bias}
// , weight_decay_ {cfg.weight_decay}
// , state_ema_decay_ {cfg.state_ema_decay}
, noise_enabled_{true}
, random_spike_prob_{cfg.random_spike_prob}
, random_spike_std_{cfg.random_spike_std}
, noise_{cfg.noise}
, leakage_{cfg.leakage}
{
state(std::move(cfg.init_state));
reservoir_weights_full(std::move(cfg.reservoir_w_full));
reservoir_biases(std::move(cfg.reservoir_b));
input_weights(std::move(cfg.input_w));
feedback_weights(std::move(cfg.feedback_w));
}
/// Perform a single step with a single input.
/// \param input The input value.
/// \param feedback The teacher-forced feedback to be used instead
/// of the network's output.
/// \param desired The desired output. This is only used for callbacks.
/// This is only allowed if no feedback is provided.
/// Has to be of size [n_outs].
void step(
const af::array& input,
const std::optional<af::array>& feedback = std::nullopt,
const std::optional<af::array>& desired = std::nullopt) override
{
assert(!desired || !feedback);
assert(input.dims() == af::dim4{n_ins_});
assert((!feedback || feedback->dims() == af::dim4{n_outs_}));
assert((!feedback || af::allTrue<bool>(feedback.value() >= -1. && feedback.value() <= 1.)));
assert((!desired || desired->dims() == af::dim4{n_outs_}));
assert((!desired || af::allTrue<bool>(desired.value() >= -1. && desired.value() <= 1.)));
// Update the internal state.
update_via_weights_matmul();
// add input
update_via_input(input);
// add feedback
update_via_feedback(last_output_);
// add bias
update_via_bias();
// activation function
update_via_activation();
// Learning is not available at the moment.
// if (learning_rate_ != 0) {
// // update state exponential moving average
// update_state_ema();
// // update the weights
// update_weights(unwrapped_weights, unwrapped_state);
// }
// update last output after we have a new state
update_last_output(feedback);
// Call the registered callback functions.
for (on_state_change_callback_t& fnc : on_state_change_callbacks_) {
on_state_change_data data = {
.state = state_, .input = input, .output = last_output_, .desired = desired};
fnc(*this, std::move(data));
}
}
/// Perform multiple steps with multiple input seqences.
/// \param inputs Input sequence of dimensions [n_ins, time].
/// \param feedback The desired output sequences to be teacher-forced into the net.
/// Needs to have dimensions [n_outs, time]
/// \param desired The desired output. This is only used for callbacks.
/// This is only allowed if no feedback is provided.
/// Has to be of size [n_outs, time].
/// \return The array of intermediate states of dimensions [state_height, state_width, time]
/// and the array of intermediate outputs of dimensions [n_outs, time].
feed_result_t feed(
const af::array& input,
const std::optional<af::array>& feedback = std::nullopt,
const std::optional<af::array>& desired = std::nullopt) override
{
assert(!desired || !feedback);
assert(input.type() == DType);
assert(input.numdims() == 2);
assert(input.dims(0) == n_ins_);
assert(input.dims(1) > 0);
assert(!feedback || feedback->type() == DType);
assert(!feedback || feedback->numdims() <= 2);
assert(!feedback || feedback->dims(0) == n_outs_);
assert(!feedback || feedback->dims(1) == input.dims(1));
assert(!feedback || af::allTrue<bool>(af::abs(feedback.value()) <= 1.));
assert(!desired || desired->type() == DType);
assert(!desired || desired->numdims() <= 2);
assert(!desired || desired->dims(0) == n_outs_);
assert(!desired || desired->dims(1) == input.dims(1));
assert(!desired || af::allTrue<bool>(af::abs(desired.value()) <= 1.));
feed_result_t result;
result.states = af::array(state_.dims(0), state_.dims(1), input.dims(1), DType);
result.outputs = af::array(n_outs_, input.dims(1), DType);
for (long i = 0; i < input.dims(1); ++i) {
std::optional<af::array> feedback_i;
if (feedback) feedback_i = feedback.value()(af::span, i);
std::optional<af::array> desired_i;
if (desired) desired_i = desired.value()(af::span, i);
step(input(af::span, i), feedback_i, desired_i);
result.states(af::span, af::span, i) = state_;
result.outputs(af::span, i) = last_output_;
}
return result;
}
/// Train the network on the given sequence.
/// \param input Input sequence of dimensions [n_ins, time].
/// \param desired The desired output sequences. Those are also teacher-forced into the net.
/// Needs to have dimensions [n_outs, time]
void train(const af::array& input, const af::array& desired) override
{
assert(input.type() == DType);
assert(input.numdims() == 2);
assert(input.dims(0) == n_ins_);
assert(input.dims(1) > 0);
assert(desired.type() == DType);
assert(desired.numdims() <= 2);
assert(desired.dims(0) == n_outs_);
assert(desired.dims(1) == input.dims(1));
assert(af::allTrue<bool>(af::abs(desired) <= 1));
af::array states = feed(input, desired).states;
states = af::moddims(std::move(states), state_.elements(), input.dims(1));
output_w_ = af_utils::lstsq_train(states.T(), desired.T()).T();
assert(output_w_.dims() == (af::dim4{n_outs_, state_.elements() + 1}));
}
/// Perform multiple steps using self's output as input.
/// \param n_steps The number of steps to take.
/// \param desired The desired output. This is used only for callbacks.
/// Needs to have dimensions [n_outs, time]
/// \return The same as \ref feed().
feed_result_t
loop(long n_steps, const std::optional<af::array>& desired = std::nullopt) override
{
assert(n_ins_ == n_outs_);
assert(n_steps > 0);
assert(!desired || desired->type() == DType);
assert(!desired || desired->numdims() <= 2);
assert(!desired || desired->dims(0) == n_outs_);
assert(!desired || desired->dims(1) == n_steps);
assert(!desired || af::allTrue<bool>(af::abs(desired.value()) <= 1.));
feed_result_t result;
result.states = af::array(state_.dims(0), state_.dims(1), n_steps, DType);
result.outputs = af::array(n_outs_, n_steps, DType);
for (long i = 0; i < n_steps; ++i) {
std::optional<af::array> desired_i;
if (desired) desired_i = desired.value()(af::span, i);
// Create a shallow copy of `last_output_` so that when step() changes it,
// it's `input` parameter does not change (it is taken as const ref).
step(af::array(last_output_), std::nullopt, desired_i);
result.states(af::span, af::span, i) = state_;
result.outputs(af::span, i) = last_output_;
}
return result;
}
/// Get the current state of the network.
const af::array& state() const override
{
return state_;
}
/// Set the current state of the network.
void state(af::array new_state) override
{
assert(new_state.type() == DType);
assert(new_state.numdims() == 2);
state_ = std::move(new_state);
}
// Learning is not available at the moment.
// /// Update the state exponential moving average using the current state.
// void update_state_ema()
// {
// if (state_ema_.isempty()) {
// state_ema_ = state_;
// } else {
// state_ema_ = (1. - state_ema_decay_) * state_ema_ + state_ema_decay_ * state_;
// }
// }
/// The number of inputs.
long n_ins() const override
{
return n_ins_;
}
/// The number of outputs.
long n_outs() const override
{
return n_outs_;
}
/// The average number of inputs to a neuron.
double neuron_ins() const override
{
af::array in = af::sum(reservoir_w_full_ != 0, 1);
return af::mean<double>(in);
}
/// Set the input weights of the network.
///
/// The shape has to be [state.dims(0), state.dims(1), n_ins].
void input_weights(af::array new_weights)
{
assert(new_weights.type() == DType);
assert((new_weights.dims() == af::dim4{state_.dims(0), state_.dims(1), n_ins_}));
input_w_ = std::move(new_weights);
}
/// Get the input weights of the network.
const af::array& input_weights() const
{
return input_w_;
}
/// Set the feedback weights of the network.
///
/// The shape has to be [state.dims(0), state.dims(1), n_outs].
void feedback_weights(af::array new_weights)
{
assert(new_weights.type() == DType);
assert((new_weights.dims() == af::dim4{state_.dims(0), state_.dims(1), n_outs_}));
feedback_w_ = std::move(new_weights);
}
/// Get the feedback weights of the network.
const af::array& feedback_weights() const
{
return feedback_w_;
}
/// Set the fully connected reservoir weights of the network.
///
/// This forces the step() always use the fully connected matrix.
///
/// The shape has to be [state_height*state_width, state_height*state_width].
void reservoir_weights_full(af::array new_weights)
{
assert(new_weights.type() == DType);
assert(new_weights.numdims() == 2);
assert(new_weights.dims(0) == state_.elements());
assert(new_weights.dims(1) == state_.elements());
reservoir_w_full_ = std::move(new_weights);
}
/// Get the reservoir weights of the network in the form of fully connected matrix.
const af::array& reservoir_weights_full() const
{
return reservoir_w_full_;
}
/// Set the reservoir biases of the network.
///
/// The shape has to be the same as the state.
void reservoir_biases(af::array new_biases)
{
assert(new_biases.type() == DType);
assert(new_biases.dims() == state_.dims());
reservoir_b_ = std::move(new_biases);
}
/// Get the reservoir biases of the network.
const af::array& reservoir_biases() const
{
return reservoir_b_;
}
// Learning is not available at the moment.
// /// Get the learning rate.
// double learning_rate() const
// {
// return learning_rate_;
// }
/// Set the learning rate.
void learning_rate(double learning_rate) override
{
// Learning is not available at the moment.
// learning_rate_ = learning_rate;
}
/// Disable random noise e.g., for lyapunov testing.
void random_noise(bool enable) override
{
noise_enabled_ = enable;
}
std::unique_ptr<net_base> clone() const override
{
return std::make_unique<lcnn>(*this);
}
};
/// Generate a random locally connected echo state network.
///
/// \param n_ins The number of inputs.
/// \param n_outs The number of outputs.
/// \param args The parameters by which is the network constructed.
template <af::dtype DType = af::dtype::f64>
lcnn<DType> random_lcnn(long n_ins, long n_outs, const po::variables_map& args, std::mt19937& prng)
{
// The number of rows of the state matrix.
long state_height = args.at("lcnn.state-height").as<long>();
// The number of columns of the state matrix.
long state_width = args.at("lcnn.state-width").as<long>();
// Standard deviation of the normal distribution generating the reservoir.
double sigma_res = args.at("lcnn.sigma-res").as<double>();
// The mean of the normal distribution generating the reservoir.
double mu_res = args.at("lcnn.mu-res").as<double>();
// The input weight.
double in_weight = args.at("lcnn.in-weight").as<double>();
// The feedback weights will be generated from [0, fb_weight].
double fb_weight = args.at("lcnn.fb-weight").as<double>();
// Standard deviation of the normal distribution generating the biases.
double sigma_b = args.at("lcnn.sigma-b").as<double>();
// The mean of the normal distribution generating the biases.
double mu_b = args.at("lcnn.mu-b").as<double>();
// The sparsity of the reservoir weight matrix. For 0, the matrix is
// fully connected. For 1, the matrix is completely zero.
double sparsity = args.at("lcnn.sparsity").as<double>();
// The reservoir topology.
std::string topology = args.at("lcnn.topology").as<std::string>();
// Put input to all neurons. In such a case, the input weights are
// distributed uniformly from [0, in_weight].
bool input_to_all = args.at("lcnn.input-to-all").as<bool>();
lcnn_config cfg{args};
af::randomEngine af_prng{AF_RANDOM_ENGINE_DEFAULT, prng()};
int neurons = state_height * state_width;
// generate the reservoir weights based on topology
if (topology == "sparse") {
cfg.reservoir_w_full = sigma_res * af::randn({neurons, neurons}, DType, af_prng) + mu_res;
// make the reservoir sparse by the given coefficient
cfg.reservoir_w_full *=
af::randu({cfg.reservoir_w_full.dims()}, DType, af_prng) >= sparsity;
} else if (topology == "permutation") {
// only allow one connection to each neuron
std::vector<int> perm(neurons, 0);
std::iota(perm.begin(), perm.end(), 0);
std::shuffle(perm.begin(), perm.end(), prng);
// build an eye matrix with permuted columns
cfg.reservoir_w_full = af::constant(0, neurons, neurons, DType);
for (int i = 0; i < neurons; ++i) cfg.reservoir_w_full(i, perm.at(i)) = 1;
// Assign random weights to the permuted eye matrix.
cfg.reservoir_w_full = cfg.reservoir_w_full * sigma_res
* af::randn({cfg.reservoir_w_full.dims()}, DType, af_prng)
+ cfg.reservoir_w_full * mu_res;
// make the reservoir sparse by the given coefficient
cfg.reservoir_w_full *=
af::randu({cfg.reservoir_w_full.dims()}, DType, af_prng) >= sparsity;
} else if (topology == "ring" || topology == "chain") {
cfg.reservoir_w_full = af::constant(0, neurons, neurons, DType);
for (int i = 0; i < neurons; ++i) cfg.reservoir_w_full(i, (i + 1) % neurons) = 1;
if (topology == "chain") cfg.reservoir_w_full(neurons - 1, 0) = 0;
// Assign random weights to the ring matrix.
cfg.reservoir_w_full = cfg.reservoir_w_full * sigma_res
* af::randn({cfg.reservoir_w_full.dims()}, DType, af_prng)
+ cfg.reservoir_w_full * mu_res;
// make the reservoir sparse by the given coefficient
cfg.reservoir_w_full *=
af::randu({cfg.reservoir_w_full.dims()}, DType, af_prng) >= sparsity;
} else {
throw std::runtime_error{"Unknown topology `" + topology + "`."};
}
// generate reservoir biases
cfg.reservoir_b = sigma_b * af::randn({state_height, state_width}, DType, af_prng) + mu_b;
// for improved visualizations, this is a list of nice positions
// in the state matrix.
const std::vector<std::pair<long, long>> nice_positions = {
{state_height / 2, state_width / 2}, // center
{state_height / 2, 0}, // left center
{state_height - 1, state_width / 2}, // bottom center
{state_height - 1, 0}, // bottom left corner
};
auto free_position = nice_positions.begin();
// if we run out of nice positions, have a generator prepared
std::uniform_int_distribution<long> vert_dist{0, state_height - 1};
std::uniform_int_distribution<long> horiz_dist{0, state_width - 1};
if (input_to_all) {
// put input and feedback into all the neurons
cfg.input_w = af::randu({state_height, state_width, n_ins}, DType, af_prng) * in_weight;
cfg.feedback_w = af::randu({state_height, state_width, n_outs}, DType, af_prng) * fb_weight;
} else {
// choose the locations for inputs and feedbacks
cfg.input_w = af::constant(0, state_height, state_width, n_ins, DType);
for (long i = 0; i < n_ins; ++i) {
if (free_position != nice_positions.end()) {
cfg.input_w(free_position->first, free_position->second, i) = in_weight;
++free_position;
} else {
cfg.input_w(vert_dist(prng), horiz_dist(prng), i) = in_weight;
}
}
cfg.feedback_w = af::constant(0, state_height, state_width, n_outs, DType);
for (long i = 0; i < n_outs; ++i) {
if (free_position != nice_positions.end()) {
cfg.feedback_w(free_position->first, free_position->second, i) = fb_weight;
++free_position;
} else {
cfg.feedback_w(vert_dist(prng), horiz_dist(prng), i) = fb_weight;
}
}
}
// the initial state is full of zeros
cfg.init_state = af::constant(0, state_height, state_width, DType);
// the initial state is random
// cfg.init_state = af::randu({state_height, state_width}, DType, af_prng) * 2. - 1.;
return lcnn<DType>{std::move(cfg), prng};
}
/// Locally connected network options description for command line parsing.
po::options_description lcnn_arg_description()
{
po::options_description lcnn_arg_desc{"Locally connected network options"};
lcnn_arg_desc.add_options() //
("lcnn.state-height", po::value<long>()->default_value(11), //
"The fixed height of the kernel.") //
("lcnn.state-width", po::value<long>()->default_value(11), //
"The width of the state matrix.") //
("lcnn.sigma-res", po::value<double>()->default_value(0.18282597654191446), //
"See random_lcnn().") //
("lcnn.mu-res", po::value<double>()->default_value(-0.008182614281959771), //
"See random_lcnn().") //
("lcnn.in-weight", po::value<double>()->default_value(0.096420048082640264), //
"See random_lcnn().") //
("lcnn.fb-weight", po::value<double>()->default_value(0.087346208897765704), //
"See random_lcnn().") //
("lcnn.sigma-b", po::value<double>()->default_value(0), //
"See random_lcnn().") //
("lcnn.mu-b", po::value<double>()->default_value(0), //
"See random_lcnn().") //
("lcnn.sparsity", po::value<double>()->default_value(0.092256797467520246), //
"See random_lcnn().") //
// Learning is not available at the moment.
// ("lcnn.learning-rate", po::value<double>()->default_value(0),
// "See lcnn_config struct.")
// ("lcnn.learning-rate-ema", po::value<double>()->default_value(0),
// "See lcnn_config struct.")
// ("lcnn.learning-rate-diff", po::value<double>()->default_value(0),
// "See lcnn_config struct.")
// ("lcnn.learning-bias", po::value<double>()->default_value(0),
// "See lcnn_config struct.")
// ("lcnn.learning-rate-bias", po::value<double>()->default_value(0),
// "See lcnn_config struct.")
// ("lcnn.weight-decay", po::value<double>()->default_value(0),
// "See random_lcnn().")
// ("lcnn.state-ema-decay", po::value<double>()->default_value(0),
// "See random_lcnn().")
("lcnn.topology", po::value<std::string>()->default_value("sparse"), //
"See random_lcnn().") //
("lcnn.input-to-all", po::value<bool>()->default_value(false), //
"See random_lcnn().") //
("lcnn.random-spike-prob", po::value<double>()->default_value(0), //
"See lcnn_config class.") //
("lcnn.random-spike-std", po::value<double>()->default_value(0), //
"See lcnn_config class.") //
("lcnn.noise", po::value<double>()->default_value(1e-8), //
"See lcnn_config class.") //
("lcnn.leakage", po::value<double>()->default_value(0.98589345058490285), //
"See lcnn_config class.") //
;
return lcnn_arg_desc;
}
std::unique_ptr<net_base>
make_net(long n_ins, long n_outs, const po::variables_map& args, std::mt19937& prng)
{
if (args.at("gen.net-type").as<std::string>() == "lcnn") {
return std::make_unique<lcnn<af::dtype::f64>>(random_lcnn(n_ins, n_outs, args, prng));
}
if (args.at("gen.net-type").as<std::string>() == "simple-esn") {
return std::make_unique<simple_esn<af::dtype::f64>>(random_esn(n_ins, n_outs, args, prng));
}
throw std::runtime_error{
"Unknown net type \"" + args.at("gen.net-type").as<std::string>() + "\"."};
}
} // namespace esn