From 1e93e0130ffe203270d5bd8f59b3bf9e9f86ff5f Mon Sep 17 00:00:00 2001 From: tgaaly Date: Sat, 16 May 2015 20:51:14 -0400 Subject: [PATCH 1/4] added feature to allow different learning rates per layer in the NN - useful for transfer learning, pre-training parts of the NN and fine-tuning other parts --- NN/nnapplygrads.m | 7 ++++++- NN/nnsetup.m | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NN/nnapplygrads.m b/NN/nnapplygrads.m index 781163b..649258d 100644 --- a/NN/nnapplygrads.m +++ b/NN/nnapplygrads.m @@ -10,7 +10,12 @@ dW = nn.dW{i}; end - dW = nn.learningRate * dW; + % to apply different learning rates to each layer + if isempty(nn.learningRatePerLayer) + dW = nn.learningRate * dW; + else + dW = nn.learningRatePerLayer(i) * dW; + end if(nn.momentum>0) nn.vW{i} = nn.momentum*nn.vW{i} + dW; diff --git a/NN/nnsetup.m b/NN/nnsetup.m index b8ec742..7a1162d 100644 --- a/NN/nnsetup.m +++ b/NN/nnsetup.m @@ -8,6 +8,7 @@ nn.activation_function = 'tanh_opt'; % Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh). nn.learningRate = 2; % learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs. + nn.learningRatePerLayer = []; % learning rate per layer - for transfer learning pre-training and fine-tuning different parts of the network (should be of length nn.n - 1) nn.momentum = 0.5; % Momentum nn.scaling_learningRate = 1; % Scaling factor for the learning rate (each epoch) nn.weightPenaltyL2 = 0; % L2 regularization From 06c5bc82aede8c3d53d50f7a390bc555ec99265c Mon Sep 17 00:00:00 2001 From: tgaaly Date: Sat, 16 May 2015 21:01:33 -0400 Subject: [PATCH 2/4] added feature to allow different learning rates per layer in the NN - useful for transfer learning, pre-training parts of the NN and fine-tuning other parts - updated nntrain.m - where learningRatePerLayer is also scaled with the nn.scaling_learningRate --- NN/nntrain.m | 1 + 1 file changed, 1 insertion(+) diff --git a/NN/nntrain.m b/NN/nntrain.m index af844a6..23e509f 100644 --- a/NN/nntrain.m +++ b/NN/nntrain.m @@ -72,6 +72,7 @@ disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mini-batch mean squared error on training set is ' num2str(mean(L((n-numbatches):(n-1)))) str_perf]); nn.learningRate = nn.learningRate * nn.scaling_learningRate; + nn.learningRatePerLayer = nn.learningRatePerLayer * nn.scaling_learningRate; end end From 41d3fee6c21c19d57e93a9a2c35661d24610d6e9 Mon Sep 17 00:00:00 2001 From: tgaaly Date: Sat, 16 May 2015 21:04:51 -0400 Subject: [PATCH 3/4] added feature to allow different learning rates per layer in the NN - useful for transfer learning, pre-training parts of the NN and fine-tuning other parts - error fix --- NN/nntrain.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/NN/nntrain.m b/NN/nntrain.m index 23e509f..ffaf245 100644 --- a/NN/nntrain.m +++ b/NN/nntrain.m @@ -72,7 +72,9 @@ disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mini-batch mean squared error on training set is ' num2str(mean(L((n-numbatches):(n-1)))) str_perf]); nn.learningRate = nn.learningRate * nn.scaling_learningRate; - nn.learningRatePerLayer = nn.learningRatePerLayer * nn.scaling_learningRate; + if ~isempty(nn.learningRatePerLayer) + nn.learningRatePerLayer = nn.learningRatePerLayer * nn.scaling_learningRate; + end end end From 1cf85792e534ec218be8c1e0f933dee3bd4ef2c3 Mon Sep 17 00:00:00 2001 From: tgaaly Date: Sun, 17 May 2015 22:51:33 -0400 Subject: [PATCH 4/4] added option to break training if error is below tolerance - defined in opts.tol --- NN/nntrain.m | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/NN/nntrain.m b/NN/nntrain.m index ffaf245..1e2d1e8 100644 --- a/NN/nntrain.m +++ b/NN/nntrain.m @@ -75,6 +75,14 @@ if ~isempty(nn.learningRatePerLayer) nn.learningRatePerLayer = nn.learningRatePerLayer * nn.scaling_learningRate; end + + if isfield(opts,'tol') + if opts.validation == 1 && loss.val.e(end)