diff --git a/RReLU.lua b/RReLU.lua index 843415f7e..95b7b90ba 100644 --- a/RReLU.lua +++ b/RReLU.lua @@ -1,7 +1,7 @@ local ffi = require 'ffi' local RReLU, parent = torch.class('nn.RReLU', 'nn.Module') -function RReLU:__init(l, u, ip) +function RReLU:__init(l, u, ip, cw) parent.__init(self) self.lower = l or 1/8 self.upper = u or 1/3 @@ -9,6 +9,7 @@ function RReLU:__init(l, u, ip) self.noise = torch.Tensor() self.train = true self.inplace = ip or false + self.channelwise = cw or false end function RReLU:updateOutput(input) @@ -21,6 +22,7 @@ function RReLU:updateOutput(input) self.upper, self.train, self.inplace, + self.channelwise, gen ) return self.output @@ -35,13 +37,14 @@ function RReLU:updateGradInput(input, gradOutput) self.lower, self.upper, self.train, - self.inplace + self.inplace, + self.channelwise ) return self.gradInput end function RReLU:__tostring__() - return string.format('%s (l:%f, u:%f)', torch.type(self), self.lower, self.upper) + return string.format('%s (l:%f, u:%f, channel-wise:%s)', torch.type(self), self.lower, self.upper, self.channelwise) end function RReLU:clearState() diff --git a/doc/transfer.md b/doc/transfer.md index c1dfc80c2..eb854d4cb 100644 --- a/doc/transfer.md +++ b/doc/transfer.md @@ -290,6 +290,7 @@ m=nn.ReLU( l, -- minimum factor for negative inputs, default: 1/8; u, -- maximum factor for negative inputs, default: 1/3; inplace -- if true the result will be written to the input tensor, default: false; + cw -- if true all elements of the same channel share the same `a`, default: false; ) ``` If `l == u` a RReLU effectively becomes a LeakyReLU. Regardless of operating in in-place mode a RReLU will internally allocate an input-sized `noise` tensor to store random factors for negative inputs. The backward() operation assumes that forward() has been called before. diff --git a/lib/THNN/generic/RReLU.c b/lib/THNN/generic/RReLU.c index 8bf6764e5..a4a13efa1 100644 --- a/lib/THNN/generic/RReLU.c +++ b/lib/THNN/generic/RReLU.c @@ -11,68 +11,156 @@ void THNN_(RReLU_updateOutput)( real upper, bool train, bool inplace, + bool channelwise, THGenerator *generator) { - if (train) + if (channelwise && train) { - // get default random generator - THTensor_(resizeAs)(noise, input); - if (inplace) + long bs, ks; + long nOutputPlane; { - TH_TENSOR_APPLY2(real, input, real, noise, - if (*input_data <= 0) - { - const real r = (real)THRandom_uniform(generator, lower, upper); - *input_data = (*input_data) * r; - *noise_data = r; - } - else - { - *noise_data = 1; - } - ); - THTensor_(set)(output, input); + long input_ndim = THTensor_(nDimension)(input); + switch (input_ndim) + { + case 1: + bs = 1; + ks = 1; + break; + case 2: + bs = input->size[0]; + ks = 1; + break; + case 3: + bs = 1; + ks = input->size[1] * input->size[2]; + break; + case 4: + bs = input->size[0]; + ks = input->size[2] * input->size[3]; + break; + } + nOutputPlane = input->size[(input_ndim + 1) % 2]; } + // get default random generator + if (inplace) + THTensor_(resizeAs)(noise, input); else + THTensor_(resize1d)(noise, nOutputPlane); + + real *output_data = NULL; + real *input_data = THTensor_(data)(input); + real *noise_data = THTensor_(data)(noise); + if (!inplace) { THTensor_(resizeAs)(output, input); - TH_TENSOR_APPLY3(real, input, real, output, real, noise, - if (*input_data <= 0) - { - const real r = (real)THRandom_uniform(generator, lower, upper); - *output_data = (*input_data) * r; - *noise_data = r; - } + output_data = THTensor_(data)(output); + } + THTensor *channel_noise = THTensor_(newWithSize1d)(nOutputPlane); + real *channel_noise_data = THTensor_(data)(channel_noise); + + THIndex_t i, j, k; +#pragma omp parallel for private(j) + for (j = 0; j < nOutputPlane; ++j) + channel_noise_data[j] = (real)THRandom_uniform(generator, lower, upper); +#pragma omp parallel for private(j,k) + for (i = 0; i < bs; ++i) + { + real* n_input_data = input_data + i*nOutputPlane*ks; + real* n_output_data = NULL; + real* n_noise_data = NULL; + if (inplace) + n_noise_data = noise_data + i*nOutputPlane*ks; + else + n_output_data = output_data + i*nOutputPlane*ks; + for (j = 0; j < nOutputPlane; ++j) + { + const real r = channel_noise_data[j]; + for (k = 0; k < ks; ++k) + if (inplace) + if (n_input_data[k] <= 0) + { + n_input_data[k] = r * n_input_data[k]; + n_noise_data[k] = r; + } + else + n_noise_data[k] = 1; + else + n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : r * n_input_data[k]; + n_input_data += ks; + if (inplace) + n_noise_data += ks; else - { - *output_data = *input_data; - *noise_data = 1; - } - ); + n_output_data += ks; + } } + if (inplace) + THTensor_(set)(output, input); + else + THTensor_(set)(noise, channel_noise); } else { - const real negSlope = (lower + upper) / 2; - if (inplace) + if (train) { - TH_TENSOR_APPLY(real, input, - if (*input_data <= 0) - { - *input_data = *input_data * negSlope; - } - ); - THTensor_(set)(output, input); + // get default random generator + THTensor_(resizeAs)(noise, input); + if (inplace) + { + TH_TENSOR_APPLY2(real, input, real, noise, + if (*input_data <= 0) + { + const real r = (real)THRandom_uniform(generator, lower, upper); + *input_data = (*input_data) * r; + *noise_data = r; + } + else + { + *noise_data = 1; + } + ); + THTensor_(set)(output, input); + } + else + { + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY3(real, input, real, output, real, noise, + if (*input_data <= 0) + { + const real r = (real)THRandom_uniform(generator, lower, upper); + *output_data = (*input_data) * r; + *noise_data = r; + } + else + { + *output_data = *input_data; + *noise_data = 1; + } + ); + } } else { - THTensor_(resizeAs)(output, input); - TH_TENSOR_APPLY2(real, input, real, output, - const real r = (*input_data) <= 0 ? negSlope : 1; - *output_data = *input_data * r; - ); + const real negSlope = (lower + upper) / 2; + if (inplace) + { + TH_TENSOR_APPLY(real, input, + if (*input_data <= 0) + { + *input_data = *input_data * negSlope; + } + ); + THTensor_(set)(output, input); + } + else + { + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY2(real, input, real, output, + const real r = (*input_data) <= 0 ? negSlope : 1; + *output_data = *input_data * r; + ); + } } - } + } } void THNN_(RReLU_updateGradInput)( @@ -84,24 +172,84 @@ void THNN_(RReLU_updateGradInput)( real lower, real upper, bool train, - bool inplace) + bool inplace, + bool channelwise) { if (train && upper - lower > 1E-6) // e.g. if upper == lower, RReLU behaves like LeakyReLU { - // multiply the gradient by the noise tensor - if (inplace) + if (channelwise && !inplace) { - THTensor_(cmul)(gradOutput, gradOutput, noise); - THTensor_(set)(gradInput, gradOutput); + long bs, ks; + long nOutputPlane; + { + long input_ndim = THTensor_(nDimension)(input); + switch (input_ndim) + { + case 1: + bs = 1; + ks = 1; + break; + case 2: + bs = input->size[0]; + ks = 1; + break; + case 3: + bs = 1; + ks = input->size[1] * input->size[2]; + break; + case 4: + bs = input->size[0]; + ks = input->size[2] * input->size[3]; + break; + } + nOutputPlane = input->size[(input_ndim + 1) % 2]; + } + + const real *input_data = THTensor_(data)(input); + const real *gradOutput_data = THTensor_(data)(gradOutput); + THTensor_(resizeAs)(gradInput, input); + real *gradInput_data = THTensor_(data)(gradInput); + const real *noise_data = THTensor_(data)(noise); + + THIndex_t i, j, k; +#pragma omp parallel for private(j,k) + for (i = 0; i < bs; ++i) + { + const real *n_input_data = input_data + i*nOutputPlane*ks; + const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks; + real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks; + + for (j = 0; j < nOutputPlane; ++j) + { + const real r = noise_data[j]; + for (k = 0; k < ks; ++k) + if (n_input_data[k] > 0) + n_gradInput_data[k] = n_gradOutput_data[k]; + else + n_gradInput_data[k] = n_gradOutput_data[k] * r; + n_input_data += ks; + n_gradInput_data += ks; + n_gradOutput_data += ks; + } + } } else { - THTensor_(resizeAs)(gradInput, input); - THTensor_(cmul)(gradInput, gradOutput, noise); - } + // multiply the gradient by the noise tensor + if (inplace) + { + THTensor_(cmul)(gradOutput, gradOutput, noise); + THTensor_(set)(gradInput, gradOutput); + } + else + { + THTensor_(resizeAs)(gradInput, input); + THTensor_(cmul)(gradInput, gradOutput, noise); + } + } } else - { + { // use constant factor for negative input values const real negSlope = (lower + upper) / 2; if (inplace) diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h index 1600fb1d3..915511731 100644 --- a/lib/THNN/generic/THNN.h +++ b/lib/THNN/generic/THNN.h @@ -291,6 +291,7 @@ TH_API void THNN_(RReLU_updateOutput)( real upper, bool train, bool inplace, + bool channelwise, THGenerator *generator); TH_API void THNN_(RReLU_updateGradInput)( THNNState *state, @@ -301,7 +302,8 @@ TH_API void THNN_(RReLU_updateGradInput)( real lower, real upper, bool train, - bool inplace); + bool inplace, + bool channelwise); TH_API void THNN_(Sigmoid_updateOutput)( THNNState *state, diff --git a/test.lua b/test.lua index 3847166ed..0beba2ab8 100644 --- a/test.lua +++ b/test.lua @@ -484,32 +484,35 @@ function nntest.RReLU() for _,train in ipairs({true,false}) do -- test with separate output buffer and inplace for _,inplace in ipairs({false,true}) do - module = nn.RReLU(l, u, inplace) - if train then - module:training() - else - module:evaluate() - end - input = torch.rand(nframe, size, kW, kH) - 0.5 - input:storage()[1] = -1 - local original_input = input:clone() - local output = module:forward(input) - mytester:assert(output:sign():eq(original_input:sign()):all(), 'sign flipped forward ') - local gradOutput = torch.ones(output:size()) - local gradInput = module:backward(input, gradOutput) - mytester:assert(gradInput:gt(0):eq(input:ne(0)):all(), 'gradient ') - mytester:assert(gradInput:lt(1):eq(input:le(0)):all(), 'backward negative inputs ') - mytester:assert(gradInput:eq(1):eq(input:gt(0)):all(), 'backward positive inputs ') - if not train then - local err = gradInput[input:le(0)]:mean()-(module.lower+module.upper)/2 - mytester:assertlt(err, precision, 'error on gradient ') - end + -- test with channel-wise + for _,cw in ipairs({true,false}) do + module = nn.RReLU(l, u, inplace, cw) + if train then + module:training() + else + module:evaluate() + end + input = torch.rand(nframe, size, kW, kH) - 0.5 + input:storage()[1] = -1 + local original_input = input:clone() + local output = module:forward(input) + mytester:assert(output:sign():eq(original_input:sign()):all(), 'sign flipped forward ') + local gradOutput = torch.ones(output:size()) + local gradInput = module:backward(input, gradOutput) + mytester:assert(gradInput:gt(0):eq(input:ne(0)):all(), 'gradient ') + mytester:assert(gradInput:lt(1):eq(input:le(0)):all(), 'backward negative inputs ') + mytester:assert(gradInput:eq(1):eq(input:gt(0)):all(), 'backward positive inputs ') + if not train then + local err = gradInput[input:le(0)]:mean()-(module.lower+module.upper)/2 + mytester:assertlt(err, precision, 'error on gradient ') + end - input = -torch.rand(1000) - module:forward(input) -- fill internal noise tensor - local g = module:backward(input, torch.ones(1000)) - local err = math.abs(g[input:le(0)]:mean()-(module.lower+module.upper)/2) - mytester:assertlt(err, 0.05, 'mean deviation of gradient for negative inputs ') + input = -torch.rand(1000) + module:forward(input) -- fill internal noise tensor + local g = module:backward(input, torch.ones(1000)) + local err = math.abs(g[input:le(0)]:mean()-(module.lower+module.upper)/2) + mytester:assertlt(err, 0.05, 'mean deviation of gradient for negative inputs ') + end end end end