Skip to content

Commit

Permalink
Channel-Wise RReLU
Browse files Browse the repository at this point in the history
  • Loading branch information
xternalz authored and jasonkuen committed May 31, 2016
1 parent 9c8f2bb commit 61334b6
Show file tree
Hide file tree
Showing 5 changed files with 239 additions and 82 deletions.
9 changes: 6 additions & 3 deletions RReLU.lua
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
local ffi = require 'ffi'
local RReLU, parent = torch.class('nn.RReLU', 'nn.Module')

function RReLU:__init(l, u, ip)
function RReLU:__init(l, u, ip, cw)
parent.__init(self)
self.lower = l or 1/8
self.upper = u or 1/3
assert(self.lower <= self.upper and self.lower >= 0 and self.upper >= 0)
self.noise = torch.Tensor()
self.train = true
self.inplace = ip or false
self.channelwise = cw or false
end

function RReLU:updateOutput(input)
Expand All @@ -21,6 +22,7 @@ function RReLU:updateOutput(input)
self.upper,
self.train,
self.inplace,
self.channelwise,
gen
)
return self.output
Expand All @@ -35,13 +37,14 @@ function RReLU:updateGradInput(input, gradOutput)
self.lower,
self.upper,
self.train,
self.inplace
self.inplace,
self.channelwise
)
return self.gradInput
end

function RReLU:__tostring__()
return string.format('%s (l:%f, u:%f)', torch.type(self), self.lower, self.upper)
return string.format('%s (l:%f, u:%f, channel-wise:%s)', torch.type(self), self.lower, self.upper, self.channelwise)
end

function RReLU:clearState()
Expand Down
1 change: 1 addition & 0 deletions doc/transfer.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ m=nn.ReLU(
l, -- minimum factor for negative inputs, default: 1/8;
u, -- maximum factor for negative inputs, default: 1/3;
inplace -- if true the result will be written to the input tensor, default: false;
cw -- if true all elements of the same channel share the same `a`, default: false;
)
```
If `l == u` a RReLU effectively becomes a LeakyReLU. Regardless of operating in in-place mode a RReLU will internally allocate an input-sized `noise` tensor to store random factors for negative inputs. The backward() operation assumes that forward() has been called before.
Expand Down
254 changes: 201 additions & 53 deletions lib/THNN/generic/RReLU.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,68 +11,156 @@ void THNN_(RReLU_updateOutput)(
real upper,
bool train,
bool inplace,
bool channelwise,
THGenerator *generator)
{
if (train)
if (channelwise && train)
{
// get default random generator
THTensor_(resizeAs)(noise, input);
if (inplace)
long bs, ks;
long nOutputPlane;
{
TH_TENSOR_APPLY2(real, input, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*input_data = (*input_data) * r;
*noise_data = r;
}
else
{
*noise_data = 1;
}
);
THTensor_(set)(output, input);
long input_ndim = THTensor_(nDimension)(input);
switch (input_ndim)
{
case 1:
bs = 1;
ks = 1;
break;
case 2:
bs = input->size[0];
ks = 1;
break;
case 3:
bs = 1;
ks = input->size[1] * input->size[2];
break;
case 4:
bs = input->size[0];
ks = input->size[2] * input->size[3];
break;
}
nOutputPlane = input->size[(input_ndim + 1) % 2];
}
// get default random generator
if (inplace)
THTensor_(resizeAs)(noise, input);
else
THTensor_(resize1d)(noise, nOutputPlane);

real *output_data = NULL;
real *input_data = THTensor_(data)(input);
real *noise_data = THTensor_(data)(noise);
if (!inplace)
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY3(real, input, real, output, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*output_data = (*input_data) * r;
*noise_data = r;
}
output_data = THTensor_(data)(output);
}
THTensor *channel_noise = THTensor_(newWithSize1d)(nOutputPlane);
real *channel_noise_data = THTensor_(data)(channel_noise);

THIndex_t i, j, k;
#pragma omp parallel for private(j)
for (j = 0; j < nOutputPlane; ++j)
channel_noise_data[j] = (real)THRandom_uniform(generator, lower, upper);
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
real* n_input_data = input_data + i*nOutputPlane*ks;
real* n_output_data = NULL;
real* n_noise_data = NULL;
if (inplace)
n_noise_data = noise_data + i*nOutputPlane*ks;
else
n_output_data = output_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
const real r = channel_noise_data[j];
for (k = 0; k < ks; ++k)
if (inplace)
if (n_input_data[k] <= 0)
{
n_input_data[k] = r * n_input_data[k];
n_noise_data[k] = r;
}
else
n_noise_data[k] = 1;
else
n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : r * n_input_data[k];
n_input_data += ks;
if (inplace)
n_noise_data += ks;
else
{
*output_data = *input_data;
*noise_data = 1;
}
);
n_output_data += ks;
}
}
if (inplace)
THTensor_(set)(output, input);
else
THTensor_(set)(noise, channel_noise);
}
else
{
const real negSlope = (lower + upper) / 2;
if (inplace)
if (train)
{
TH_TENSOR_APPLY(real, input,
if (*input_data <= 0)
{
*input_data = *input_data * negSlope;
}
);
THTensor_(set)(output, input);
// get default random generator
THTensor_(resizeAs)(noise, input);
if (inplace)
{
TH_TENSOR_APPLY2(real, input, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*input_data = (*input_data) * r;
*noise_data = r;
}
else
{
*noise_data = 1;
}
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY3(real, input, real, output, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*output_data = (*input_data) * r;
*noise_data = r;
}
else
{
*output_data = *input_data;
*noise_data = 1;
}
);
}
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, input, real, output,
const real r = (*input_data) <= 0 ? negSlope : 1;
*output_data = *input_data * r;
);
const real negSlope = (lower + upper) / 2;
if (inplace)
{
TH_TENSOR_APPLY(real, input,
if (*input_data <= 0)
{
*input_data = *input_data * negSlope;
}
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, input, real, output,
const real r = (*input_data) <= 0 ? negSlope : 1;
*output_data = *input_data * r;
);
}
}
}
}
}

void THNN_(RReLU_updateGradInput)(
Expand All @@ -84,24 +172,84 @@ void THNN_(RReLU_updateGradInput)(
real lower,
real upper,
bool train,
bool inplace)
bool inplace,
bool channelwise)
{
if (train && upper - lower > 1E-6) // e.g. if upper == lower, RReLU behaves like LeakyReLU
{
// multiply the gradient by the noise tensor
if (inplace)
if (channelwise && !inplace)
{
THTensor_(cmul)(gradOutput, gradOutput, noise);
THTensor_(set)(gradInput, gradOutput);
long bs, ks;
long nOutputPlane;
{
long input_ndim = THTensor_(nDimension)(input);
switch (input_ndim)
{
case 1:
bs = 1;
ks = 1;
break;
case 2:
bs = input->size[0];
ks = 1;
break;
case 3:
bs = 1;
ks = input->size[1] * input->size[2];
break;
case 4:
bs = input->size[0];
ks = input->size[2] * input->size[3];
break;
}
nOutputPlane = input->size[(input_ndim + 1) % 2];
}

const real *input_data = THTensor_(data)(input);
const real *gradOutput_data = THTensor_(data)(gradOutput);
THTensor_(resizeAs)(gradInput, input);
real *gradInput_data = THTensor_(data)(gradInput);
const real *noise_data = THTensor_(data)(noise);

THIndex_t i, j, k;
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
const real *n_input_data = input_data + i*nOutputPlane*ks;
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;

for (j = 0; j < nOutputPlane; ++j)
{
const real r = noise_data[j];
for (k = 0; k < ks; ++k)
if (n_input_data[k] > 0)
n_gradInput_data[k] = n_gradOutput_data[k];
else
n_gradInput_data[k] = n_gradOutput_data[k] * r;
n_input_data += ks;
n_gradInput_data += ks;
n_gradOutput_data += ks;
}
}
}
else
{
THTensor_(resizeAs)(gradInput, input);
THTensor_(cmul)(gradInput, gradOutput, noise);
}
// multiply the gradient by the noise tensor
if (inplace)
{
THTensor_(cmul)(gradOutput, gradOutput, noise);
THTensor_(set)(gradInput, gradOutput);
}
else
{
THTensor_(resizeAs)(gradInput, input);
THTensor_(cmul)(gradInput, gradOutput, noise);
}
}
}
else
{
{
// use constant factor for negative input values
const real negSlope = (lower + upper) / 2;
if (inplace)
Expand Down
4 changes: 3 additions & 1 deletion lib/THNN/generic/THNN.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ TH_API void THNN_(RReLU_updateOutput)(
real upper,
bool train,
bool inplace,
bool channelwise,
THGenerator *generator);
TH_API void THNN_(RReLU_updateGradInput)(
THNNState *state,
Expand All @@ -301,7 +302,8 @@ TH_API void THNN_(RReLU_updateGradInput)(
real lower,
real upper,
bool train,
bool inplace);
bool inplace,
bool channelwise);

TH_API void THNN_(Sigmoid_updateOutput)(
THNNState *state,
Expand Down
Loading

0 comments on commit 61334b6

Please sign in to comment.