Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Channel-wise Randomized ReLU #838

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions RReLU.lua
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
local ffi = require 'ffi'
local RReLU, parent = torch.class('nn.RReLU', 'nn.Module')

function RReLU:__init(l, u, ip)
function RReLU:__init(l, u, ip, cw)
parent.__init(self)
self.lower = l or 1/8
self.upper = u or 1/3
assert(self.lower <= self.upper and self.lower >= 0 and self.upper >= 0)
self.noise = torch.Tensor()
self.train = true
self.inplace = ip or false
self.channelwise = cw or false
end

function RReLU:updateOutput(input)
Expand All @@ -21,6 +22,7 @@ function RReLU:updateOutput(input)
self.upper,
self.train,
self.inplace,
self.channelwise,
gen
)
return self.output
Expand All @@ -35,13 +37,14 @@ function RReLU:updateGradInput(input, gradOutput)
self.lower,
self.upper,
self.train,
self.inplace
self.inplace,
self.channelwise
)
return self.gradInput
end

function RReLU:__tostring__()
return string.format('%s (l:%f, u:%f)', torch.type(self), self.lower, self.upper)
return string.format('%s (l:%f, u:%f, channel-wise:%s)', torch.type(self), self.lower, self.upper, self.channelwise)
end

function RReLU:clearState()
Expand Down
1 change: 1 addition & 0 deletions doc/transfer.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ m=nn.ReLU(
l, -- minimum factor for negative inputs, default: 1/8;
u, -- maximum factor for negative inputs, default: 1/3;
inplace -- if true the result will be written to the input tensor, default: false;
cw -- if true all elements of the same channel share the same `a`, default: false;
)
```
If `l == u` a RReLU effectively becomes a LeakyReLU. Regardless of operating in in-place mode a RReLU will internally allocate an input-sized `noise` tensor to store random factors for negative inputs. The backward() operation assumes that forward() has been called before.
Expand Down
254 changes: 201 additions & 53 deletions lib/THNN/generic/RReLU.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,68 +11,156 @@ void THNN_(RReLU_updateOutput)(
real upper,
bool train,
bool inplace,
bool channelwise,
THGenerator *generator)
{
if (train)
if (channelwise && train)
{
// get default random generator
THTensor_(resizeAs)(noise, input);
if (inplace)
long bs, ks;
long nOutputPlane;
{
TH_TENSOR_APPLY2(real, input, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*input_data = (*input_data) * r;
*noise_data = r;
}
else
{
*noise_data = 1;
}
);
THTensor_(set)(output, input);
long input_ndim = THTensor_(nDimension)(input);
switch (input_ndim)
{
case 1:
bs = 1;
ks = 1;
break;
case 2:
bs = input->size[0];
ks = 1;
break;
case 3:
bs = 1;
ks = input->size[1] * input->size[2];
break;
case 4:
bs = input->size[0];
ks = input->size[2] * input->size[3];
break;
}
nOutputPlane = input->size[(input_ndim + 1) % 2];
}
// get default random generator
if (inplace)
THTensor_(resizeAs)(noise, input);
else
THTensor_(resize1d)(noise, nOutputPlane);

real *output_data = NULL;
real *input_data = THTensor_(data)(input);
real *noise_data = THTensor_(data)(noise);
if (!inplace)
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY3(real, input, real, output, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*output_data = (*input_data) * r;
*noise_data = r;
}
output_data = THTensor_(data)(output);
}
THTensor *channel_noise = THTensor_(newWithSize1d)(nOutputPlane);
real *channel_noise_data = THTensor_(data)(channel_noise);

THIndex_t i, j, k;
#pragma omp parallel for private(j)
for (j = 0; j < nOutputPlane; ++j)
channel_noise_data[j] = (real)THRandom_uniform(generator, lower, upper);
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
real* n_input_data = input_data + i*nOutputPlane*ks;
real* n_output_data = NULL;
real* n_noise_data = NULL;
if (inplace)
n_noise_data = noise_data + i*nOutputPlane*ks;
else
n_output_data = output_data + i*nOutputPlane*ks;
for (j = 0; j < nOutputPlane; ++j)
{
const real r = channel_noise_data[j];
for (k = 0; k < ks; ++k)
if (inplace)
if (n_input_data[k] <= 0)
{
n_input_data[k] = r * n_input_data[k];
n_noise_data[k] = r;
}
else
n_noise_data[k] = 1;
else
n_output_data[k] = (n_input_data[k] > 0) ? n_input_data[k] : r * n_input_data[k];
n_input_data += ks;
if (inplace)
n_noise_data += ks;
else
{
*output_data = *input_data;
*noise_data = 1;
}
);
n_output_data += ks;
}
}
if (inplace)
THTensor_(set)(output, input);
else
THTensor_(set)(noise, channel_noise);
}
else
{
const real negSlope = (lower + upper) / 2;
if (inplace)
if (train)
{
TH_TENSOR_APPLY(real, input,
if (*input_data <= 0)
{
*input_data = *input_data * negSlope;
}
);
THTensor_(set)(output, input);
// get default random generator
THTensor_(resizeAs)(noise, input);
if (inplace)
{
TH_TENSOR_APPLY2(real, input, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*input_data = (*input_data) * r;
*noise_data = r;
}
else
{
*noise_data = 1;
}
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY3(real, input, real, output, real, noise,
if (*input_data <= 0)
{
const real r = (real)THRandom_uniform(generator, lower, upper);
*output_data = (*input_data) * r;
*noise_data = r;
}
else
{
*output_data = *input_data;
*noise_data = 1;
}
);
}
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, input, real, output,
const real r = (*input_data) <= 0 ? negSlope : 1;
*output_data = *input_data * r;
);
const real negSlope = (lower + upper) / 2;
if (inplace)
{
TH_TENSOR_APPLY(real, input,
if (*input_data <= 0)
{
*input_data = *input_data * negSlope;
}
);
THTensor_(set)(output, input);
}
else
{
THTensor_(resizeAs)(output, input);
TH_TENSOR_APPLY2(real, input, real, output,
const real r = (*input_data) <= 0 ? negSlope : 1;
*output_data = *input_data * r;
);
}
}
}
}
}

void THNN_(RReLU_updateGradInput)(
Expand All @@ -84,24 +172,84 @@ void THNN_(RReLU_updateGradInput)(
real lower,
real upper,
bool train,
bool inplace)
bool inplace,
bool channelwise)
{
if (train && upper - lower > 1E-6) // e.g. if upper == lower, RReLU behaves like LeakyReLU
{
// multiply the gradient by the noise tensor
if (inplace)
if (channelwise && !inplace)
{
THTensor_(cmul)(gradOutput, gradOutput, noise);
THTensor_(set)(gradInput, gradOutput);
long bs, ks;
long nOutputPlane;
{
long input_ndim = THTensor_(nDimension)(input);
switch (input_ndim)
{
case 1:
bs = 1;
ks = 1;
break;
case 2:
bs = input->size[0];
ks = 1;
break;
case 3:
bs = 1;
ks = input->size[1] * input->size[2];
break;
case 4:
bs = input->size[0];
ks = input->size[2] * input->size[3];
break;
}
nOutputPlane = input->size[(input_ndim + 1) % 2];
}

const real *input_data = THTensor_(data)(input);
const real *gradOutput_data = THTensor_(data)(gradOutput);
THTensor_(resizeAs)(gradInput, input);
real *gradInput_data = THTensor_(data)(gradInput);
const real *noise_data = THTensor_(data)(noise);

THIndex_t i, j, k;
#pragma omp parallel for private(j,k)
for (i = 0; i < bs; ++i)
{
const real *n_input_data = input_data + i*nOutputPlane*ks;
const real *n_gradOutput_data = gradOutput_data + i*nOutputPlane*ks;
real *n_gradInput_data = gradInput_data + i*nOutputPlane*ks;

for (j = 0; j < nOutputPlane; ++j)
{
const real r = noise_data[j];
for (k = 0; k < ks; ++k)
if (n_input_data[k] > 0)
n_gradInput_data[k] = n_gradOutput_data[k];
else
n_gradInput_data[k] = n_gradOutput_data[k] * r;
n_input_data += ks;
n_gradInput_data += ks;
n_gradOutput_data += ks;
}
}
}
else
{
THTensor_(resizeAs)(gradInput, input);
THTensor_(cmul)(gradInput, gradOutput, noise);
}
// multiply the gradient by the noise tensor
if (inplace)
{
THTensor_(cmul)(gradOutput, gradOutput, noise);
THTensor_(set)(gradInput, gradOutput);
}
else
{
THTensor_(resizeAs)(gradInput, input);
THTensor_(cmul)(gradInput, gradOutput, noise);
}
}
}
else
{
{
// use constant factor for negative input values
const real negSlope = (lower + upper) / 2;
if (inplace)
Expand Down
4 changes: 3 additions & 1 deletion lib/THNN/generic/THNN.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ TH_API void THNN_(RReLU_updateOutput)(
real upper,
bool train,
bool inplace,
bool channelwise,
THGenerator *generator);
TH_API void THNN_(RReLU_updateGradInput)(
THNNState *state,
Expand All @@ -301,7 +302,8 @@ TH_API void THNN_(RReLU_updateGradInput)(
real lower,
real upper,
bool train,
bool inplace);
bool inplace,
bool channelwise);

TH_API void THNN_(Sigmoid_updateOutput)(
THNNState *state,
Expand Down
Loading