Skip to content

Commit

Permalink
Normalize supports arbitrary dimensions
Browse files Browse the repository at this point in the history
Assert that dim is integer in Normalize
  • Loading branch information
fmassa committed Apr 13, 2016
1 parent 9cffea5 commit 34a0080
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 39 deletions.
67 changes: 31 additions & 36 deletions Normalize.lua
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
local Normalize, parent = torch.class('nn.Normalize', 'nn.Module')

function Normalize:__init(p,eps)
function Normalize:__init(p, dim, eps)
parent.__init(self)
assert(p,'p-norm not provided')
assert(p > 0, p..'-norm not supported')
self.p = p
self.dim = dim or -1
self.eps = eps or 1e-10
end

function Normalize:updateOutput(input)
assert(input:dim() <= 2, 'only 1d layer supported')
local input_size = input:size()
if input:dim() == 1 then
input = input:view(1,-1)
assert(math.abs(self.dim) <= input:dim(),
'input has less dimensions than the normalization dimension')
assert(self.dim % 1 == 0, 'dimension should be an integer')
local dim = self.dim or -1
if dim < 0 then
dim = input:dim() + dim + 1
end

self._output = self._output or input.new()
self.norm = self.norm or input.new()
self.buffer = self.buffer or input.new()

self._output:resizeAs(input)

if self.p == math.huge then
-- specialization for the infinity norm
self._indices = self._indices or
(torch.type(self.output) == 'torch.CudaTensor' and
torch.CudaTensor() or torch.LongTensor())

self.buffer:abs(input)
torch.max(self.norm, self._indices, self.buffer, 2)
torch.max(self.norm, self._indices, self.buffer, dim)
self.norm:add(self.eps)
else
self.normp = self.normp or input.new()
Expand All @@ -37,41 +37,36 @@ function Normalize:updateOutput(input)
else
self.buffer:pow(input,self.p)
end
self.normp:sum(self.buffer,2):add(self.eps)
self.normp:sum(self.buffer, dim):add(self.eps)
self.norm:pow(self.normp,1/self.p)
end
self._output:cdiv(input, self.norm:view(-1,1):expandAs(input))
self.output:cdiv(input, self.norm:expandAs(input))

self.output:view(self._output, input_size)
return self.output
end

function Normalize:updateGradInput(input, gradOutput)
assert(input:dim() <= 2, 'only 1d layer supported')
assert(gradOutput:dim() <= 2, 'only 1d layer supported')

local input_size = input:size()
if input:dim() == 1 then
input = input:view(1,-1)
assert(math.abs(self.dim) <= input:dim(),
'input has less dimensions than the normalization dimension')
assert(self.dim % 1 == 0, 'dimension should be an integer')
local dim = self.dim or -1
if dim < 0 then
dim = input:dim() + dim + 1
end

local n = input:size(1) -- batch size
local d = input:size(2) -- dimensionality of vectors

self._gradInput = self._gradInput or input.new()
self.cross = self.cross or input.new()
-- compute diagonal term with gradOutput
self._gradInput:resize(n,d)
self.gradInput:resizeAs(input)
if self.p == math.huge then
-- specialization for the inf case
self._gradInput:cmul(self.norm:view(n,1,1):expand(n,d,1),gradOutput)
self.gradInput:cmul(self.norm:expandAs(gradOutput),gradOutput)
self.buffer:resizeAs(input):zero()
self.cross:resize(n,1)
self.cross:gather(input,2,self._indices)
self.cross:resizeAs(self.norm)
self.cross:gather(input,dim,self._indices)
self.cross:cdiv(self.norm)
self.buffer:scatter(2,self._indices,self.cross)
self.buffer:scatter(dim,self._indices,self.cross)
else
self._gradInput:cmul(self.normp:view(n,1):expand(n,d), gradOutput)
self.gradInput:cmul(self.normp:expandAs(gradOutput), gradOutput)
-- small optimizations for different p
-- buffer = input*|input|^(p-2)
if self.p % 2 ~= 0 then
Expand All @@ -91,39 +86,39 @@ function Normalize:updateGradInput(input, gradOutput)
end
end
-- compute cross term in two steps
self.cross:resize(n,1)
self.cross:resizeAs(self.norm)

-- instead of having a huge temporary matrix (b1*b2),
-- do the computations as b1*(b2*gradOutput). This avoids redundant
-- computation and also a huge buffer of size n*d^2
self.buffer2 = self.buffer2 or input.new() -- nxd
self.buffer2:cmul(input, gradOutput)
self.cross:sum(self.buffer2, 2)
self.cross:sum(self.buffer2, dim)

self.buffer:cmul(self.cross:expandAs(self.buffer))
self._gradInput:add(-1, self.buffer)
self.gradInput:add(-1, self.buffer)

-- reuse cross buffer for normalization
if self.p == math.huge then
self.cross:cmul(self.norm,self.norm)
else
self.cross:cmul(self.normp,self.norm)
end
self._gradInput:cdiv(self.cross:expand(n,d))
self.gradInput:cdiv(self.cross:expandAs(gradOutput))

self.gradInput:view(self._gradInput, input_size)
return self.gradInput
end

function Normalize:__tostring__()
local s
-- different prints if the norm is integer
if self.p % 1 == 0 then
s = '%s(%d)'
s = '%s(%d,%d)'
else
s = '%s(%f)'
s = '%s(%f,%d)'
end
return string.format(s,torch.type(self),self.p)
local dim = self.dim or -1
return string.format(s,torch.type(self),self.p, dim)
end

function Normalize:type(type, tensorCache)
Expand Down
14 changes: 11 additions & 3 deletions doc/simple.md
Original file line number Diff line number Diff line change
Expand Up @@ -1149,11 +1149,11 @@ print(B) -- output
## Normalize ##

```lua
module = nn.Normalize(p, [eps])
module = nn.Normalize(p, [dim], [eps])
```
Normalizes the input Tensor to have unit `L_p` norm. The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).
Normalizes the input Tensor to have unit `L_p` norm over dimension `dim` (by default -1, i.e., the last dimension). The smoothing parameter `eps` prevents division by zero when the input contains all zero elements (default = `1e-10`).

Input can be 1D or 2D (in which case it's considered as in batch mode)
The `dim` parameter can take both positivs and negative values (in which case it is counted from the end). Negative dimensions are specially useful if one wants to be invariant to batch-mode.

```lua
A = torch.randn(3, 5)
Expand All @@ -1163,6 +1163,14 @@ B = m:forward(A) -- B is also 3 x 5
print(torch.norm(B, 2, 2)) -- norms is [1, 1, 1]
```

Here is an example of normalizing the feature maps of an image
```lua
I = torch.randn(2, 3, 2, 2)
m = nn.Normalize(1, -3) -- the third from the last element
B = m:forward(I)
print(torch.norm(B, 1, 2))
```

`Normalize` has a specialized implementation for the `inf` norm, which corresponds to the maximum norm.
```lua
A = torch.randn(3,5)
Expand Down
15 changes: 15 additions & 0 deletions test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,21 @@ function nntest.Normalize()
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
end

-- test on different dimensions
for _,p in pairs({1,2,3,4,torch.uniform()*math.random(1,10),math.huge}) do
local ini = math.random(3,5)
local inj = math.random(3,5)
local ink = math.random(3,5)
local inl = math.random(3,5)
local dim = math.random(1,4)
local input = torch.Tensor(inl, ink, inj, ini):zero()

local module = nn.Normalize(p, dim)

local err = jac.testJacobian(module, input, -2, 2)
mytester:assertlt(err, precision, 'error norm '..p..' on state ')
end

-- test IO correctness
local ini = math.random(3,5)
local inj = math.random(3,5)
Expand Down

0 comments on commit 34a0080

Please sign in to comment.