Skip to content

Commit

Permalink
change the implementation of add-deltas to be a subclass of nn.Module
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Feb 24, 2020
1 parent 5c7fdec commit 150b497
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 32 deletions.
86 changes: 56 additions & 30 deletions egs/aishell/s10b/ctc/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,22 @@
# Apache 2.0

import torch
import torch.nn as nn
import torch.nn.functional as F


def compute_delta_feat(x, weight):
def compute_delta_feat(x, weight, enable_padding):

This comment has been minimized.

Copy link
@danpovey

danpovey Feb 24, 2020

Contributor

I think it would be clearer to just not have the option to enable padding.
Also, as I said previously, there should be no need for the permute. Just give all quantities the same layout they would be expected to have for a typical PyTorch model. (May require the input of the network to be permuted though).

This comment has been minimized.

Copy link
@csukuangfj

csukuangfj Feb 24, 2020

Author Contributor
  1. I will remove enable_padding.

  2. For the permute:

  • nn.Conv1d and nn.BatchNorm1d require [batch_size, feat_dim, seq_len]
  • nn.Linear requires [batch_size, seq_len, feat_dim]

I think permute cannot be avoided.

I will change the layout of the input features to be [batch_size, feat_dim, seq_len],
which may reduce the number of permute calls.

This comment has been minimized.

Copy link
@danpovey

danpovey Feb 24, 2020

Contributor

can you please edit this comment for accuracy? you mention nn.Linear in both sides. trying to figure this out.

This comment has been minimized.

Copy link
@csukuangfj

csukuangfj Feb 24, 2020

Author Contributor

sorry, re-edited.

This comment has been minimized.

Copy link
@danpovey

danpovey Feb 24, 2020

Contributor

Hm, OK. I think it would be better to have the delta layer have the same input/output conventions at nn.Conv1d, and for most composite layers (e.g. TDNN-F) to also have those same input/output conventions. Then any permutation can be just done at the end of the network, or even perhaps avoided altogether by using a 1-d convolution with a kernel size of 1.

This comment has been minimized.

Copy link
@csukuangfj

csukuangfj Feb 24, 2020

Author Contributor

I will replace nn.Linear with nn.Conv1d to eliminate permute.

This comment has been minimized.

Copy link
@csukuangfj

csukuangfj Feb 24, 2020

Author Contributor

permute and enable_padding are removed.


I will remove permute in tdnn-f after finishing ctc training.
@fanlu or @qindazhu may help to do that if they have time.
Otherwise I will do it.

This comment has been minimized.

Copy link
@danpovey

danpovey via email Feb 24, 2020

Contributor
'''
Args:
x: input feat of shape [batch_size, seq_len, feat_dim]
weight: coefficients for computing delta features;
it has a shape of [feat_dim, 1, kernel_size].
it has shape [feat_dim, 1, kernel_size].
enable_padding: True to add padding.
Returns:
a tensor fo shape [batch_size, seq_len, feat_dim]
a tensor of shape [batch_size, seq_len, feat_dim]
'''

assert x.ndim == 3
Expand All @@ -27,51 +30,61 @@ def compute_delta_feat(x, weight):

feat_dim = x.size(2)

pad_size = weight.size(2) // 2
if enable_padding:
pad_size = weight.size(2) // 2

# F.pad requires a 4-D tensor in our case
x = x.unsqueeze(0)
# F.pad requires a 4-D tensor in our case
x = x.unsqueeze(0)

# (0, 0, pad_size, pad_size) == (left, right, top, bottom)
padded_x = F.pad(x, (0, 0, pad_size, pad_size), mode='replicate')
# (0, 0, pad_size, pad_size) == (left, right, top, bottom)
x = F.pad(x, (0, 0, pad_size, pad_size), mode='replicate')

# after padding, we have to convert it back to 3-D
# since conv1d requires 3-D input
padded_x = padded_x.squeeze(0)
# after padding, we have to convert it back to 3-D
# since conv1d requires 3-D input
x = x.squeeze(0)

# conv1d requires a shape of [batch_size, feat_dim, seq_len]
padded_x = padded_x.permute(0, 2, 1)
x = x.permute(0, 2, 1)

# NOTE(fangjun): we perform a depthwise convolution here by
# setting groups == number of channels
y = F.conv1d(input=padded_x, weight=weight, groups=feat_dim)
y = F.conv1d(input=x, weight=weight, groups=feat_dim)

# now convert y back to be of shape [batch_size, seq_len, feat_dim]
# now convert y back to shape [batch_size, seq_len, feat_dim]
y = y.permute(0, 2, 1)

return y


class AddDeltasTransform:
class AddDeltasTransform(nn.Module):
'''
This class implements `add-deltas` in kaldi with
order == 2 and window == 2.
It generates the identical output as kaldi's `add-deltas` with default
parameters given the same input.
It can generate the identical output as kaldi's `add-deltas`.
See transform_test.py
'''

def __init__(self):
# yapf: disable
self.first_order_coef = torch.tensor([-0.2, -0.1, 0, 0.1, 0.2])
self.second_order_coef = torch.tensor([0.04, 0.04, 0.01, -0.04, -0.1, -0.04, 0.01, 0.04, 0.04])
# yapf: enable
def __init__(self,
first_order_coef=[-1, 0, 1],
second_order_coef=[1, 0, -2, 0, 1],
enable_padding=False):
'''
Note that this class has no trainable `nn.Parameters`.
Args:
first_order_coef: coefficient to compute the first order delta feature
second_order_coef: coefficient to compute the second order delta feature
'''
super().__init__()

# TODO(fangjun): change the coefficients to the following as suggested by Dan
# [-1, 0, 1]
# [1, 0, -2, 0, 1]
self.first_order_coef = torch.tensor(first_order_coef)
self.second_order_coef = torch.tensor(second_order_coef)
self.enable_padding = enable_padding

def __call__(self, x):
def forward(self, x):
'''
Args:
x: a tensor of shape [batch_size, seq_len, feat_dim]
Expand All @@ -94,9 +107,22 @@ def __call__(self, x):
self.first_order_coef = self.first_order_coef.to(device)
self.second_order_coef = self.second_order_coef.to(device)

first_order = compute_delta_feat(x, self.first_order_coef)
second_order = compute_delta_feat(x, self.second_order_coef)

y = torch.cat([x, first_order, second_order], dim=2)
first_order = compute_delta_feat(x, self.first_order_coef,
self.enable_padding)
second_order = compute_delta_feat(x, self.second_order_coef,
self.enable_padding)

if self.enable_padding:
y = torch.cat([x, first_order, second_order], dim=2)
else:
zeroth = (x.size(1) - second_order.size(1)) // 2
first = (first_order.size(1) - second_order.size(1)) // 2

y = torch.cat([
x[:, zeroth:-zeroth, :],
first_order[:, first:-first, :],
second_order,
],
dim=2)

return y
21 changes: 19 additions & 2 deletions egs/aishell/s10b/ctc/transform_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,18 @@ def test_add_deltas_transform(self):
[3, 2],
[5, 1],
[10, -2],
[10, 20],
[100, 200],
]).float()

x = x.unsqueeze(0)

transform = AddDeltasTransform()
transform = AddDeltasTransform(
first_order_coef=[-0.2, -0.1, 0, 0.1, 0.2],
second_order_coef=[
0.04, 0.04, 0.01, -0.04, -0.1, -0.04, 0.01, 0.04, 0.04
],
enable_padding=True)
y = transform(x)

# now use kaldi's add-deltas to compute the ground truth
Expand All @@ -60,7 +67,17 @@ def test_add_deltas_transform(self):

y = y.squeeze(0)

np.testing.assert_array_almost_equal(y.numpy(), expected.numpy())
np.testing.assert_array_almost_equal(y.numpy(),
expected.numpy(),
decimal=5)

# now for padding == False
transform.enable_padding = False
y = transform(x).squeeze(0)

np.testing.assert_array_almost_equal(y.numpy(),
expected.numpy()[4:-4, :],
decimal=5)

reader.Close()

Expand Down

0 comments on commit 150b497

Please sign in to comment.