kaldi-asr · csukuangfj · Feb 20, 2020 · Feb 20, 2020 · Feb 21, 2020 · Feb 21, 2020
diff --git a/egs/aishell/s10b/cmd.sh b/egs/aishell/s10b/cmd.sh
@@ -0,0 +1,16 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="run.pl"
+export decode_cmd="run.pl"
+export mkgraph_cmd="run.pl"
+export cuda_cmd="run.pl"
diff --git a/egs/aishell/s10b/conf/fbank.conf b/egs/aishell/s10b/conf/fbank.conf
@@ -0,0 +1 @@
+--num-mel-bins=40
diff --git a/egs/aishell/s10b/ctc/add_deltas_layer.py b/egs/aishell/s10b/ctc/add_deltas_layer.py
@@ -0,0 +1,96 @@
+# Copyright 2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)
+# Apache 2.0
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def compute_delta_feat(x, weight):
+    '''
+    Args:
+        x: input feat of shape [batch_size, feat_dim, seq_len]
+
+        weight: coefficients for computing delta features;
+              it has shape [feat_dim, 1, kernel_size].
+
+    Returns:
+        a tensor of shape [batch_size, feat_dim, seq_len]
+    '''
+
+    assert x.ndim == 3
+
+    assert weight.ndim == 3
+    assert weight.size(0) == x.size(1)
+    assert weight.size(1) == 1
+    assert weight.size(2) % 2 == 1
+
+    feat_dim = x.size(1)
+
+    # NOTE(fangjun): we perform a depthwise convolution here by
+    # setting groups == number of channels
+    y = F.conv1d(input=x, weight=weight, groups=feat_dim)
+
+    return y
+
+
+class AddDeltasLayer(nn.Module):
+    '''
+    This class implements `add-deltas` with order == 2 and window == 2.
+
+    Note that it has no trainable `nn.Parameter`s.
+    '''
+
+    def __init__(self,
+                 first_order_coef=[-1, 0, 1],
+                 second_order_coef=[1, 0, -2, 0, 1]):
+        '''
+        Args:
+            first_order_coef: coefficient to compute the first order delta feature
+
+            second_order_coef: coefficient to compute the second order delta feature
+        '''
+        super().__init__()
+
+        self.first_order_coef = torch.tensor(first_order_coef).float()
+        self.second_order_coef = torch.tensor(second_order_coef).float()
+
+    def forward(self, x):
+        '''
+        Args:
+            x: a tensor of shape [batch_size, feat_dim, seq_len]
+
+        Returns:
+            a tensor of shape [batch_size, feat_dim * 3, seq_len]
+        '''
+        if self.first_order_coef.ndim != 3:
+            num_duplicates = x.size(1)
+
+            # yapf: disable
+            self.first_order_coef = self.first_order_coef.reshape(1, 1, -1)
+            self.first_order_coef = torch.cat([self.first_order_coef] * num_duplicates, dim=0)
+
+            self.second_order_coef = self.second_order_coef.reshape(1, 1, -1)
+            self.second_order_coef = torch.cat([self.second_order_coef] * num_duplicates, dim=0)
+            # yapf: enable
+
+            device = x.device
+            self.first_order_coef = self.first_order_coef.to(device)
+            self.second_order_coef = self.second_order_coef.to(device)
+
+        first_order = compute_delta_feat(x, self.first_order_coef)
+        second_order = compute_delta_feat(x, self.second_order_coef)
+
+        # since we did not perform padding, we have to remove some frames
+        # from the 0th and 1st order features
+        zeroth_valid = (x.size(2) - second_order.size(2)) // 2
+        first_valid = (first_order.size(2) - second_order.size(2)) // 2
+
+        y = torch.cat([
+            x[:, :, zeroth_valid:-zeroth_valid,],
+            first_order[:, :, first_valid:-first_valid],
+            second_order,
+        ],
+                      dim=1)
+
+        return y
diff --git a/egs/aishell/s10b/ctc/add_deltas_layer_test.py b/egs/aishell/s10b/ctc/add_deltas_layer_test.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+# Copyright 2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)
+# Apache 2.0
+
+import os
+import shutil
+import tempfile
+import unittest
+
+import numpy as np
+
+import torch
+import torch.nn.functional as F
+
+import kaldi
+
+from add_deltas_layer import AddDeltasLayer
+
+
+class AddDeltasLayerTest(unittest.TestCase):
+
+    def test(self):
+        x = torch.tensor([
+            [1, 3],
+            [5, 10],
+            [0, 1],
+            [10, 20],
+            [3, 1],
+            [3, 2],
+            [5, 1],
+            [10, -2],
+            [10, 20],
+            [100, 200],
+        ]).float()
+
+        x = x.unsqueeze(0)
+
+        transform = AddDeltasLayer(first_order_coef=[-0.2, -0.1, 0, 0.1, 0.2],
+                                   second_order_coef=[
+                                       0.04, 0.04, 0.01, -0.04, -0.1, -0.04,
+                                       0.01, 0.04, 0.04
+                                   ])
+        y = transform(x.permute(0, 2, 1)).permute(0, 2, 1)
+
+        # now use kaldi's add-deltas to compute the ground truth
+        d = tempfile.mkdtemp()
+
+        wspecifier = 'ark:{}/feats.ark'.format(d)
+
+        writer = kaldi.MatrixWriter(wspecifier)
+        writer.Write('utt1', x.squeeze(0).numpy())
+        writer.Close()
+
+        delta_feats_specifier = 'ark:{dir}/delta.ark'.format(dir=d)
+
+        cmd = '''
+        add-deltas --print-args=false --delta-order=2 --delta-window=2 {} {}
+        '''.format(wspecifier, delta_feats_specifier)
+
+        os.system(cmd)
+
+        reader = kaldi.RandomAccessMatrixReader(delta_feats_specifier)
+
+        expected = reader['utt1']
+
+        y = y.squeeze(0)
+
+        np.testing.assert_array_almost_equal(y.numpy(),
+                                             expected.numpy()[4:-4, :],
+                                             decimal=5)
+
+        reader.Close()
+
+        shutil.rmtree(d)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/egs/aishell/s10b/ctc/common.py b/egs/aishell/s10b/ctc/common.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+
+# Copyright 2020 Mobvoi AI Lab, Beijing, China (author: Fangjun Kuang)
+# Apache 2.0
+
+from datetime import datetime
+import logging
+
+import torch
+
+
+def setup_logger(log_filename, log_level='info'):
+    now = datetime.now()
+    date_time = now.strftime('%Y-%m-%d-%H-%M-%S')
+    log_filename = '{}-{}'.format(log_filename, date_time)
+    formatter = '%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s'
+    if log_level == 'debug':
+        level = logging.DEBUG
+    elif log_level == 'info':
+        level = logging.INFO
+    elif log_level == 'warning':
+        level = logging.WARNING
+    logging.basicConfig(filename=log_filename,
+                        format=formatter,
+                        level=level,
+                        filemode='w')
+    console = logging.StreamHandler()
+    console.setLevel(level)
+    console.setFormatter(logging.Formatter(formatter))
+    logging.getLogger('').addHandler(console)
+
+
+def load_checkpoint(filename, model):
+    logging.info('Loading checkpoint from {}'.format(filename))
+
+    checkpoint = torch.load(filename, map_location='cpu')
+
+    keys = ['state_dict', 'epoch', 'learning_rate', 'loss']
+    for k in keys:
+        assert k in checkpoint
+
+    if not list(model.state_dict().keys())[0].startswith('module.') \
+            and list(checkpoint['state_dict'])[0].startswith('module.'):
+        # the checkpoint was saved by DDP
+        logging.info('load checkpoint from DDP')
+        dst_state_dict = model.state_dict()
+        src_state_dict = checkpoint['state_dict']
+        for key in dst_state_dict.keys():
+            src_key = '{}.{}'.format('module', key)
+            dst_state_dict[key] = src_state_dict.pop(src_key)
+        assert len(src_state_dict) == 0
+        model.load_state_dict(dst_state_dict)
+    else:
+        model.load_state_dict(checkpoint['state_dict'])
+
+    epoch = checkpoint['epoch']
+    learning_rate = checkpoint['learning_rate']
+    loss = checkpoint['loss']
+
+    return epoch, learning_rate, loss
+
+
+def save_checkpoint(filename, model, epoch, learning_rate, loss, local_rank=0):
+    if local_rank != 0:
+        return
+    logging.info('Saving checkpoint to {filename}: epoch={epoch}, '
+                 'learning_rate={learning_rate}, loss={loss}'.format(
+                     filename=filename,
+                     epoch=epoch,
+                     learning_rate=learning_rate,
+                     loss=loss))
+    checkpoint = {
+        'state_dict': model.state_dict(),
+        'epoch': epoch,
+        'learning_rate': learning_rate,
+        'loss': loss
+    }
+    torch.save(checkpoint, filename)
+
+
+def save_training_info(filename,
+                       model_path,
+                       current_epoch,
+                       learning_rate,
+                       loss,
+                       best_loss,
+                       best_epoch,
+                       local_rank=0):
+    if local_rank != 0:
+        return
+
+    with open(filename, 'w') as f:
+        f.write('model_path: {}\n'.format(model_path))
+        f.write('epoch: {}\n'.format(current_epoch))
+        f.write('learning rate: {}\n'.format(learning_rate))
+        f.write('loss: {}\n'.format(loss))
+        f.write('best loss: {}\n'.format(best_loss))
+        f.write('best epoch: {}\n'.format(best_epoch))