kaldi-asr · danpovey · Mar 10, 2019 · Mar 14, 2019 · Mar 15, 2019 · Mar 15, 2019
diff --git a/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh b/egs/callhome_diarization/v1/diarization/nnet3/xvector/extract_xvectors.sh
@@ -102,7 +102,7 @@ if [ $stage -le 0 ]; then
   fi
   utils/data/get_uniform_subsegments.py \
       --max-segment-duration=$window \
-      --overlap-duration=$(echo "$window-$period" | bc) \
+      --overlap-duration=$(perl -e "print ($window-$period);") \
       --max-remaining-duration=$min_segment \
       --constant-duration=True \
       $segments > $dir/subsegments

diff --git a/egs/callhome_diarization/v1/run.sh b/egs/callhome_diarization/v1/run.sh
@@ -188,7 +188,7 @@ if [ $stage -le 6 ]; then
 
       der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \
         exp/tuning/${dataset}_t${threshold})
-      if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then
+      if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then
         best_der=$der
         best_threshold=$threshold
       fi

diff --git a/egs/callhome_diarization/v2/run.sh b/egs/callhome_diarization/v2/run.sh
@@ -297,7 +297,7 @@ if [ $stage -le 10 ]; then
 
       der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \
         $nnet_dir/tuning/${dataset}_t${threshold})
-      if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then
+      if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then
         best_der=$der
         best_threshold=$threshold
       fi

diff --git a/egs/dihard_2018/v1/run.sh b/egs/dihard_2018/v1/run.sh
@@ -186,7 +186,7 @@ if [ $stage -le 7 ]; then
 
     der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \
       $ivec_dir/tuning/dihard_2018_dev_t${threshold})
-    if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then
+    if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then
       best_der=$der
       best_threshold=$threshold
     fi

diff --git a/egs/dihard_2018/v2/run.sh b/egs/dihard_2018/v2/run.sh
@@ -260,7 +260,7 @@ if [ $stage -le 12 ]; then
 
     der=$(grep -oP 'DIARIZATION\ ERROR\ =\ \K[0-9]+([.][0-9]+)?' \
       $nnet_dir/tuning/dihard_2018_dev_t${threshold})
-    if [ $(echo $der'<'$best_der | bc -l) -eq 1 ]; then
+    if [ $(perl -e "print ($der < $best_der ? 1 : 0);") -eq 1 ]; then
       best_der=$der
       best_threshold=$threshold
     fi

diff --git a/egs/rm/README.txt b/egs/rm/README.txt
@@ -9,7 +9,7 @@ About the Resource Management corpus:
 
 Each subdirectory of this directory contains the
 scripts for a sequence of experiments. 
-s5 is the currently recommmended setup.
+s5 is the currently recommended setup.
 
   s5: This is the "new-new-style" recipe.  It is now finished.
       All further work will be on top of this style of recipe.  Note: 

diff --git a/egs/sre08/v1/local/score_sre08.sh b/egs/sre08/v1/local/score_sre08.sh
@@ -35,11 +35,11 @@ tot_eer=0.0
 printf '% 12s' 'EER:'
 for condition in $(seq 8); do
   eer=$(awk '{print $3}' $scores | paste - $trials | awk -v c=$condition '{n=4+c; if ($n == "Y") print $1, $4}' | compute-eer - 2>/dev/null)
-  tot_eer=$(echo "$tot_eer+$eer" | bc)
+  tot_eer=$(perl -e "print ($tot_eer+$eer);")
   eers[$condition]=$eer
 done
 
-eers[0]=$(echo "$tot_eer/8" | bc -l)
+eers[0]=$(perl -e "print ($tot_eer/8.0);")
 
 for i in $(seq 0 8); do
   printf '% 7.2f' ${eers[$i]}

diff --git a/egs/swbd/s5c/local/score_sclite_conf.sh b/egs/swbd/s5c/local/score_sclite_conf.sh
@@ -39,6 +39,12 @@ for f in $data/stm $data/glm $lang/words.txt $lang/phones/word_boundary.int \
   [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1;
 done
 
+if [ -f $dir/../frame_subsampling_factor ]; then
+  factor=$(cat $dir/../frame_subsampling_factor) || exit 1
+  frame_shift_opt="--frame-shift=0.0$factor"
+  echo "$0: $dir/../frame_subsampling_factor exists, using $frame_shift_opt"
+fi
+
 name=`basename $data`; # e.g. eval2000
 
 mkdir -p $dir/scoring/log
@@ -51,7 +57,7 @@ if [ $stage -le 0 ]; then
       ACWT=\`perl -e \"print 1.0/LMWT\;\"\` '&&' \
       lattice-add-penalty --word-ins-penalty=$wip "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
       lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
-      lattice-to-ctm-conf --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT  ark:- - \| \
+      lattice-to-ctm-conf $frame_shift_opt --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT  ark:- - \| \
       utils/int2sym.pl -f 5 $lang/words.txt  \| \
       utils/convert_ctm.pl $data/segments $data/reco2file_and_channel \
       '>' $dir/score_LMWT_${wip}/$name.ctm || exit 1;

diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh
@@ -160,7 +160,7 @@ if [ $stage -le 15 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
 
   num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}')
-  learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+  learning_rate_factor=$(echo "print(0.5/$xent_regularize)" | python)
   tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true"
   tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
   linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"

diff --git a/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py b/egs/wsj/s5/steps/libs/nnet3/train/dropout_schedule.py
@@ -186,9 +186,22 @@ def _get_component_dropout(dropout_schedule, data_fraction):
 
 def _get_dropout_proportions(dropout_schedule, data_fraction):
     """Returns dropout proportions based on the dropout_schedule for the
-    fraction of data seen at this stage of training.
+    fraction of data seen at this stage of training.  Returns a list of
+    pairs (pattern, dropout_proportion); for instance, it might return
+    the list ['*', 0.625] meaning a dropout proportion of 0.625 is to
+    be applied to all dropout components.
+
     Returns None if dropout_schedule is None.
 
+    dropout_schedule might be (in the sample case using the default pattern of
+    '*'): '0.1,[email protected],0.1', meaning a piecewise linear function that starts at
+    0.1 when data_fraction=0.0, rises to 0.5 when data_fraction=0.5, and falls
+    again to 0.1 when data_fraction=1.0.   It can also contain space-separated
+    items of the form 'pattern=schedule', for instance:
+       '*=0.0,0.5,0.0 lstm.*=0.0,[email protected],0.0'
+    The more specific patterns should go later, otherwise they will be overridden
+    by the less specific patterns' commands.
+
     Calls _get_component_dropout() for the different component name patterns
     in dropout_schedule.
 
@@ -198,6 +211,7 @@ def _get_dropout_proportions(dropout_schedule, data_fraction):
             See _self_test() for examples.
         data_fraction: The fraction of data seen until this stage of
             training.
+
     """
     if dropout_schedule is None:
         return None
@@ -213,6 +227,10 @@ def _get_dropout_proportions(dropout_schedule, data_fraction):
 def get_dropout_edit_string(dropout_schedule, data_fraction, iter_):
     """Return an nnet3-copy --edits line to modify raw_model_string to
     set dropout proportions according to dropout_proportions.
+    E.g. if _dropout_proportions(dropout_schedule, data_fraction)
+    returns [('*', 0.625)],  this will return the string:
+     "nnet3-copy --edits='set-dropout-proportion name=* proportion=0.625'"
+
 
     Arguments:
         dropout_schedule: Value for the --trainer.dropout-schedule option.

diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py
@@ -27,6 +27,7 @@
         'relu-batchnorm-layer' : xlayers.XconfigBasicLayer,
         'relu-batchnorm-so-layer' : xlayers.XconfigBasicLayer,
         'batchnorm-so-relu-layer' : xlayers.XconfigBasicLayer,
+        'batchnorm-layer' : xlayers.XconfigBasicLayer,
         'sigmoid-layer' : xlayers.XconfigBasicLayer,
         'tanh-layer' : xlayers.XconfigBasicLayer,
         'fixed-affine-layer' : xlayers.XconfigFixedAffineLayer,

diff --git a/egs/wsj/s5/steps/nnet/train.sh b/egs/wsj/s5/steps/nnet/train.sh
@@ -433,18 +433,6 @@ else
         ${bn_dim:+ --bottleneck-dim=$bn_dim} \
         "$cnn_fea" $num_tgt $hid_layers $hid_dim >>$nnet_proto
       ;;
-    cnn2d)
-      delta_order=$([ -z $delta_opts ] && echo "0" || { echo $delta_opts | tr ' ' '\n' | grep "delta[-_]order" | sed 's:^.*=::'; })
-      echo "Debug : $delta_opts, delta_order $delta_order"
-      utils/nnet/make_cnn2d_proto.py $cnn_proto_opts \
-        --splice=$splice --delta-order=$delta_order --dir=$dir \
-        $num_fea >$nnet_proto
-      cnn_fea=$(cat $nnet_proto | grep -v '^$' | tail -n1 | awk '{ print $5; }')
-      utils/nnet/make_nnet_proto.py $proto_opts \
-        --no-smaller-input-weights \
-        ${bn_dim:+ --bottleneck-dim=$bn_dim} \
-        "$cnn_fea" $num_tgt $hid_layers $hid_dim >>$nnet_proto
-      ;;
     lstm)
       utils/nnet/make_lstm_proto.py $proto_opts \
         $num_fea $num_tgt >$nnet_proto

diff --git a/egs/wsj/s5/steps/nnet3/xconfig_to_config.py b/egs/wsj/s5/steps/nnet3/xconfig_to_config.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+
+# Copyright 2016-2018    Johns Hopkins University (Dan Povey)
+#           2016    Vijayaditya Peddinti
+#           2017    Google Inc. ([email protected])
+# Apache 2.0.
+
+# This is like xconfig_to_configs.py but with a simpler interface; it writes
+# to a single named file.
+
+
+import argparse
+import os
+import sys
+from collections import defaultdict
+
+sys.path.insert(0, 'steps/')
+# the following is in case we weren't running this from the normal directory.
+sys.path.insert(0, os.path.realpath(os.path.dirname(sys.argv[0])) + '/')
+
+import libs.nnet3.xconfig.parser as xparser
+import libs.common as common_lib
+
+
+def get_args():
+    # we add compulsory arguments as named arguments for readability
+    parser = argparse.ArgumentParser(
+        description="Reads an xconfig file and creates config files "
+                    "for neural net creation and training",
+        epilog='Search egs/*/*/local/{nnet3,chain}/*sh for examples')
+    parser.add_argument('--xconfig-file', required=True,
+                        help='Filename of input xconfig file')
+    parser.add_argument('--existing-model',
+                        help='Filename of previously trained neural net '
+                             '(e.g. final.mdl) which is useful in case of '
+                             'using nodes from list of component-nodes in '
+                             'already trained model '
+                             'to generate new config file for new model.'
+                             'The context info is also generated using '
+                             'a model generated by adding final.config '
+                             'to the existing model.'
+                             'e.g. In Transfer learning: generate new model using '
+                             'component nodes in existing model.')
+    parser.add_argument('--config-file-out', required=True,
+                        help='Filename to write nnet config file.');
+    parser.add_argument('--nnet-edits', type=str, default=None,
+                        action=common_lib.NullstrToNoneAction,
+                        help="""This option is useful in case the network you
+                        are creating does not have an output node called
+                        'output' (e.g. for multilingual setups).  You can set
+                        this to an edit-string like: 'rename-node old-name=xxx
+                        new-name=output' if node xxx plays the role of the
+                        output node in this network.  This is only used for
+                        computing the left/right context.""")
+
+    print(' '.join(sys.argv), file=sys.stderr)
+
+    args = parser.parse_args()
+
+    return args
+
+
+
+def write_config_file(config_file_out, all_layers):
+    # config_basename_to_lines is map from the basename of the
+    # config, as a string (i.e. 'ref', 'all', 'init') to a list of
+    # strings representing lines to put in the config file.
+    config_basename_to_lines = defaultdict(list)
+
+    for layer in all_layers:
+        try:
+            pairs = layer.get_full_config()
+            for config_basename, line in pairs:
+                config_basename_to_lines[config_basename].append(line)
+        except Exception as e:
+            print("{0}: error producing config lines from xconfig "
+                  "line '{1}': error was: {2}".format(sys.argv[0],
+                                                      str(layer), repr(e)),
+                  file=sys.stderr)
+            # we use raise rather than raise(e) as using a blank raise
+            # preserves the backtrace
+            raise
+
+    with open(config_file_out, 'w') as f:
+        print('# This file was created by the command:\n'
+              '# {0} '.format(sys.argv), file=f)
+        lines = config_basename_to_lines['final']
+        for line in lines:
+            print(line, file=f)
+
+
+def main():
+    args = get_args()
+    existing_layers = []
+    if args.existing_model is not None:
+        existing_layers = xparser.get_model_component_info(args.existing_model)
+    all_layers = xparser.read_xconfig_file(args.xconfig_file, existing_layers)
+    write_config_file(args.config_file_out, all_layers)
+
+
+if __name__ == '__main__':
+    main()
+
+
+# test:
+# (echo 'input dim=40 name=input'; echo 'output name=output input=Append(-1,0,1)')  >xconfig; steps/nnet3/xconfig_to_config.py --xconfig-file=xconfig --config-file-out=foo
diff --git a/egs/wsj/s5/steps/segmentation/internal/merge_targets.py b/egs/wsj/s5/steps/segmentation/internal/merge_targets.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Copyright 2017  Vimal Manohar
 # Apache 2.0
@@ -16,8 +16,6 @@
 option.
 """
 
-from __future__ import print_function
-from __future__ import division
 import argparse
 import logging
 import numpy as np
@@ -111,7 +109,7 @@ def should_remove_frame(row, dim):
                                      # source[2] = [ 0 0 0 ]
     """
     assert len(row) % dim == 0
-    num_sources = len(row) / dim
+    num_sources = len(row) // dim
 
     max_idx = np.argmax(row)
     max_val = row[max_idx]

diff --git a/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh b/egs/wsj/s5/utils/data/perturb_data_dir_volume.sh
@@ -52,15 +52,15 @@ for line in sys.stdin.readlines():
   parts = line.strip().split()
   if line.strip()[-1] == '|':
     if re.search('sox --vol', ' '.join(parts[-11:])):
-      print 'true'
+      print('true')
       sys.exit(0)
   elif re.search(':[0-9]+$', line.strip()) is not None:
     continue
   else:
     if ' '.join(parts[1:3]) == 'sox --vol':
-      print 'true'
+      print('true')
       sys.exit(0)
-print 'false'
+print('false')
 "` || exit 1
 
 if $volume_perturb_done; then

diff --git a/egs/wsj/s5/utils/nnet/gen_dct_mat.py b/egs/wsj/s5/utils/nnet/gen_dct_mat.py
@@ -16,8 +16,8 @@
 # limitations under the License.
 
 # ./gen_dct_mat.py
-# script generates matrix with DCT transform, which is sparse 
-# and takes into account that data-layout is along frequency axis, 
+# script generates matrix with DCT transform, which is sparse
+# and takes into account that data-layout is along frequency axis,
 # while DCT is done along temporal axis.
 
 from __future__ import division
@@ -29,10 +29,7 @@
 from optparse import OptionParser
 
 def print_on_same_line(text):
-    if (sys.version_info > (3,0)):
-        print(text, end=' ')
-    else:
-        print text,
+    print(text, end=' ')
 
 parser = OptionParser()
 parser.add_option('--fea-dim', dest='dim', help='feature dimension')
@@ -69,7 +66,7 @@ def print_on_same_line(text):
           if(n==timeContext-1):
               print_on_same_line((dim-m-1)*'0 ')
         print()
-    print() 
+    print()
 
 print(']')
 
diff --git a/egs/wsj/s5/utils/nnet/gen_hamm_mat.py b/egs/wsj/s5/utils/nnet/gen_hamm_mat.py
@@ -27,10 +27,7 @@
 from optparse import OptionParser
 
 def print_on_same_line(text):
-    if (sys.version_info > (3,0)):
-        print(text, end=' ')
-    else:
-        print text,
+    print(text, end=' ')
 
 parser = OptionParser()
 parser.add_option('--fea-dim', dest='dim', help='feature dimension')

diff --git a/egs/wsj/s5/utils/nnet/gen_splice.py b/egs/wsj/s5/utils/nnet/gen_splice.py
@@ -26,10 +26,7 @@
 from optparse import OptionParser
 
 def print_on_same_line(text):
-    if (sys.version_info > (3,0)):
-        print(text, end=' ')
-    else:
-        print text,
+    print(text, end=' ')
 
 parser = OptionParser()
 parser.add_option('--fea-dim', dest='dim_in', help='feature dimension')