diff --git a/.gitignore b/.gitignore index 5764bfe22c6..13d8aefe39d 100644 --- a/.gitignore +++ b/.gitignore @@ -73,7 +73,8 @@ GSYMS /src/kaldi.mk.bak # /egs/ -/egs/*/*/mfcc +/egs/*/*/mfcc* +/egs/*/*/fbank* /egs/*/*/plp /egs/*/*/exp /egs/*/*/data diff --git a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh index 0be0e2c79c6..2aeb836083c 100644 --- a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_1a.sh @@ -177,7 +177,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh index 78dd4000e58..e3e97e9ae2a 100644 --- a/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh +++ b/egs/aidatatang_200zh/s5/local/chain/tuning/run_tdnn_2a.sh @@ -197,7 +197,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh index b38fa4d9c7a..e0acea5f168 100755 --- a/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_1a.sh @@ -168,7 +168,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh index 6b7223785d9..965932316b8 100755 --- a/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh +++ b/egs/aishell/s5/local/chain/tuning/run_tdnn_2a.sh @@ -170,7 +170,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh index 86c9becac5b..9148f54d29b 100755 --- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1a.sh @@ -188,7 +188,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh index d8560e63909..f0d87890c00 100755 --- a/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/aishell2/s5/local/chain/tuning/run_tdnn_1b.sh @@ -238,7 +238,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/ami/s5/local/chain/run_blstm_ami_5.sh b/egs/ami/s5/local/chain/run_blstm_ami_5.sh index 53221a2bd53..90e096a9264 100755 --- a/egs/ami/s5/local/chain/run_blstm_ami_5.sh +++ b/egs/ami/s5/local/chain/run_blstm_ami_5.sh @@ -149,7 +149,7 @@ if [ $stage -le 18 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 19 ]; then diff --git a/egs/ami/s5/local/chain/run_tdnn_ami_5.sh b/egs/ami/s5/local/chain/run_tdnn_ami_5.sh index df635316127..5b9ab9de043 100755 --- a/egs/ami/s5/local/chain/run_tdnn_ami_5.sh +++ b/egs/ami/s5/local/chain/run_tdnn_ami_5.sh @@ -175,7 +175,7 @@ if [ $stage -le 18 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 19 ]; then diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh index 4d260e3c517..57628d86798 100755 --- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_1a.sh @@ -307,7 +307,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh index 3546b6a7ced..8aae7760a71 100755 --- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1a.sh @@ -301,7 +301,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh index 1a839b045bd..64d8e1822ca 100755 --- a/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh +++ b/egs/ami/s5b/local/chain/multi_condition/tuning/run_tdnn_lstm_1b.sh @@ -330,7 +330,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh index d926c1dc6d7..23e5bda2038 100644 --- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh @@ -284,7 +284,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh index d9cd1c356e8..d9dd08166c2 100644 --- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1b.sh @@ -278,7 +278,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh index a0805b4f9f1..ac5c403c4bd 100755 --- a/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1c.sh @@ -287,7 +287,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh index 03ebc5845e4..3b107519114 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -217,7 +217,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh index 997357b80a9..2ea2266b1b5 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1b.sh @@ -245,7 +245,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh index 4d062e65429..de2030c71cc 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1c.sh @@ -232,7 +232,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh index 387570388d0..4375253d3a2 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh @@ -244,7 +244,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh index 0436b08cdc0..b372db56e32 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1e.sh @@ -242,7 +242,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh index 4ca526d63b8..ee887fd91c2 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1f.sh @@ -247,7 +247,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh index baed760bb68..8c421c58351 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1g.sh @@ -248,7 +248,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh index e721a858c0a..2c226c01105 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1h.sh @@ -251,7 +251,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh index de40cb2d1a4..7486b3b6d6e 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1i.sh @@ -253,7 +253,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh index 4f580b88f6b..84470f6530b 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -259,7 +259,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh index 904a079d7de..93ef04d79f5 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -263,7 +263,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh index 511e520465a..60a6356077e 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -262,7 +262,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh index bd81b7df4eb..a3ee0bcb631 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -264,7 +264,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh index 50903e78b6d..aff42a3647f 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -264,7 +264,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh index f6c53001498..a748e034cf8 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -263,7 +263,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh index 79fd9ef3fb5..0cdf44279f2 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -264,7 +264,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh index e58a7f89e03..428e4926693 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -265,7 +265,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh index 13f894f5a48..3bd87ca26f0 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -264,7 +264,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh index 48b31832e8c..b835da9cf38 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -275,7 +275,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh index e675bc494bb..0caf4494b79 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -271,7 +271,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh index 2d019398274..f8a6a0f1aa7 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -315,7 +315,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh index 9e5b971bbe2..cb49eb94888 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -321,7 +321,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh index 9575c3cf686..0df4d741fe4 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -269,7 +269,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh index a7f2625c181..6bbc6fd52ad 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1o.sh @@ -276,7 +276,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh index ca920869b30..dacf4639a1f 100755 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_bs_1a.sh @@ -278,7 +278,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh index 53dbd5238db..1fd80acab90 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -272,7 +272,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh index dafef668e60..d39a7cf6c9f 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1b.sh @@ -273,7 +273,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh index 677946d0b9a..d0b3f4181bc 100644 --- a/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_opgru_1c.sh @@ -272,7 +272,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir + utils/mkgraph.sh data/lang_${LM} $dir $graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh index bd13010c791..2928bde6ab4 100755 --- a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh +++ b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh @@ -230,7 +230,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_pp_test $dir $dir/graph_pp + utils/mkgraph.sh data/lang_pp_test $dir $dir/graph_pp fi if [ $stage -le 14 ]; then diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh index b5979a3ce6b..fed98e57b99 100755 --- a/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh +++ b/egs/aspire/s5/local/chain/tuning/run_blstm_asp_1.sh @@ -199,7 +199,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_pp_test $dir $dir/graph_pp + utils/mkgraph.sh data/lang_pp_test $dir $dir/graph_pp fi diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh index cd548142598..ad85e63a975 100755 --- a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh +++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh @@ -217,7 +217,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_pp_test $dir $dir/graph_pp + utils/mkgraph.sh data/lang_pp_test $dir $dir/graph_pp fi if [ $stage -le 14 ]; then diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh index 5b35c902354..8eeb2fef21a 100755 --- a/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh +++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_asp_1.sh @@ -190,7 +190,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_pp_test $dir $dir/graph_pp + utils/mkgraph.sh data/lang_pp_test $dir $dir/graph_pp fi if [ $stage -le 14 ]; then diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index f98dff5e6fa..ae298c3148e 100755 --- a/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/aspire/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -248,7 +248,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_pp_test $dir $graph_dir + utils/mkgraph.sh data/lang_pp_test $dir $graph_dir fi if [ $stage -le 15 ]; then diff --git a/egs/babel/s5c/local/ali_to_rttm.sh b/egs/babel/s5c/local/ali_to_rttm.sh index ef11f516ea3..4b1ef5948cd 100755 --- a/egs/babel/s5c/local/ali_to_rttm.sh +++ b/egs/babel/s5c/local/ali_to_rttm.sh @@ -23,7 +23,7 @@ #local/ali_to_rttm.sh data/dev2h data/lang exp/sgmm5/align_dev2h/ cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 diff --git a/egs/babel/s5d/local/ali_to_rttm.sh b/egs/babel/s5d/local/ali_to_rttm.sh index cb4f0740130..6a720c91287 100755 --- a/egs/babel/s5d/local/ali_to_rttm.sh +++ b/egs/babel/s5d/local/ali_to_rttm.sh @@ -23,7 +23,7 @@ #local/ali_to_rttm.sh data/dev2h data/lang exp/sgmm5/align_dev2h/ cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh index 7b4535f8c5e..102225f9bc4 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn.sh @@ -210,7 +210,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh index 5fc14dda826..93958c93717 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm.sh @@ -217,7 +217,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh index 8c7de5d18d4..d4f2ed70cfb 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab1.sh @@ -215,7 +215,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh index 0b3e70b5a04..b9ff6c1a15d 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab2.sh @@ -215,7 +215,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh index 45f2907645e..1c3f26e7def 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab3.sh @@ -216,7 +216,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh index 0d92aff5c28..2342437c83c 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab4.sh @@ -216,7 +216,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh index 4129c00dcb4..38bdcfda2f5 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab5.sh @@ -216,7 +216,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh index 1cfa50c1aa1..d0c7ca09b1c 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab6.sh @@ -216,7 +216,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh index ba8ac1e0373..ceb94f5e16b 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab7.sh @@ -218,7 +218,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh index 5de285e080e..98c6b13aeef 100755 --- a/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh +++ b/egs/babel/s5d/local/chain/tuning/run_tdnn_lstm_bab8.sh @@ -220,7 +220,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/langp_test $dir $dir/graph + utils/mkgraph.sh data/langp_test $dir $dir/graph fi exit 0 diff --git a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index ec530ef1ce4..fac93f0a790 100755 --- a/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/bentham/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -110,7 +110,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -235,7 +235,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh index 716bdce3729..c5d4106e44a 100755 --- a/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh +++ b/egs/bentham/v1/local/chain/tuning/run_e2e_cnn_1a.sh @@ -147,7 +147,7 @@ if [ $stage -le 4 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/bentham/v1/run_end2end.sh b/egs/bentham/v1/run_end2end.sh index 63c034e41f6..5d821aeb9ee 100755 --- a/egs/bentham/v1/run_end2end.sh +++ b/egs/bentham/v1/run_end2end.sh @@ -111,7 +111,7 @@ if [ $stage -le 6 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --use-gpu false \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + --scale-opts ' --acoustic-scale=1.0' \ data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh index 3f8b7c60090..fb254339cb5 100755 --- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_1a.sh @@ -321,7 +321,7 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr_5k/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr_5k \ + data/lang_test_tgpr_5k \ $tree_dir $tree_dir/graph_tgpr_5k || exit 1; fi diff --git a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh index 8b4e93cd05b..19ea72a944a 100755 --- a/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/chime4/s5_1ch/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -275,7 +275,7 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr_5k/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr_5k \ + data/lang_test_tgpr_5k \ $tree_dir $tree_dir/graph_tgpr_5k || exit 1; fi diff --git a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh index f0f469e46c8..2fb91a07a33 100755 --- a/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/chime5/s5/local/chain/tuning/run_tdnn_1a.sh @@ -212,7 +212,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; fi diff --git a/egs/chime5/s5/local/run_recog.sh b/egs/chime5/s5/local/run_recog.sh index 5c74c9ff242..9da73a02821 100755 --- a/egs/chime5/s5/local/run_recog.sh +++ b/egs/chime5/s5/local/run_recog.sh @@ -130,7 +130,7 @@ if [ $stage -le 18 ]; then chunk_right_context=0 utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; frames_per_chunk=$(echo $chunk_width | cut -d, -f1) diff --git a/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh index 95e9d934bd3..7b14b7dff67 100755 --- a/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh +++ b/egs/chime5/s5b/local/chain/tuning/run_cnn_tdnn_lstm_1a.sh @@ -246,7 +246,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; fi diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh index daad37e2cd7..3b6c73e41d8 100755 --- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -216,7 +216,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; fi diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh index e033715d884..ccde8a0fcd3 100755 --- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_1b.sh @@ -224,7 +224,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; fi diff --git a/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh index e3d8e6ac4dc..e80797de57a 100755 --- a/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/chime5/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -239,7 +239,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; fi diff --git a/egs/chime5/s5b/local/run_recog.sh b/egs/chime5/s5b/local/run_recog.sh index 5c74c9ff242..9da73a02821 100755 --- a/egs/chime5/s5b/local/run_recog.sh +++ b/egs/chime5/s5b/local/run_recog.sh @@ -130,7 +130,7 @@ if [ $stage -le 18 ]; then chunk_right_context=0 utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; frames_per_chunk=$(echo $chunk_width | cut -d, -f1) diff --git a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh index d4acd0fed4b..74d37961396 100755 --- a/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/commonvoice/s5/local/chain/tuning/run_tdnn_1a.sh @@ -229,7 +229,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; fi diff --git a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh index 75ceb80e3e0..1cb21d96375 100755 --- a/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/csj/s5/local/chain/tuning/run_tdnn_1a.sh @@ -213,7 +213,7 @@ fi if [ $stage -le 14 ]; then utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_csj_tg $dir $dir/graph_csj_tg + data/lang_csj_tg $dir $dir/graph_csj_tg for decode_set in $test_sets; do steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj 10 \ diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh index 7f407552c2e..5f4690d05b4 100755 --- a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh +++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh @@ -249,7 +249,7 @@ if [ $stage -le 21 ]; then #LM was trained only on Fisher Spanish train subset. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph_fsp_train || exit 1; fi diff --git a/egs/fisher_english/s5/local/chain/run_tdnn.sh b/egs/fisher_english/s5/local/chain/run_tdnn.sh index 1fd0f1fdf3a..424a4610bab 100755 --- a/egs/fisher_english/s5/local/chain/run_tdnn.sh +++ b/egs/fisher_english/s5/local/chain/run_tdnn.sh @@ -193,7 +193,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $graph_dir + utils/mkgraph.sh data/lang_test $dir $graph_dir fi decode_suff= diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh index 07636a8b3c8..a4040e9494a 100644 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh @@ -126,7 +126,7 @@ for f in data/${supervised_set_perturbed}/feats.scp \ done if [ ! -f $graphdir/HCLG.fst ]; then - utils/mkgraph.sh --self-loop-scale 1.0 $unsup_decode_lang $sup_chain_dir $graphdir + utils/mkgraph.sh $unsup_decode_lang $sup_chain_dir $graphdir fi # Prepare the speed-perturbed unsupervised data directory @@ -402,7 +402,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 ${test_lang} $dir $test_graph_dir + utils/mkgraph.sh ${test_lang} $dir $test_graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh index b1c133942ef..aa2818c23ce 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_1a.sh @@ -224,7 +224,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_poco_unk $dir $graph_dir + utils/mkgraph.sh data/lang_test_poco_unk $dir $graph_dir fi decode_suff= diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh index 04244014502..ed487734eef 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh @@ -138,7 +138,7 @@ for f in data/${supervised_set_perturbed}/feats.scp \ done if [ ! -f $graphdir/HCLG.fst ]; then - utils/mkgraph.sh --self-loop-scale 1.0 $unsup_decode_lang $sup_chain_dir $graphdir + utils/mkgraph.sh $unsup_decode_lang $sup_chain_dir $graphdir fi if [ $stage -le 2 ]; then @@ -421,7 +421,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 ${test_lang} $dir $test_graph_dir + utils/mkgraph.sh ${test_lang} $dir $test_graph_dir fi if [ $stage -le 18 ]; then diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh index 66f87c8da8f..ce32a3ca9b7 100755 --- a/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh +++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh @@ -143,7 +143,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh index c12f604f26b..0bedf85c8cb 100755 --- a/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh +++ b/egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh @@ -216,7 +216,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh index 543f753bd4e..0179ebd26e3 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh @@ -135,7 +135,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh index efcd1eced4a..910bbe358bf 100644 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7c.sh @@ -212,7 +212,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh index e4a555abfdd..ac990889e2a 100644 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_7d.sh @@ -221,7 +221,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh index 5650cedca28..89ef17fa9bc 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh @@ -232,7 +232,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh index 5beb2e74a9a..1a711089912 100644 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a_svd.sh @@ -312,7 +312,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh index f3cc869e6de..aed698b343d 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh @@ -242,7 +242,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh index 059a81e15fc..cd5910cf9b4 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh @@ -239,7 +239,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh index d86b699d6f6..51546ddd622 100755 --- a/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh +++ b/egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh @@ -240,7 +240,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh index 66c5ad3335f..e4aa735a9d8 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1a.sh @@ -164,7 +164,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh index 1981bb0530d..ec2f9dc1b6c 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1b.sh @@ -172,7 +172,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh index 6fa10344cfc..c3ee11a0638 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1c.sh @@ -174,7 +174,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh index 1f4b7e12850..03d739579bd 100755 --- a/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/formosa/s5/local/chain/tuning/run_tdnn_1d.sh @@ -173,7 +173,7 @@ if [ $stage -le 12 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh index bf2e45c9914..d62d214d957 100755 --- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -200,7 +200,7 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; fi diff --git a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh index deebafc95e4..5278b97591a 100755 --- a/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/gale_arabic/s5b/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -206,7 +206,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh --left-biphone data/lang_test $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh index bf2e45c9914..d62d214d957 100755 --- a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_1a.sh @@ -200,7 +200,7 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; fi diff --git a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh index deebafc95e4..5278b97591a 100755 --- a/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/gale_arabic/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -206,7 +206,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh --left-biphone data/lang_test $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/gp/s1/steps/align_deltas.sh b/egs/gp/s1/steps/align_deltas.sh index 22da04432c7..37406b4d2a6 100755 --- a/egs/gp/s1/steps/align_deltas.sh +++ b/egs/gp/s1/steps/align_deltas.sh @@ -93,7 +93,7 @@ mkdir -p $dir # Create copy of the tree and model and occs... cp $srcdir/{tree,final.mdl,final.occs} $dir || exit 1; -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" if [ ! -d $data/split$nj -o $data/split$nj -ot $data/feats.scp ]; then split_data.sh $data $nj diff --git a/egs/gp/s1/steps/train_deltas.sh b/egs/gp/s1/steps/train_deltas.sh index 0efe7b60379..45a4a54f861 100755 --- a/egs/gp/s1/steps/train_deltas.sh +++ b/egs/gp/s1/steps/train_deltas.sh @@ -125,7 +125,7 @@ if [ ! -f $alidir/final.mdl ]; then exit 1; fi -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; oov_sym=`cat $lang/oov.txt` silphonelist=`cat $lang/silphones.csl` diff --git a/egs/gp/s1/steps/train_mono.sh b/egs/gp/s1/steps/train_mono.sh index e82c14fcaf2..c4e2ad42228 100755 --- a/egs/gp/s1/steps/train_mono.sh +++ b/egs/gp/s1/steps/train_mono.sh @@ -77,7 +77,7 @@ dir=$3 [ -f path.sh ] && . ./path.sh # Configuration: -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" numiters=40 # Number of iterations of training maxiterinc=30 # Last iter to increase #Gauss on. numgauss=300 # Initial num-Gauss (must be more than #states=3*phones). diff --git a/egs/gp/s1/utils/lmrescore.sh b/egs/gp/s1/utils/lmrescore.sh index c911d0ce8b0..bf70021f13e 100755 --- a/egs/gp/s1/utils/lmrescore.sh +++ b/egs/gp/s1/utils/lmrescore.sh @@ -157,7 +157,7 @@ case "$mode" in lattice-compose ark:- $outdir/Ldet.fst ark:- \| \ lattice-determinize ark:- ark:- \| \ lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \ - lattice-add-trans-probs --transition-scale=1.0 --self-loop-scale=0.1 \ + lattice-add-trans-probs \ $mdl ark:- ark:- \| \ gzip -c \>$newlat || error_exit "Error doing LM rescoring." ;; diff --git a/egs/gp/s1/utils/mkgraph.sh b/egs/gp/s1/utils/mkgraph.sh index 3aba742832d..14a4048ffba 100755 --- a/egs/gp/s1/utils/mkgraph.sh +++ b/egs/gp/s1/utils/mkgraph.sh @@ -19,7 +19,7 @@ # all the language-model, pronunciation dictionary (lexicon), context-dependency, # and HMM structure in our model. The output is a Finite State Transducer # that has word-ids on the output, and pdf-ids on the input (these are indexes -# that resolve to Gaussian Mixture Models). +# that resolve to Gaussian Mixture Models). # See # http://kaldi-asr.org/doc/graph_recipe_test.html # (this is compiled from this repository using Doxygen, @@ -30,7 +30,7 @@ N=3 P=1 clean=false -for x in 1 2 3; do +for x in 1 2 3; do if [ $1 == "--mono" ]; then N=1; P=0; @@ -60,9 +60,6 @@ if $clean; then rm -r $lang/tmp; fi mkdir -p $dir -tscale=1.0 -loopscale=0.1 - # If $lang/tmp/LG.fst does not exist or is older than its sources, make it... # (note: the [[ ]] brackets make the || type operators work (inside [ ], we # would have to use -o instead), -f means file exists, and -ot means older than). @@ -101,7 +98,7 @@ fi if [[ ! -f $dir/Ha.fst || $dir/Ha.fst -ot $model \ || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then make-h-transducer --disambig-syms-out=$dir/disambig_tid.list \ - --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \ + $lang/tmp/ilabels_${N}_${P} $tree $model \ > $dir/Ha.fst || exit 1; fi @@ -114,13 +111,10 @@ if [[ ! -f $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \ fi if [[ ! -f $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then - add-self-loops --self-loop-scale=$loopscale --reorder=true \ + add-self-loops \ $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1; - if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then - # No point doing this test if transition-scale not 1, as it is bound to fail. - fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic." - fi + fstisstochastic $dir/HCLG.fst || echo "Final HCLG is not stochastic." fi # keep a copy of the lexicon and a list of silence phones with HCLG... diff --git a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index 361879b4142..0a40bd33c66 100755 --- a/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/heroico/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -249,7 +249,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 \ + \ data/lang_test \ $tree_dir \ $tree_dir/graph || exit 1; diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh index 290bd4c7970..147195d18b7 100755 --- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1a.sh @@ -236,7 +236,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 \ + \ data/lang_test \ $tree_dir \ $tree_dir/graph || exit 1; diff --git a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh index cfb4dc1f697..3591f11d228 100755 --- a/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/heroico/s5/local/chain/tuning/run_tdnn_1b.sh @@ -232,7 +232,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 \ + \ data/lang_test \ $tree_dir \ $tree_dir/graph || exit 1; diff --git a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh index c62b776de2b..c79606dcfd1 100755 --- a/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh +++ b/egs/hkust/s5/local/chain/tuning/run_tdnn_2a.sh @@ -207,7 +207,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index d1b657a2d74..e2a51260ff5 100755 --- a/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/hub4_spanish/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -254,7 +254,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/langp_test \ + data/langp_test \ $tree_dir $dir/graph || exit 1; fi diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh index 40bbbe1ae79..25b2224a855 100755 --- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1a.sh @@ -223,7 +223,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/langp_test \ + data/langp_test \ $tree_dir $dir/graph || exit 1; fi diff --git a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh index a498d8157f3..246adb1e45d 100755 --- a/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/hub4_spanish/s5/local/chain/tuning/run_tdnn_1b.sh @@ -240,7 +240,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/langp_test \ + data/langp_test \ $tree_dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh index ef1273f3961..a0655c6f247 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_1a.sh @@ -209,7 +209,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_decode \ + data/$lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh index bbcc55aa2b0..2b80fbcb4de 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1a.sh @@ -100,7 +100,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ $train_data_dir data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -206,7 +206,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_decode \ + data/$lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh index 401ffa14e19..ad7367b614e 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1b.sh @@ -98,7 +98,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ $train_data_dir data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -207,7 +207,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_decode \ + data/$lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh index 17209b9204f..3770eb0aa40 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1c.sh @@ -97,7 +97,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ $train_data_dir data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -213,7 +213,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_decode \ + data/$lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh index 89a40ed2a13..e5d12aabbb7 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_chainali_1d.sh @@ -101,7 +101,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ $train_data_dir data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -215,7 +215,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_decode \ + data/$lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index 703d404159a..81399230b2e 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -94,7 +94,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -211,7 +211,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_decode \ + data/$lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index 905c4661477..ecc93e9341a 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -91,7 +91,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ $train_data_dir data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -203,7 +203,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_decode \ + data/$lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh index 26b1aca0929..f7cf2d3ff59 100755 --- a/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh +++ b/egs/iam/v1/local/chain/tuning/run_cnn_e2eali_1c.sh @@ -93,7 +93,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ $train_data_dir data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -206,7 +206,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh index 462ad0522de..72ad70e7dcd 100755 --- a/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh +++ b/egs/iam/v1/local/chain/tuning/run_e2e_cnn_1a.sh @@ -138,7 +138,7 @@ if [ $stage -le 4 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v1/run_end2end.sh b/egs/iam/v1/run_end2end.sh index 0a8b014715f..e81c2eb54ba 100755 --- a/egs/iam/v1/run_end2end.sh +++ b/egs/iam/v1/run_end2end.sh @@ -114,7 +114,7 @@ if [ $stage -le 6 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --use-gpu false \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + --scale-opts ' --acoustic-scale=1.0' \ data/$train_set data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh index 9a01688ba35..10a69265b3f 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -106,7 +106,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -231,7 +231,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh index 28aa246f334..ce3fda36052 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -108,7 +108,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -233,7 +233,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh index f158317950a..c253a796813 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1c.sh @@ -110,7 +110,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -234,7 +234,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh index 1c44057454a..dde868d6918 100755 --- a/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh +++ b/egs/iam/v2/local/chain/tuning/run_cnn_e2eali_1d.sh @@ -108,7 +108,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -233,7 +233,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh index cb2bfa0a82d..1758efd8f4d 100755 --- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh +++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1a.sh @@ -158,7 +158,7 @@ if [ $stage -le 4 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh index d5f79602695..f02246503d1 100755 --- a/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh +++ b/egs/iam/v2/local/chain/tuning/run_e2e_cnn_1b.sh @@ -144,7 +144,7 @@ if [ $stage -le 4 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/iam/v2/run_end2end.sh b/egs/iam/v2/run_end2end.sh index c515c85fc72..51dc6737c86 100755 --- a/egs/iam/v2/run_end2end.sh +++ b/egs/iam/v2/run_end2end.sh @@ -136,7 +136,7 @@ if [ $stage -le 7 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --use-gpu false \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + --scale-opts ' --acoustic-scale=1.0' \ data/train_aug data/lang exp/chain/e2e_cnn_1b exp/chain/e2e_ali_train fi diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh index 10650a18269..af7f6599f97 100755 --- a/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1a.sh @@ -223,7 +223,7 @@ if [ $stage -le 14 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; fi diff --git a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh index db62e6f8a55..e657c9bc3f3 100755 --- a/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/iban/s5/local/chain/tuning/run_tdnn_1b.sh @@ -237,7 +237,7 @@ if [ $stage -le 14 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; fi diff --git a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh index b0ecd547741..d0c386e75ee 100755 --- a/egs/ifnenit/v1/local/chain/run_cnn_1a.sh +++ b/egs/ifnenit/v1/local/chain/run_cnn_1a.sh @@ -211,7 +211,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_test \ + data/$lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh index 7f3132d657e..7eeb6f4a15c 100755 --- a/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/ifnenit/v1/local/chain/run_cnn_chainali_1a.sh @@ -101,7 +101,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -216,7 +216,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/$lang_test \ + data/$lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh index 6bf3a139ad1..0e5a3410e31 100755 --- a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh @@ -125,7 +125,7 @@ if [ $stage -le 1 ]; then # have some stragglers. steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \ --online-ivector-dir $train_ivector_dir \ - --scale-opts "--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0" \ + --scale-opts "--acoustic-scale=1.0 " \ --nj $nj $train_data_dir $lang $srcdir ${srcdir}_ali${affix} ; fi @@ -139,7 +139,7 @@ if [ -z "$lats_dir" ]; then subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving # total slots = 80 * 6 = 480. steps/nnet3/make_denlats.sh --cmd "$decode_cmd" \ - --self-loop-scale 1.0 --acwt 1.0 --determinize true \ + --acwt 1.0 --determinize true \ --online-ivector-dir $train_ivector_dir \ --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \ $train_data_dir $lang $srcdir ${lats_dir} ; diff --git a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index db17a35be64..a191aba2db9 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -16,7 +16,7 @@ # WER on test(fglarge) 3.80 3.69 # WER on test(tglarge) 3.89 3.80 # WER on test(tgmed) 4.72 4.64 -# WER on test(tgsmall) 5.19 5.16 +# WER on test(tgsmall) 5.19 5.16 # WER on test_other(fglarge) 8.76 8.71 # WER on test_other(tglarge) 9.19 9.11 # WER on test_other(tgmed) 11.22 11.00 @@ -211,7 +211,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir + utils/mkgraph.sh --remove-oov data/lang_test_tgsmall $dir $graph_dir fi iter_opts= @@ -226,7 +226,7 @@ if [ $stage -le 17 ]; then --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \ + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1 steps/lmrescore_const_arpa.sh \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ @@ -271,4 +271,4 @@ if $test_online_decoding && [ $stage -le 18 ]; then fi exit 0; - + diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1a.sh index fb652a719a2..48184bca926 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1a.sh @@ -178,7 +178,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir + utils/mkgraph.sh --remove-oov data/lang_test_tgsmall $dir $graph_dir fi @@ -194,7 +194,7 @@ if [ $stage -le 17 ]; then --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \ + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1 steps/lmrescore_const_arpa.sh \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh index 48d6ddb804f..196f4b5a709 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh @@ -205,7 +205,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir + utils/mkgraph.sh --remove-oov data/lang_test_tgsmall $dir $graph_dir fi @@ -221,7 +221,7 @@ if [ $stage -le 17 ]; then --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \ + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1 steps/lmrescore_const_arpa.sh \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh index 101fd6a4c15..e1baf2c792d 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1c.sh @@ -196,7 +196,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir + utils/mkgraph.sh --remove-oov data/lang_test_tgsmall $dir $graph_dir fi iter_opts= @@ -211,7 +211,7 @@ if [ $stage -le 17 ]; then --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \ + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1 steps/lmrescore_const_arpa.sh \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh index 865b10dea0c..da6cfada36f 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_1d.sh @@ -296,7 +296,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir + utils/mkgraph.sh --remove-oov data/lang_test_tgsmall $dir $graph_dir fi iter_opts= @@ -311,7 +311,7 @@ if [ $stage -le 17 ]; then --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \ + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1 steps/lmrescore_const_arpa.sh \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 0e97e46194d..7237c1463c7 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -179,7 +179,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir + utils/mkgraph.sh --remove-oov data/lang_test_tgsmall $dir $graph_dir fi @@ -200,7 +200,7 @@ if [ $stage -le 15 ]; then --frames-per-chunk "$frames_per_chunk_primary" \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \ + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1 steps/lmrescore_const_arpa.sh \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ diff --git a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh index 0da813267fc..9ddd2457312 100755 --- a/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/librispeech/s5/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -214,7 +214,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $dir $graph_dir + utils/mkgraph.sh --remove-oov data/lang_test_tgsmall $dir $graph_dir fi @@ -235,7 +235,7 @@ if [ $stage -le 15 ]; then --frames-per-chunk "$frames_per_chunk_primary" \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_tgsmall || exit 1 - steps/lmrescore.sh --cmd "$decode_cmd" --self-loop-scale 1.0 data/lang_test_{tgsmall,tgmed} \ + steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \ data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_{tgsmall,tgmed} || exit 1 steps/lmrescore_const_arpa.sh \ --cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \ diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh index 892ee441516..33de4ae8b93 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_1a.sh @@ -194,7 +194,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh index 7ca7c652fd2..62ec4686ed6 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_chainali_1a.sh @@ -86,7 +86,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -194,7 +194,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index a8bc1836ffe..59a167f1e64 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -87,7 +87,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -206,7 +206,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index 0828e051dcc..fdf6f994268 100755 --- a/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/madcat_ar/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -100,7 +100,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -219,7 +219,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh index ccbb7119674..5403dd2af05 100755 --- a/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh +++ b/egs/madcat_ar/v1/local/tl/chain/run_cnn_e2eali.sh @@ -94,7 +94,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts @@ -213,7 +213,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh index 3fca8cf5fdc..90ca63a971e 100755 --- a/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh +++ b/egs/madcat_ar/v1/local/tl/chain/run_e2e_cnn.sh @@ -150,7 +150,7 @@ if [ $stage -le 4 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_ar/v1/local/tl/run_text_localization.sh b/egs/madcat_ar/v1/local/tl/run_text_localization.sh index 8d12f7d802f..5066adc73dd 100755 --- a/egs/madcat_ar/v1/local/tl/run_text_localization.sh +++ b/egs/madcat_ar/v1/local/tl/run_text_localization.sh @@ -133,7 +133,7 @@ if [ $stage -le 6 ]; then echo "$0: Aligning the training data using the e2e chain model...$(date)." steps/nnet3/align.sh --nj $nj --cmd "$cmd" \ --use-gpu false \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + --scale-opts ' --acoustic-scale=1.0' \ data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi diff --git a/egs/madcat_ar/v1/run_end2end.sh b/egs/madcat_ar/v1/run_end2end.sh index 62f4eeb7c71..bb22e1b1a8e 100755 --- a/egs/madcat_ar/v1/run_end2end.sh +++ b/egs/madcat_ar/v1/run_end2end.sh @@ -119,7 +119,7 @@ fi if [ $stage -le 5 ] && $decode_e2e; then echo "$0: $(date) stage 5: decoding end2end setup..." - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode \ + utils/mkgraph.sh $lang_decode \ exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1; steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj $nj --cmd "$cmd" \ diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh index 164d62a7ad9..6affb1587aa 100755 --- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh +++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_1a.sh @@ -206,7 +206,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh index be51bdcc3d1..46df193483c 100755 --- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh +++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1a.sh @@ -92,7 +92,7 @@ if [ $stage -le 2 ]; then # Get the alignments as lattices (gives the chain training more freedom). # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -202,7 +202,7 @@ if [ $stage -le 6 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh index aa61620a92f..a478a63160f 100755 --- a/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh +++ b/egs/madcat_zh/v1/local/chain/tuning/run_cnn_chainali_1b.sh @@ -96,7 +96,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $chain_model_dir $lat_dir cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts fi @@ -210,7 +210,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/madcat_zh/v1/run_end2end.sh b/egs/madcat_zh/v1/run_end2end.sh index 7e0fc1e25d1..a89222fe1b4 100755 --- a/egs/madcat_zh/v1/run_end2end.sh +++ b/egs/madcat_zh/v1/run_end2end.sh @@ -96,7 +96,7 @@ fi if [ $stage -le 5 ] && $decode_e2e; then echo "$0: $(date) stage 5: decoding end2end setup..." - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode \ + utils/mkgraph.sh $lang_decode \ exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1; steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj $nj --cmd "$cmd" \ diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh index 4f38ee886a7..f7c99ef08d5 100755 --- a/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/material/s5/local/chain/tuning/run_tdnn_1a.sh @@ -243,11 +243,11 @@ if [ $stage -le 12 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_combined_test \ + data/lang_combined_test \ $tree_dir ${tree_dir}/graph_combined || exit 1; fi diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh index 023cb34b43d..4c853eefa9f 100755 --- a/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/material/s5/local/chain/tuning/run_tdnn_1b.sh @@ -268,7 +268,7 @@ if [ $stage -le 12 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_combined_test \ + data/lang_combined_test \ $tree_dir ${tree_dir}/graph_combined || exit 1; fi diff --git a/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index af5a62dad0d..576d1146d63 100755 --- a/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/material/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -234,11 +234,11 @@ if [ $stage -le 12 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_combined_test \ + data/lang_combined_test \ $tree_dir ${tree_dir}/graph_combined || exit 1; fi diff --git a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh index 3d3056182ee..6b641a9235c 100755 --- a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh +++ b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_1a.sh @@ -246,7 +246,7 @@ if [ $stage -le 12 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_combined_test \ + data/lang_combined_test \ $tree_dir ${tree_dir}/graph_combined || exit 1; fi diff --git a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh index 37c957a3227..54bfa09b261 100755 --- a/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh +++ b/egs/material/s5/local/semisup/chain/tuning/run_tdnn_semisupervised_1a.sh @@ -124,7 +124,7 @@ done if [ $stage -le 1 ]; then if [ ! -f $graphdir/HCLG.fst ]; then - utils/mkgraph.sh --self-loop-scale 1.0 $unsup_decode_lang $sup_chain_dir $graphdir + utils/mkgraph.sh $unsup_decode_lang $sup_chain_dir $graphdir fi fi @@ -439,7 +439,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 ${test_lang} $dir $test_graph_dir + utils/mkgraph.sh ${test_lang} $dir $test_graph_dir fi if [ $stage -le 14 ]; then diff --git a/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh index 6300511e817..9b6caa24e09 100644 --- a/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/mgb5/s5/local/chain/tuning/run_tdnn_1a.sh @@ -224,7 +224,7 @@ if [ $stage -le 14 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test \ + data/lang_test \ $tree_dir $tree_dir/graph || exit 1; fi diff --git a/egs/mini_librispeech/s5/local/chain/run_att.sh b/egs/mini_librispeech/s5/local/chain/run_att.sh new file mode 120000 index 00000000000..bf5d5a0c0f1 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/run_att.sh @@ -0,0 +1 @@ +tuning/run_att_1a.sh \ No newline at end of file diff --git a/egs/mini_librispeech/s5/local/chain/tuning/run_att_1a.sh b/egs/mini_librispeech/s5/local/chain/tuning/run_att_1a.sh new file mode 100755 index 00000000000..2238e66f041 --- /dev/null +++ b/egs/mini_librispeech/s5/local/chain/tuning/run_att_1a.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +# run_att_1a.sh is similar to run_tdnn_1h.sh but with some TDNN layers replaced +# with attention layers. + + +# Note: below, att1a and att1a2 are two different runs of the same script. +# +# local/chain/compare_wer.sh exp/chain/tdnn1h_sp exp/chain/att1a_sp exp/chain/att1a2_sp +# System tdnn1h_sp att1a_sp att1a2_sp +#WER dev_clean_2 (tgsmall) 12.27 12.16 12.65 +#WER dev_clean_2 (tglarge) 8.61 8.68 8.94 +# Final train prob -0.0462 -0.0434 -0.0425 +# Final valid prob -0.0814 -0.0807 -0.0814 +# Final train prob (xent) -1.1354 -1.0721 -1.0647 +# Final valid prob (xent) -1.3680 -1.3254 -1.3263 +# Num-params 5210944 4193064 4193064 + + +# steps/info/chain_dir_info.pl exp/chain/tdnn1h_sp exp/chain/att1a_sp +# exp/chain/tdnn1h_sp: num-iters=34 nj=2..5 num-params=5.2M dim=40+100->2336 combine=-0.049->-0.047 (over 3) xent:train/valid[21,33,final]=(-1.36,-1.16,-1.14/-1.57,-1.40,-1.37) logprob:train/valid[21,33,final]=(-0.061,-0.051,-0.046/-0.094,-0.089,-0.081) +# exp/chain/att1a_sp: num-iters=34 nj=2..5 num-params=4.2M dim=40+100->2336 combine=-0.046->-0.044 (over 4) xent:train/valid[21,33,final]=(-1.30,-1.10,-1.07/-1.53,-1.38,-1.33) logprob:train/valid[21,33,final]=(-0.057,-0.049,-0.043/-0.091,-0.087,-0.081) +# exp/chain/att1a2_sp: num-iters=34 nj=2..5 num-params=4.2M dim=40+100->2336 combine=-0.046->-0.044 (over 3) xent:train/valid[21,33,final]=(-1.30,-1.08,-1.06/-1.53,-1.36,-1.33) logprob:train/valid[21,33,final]=(-0.056,-0.048,-0.043/-0.091,-0.085,-0.081) + + +# Set -e here so that we catch if any executable fails immediately +set -euo pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +decode_nj=10 +train_set=train_clean_5 +test_sets=dev_clean_2 +gmm=tri3b +nnet3_affix= + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +affix=1a # affix for the directory name +tree_affix= +train_stage=-10 +get_egs_stage=-10 +decode_iter= + +# training options +# training chunk-options +chunk_width=140,100,160 +dropout_schedule='0,0@0.20,0.3@0.50,0' +common_egs_dir= +xent_regularize=0.1 + +# training options +srand=0 +remove_egs=true +reporting_email= + +#decode options +test_online_decoding=true # if true, it will run the last decoding stage. + + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 11 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 75 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 12 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 13 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print (0.5/$xent_regularize)" | python) + + tdnn_opts="l2-regularize=0.03 dropout-proportion=0.0 dropout-per-dim-continuous=true" + + tdnnf_opts="l2-regularize=0.03 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.03 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.03" + output_opts="l2-regularize=0.015" + attention_opts="l2-regularize=0.01 bottleneck-dim=96 num-heads=4 value-dim=50 key-dim=50 time-stride=3 num-left-inputs=4 num-right-inputs=2 bypass-scale=0.66" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=768 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 + attention-block name=att7 $attention_opts + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 + attention-block name=att9 $attention_opts + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=768 bottleneck-dim=96 time-stride=3 + linear-component name=prefinal-l dim=192 $linear_opts + + ## adding the layers for chain branch + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts small-dim=192 big-dim=768 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + + # adding the layers for xent branch + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts small-dim=192 big-dim=768 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 14 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/mini_librispeech-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient=0.1 \ + --chain.l2-regularize=0.0 \ + --chain.apply-deriv-weights=false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.add-option="--optimization.memory-compression-level=2" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=20 \ + --trainer.frames-per-iter=3000000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=5 \ + --trainer.optimization.initial-effective-lrate=0.002 \ + --trainer.optimization.final-effective-lrate=0.0002 \ + --trainer.num-chunk-per-minibatch=128,64 \ + --egs.chunk-width=$chunk_width \ + --egs.dir="$common_egs_dir" \ + --egs.opts="--frames-overlap-per-eg 0" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --reporting.email="$reporting_email" \ + --feat-dir=$train_data_dir \ + --tree-dir=$tree_dir \ + --lat-dir=$lat_dir \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 15 ]; then + # Note: it's not important to give mkgraph.sh the lang directory with the + # matched topology (since it gets the topology file from the model). + utils/mkgraph.sh \ + data/lang_test_tgsmall \ + $tree_dir $tree_dir/graph_tgsmall || exit 1; +fi + +if [ $stage -le 16 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l /dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l 3 EOF - utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar2b + utils/mkgraph.sh $lang $tree_dir $tree_dir/grammar2b # test that the binary 'compile-graph' does the same thing as mkgraph.sh. diff --git a/egs/mini_librispeech/s5/local/grammar/simple_demo_silprobs.sh b/egs/mini_librispeech/s5/local/grammar/simple_demo_silprobs.sh index 414227f2ad6..088b20eba1a 100755 --- a/egs/mini_librispeech/s5/local/grammar/simple_demo_silprobs.sh +++ b/egs/mini_librispeech/s5/local/grammar/simple_demo_silprobs.sh @@ -65,7 +65,7 @@ if [ $stage -le 2 ]; then 2 0.69314718055994 3 EOF - utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar1 + utils/mkgraph.sh $lang $tree_dir $tree_dir/grammar1 # test that the binary 'compile-graph' does the same thing as mkgraph.sh. compile-graph --read-disambig-syms=$lang/phones/disambig.int $tree_dir/tree $tree_dir/1.mdl $lang/L_disambig.fst $lang/G.fst $tree_dir/grammar1/HCLG2.fst @@ -94,7 +94,7 @@ if [ $stage -le 3 ]; then 2 0.69314718055994 3 EOF - utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar2a + utils/mkgraph.sh $lang $tree_dir $tree_dir/grammar2a # test that the binary 'compile-graph' does the same thing as mkgraph.sh. offset=$(grep nonterm_bos $lang/phones.txt | awk '{print $2}') # 364 @@ -121,7 +121,7 @@ if [ $stage -le 4 ]; then 2 3 #nonterm_end 3 EOF - utils/mkgraph.sh --self-loop-scale 1.0 $lang $tree_dir $tree_dir/grammar2b + utils/mkgraph.sh $lang $tree_dir $tree_dir/grammar2b # test that the binary 'compile-graph' does the same thing as mkgraph.sh. diff --git a/egs/mini_librispeech/s5/local/kws/create_hitlist.sh b/egs/mini_librispeech/s5/local/kws/create_hitlist.sh index be06a3b9312..6ad516607d5 100755 --- a/egs/mini_librispeech/s5/local/kws/create_hitlist.sh +++ b/egs/mini_librispeech/s5/local/kws/create_hitlist.sh @@ -17,7 +17,7 @@ cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 diff --git a/egs/multi_en/s5/local/chain/run_blstm_6h.sh b/egs/multi_en/s5/local/chain/run_blstm_6h.sh index 126d29350a1..81ab737ddd9 100644 --- a/egs/multi_en/s5/local/chain/run_blstm_6h.sh +++ b/egs/multi_en/s5/local/chain/run_blstm_6h.sh @@ -151,7 +151,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh index 96f5fdac8f3..31b467f9398 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_5b.sh @@ -235,7 +235,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${multi}_${gmm}_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_${multi}_${gmm}_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 62266334962..13156cfc118 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -252,7 +252,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $lang_dir \ + utils/mkgraph.sh $lang_dir \ $dir $dir/graph${lang_suffix} fi diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh index 79cd3eb3014..be12f7cca45 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -241,7 +241,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${multi}_${gmm}_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_${multi}_${gmm}_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh index a7170af9431..6e45ade836d 100755 --- a/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh +++ b/egs/multi_en/s5/local/chain/tuning/run_tdnn_opgru_1b.sh @@ -237,7 +237,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${multi}_${gmm}_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg + utils/mkgraph.sh data/lang_${multi}_${gmm}_fsh_sw1_tg $dir $dir/graph_fsh_sw1_tg fi decode_suff=fsh_sw1_tg diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh index c8b4997161e..e68051c1770 100755 --- a/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_1a.sh @@ -223,7 +223,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; fi diff --git a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 4723400c76b..0d27842f18a 100755 --- a/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/reverb/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -239,7 +239,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang${lm_suffix}/ \ + data/lang${lm_suffix}/ \ $tree_dir $tree_dir/graph${lm_suffix} || exit 1; fi diff --git a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index 33eb9dcb98c..f0303909c8e 100755 --- a/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/rimes/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -110,7 +110,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 --generate-ali-from-lats true \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -239,7 +239,7 @@ if [ $stage -le 7 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh index 9d28a41316d..b6226ab5c2e 100755 --- a/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh +++ b/egs/rimes/v1/local/chain/tuning/run_e2e_cnn_1a.sh @@ -140,7 +140,7 @@ if [ $stage -le 4 ]; then # lang directory, one that contained a wordlist and LM of your choice, # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/rimes/v1/run_end2end.sh b/egs/rimes/v1/run_end2end.sh index d3e3da2be13..89ed4e656a6 100755 --- a/egs/rimes/v1/run_end2end.sh +++ b/egs/rimes/v1/run_end2end.sh @@ -103,7 +103,7 @@ fi if [ $stage -le 7 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ + --scale-opts ' --acoustic-scale=1.0' \ data/$train_set data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh index c393a9aa28b..0464c073b2a 100755 --- a/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh +++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5g.sh @@ -134,7 +134,7 @@ if [ $stage -le 10 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --scoring-opts "--min-lmwt 1" \ --nj 20 --cmd "$decode_cmd" \ @@ -143,7 +143,7 @@ if [ $stage -le 10 ]; then fi if [ $stage -le 11 ]; then - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_ug $dir $dir/graph_ug + utils/mkgraph.sh data/lang_ug $dir $dir/graph_ug steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj 20 --cmd "$decode_cmd" \ --online-ivector-dir exp/nnet2_online/ivectors_test \ diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh index 131bcf98de9..c2869d20731 100755 --- a/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh +++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5n.sh @@ -136,7 +136,7 @@ if [ $stage -le 10 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --scoring-opts "--min-lmwt 1" \ --nj 20 --cmd "$decode_cmd" \ @@ -145,7 +145,7 @@ if [ $stage -le 10 ]; then fi if [ $stage -le 11 ]; then - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_ug $dir $dir/graph_ug + utils/mkgraph.sh data/lang_ug $dir $dir/graph_ug steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj 20 --cmd "$decode_cmd" \ --online-ivector-dir exp/nnet2_online/ivectors_test \ diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh index db5944fdbea..60cf4733185 100755 --- a/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh +++ b/egs/rm/s5/local/chain/tuning/run_tdnn_5o.sh @@ -166,7 +166,7 @@ if [ $stage -le 10 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --scoring-opts "--min-lmwt 1" \ --nj 20 --cmd "$decode_cmd" \ @@ -175,7 +175,7 @@ if [ $stage -le 10 ]; then fi if [ $stage -le 11 ]; then - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_ug $dir $dir/graph_ug + utils/mkgraph.sh data/lang_ug $dir $dir/graph_ug steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --nj 20 --cmd "$decode_cmd" \ --online-ivector-dir exp/nnet2_online/ivectors_test \ diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh index 2fd2556c19b..df9c020bd30 100755 --- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh +++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh @@ -198,7 +198,7 @@ if [ $stage -le 9 ]; then if $use_ivector;then ivec_opt="--online-ivector-dir exp/nnet2${nnet_affix}/ivectors_test" fi - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --scoring-opts "--min-lmwt 1" \ --nj 20 --cmd "$decode_cmd" $ivec_opt \ diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh index 3e8d5717d4b..f21aec5c29a 100755 --- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh +++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1b.sh @@ -211,7 +211,7 @@ if [ $stage -le 8 ]; then # the lang directory. ivec_opt="" if $use_ivector;then ivec_opt="--online-ivector-dir exp/nnet2${nnet_affix}/ivectors_test" ; fi - utils/mkgraph.sh --self-loop-scale 1.0 $lang_src_tgt $dir $dir/graph + utils/mkgraph.sh $lang_src_tgt $dir $dir/graph steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --scoring-opts "--min-lmwt 1" \ --nj 20 --cmd "$decode_cmd" $ivec_opt \ diff --git a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh index 611aede371d..f6f4d9b0193 100755 --- a/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh +++ b/egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1c.sh @@ -147,7 +147,7 @@ if [ $stage -le 4 ]; then --generate-ali-from-lats true \ --acoustic-scale 1.0 --extra-left-context-initial 0 --extra-right-context-final 0 \ --frames-per-chunk 150 \ - --scale-opts "--transition-scale=1.0 --self-loop-scale=1.0" \ + --scale-opts "" \ data/train_hires $lang_src_tgt $src_mdl_dir $lat_dir || exit 1; rm $lat_dir/fsts.*.gz # save space fi @@ -219,7 +219,7 @@ if [ $stage -le 8 ]; then tes_ivec_opt="" if $use_ivector;then test_ivec_opt="--online-ivector-dir exp/nnet2${nnet_affix}/ivectors_test" ; fi - utils/mkgraph.sh --self-loop-scale 1.0 $lang_src_tgt $dir $dir/graph + utils/mkgraph.sh $lang_src_tgt $dir $dir/graph steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ --scoring-opts "--min-lmwt 1" \ --nj 20 --cmd "$decode_cmd" $test_ivec_opt \ diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh index 47557f93696..8ebf33af18f 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1a.sh @@ -232,7 +232,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh index 7afa1b7f902..6414898fca7 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1b.sh @@ -233,7 +233,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh index e69e499e152..8ce19734a22 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1c.sh @@ -231,7 +231,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh index 86e0352828c..cc51f149446 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1d.sh @@ -244,7 +244,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh index 313f899a471..b6b8ffb9885 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_lstm_1e.sh @@ -232,7 +232,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph + utils/mkgraph.sh data/lang_test_tg $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh index 4991326a86d..cf28beb7691 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1a.sh @@ -177,7 +177,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh index 600f27ddf86..f9ffab37b73 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_1b.sh @@ -215,7 +215,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph + utils/mkgraph.sh data/lang_test_tg $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index cedc448464a..6fc8735783f 100755 --- a/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/sprakbanken/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -231,7 +231,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_tg $dir $dir/graph + utils/mkgraph.sh data/lang_test_tg $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh index d317b1dc55a..25a66075419 100755 --- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh +++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7f.sh @@ -221,7 +221,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh index 20dcab8eb50..2c9437e5343 100755 --- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh +++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_7k.sh @@ -234,7 +234,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi graph_dir=$dir/graph_sw1_tg diff --git a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh index 8762430ee7f..353a77d8668 100755 --- a/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh +++ b/egs/swbd/s5c/local/chain/multi_condition/run_tdnn_aug_1a.sh @@ -214,7 +214,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh index a1be44cdbbf..0c2c8ee1f54 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h.sh @@ -172,7 +172,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh index d7382d78dc6..ed051c1729d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6h_discriminative.sh @@ -132,7 +132,7 @@ if [ $stage -le 1 ]; then # have some stragglers. steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \ --online-ivector-dir $online_ivector_dir $context_opts \ - --scale-opts "--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0" \ + --scale-opts "--acoustic-scale=1.0 " \ --nj $nj $train_data_dir $lang $srcdir ${srcdir}_ali${affix} ; fi @@ -146,7 +146,7 @@ if [ -z "$lats_dir" ]; then subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving # total slots = 80 * 6 = 480. steps/nnet3/make_denlats.sh --cmd "$decode_cmd" \ - --self-loop-scale 1.0 --acwt 1.0 --determinize true \ + --acwt 1.0 --determinize true \ --online-ivector-dir $online_ivector_dir $context_opts \ --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \ $train_data_dir $lang $srcdir ${lats_dir} ; diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh index 1eac1c60c27..b633d64375b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6i.sh @@ -178,7 +178,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh index acdae844b65..426e8647cd8 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6j.sh @@ -203,7 +203,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh index bbd8cb63697..7e36d47f1f9 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh @@ -203,7 +203,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh index 16f2ea211d0..5377ca916a1 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6l.sh @@ -213,7 +213,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh index 09f7d72434c..692b2240aa0 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6m.sh @@ -211,7 +211,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh index 8e44d0bc114..86e2a7786ce 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6n.sh @@ -217,7 +217,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh index 6a836e81b09..f0a746ca362 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6o.sh @@ -219,7 +219,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh index d1a61360f85..f4dee51dd03 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -207,7 +207,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh index ac22e858aea..459e9b4b00d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6h.sh @@ -177,7 +177,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh index aa48db04841..12646f07897 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6i.sh @@ -177,7 +177,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh index 48db81f586f..3b8a4b6f104 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6j.sh @@ -197,7 +197,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh index 021eab09506..65ef23f8ce3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh @@ -204,7 +204,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh index f219167f9ec..b8e08c61ddf 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6l.sh @@ -215,7 +215,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh index 551be099390..532eb3ede7e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_d.sh @@ -182,7 +182,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh index c584bbe29a6..318d026b8d6 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2a.sh @@ -220,8 +220,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh index 227a74067d4..e7b574f0ea3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2b.sh @@ -211,8 +211,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh index 9fc08f27d45..556325b5bdc 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2c.sh @@ -202,7 +202,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh index 2ef8c374514..4c9b497f2fa 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2d.sh @@ -207,7 +207,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh index 2db9a59c2e2..4c31608c8f9 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2e.sh @@ -230,7 +230,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh index f510fccd882..d940e81efab 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2f.sh @@ -212,7 +212,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh index 65b48b43685..8f43562e4e3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2g.sh @@ -215,7 +215,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh index d86233ff83b..5154834004b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2h.sh @@ -217,7 +217,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh index cab9dd957a3..53e8e2a9ed6 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2i.sh @@ -215,7 +215,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh index 0eca2ff10ff..d5b5a80bef4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2j.sh @@ -216,7 +216,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh index 7e127c10917..1eb99309edd 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2k.sh @@ -225,7 +225,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh index fbe45761996..53ad7c17e14 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2l.sh @@ -235,7 +235,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh index 93db16408cc..345446f3cc8 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2m.sh @@ -236,7 +236,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh index 57eb66dac35..1ee2a92f494 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2n.sh @@ -276,7 +276,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh index ae085c9804f..e5111d13bf2 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2o.sh @@ -235,7 +235,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh index 4c6ad3b9761..b7ae60e2449 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2p.sh @@ -250,7 +250,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh index 54b03fb2296..872a2d5bd04 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2q.sh @@ -244,7 +244,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh index 4bdc61ef0e5..657c8925e60 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2r.sh @@ -210,7 +210,7 @@ if [ $stage -le 11 ]; then # needed, as in this type of topology we only have a single pdf-class, # numbered zero. steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --cluster-phones-opts "--pdf-class-list=0" \ + --cluster-phones-opts "--pdf-class-list=1" \ --leftmost-questions-truncate $leftmost_questions_truncate \ --cmd "$train_cmd" 6000 data/$train_set data/lang_chain_2r $ali_dir $treedir fi @@ -248,7 +248,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh index 3e829e246f3..f326b2e286a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2s.sh @@ -236,7 +236,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh index 4a322e1a8fa..8bd8111537d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2t.sh @@ -240,7 +240,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh index 9ec5bf81d3d..1d506714faa 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2u.sh @@ -252,7 +252,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh index cd009cfcc12..7c3b7cccead 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2v.sh @@ -257,7 +257,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh index 687093c98c5..674be6e15e1 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2w.sh @@ -252,7 +252,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh index e2d6204af0c..df5513245f6 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2x.sh @@ -258,7 +258,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh index c1211feae64..1169193dfce 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_2y.sh @@ -243,7 +243,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh index 01ff8079f2a..b5c73f9d700 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3c.sh @@ -250,7 +250,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh index 0cb513c84f1..0759e406e56 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3d.sh @@ -262,7 +262,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh index 687f684a68c..422505200ea 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3e.sh @@ -251,7 +251,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh index 0a4b935485a..32ae7f8ed07 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3f.sh @@ -258,7 +258,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh index 077a84d31e9..b1ebcfb706a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3g.sh @@ -278,7 +278,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh index dcda3a00383..2c7cad476d2 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3h.sh @@ -264,7 +264,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh index 996795c9aee..3ca650c496f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3i.sh @@ -285,7 +285,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh index 66e44fb6f04..752f6635e6c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3j.sh @@ -271,7 +271,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh index 5369b5251d1..fe474c82053 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k.sh @@ -285,7 +285,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh index 1902213402f..00fe73eacb8 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3k2.sh @@ -331,7 +331,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh index ae36ab2b65f..d15e25a50ec 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3l.sh @@ -281,7 +281,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh index 49656fb8aa7..5035a31837e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3m.sh @@ -285,7 +285,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh index e2b0b0ebb10..06c6f360ab8 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3n.sh @@ -280,7 +280,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh index 298eb913ff3..23ae76103e5 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3o.sh @@ -284,7 +284,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh index 6ec9c6fe4b8..b907ad73b1a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3p.sh @@ -308,7 +308,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh index 4c911ba867e..8c81726d34c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3q.sh @@ -290,7 +290,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh index fba4ef6d15f..6a3a5a5d871 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3r.sh @@ -296,7 +296,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh index daab4cad318..790eb939b79 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3s.sh @@ -315,7 +315,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh index 034f2bafd70..ba3e714a158 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3t.sh @@ -311,7 +311,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh index 97c44ad55fc..a767afefd5e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3u.sh @@ -305,7 +305,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh index 381a9e8686f..57957ab4585 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3v.sh @@ -303,7 +303,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh index 9f13b10753d..d35dfd8a18a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3w.sh @@ -307,7 +307,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh index 25db1450265..71399ca4fb7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3x.sh @@ -316,7 +316,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh index 3376652f3c2..b328a38f564 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3y.sh @@ -321,7 +321,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh index 25a68263dc7..50148dc8378 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_3z.sh @@ -325,7 +325,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh index 0be490863dc..908ccb0c6e4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4a.sh @@ -324,7 +324,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh index 40ede7c5982..6955905caa7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4b.sh @@ -321,7 +321,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh index be9043c0527..badda8a057b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4c.sh @@ -332,7 +332,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh index 7f58fbebbfc..9ac4f3aeac3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4d.sh @@ -321,7 +321,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh index 8625cfa52c8..84d890912bd 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4e.sh @@ -337,7 +337,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh index 7ba4e8c6cb7..43242092b69 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4f.sh @@ -341,7 +341,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh index f1059f0091f..dbce2b1cb1c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4g.sh @@ -340,7 +340,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh index 62154dd5d71..4b57c4d072d 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4n.sh @@ -361,7 +361,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh index 0120c2c507d..224708375fc 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4p.sh @@ -356,7 +356,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh index 7d920092c30..57d13be22ce 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4q.sh @@ -148,7 +148,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh index 591b79352ab..9722c7c6704 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4r.sh @@ -355,7 +355,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh index fea6a776dbf..3c020375985 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4s.sh @@ -355,7 +355,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh index 0173b586700..d8ebc5b150f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4t.sh @@ -357,7 +357,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh index ac15f232500..86811c0c886 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4u.sh @@ -359,7 +359,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh index 0682615acf3..9fb9d849e75 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4v.sh @@ -369,7 +369,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh index 77d5013d91f..5baa90022ed 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4w.sh @@ -372,7 +372,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh index 9c59137bbfc..c6520ee0f1b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_4x.sh @@ -371,7 +371,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh index 1d44637a8c8..e5a605af399 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5a.sh @@ -376,7 +376,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh index cdb769fb959..79ee6ce0aa4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5b.sh @@ -379,7 +379,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh index 17d8c41a82e..7f916c32993 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5c.sh @@ -384,7 +384,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh index f3b92944f1a..c898f769f5f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5d.sh @@ -382,7 +382,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh index 5a64c967907..4db6f59a868 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5e.sh @@ -392,7 +392,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh index c40f2ada0d3..4e8b1632a56 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5f.sh @@ -398,7 +398,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh index 5f59e146f65..18a9669daa7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5g.sh @@ -452,7 +452,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh index f8dc8886eb5..b4976c1c83a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5h.sh @@ -409,7 +409,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh index 7b7f67125c3..af92e3d1a94 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5i.sh @@ -407,7 +407,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh index bf1787c4373..f88e5885bb3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5j.sh @@ -403,7 +403,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh index 93f9bffdd12..fec6d8d2a36 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5k.sh @@ -430,7 +430,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh index f0c66c3a7cd..5b7580e8022 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5l.sh @@ -440,7 +440,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh index dc0f19e9261..aac8f038360 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5m.sh @@ -405,7 +405,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh index 51a3f6e7723..efe8193cc1b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5n.sh @@ -435,7 +435,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh index 4e2e6033d29..a8497ce24c9 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5o.sh @@ -443,7 +443,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh index 36056efce7a..47acc829fe8 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5p.sh @@ -396,7 +396,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh index 01a9e867b57..691f575ab34 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5q.sh @@ -400,7 +400,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh index a20ca2da3de..f48f8500710 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5r.sh @@ -402,7 +402,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh index df981a478c0..00f00103be7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5s.sh @@ -416,7 +416,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh index ddd08de7707..8a69ec95d9b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5t.sh @@ -420,7 +420,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh index 28333fd912e..998300ca1a2 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5u.sh @@ -481,7 +481,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh index 2cdb0bb988c..507d2257adc 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5v.sh @@ -434,7 +434,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh index 5a33622645a..3d263c066d6 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5w.sh @@ -445,7 +445,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh index 0b76fe60a7b..608f14634c1 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5x.sh @@ -452,7 +452,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh index 3fd623e163f..eb5581f6144 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5y.sh @@ -451,7 +451,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh index ff3528d9660..0b0b8ada738 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_5z.sh @@ -443,7 +443,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh index 194245be1e3..c8b02f54172 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6a.sh @@ -465,7 +465,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh index d4194a5afe4..8449fb3b76a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6b.sh @@ -455,7 +455,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh index 89021098c49..927a33616ea 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6c.sh @@ -443,7 +443,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh index 354640e0258..8909f643de0 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6d.sh @@ -445,7 +445,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh index 80fea19e7a2..a8d82682d24 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6e.sh @@ -439,7 +439,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh index f92048cfeb4..a661b5b59fb 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6f.sh @@ -445,7 +445,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh index fbc5e0c54b5..c14f2fca4d7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6g.sh @@ -466,7 +466,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh index 5449671d131..5a1a7d40474 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h.sh @@ -469,7 +469,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh index 6db0a4f5ac4..4e47c28b9f7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh @@ -159,7 +159,7 @@ if [ $stage -le 1 ]; then # have some stragglers. steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \ --online-ivector-dir $online_ivector_dir \ - --scale-opts "--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0" \ + --scale-opts "--acoustic-scale=1.0 " \ --nj $nj $train_data_dir $lang $srcdir ${srcdir}_ali${affix} ; fi @@ -173,7 +173,7 @@ if [ -z "$lats_dir" ]; then subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving # total slots = 80 * 6 = 480. steps/nnet3/make_denlats.sh --cmd "$decode_cmd" \ - --self-loop-scale 1.0 --acwt 1.0 --determinize true \ + --acwt 1.0 --determinize true \ --online-ivector-dir $online_ivector_dir \ --nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \ $train_data_dir $lang $srcdir ${lats_dir} ; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh index 32631f4d348..f763c5f95b3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_py.sh @@ -152,7 +152,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh index 093bceb2717..a00b762695f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6i.sh @@ -473,7 +473,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh index cf98106ea04..416ae831c21 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6j.sh @@ -458,7 +458,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh index 5d518aeab2a..99b00b50e1a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6k.sh @@ -485,7 +485,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh index c76f5a9efd3..6e30c9a62e1 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6l.sh @@ -497,7 +497,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh index 39d6d3cb449..d352a63c944 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6m.sh @@ -473,7 +473,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh index 0911711e73c..04adf6805d5 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6n.sh @@ -475,7 +475,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh index c07cb35ed33..959067206fe 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6o.sh @@ -484,7 +484,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh index 5710dbe2ef9..38065eba75d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6p.sh @@ -479,7 +479,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh index 3e93d79b799..b2ab4a581d5 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6q.sh @@ -469,7 +469,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh index 0415f4e0fb9..dda85a77550 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6r.sh @@ -468,7 +468,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh index 0564c0a858f..4de17d1c875 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6s.sh @@ -478,7 +478,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh index 98ecd477a1d..787ff971b9d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6t.sh @@ -488,7 +488,7 @@ if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh index 9e8afc3c5b8..67da19429a2 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6u.sh @@ -499,7 +499,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh index 732b60d7c95..9c0de1c8597 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6v.sh @@ -191,7 +191,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh index a625859f7d4..e0db69bf5a5 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6w.sh @@ -199,7 +199,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh index 2e79e24ddb6..90a6793254c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6x.sh @@ -194,7 +194,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh index 5cf1cead63f..fd5a9342c40 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6y.sh @@ -192,7 +192,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh index baa42a087b7..a03cf67b4f7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6z.sh @@ -196,7 +196,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh index 5dd430ded8d..1ce8dcef65f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7a.sh @@ -227,7 +227,7 @@ if [ $stage -le 16 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh index 47dbe843d8e..c3bc49b783d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7b.sh @@ -211,7 +211,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh index 3335ef788a4..8006dc6051e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7c.sh @@ -211,7 +211,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh index dba1b99582a..a83027b2a05 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7d.sh @@ -181,7 +181,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh index 704411b6a76..a3dcb68c92d 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7e.sh @@ -182,7 +182,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh index a7a5a11dc7a..c07b5f3b936 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7f.sh @@ -183,7 +183,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh index 0623d26a9e4..9a0cfe0b301 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7g.sh @@ -200,7 +200,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh index dbbe3c1e6fd..13323fa559f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh @@ -200,7 +200,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh index 2a8a658bf6b..483b08c6938 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7i.sh @@ -193,7 +193,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh index a9eba36ddaa..a9e91f78dd1 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh @@ -193,7 +193,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh index 8e0b290cf87..9843df8945b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7k.sh @@ -194,7 +194,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh index bb9ddf209d6..09dd37fe61d 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7l.sh @@ -188,7 +188,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh index 97f92c14f1f..ef99c6d42f7 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh @@ -204,7 +204,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh index d9fe106e5d7..fdbbdae43b6 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m25l.sh @@ -541,7 +541,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh index 99e43443f99..ee3b0a70974 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7n.sh @@ -207,7 +207,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh index 44ca3b3d279..26448a96ffc 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh @@ -231,7 +231,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh index d19a4ef4c0b..d68475c6fad 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7p.sh @@ -219,7 +219,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh index cea0891d5d7..6fbdd992d02 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7q.sh @@ -207,7 +207,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh index 96046ac23c1..a18133a5609 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a.sh @@ -122,8 +122,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh index 3a8e41a8315..94921a094bd 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_a2.sh @@ -121,8 +121,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh index d4febd61e94..df1b86fee25 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_attention_1a.sh @@ -202,7 +202,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh index 8c623a7c01b..e02d283a171 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_b.sh @@ -124,8 +124,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh index 4414147bf0e..460a9081d28 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh @@ -210,7 +210,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh index cd9d4dc6f2b..e60697f1300 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1b.sh @@ -213,7 +213,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh index 18b660b4080..53485466f13 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1c.sh @@ -211,7 +211,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh index be615e0e361..f9474506d86 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1d.sh @@ -205,7 +205,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh index ec4634acf69..cbc6254575c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_c.sh @@ -132,8 +132,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh index 3a66a8cd556..94241dfc833 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_d.sh @@ -136,8 +136,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh index d30a513181e..f19df0c7190 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_e.sh @@ -142,8 +142,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh index 12450c2ae62..047cf298021 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_f.sh @@ -147,8 +147,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh index 70845684262..340d8fd7b61 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_g.sh @@ -149,8 +149,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh index 01f8743f585..91999d8459e 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_h.sh @@ -163,8 +163,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh index 82d91bbd33e..0b17e772a95 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_i.sh @@ -157,8 +157,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh index 334eec7e872..8fa842cb5da 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_j.sh @@ -164,8 +164,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh index b64318ec4bb..0ee01ac3de4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_k.sh @@ -159,8 +159,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh index 6de6c79affc..9fbfcf68b01 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_l.sh @@ -163,8 +163,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh index 43855e6f7ce..15e194d9081 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -205,7 +205,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh index 5c82ed0eb11..afcb48ff04a 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -201,7 +201,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh index c3df0bf2b2c..c6e8625ff0b 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -208,7 +208,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh index 3d353387239..ab301178c87 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -231,7 +231,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh index 2a2d508ecdd..3d221ad03f2 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -226,7 +226,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh index 5af5463b372..9affa2aeaf2 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -243,7 +243,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh index 28105a587ec..1921c9735e5 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -225,7 +225,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh index d6e81f2d8eb..579ccb45140 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -222,7 +222,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh index 060d98c9d05..4e8dddab481 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -243,7 +243,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh index 9bd39a262c5..5f7818689d4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -210,7 +210,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh index ccd6138da6e..4383dacb5ff 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -220,7 +220,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh index f702033377a..3b070d0a35b 100644 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -208,7 +208,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh index b43577bd76c..375b00a70bd 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -220,7 +220,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh index 5bb6e7da152..eb7cf854982 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -217,7 +217,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh index 8d357db0217..5290de020eb 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_m.sh @@ -164,8 +164,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh index a190a1d56dd..cdc7b8749fa 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_n.sh @@ -174,8 +174,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh index 5b80665268d..1e4ffbaf665 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_o.sh @@ -178,8 +178,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh index 4db38d74508..7dee86b1741 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -225,7 +225,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh index 7e9dec67068..0a83ab53e6c 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_opgru_1b.sh @@ -223,7 +223,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh index d401790449d..cc2dfe960bb 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_p.sh @@ -171,8 +171,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh index c6758a62fa5..1204cdb0eed 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_q.sh @@ -181,8 +181,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh index 73cadcc622c..95bef5b5317 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_r.sh @@ -181,8 +181,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh index ae10b53824f..f1ffc479eb2 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_s.sh @@ -183,8 +183,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh index dabb2a6db87..7b7698a730f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_t.sh @@ -186,8 +186,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh index c83274499fa..d2f69394557 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_u.sh @@ -191,8 +191,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh index 38f31269d33..fe195ab33ee 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_v.sh @@ -197,8 +197,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh index 35d1ddd8052..cea2fd8d4c4 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_w.sh @@ -190,8 +190,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh index 0f294033489..6cb8a63b478 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_x.sh @@ -191,8 +191,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh index 09217d1b196..4ce697ad42f 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_y.sh @@ -201,8 +201,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh index 0c8524a2c90..04ed8cfb022 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_z.sh @@ -191,8 +191,8 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --transition-scale 0.0 \ - --self-loop-scale 0.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg + utils/mkgraph.sh \ + data/lang_sw1_tg $dir $dir/graph_sw1_tg fi decode_suff=sw1_tg diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh index 02e637286b5..ed1927c648e 100755 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh @@ -138,7 +138,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir + utils/mkgraph.sh data/lang_sw1_tg $dir $graph_dir fi if [ $stage -le 14 ]; then diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh index 67fd3c03d27..59b63a6fcee 100755 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh @@ -140,7 +140,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir + utils/mkgraph.sh data/lang_sw1_tg $dir $graph_dir fi if [ $stage -le 14 ]; then diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh index 260116666a0..e6cc4e8bed1 100755 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh @@ -137,7 +137,7 @@ if [ $stage -le 13 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir + utils/mkgraph.sh data/lang_sw1_tg $dir $graph_dir fi if [ $stage -le 14 ]; then diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh index e1d0f06affe..2c8c6a57669 100755 --- a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh @@ -130,7 +130,7 @@ if [ $stage -le 1 ]; then # hardcode no-GPU for alignment, although you could use GPU [you wouldn't # get excellent GPU utilization though.] steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \ - --scale-opts '--transition-scale=1.0 --acoustic-scale=0.333 --self-loop-scale=0.333' \ + --scale-opts '--acoustic-scale=0.333' \ --frames-per-chunk $frames_per_chunk_decoding \ --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ --extra-left-context-initial 0 --extra-right-context-final 0 \ @@ -150,7 +150,7 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_degs.sh \ --cmd "$decode_cmd --mem 10G" --num-threads 3 \ - --self-loop-scale 0.333 --acwt 0.333 \ + --acwt 0.333 \ --max-copy-jobs $max_copy_jobs \ --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ diff --git a/egs/tedlium/s5/local/chain/run_tdnn.sh b/egs/tedlium/s5/local/chain/run_tdnn.sh index 545294dd035..96fee897a56 100755 --- a/egs/tedlium/s5/local/chain/run_tdnn.sh +++ b/egs/tedlium/s5/local/chain/run_tdnn.sh @@ -173,7 +173,7 @@ if [ $stage -le 14 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test $dir $dir/graph + utils/mkgraph.sh data/lang_test $dir $dir/graph fi graph_dir=$dir/graph diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh index 2ac8c09dad1..a53e2016f8b 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_blstm_1a.sh @@ -222,7 +222,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh index 47557f93696..8ebf33af18f 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1a.sh @@ -232,7 +232,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh index 7afa1b7f902..6414898fca7 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1b.sh @@ -233,7 +233,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh index e69e499e152..8ce19734a22 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1c.sh @@ -231,7 +231,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh index 86e0352828c..cc51f149446 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1d.sh @@ -244,7 +244,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh index 0fdb2b3b63e..b39f94865f3 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_lstm_1e.sh @@ -234,7 +234,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh index 70e72ee1914..36921a1ea9f 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1a.sh @@ -177,7 +177,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh index 492d3efb804..8839ecf14a7 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1b.sh @@ -223,7 +223,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh index 01768c3875f..6eaf886ef5b 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh @@ -240,7 +240,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh index bb5007f4c9f..1ee826d0e5c 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh @@ -231,7 +231,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh index 1476ed1fd40..f3b0e654813 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1e.sh @@ -223,7 +223,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh index 47f939fea1c..368b10e4ca7 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1f.sh @@ -222,7 +222,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh index f02025674e8..7c41c7e2a5d 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1g.sh @@ -226,7 +226,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh index b03da27e760..a13e767e767 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -242,7 +242,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh index e896a7867b3..936129704bb 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -257,7 +257,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh index 00f72fab796..15745ba14a0 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -249,7 +249,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh index 80a9ed1c4d0..d827d2d789b 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -253,7 +253,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh index 031978f878a..563e8071df1 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -301,7 +301,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh index 0d64c75aea8..dd26c9f0bf3 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh @@ -135,7 +135,7 @@ if [ $stage -le 1 ]; then # hardcode no-GPU for alignment, although you could use GPU [you wouldn't # get excellent GPU utilization though.] steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \ - --scale-opts "--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0" \ + --scale-opts "--acoustic-scale=1.0 " \ --frames-per-chunk $frames_per_chunk_egs \ --extra-left-context $extra_left_context --extra-right-context $extra_right_context \ --extra-left-context-initial 0 --extra-right-context-final 0 \ @@ -156,7 +156,7 @@ if [ -z "$degs_dir" ]; then steps/nnet3/get_degs.sh \ --cmd "$decode_cmd --mem 10G" --num-threads 3 \ --max-copy-jobs $max_copy_jobs \ - --self-loop-scale 1.0 --acwt 1.0 \ + --acwt 1.0 \ --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ --extra-left-context-initial 0 --extra-right-context-final 0 \ diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh index c60b8f7fefc..d3d6ca36816 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -255,7 +255,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh index 2d2048a6869..638cbec690a 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -258,7 +258,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh index a074e128270..dcf463156a6 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -256,7 +256,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh index 3bfe175806f..0a4c678c606 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -277,7 +277,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh index acbef783823..c1d9c4dad71 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -274,7 +274,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh index 173be863608..212967ca356 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -279,7 +279,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh index 94955d0472c..dbb2a8acbe1 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -270,7 +270,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh index efd3bc98725..189f8a2cb79 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -270,7 +270,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh index c0559e8d389..27dd0331dd2 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -280,7 +280,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh index 5a6dbaef8af..89de6799c27 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh @@ -284,7 +284,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh index dd38d56759f..5209710dbc4 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh @@ -279,7 +279,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh index 1378d2d176d..bb083315157 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1s.sh @@ -240,7 +240,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh index 3c4882ec2c6..82eca8f19af 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1t.sh @@ -240,7 +240,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh index 23ea14ae151..df96469ad28 100644 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1u.sh @@ -234,7 +234,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh index 7c44d963504..a10913133ad 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1v.sh @@ -239,7 +239,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh index 042ef346578..6c95f819dd2 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_1a.sh @@ -248,7 +248,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh index 905e1845183..fc3e45dc7b0 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1a.sh @@ -254,7 +254,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh index 7bd96e7d82c..273068b6b28 100755 --- a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_attention_bs_1b.sh @@ -241,7 +241,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh index f0220b17376..06b4b2451a4 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh @@ -172,7 +172,7 @@ if [ $stage -le 16 ]; then # as long as phones.txt was compatible. utils/lang/check_phones_compatible.sh data/lang/phones.txt $lang/phones.txt - utils/mkgraph.sh --self-loop-scale 0.333 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh index 3e8509bf4ac..d867c99a185 100755 --- a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh @@ -222,7 +222,7 @@ if [ $stage -le 16 ]; then # as long as phones.txt was compatible. utils/lang/check_phones_compatible.sh data/lang/phones.txt $lang/phones.txt - utils/mkgraph.sh --self-loop-scale 0.333 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 17 ]; then diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh index 1204ff6ce4c..0289ff9dd16 100755 --- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1a.sh @@ -224,7 +224,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh index 744c964db2f..a5250f4ff9d 100755 --- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1b.sh @@ -231,7 +231,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh index faac365af54..1c6c3709bdf 100755 --- a/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/tedlium/s5_r3/local/chain/tuning/run_tdnn_1c.sh @@ -224,7 +224,7 @@ if [ $stage -le 19 ]; then # Note: it might appear that this data/lang_chain directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph + utils/mkgraph.sh data/lang $dir $dir/graph fi if [ $stage -le 20 ]; then diff --git a/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh index ab68ba6fb68..166b6d842a0 100755 --- a/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/tunisian_msa/s5/local/chain/tuning/run_tdnn_1a.sh @@ -223,7 +223,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 \ + \ data/lang_test \ $tree_dir \ $tree_dir/graph || exit 1; diff --git a/egs/uw3/v1/local/chain/run_cnn_1a.sh b/egs/uw3/v1/local/chain/run_cnn_1a.sh index e3548609da7..401d79e7217 100755 --- a/egs/uw3/v1/local/chain/run_cnn_1a.sh +++ b/egs/uw3/v1/local/chain/run_cnn_1a.sh @@ -216,7 +216,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_test \ + $lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh b/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh index 844ccf80677..d68ac82ce6c 100755 --- a/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/vystadial_cz/s5b/local/chain/tuning/run_tdnn_1a.sh @@ -234,7 +234,7 @@ if [ $stage -le 15 ]; then # Note: it's not important to give mkgraph.sh the lang directory with the # matched topology (since it gets the topology file from the model). utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_sp_test \ + data/lang_sp_test \ $tree_dir $tree_dir/graph || exit 1; fi diff --git a/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh b/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh index 1ddb3c305ac..0d07afa4519 100755 --- a/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh +++ b/egs/wsj/s5/local/chain/e2e/run_tdnn_flatstart.sh @@ -175,13 +175,13 @@ if [ $stage -le 4 ]; then utils/lang/check_phones_compatible.sh \ data/lang_nosp_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_nosp_test_tgpr \ + data/lang_nosp_test_tgpr \ $dir $treedir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_nosp_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_nosp_test_bd_tgpr \ + data/lang_nosp_test_bd_tgpr \ $dir $treedir/graph_bd_tgpr || exit 1; fi @@ -203,7 +203,7 @@ if [ $stage -le 5 ]; then $treedir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_nosp_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh b/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh index be82e80d5fe..714ace8a633 100755 --- a/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh +++ b/egs/wsj/s5/local/chain/e2e/run_tdnn_lstm_flatstart.sh @@ -189,13 +189,13 @@ if [ $stage -le 4 ]; then utils/lang/check_phones_compatible.sh \ data/lang_char_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_tgpr \ + data/lang_char_test_tgpr \ $dir $treedir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_char_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_bd_tgpr \ + data/lang_char_test_bd_tgpr \ $dir $treedir/graph_bd_tgpr || exit 1; fi @@ -219,7 +219,7 @@ if [ $stage -le 5 ]; then $treedir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_char_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh index 4ab0cf58d53..70f22f39903 100755 --- a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh +++ b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1a.sh @@ -181,13 +181,13 @@ if [ $stage -le 4 ]; then utils/lang/check_phones_compatible.sh \ data/lang_char_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_tgpr \ + data/lang_char_test_tgpr \ $dir $treedir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_char_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_bd_tgpr \ + data/lang_char_test_bd_tgpr \ $dir $treedir/graph_bd_tgpr || exit 1; fi @@ -209,7 +209,7 @@ if [ $stage -le 5 ]; then $treedir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_char_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh index 4e66fae8baa..615c2535f7d 100755 --- a/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh +++ b/egs/wsj/s5/local/chain/e2e/tuning/run_tdnnf_flatstart_char1b.sh @@ -183,13 +183,13 @@ if [ $stage -le 4 ]; then utils/lang/check_phones_compatible.sh \ data/lang_char_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_tgpr \ + data/lang_char_test_tgpr \ $dir $treedir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_char_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_char_test_bd_tgpr \ + data/lang_char_test_bd_tgpr \ $dir $treedir/graph_bd_tgpr || exit 1; fi @@ -211,7 +211,7 @@ if [ $stage -le 5 ]; then $treedir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_char_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh index e656b67e529..5a8a20496cd 100755 --- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh @@ -269,13 +269,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -300,7 +300,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -338,7 +338,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh index 9db76e94430..e5265d3b31d 100755 --- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh +++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh @@ -272,13 +272,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -303,7 +303,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -341,7 +341,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh index 36ec5bb61af..e1394fb65da 100755 --- a/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh +++ b/egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1c.sh @@ -260,13 +260,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -287,7 +287,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -325,7 +325,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh index 8d44db6f917..a3950d71d1f 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh @@ -276,13 +276,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -307,7 +307,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -345,7 +345,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh index 544b9b04a0a..26a88900b0f 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh @@ -252,13 +252,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -283,7 +283,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -321,7 +321,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh index b268ed7feda..654fc25a49a 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1c.sh @@ -253,13 +253,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -283,7 +283,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -321,7 +321,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh index d1a7f9d0663..a8549470006 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1d.sh @@ -255,13 +255,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -285,7 +285,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -323,7 +323,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh index e20069fbfa1..585399e1367 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1e.sh @@ -264,13 +264,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -294,7 +294,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -332,7 +332,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh index 86df0779841..ad97730bf01 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1f.sh @@ -258,13 +258,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -288,7 +288,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -326,7 +326,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh index 9927a0c28d3..9ba6dfb912d 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_1g.sh @@ -266,13 +266,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -296,7 +296,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -334,7 +334,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh index 6e4f220c1f2..58c31f67ff7 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh @@ -276,13 +276,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -307,7 +307,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -343,7 +343,7 @@ if [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_looped_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_looped_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -377,7 +377,7 @@ if $test_online_decoding && [ $stage -le 20 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh index 2d113e58a93..41389e4d07a 100755 --- a/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh +++ b/egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -567,13 +567,13 @@ if [ $stage -le 17 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgpr \ + data/lang_test_tgpr \ $tree_dir $tree_dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_bd_tgpr \ + data/lang_test_bd_tgpr \ $tree_dir $tree_dir/graph_bd_tgpr || exit 1; fi @@ -598,7 +598,7 @@ if [ $stage -le 18 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -634,7 +634,7 @@ if [ $stage -le 19 ]; then $tree_dir/graph_${lmtype} data/${data}_hires ${dir}/decode_looped_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_looped_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -668,7 +668,7 @@ if $test_online_decoding && [ $stage -le 20 ]; then $tree_dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 1.0 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh index f2a4ed37ae5..03554c61b57 100755 --- a/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh +++ b/egs/wsj/s5/local/nnet3/tuning/run_tdnn_lstm_lfr_1a.sh @@ -218,12 +218,12 @@ if [ $stage -le 16 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgpr/phones.txt $lang/phones.txt - utils/mkgraph.sh --self-loop-scale 0.333 data/lang_test_tgpr \ + utils/mkgraph.sh data/lang_test_tgpr \ $dir $dir/graph_tgpr || exit 1; utils/lang/check_phones_compatible.sh \ data/lang_test_bd_tgpr/phones.txt $lang/phones.txt - utils/mkgraph.sh --self-loop-scale 0.333 data/lang_test_bd_tgpr \ + utils/mkgraph.sh data/lang_test_bd_tgpr \ $dir $dir/graph_bd_tgpr || exit 1; fi @@ -248,7 +248,7 @@ if [ $stage -le 17 ]; then $dir/graph_${lmtype} data/${data}_hires ${dir}/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 0.333 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -284,7 +284,7 @@ if [ $stage -le 18 ]; then $dir/graph_${lmtype} data/${data}_hires ${dir}/decode_looped_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 0.333 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}/decode_looped_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ @@ -318,7 +318,7 @@ if $test_online_decoding && [ $stage -le 19 ]; then $dir/graph_${lmtype} data/${data} ${dir}_online/decode_${lmtype}_${data_affix} || exit 1 done steps/lmrescore.sh \ - --self-loop-scale 0.333 \ + \ --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ data/${data}_hires ${dir}_online/decode_{tgpr,tg}_${data_affix} || exit 1 steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ diff --git a/egs/wsj/s5/steps/align_basis_fmllr.sh b/egs/wsj/s5/steps/align_basis_fmllr.sh index e5510c5ab7e..a9ceab29b4f 100755 --- a/egs/wsj/s5/steps/align_basis_fmllr.sh +++ b/egs/wsj/s5/steps/align_basis_fmllr.sh @@ -19,7 +19,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" basis_fmllr_opts="--fmllr-min-count=22 --num-iters=10 --size-scale=0.2 --step-size-iters=3" beam=10 retry_beam=40 diff --git a/egs/wsj/s5/steps/align_basis_fmllr_lats.sh b/egs/wsj/s5/steps/align_basis_fmllr_lats.sh index 426168496cc..dd3db90bd76 100755 --- a/egs/wsj/s5/steps/align_basis_fmllr_lats.sh +++ b/egs/wsj/s5/steps/align_basis_fmllr_lats.sh @@ -16,7 +16,6 @@ stage=0 nj=4 cmd=run.pl # Begin configuration. -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" acoustic_scale=0.1 beam=10 retry_beam=40 @@ -112,18 +111,18 @@ if [ $stage -le 0 ]; then echo "$0: compiling training graphs" tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|"; $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $scale_opts $dir/tree $dir/final.mdl $lang/L.fst "$tra" \ + compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" \ "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; fi if [ $stage -le 1 ]; then - # Note: we need to set --transition-scale=0.0 --self-loop-scale=0.0 because, + # Note: we need to set because, # as explained above, we compiled the transition probs into the training # graphs. echo "$0: aligning data in $data using $alimdl and speaker-independent features." $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - gmm-align-compiled --transition-scale=0.0 --self-loop-scale=0.0 --acoustic-scale=$acoustic_scale \ + gmm-align-compiled --acoustic-scale=$acoustic_scale \ --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \ "ark:gunzip -c $dir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1; fi diff --git a/egs/wsj/s5/steps/align_fmllr.sh b/egs/wsj/s5/steps/align_fmllr.sh index 327978e680f..c1ec67ec7dc 100755 --- a/egs/wsj/s5/steps/align_fmllr.sh +++ b/egs/wsj/s5/steps/align_fmllr.sh @@ -18,7 +18,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 careful=false diff --git a/egs/wsj/s5/steps/align_fmllr_lats.sh b/egs/wsj/s5/steps/align_fmllr_lats.sh index b331b40d73c..e561a6f0d29 100755 --- a/egs/wsj/s5/steps/align_fmllr_lats.sh +++ b/egs/wsj/s5/steps/align_fmllr_lats.sh @@ -12,7 +12,6 @@ stage=0 nj=4 cmd=run.pl # Begin configuration. -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" acoustic_scale=0.1 beam=10 retry_beam=40 @@ -100,18 +99,18 @@ if [ $stage -le 0 ]; then echo "$0: compiling training graphs" tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|"; $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ - compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $scale_opts $dir/tree $dir/final.mdl $lang/L.fst "$tra" \ + compile-train-graphs --read-disambig-syms=$lang/phones/disambig.int $dir/tree $dir/final.mdl $lang/L.fst "$tra" \ "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1; fi if [ $stage -le 1 ]; then - # Note: we need to set --transition-scale=0.0 --self-loop-scale=0.0 because, + # Note: we need to set because, # as explained above, we compiled the transition probs into the training # graphs. echo "$0: aligning data in $data using $alimdl and speaker-independent features." $cmd JOB=1:$nj $dir/log/align_pass1.JOB.log \ - gmm-align-compiled --transition-scale=0.0 --self-loop-scale=0.0 --acoustic-scale=$acoustic_scale \ + gmm-align-compiled --acoustic-scale=$acoustic_scale \ --beam=$beam --retry-beam=$retry_beam "$alimdl_cmd" \ "ark:gunzip -c $dir/fsts.JOB.gz|" "$sifeats" "ark:|gzip -c >$dir/pre_ali.JOB.gz" || exit 1; fi diff --git a/egs/wsj/s5/steps/align_lvtln.sh b/egs/wsj/s5/steps/align_lvtln.sh index 9efba2b9096..671c3e45c71 100755 --- a/egs/wsj/s5/steps/align_lvtln.sh +++ b/egs/wsj/s5/steps/align_lvtln.sh @@ -13,7 +13,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10.0 retry_beam=40 boost_silence=1.0 # factor by which to boost silence during alignment. diff --git a/egs/wsj/s5/steps/align_raw_fmllr.sh b/egs/wsj/s5/steps/align_raw_fmllr.sh index 639dde559a4..5cec25c096a 100755 --- a/egs/wsj/s5/steps/align_raw_fmllr.sh +++ b/egs/wsj/s5/steps/align_raw_fmllr.sh @@ -18,7 +18,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 # factor by which to boost silence during alignment. diff --git a/egs/wsj/s5/steps/align_sgmm2.sh b/egs/wsj/s5/steps/align_sgmm2.sh index d2f829f7e3e..951e241284e 100755 --- a/egs/wsj/s5/steps/align_sgmm2.sh +++ b/egs/wsj/s5/steps/align_sgmm2.sh @@ -18,7 +18,7 @@ use_gselect=false # use gselect info from srcdir [regardless, we use # Gaussian-selection info, we might have to compute it though.] gselect=15 # Number of Gaussian-selection indices for SGMMs. # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 transform_dir= # directory to find fMLLR transforms in. diff --git a/egs/wsj/s5/steps/align_si.sh b/egs/wsj/s5/steps/align_si.sh index 0bfebe6b0fc..749124dfadf 100755 --- a/egs/wsj/s5/steps/align_si.sh +++ b/egs/wsj/s5/steps/align_si.sh @@ -15,7 +15,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 careful=false diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh index cc8da298d2f..67d92e0b73a 100755 --- a/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh +++ b/egs/wsj/s5/steps/cleanup/clean_and_segment_data_nnet3.sh @@ -111,10 +111,6 @@ cp $srcdir/frame_subsampling_factor $dir 2>/dev/null || true if [ -f $srcdir/frame_subsampling_factor ]; then echo "$0: guessing that this is a chain system, checking parameters." - if [ -z $scale_opts ]; then - echo "$0: setting scale_opts" - scale_opts="--self-loop-scale=1.0 --transition-scale=1.0" - fi if [ $acwt == 0.1 ]; then echo "$0: setting acwt=1.0" acwt=1.0 diff --git a/egs/wsj/s5/steps/cleanup/find_bad_utts.sh b/egs/wsj/s5/steps/cleanup/find_bad_utts.sh index 9bb67abeff9..27136be6fb5 100755 --- a/egs/wsj/s5/steps/cleanup/find_bad_utts.sh +++ b/egs/wsj/s5/steps/cleanup/find_bad_utts.sh @@ -12,7 +12,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" +scale_opts="" acoustic_scale=0.1 beam=15.0 lattice_beam=8.0 @@ -197,4 +197,3 @@ if [ $stage -le 3 ]; then sort -i -b -k1,1 -k4,4nr -k2,2 -k3,3 > $dir/analysis/ops_details.txt fi - diff --git a/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh b/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh index b18efe35a3c..68b8497f4e2 100755 --- a/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh +++ b/egs/wsj/s5/steps/cleanup/find_bad_utts_nnet.sh @@ -12,7 +12,7 @@ nj=8 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" +scale_opts="" acoustic_scale=0.1 beam=15.0 lattice_beam=8.0 diff --git a/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh b/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh index d957ce4d5c7..9233d142946 100755 --- a/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh +++ b/egs/wsj/s5/steps/cleanup/make_biased_lm_graphs.sh @@ -17,7 +17,7 @@ set -e # Begin configuration section. nj=10 cmd=run.pl -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" +scale_opts="" top_n_words=100 # Number of common words that we compile into each graph (most frequent # in $data/text.orig. top_n_words_weight=1.0 # this weight is before renormalization; it can be more @@ -49,7 +49,7 @@ if [ $# != 4 ]; then echo "Main options (for others, see top of script file)" echo " --scale-opts # Options relating to language" echo " # model scale; default is " - echo " # '--transition-scale=1.0 --self-loop-scale=0.1'" + echo " # ''" echo " --top-n-words # Number of most-common-words to add with" echo " # unigram probabilities into graph (default: 100)" echo " --top-n-words-weight # Weight given to top-n-words portion of graph" diff --git a/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh b/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh index 6705ab6db54..ab18d801c2e 100755 --- a/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh +++ b/egs/wsj/s5/steps/cleanup/make_segmentation_graph.sh @@ -6,8 +6,6 @@ # Begin configuration section. nj=4 cmd=run.pl -tscale=1.0 # transition scale. -loopscale=0.1 # scale for self-loops. cleanup=true ngram_order=1 srilm_options="-wbdiscount" # By default, use Witten-Bell discounting in SRILM @@ -35,8 +33,6 @@ if [ $# -ne 4 ]; then echo "Options:" echo " --ngram-order # order of n-gram language model" echo " --srilm-options # options for ngram-count in SRILM tool" - echo " --tscale # transition scale" - echo " --loopscale # scale for self-loops" echo " --cleanup # if true, removes the intermediate files" exit 1; fi @@ -87,7 +83,7 @@ fi mkdir -p $graph_dir/split$nj mkdir -p $graph_dir/log - + split_texts="" for n in $(seq $nj); do mkdir -p $graph_dir/split$nj/$n @@ -97,7 +93,6 @@ utils/split_scp.pl $data/text.orig $split_texts $cmd JOB=1:$nj $graph_dir/log/make_utterance_graph.JOB.log \ steps/cleanup/make_utterance_graph.sh --cleanup $cleanup \ - --tscale $tscale --loopscale $loopscale \ --ngram-order $ngram_order --srilm-options "$srilm_options" \ $graph_dir/split$nj/JOB/text $lang \ $model_dir $graph_dir/split$nj/JOB || exit 1; diff --git a/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh b/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh index 277c5a2da1c..a784c8777a8 100755 --- a/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh +++ b/egs/wsj/s5/steps/cleanup/make_utterance_graph.sh @@ -4,8 +4,6 @@ # Apache 2.0 # Begin configuration section. -tscale=1.0 # transition scale. -loopscale=0.1 # scale for self-loops. cleanup=true ngram_order=1 srilm_options="-wbdiscount" # By default, use Witten-Bell discounting in SRILM @@ -34,8 +32,6 @@ if [ $# -ne 4 ]; then echo "Options:" echo " --ngram-order # order of n-gram language model" echo " --srilm-options # options for ngram-count in SRILM tool" - echo " --tscale # transition scale" - echo " --loopscale # scale for self-loops" echo " --cleanup # if true, removes the intermediate files" exit 1; fi @@ -134,7 +130,7 @@ cat $text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \ fstisstochastic $wdir/CLG.fst || echo "$0: $uttid/CLG.fst not stochastic." make-h-transducer --disambig-syms-out=$wdir/disambig_tid.int \ - --transition-scale=$tscale $wdir/ilabels_${N}_${P} \ + $wdir/ilabels_${N}_${P} \ $model_dir/tree $model_dir/final.mdl > $wdir/Ha.fst # Builds HCLGa.fst @@ -145,13 +141,10 @@ cat $text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \ fstisstochastic $wdir/HCLGa.fst ||\ echo "$0: $uttid/HCLGa.fst is not stochastic" - add-self-loops --self-loop-scale=$loopscale --reorder=true \ - $model_dir/final.mdl < $wdir/HCLGa.fst > $wdir/HCLG.fst + add-self-loops $model_dir/final.mdl < $wdir/HCLGa.fst > $wdir/HCLG.fst - if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then - fstisstochastic $wdir/HCLG.fst ||\ - echo "$0: $uttid/HCLG.fst is not stochastic." - fi + fstisstochastic $wdir/HCLG.fst ||\ + echo "$0: $uttid/HCLG.fst is not stochastic." echo "$uttid $wdir/HCLG.fst" >> $graph_dir/sub_graphs/HCLG.fsts.scp echo diff --git a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh index f0df1e7730c..d42cda1b9d3 100755 --- a/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh +++ b/egs/wsj/s5/steps/cleanup/segment_long_utterances_nnet3.sh @@ -171,7 +171,7 @@ if [ -f $srcdir/frame_subsampling_factor ]; then echo "$0: guessing that this is a chain system, checking parameters." if [ -z $scale_opts ]; then echo "$0: setting scale_opts" - scale_opts="--self-loop-scale=1.0 --transition-scale=1.0" + scale_opts=" " fi if [ $acwt == 0.1 ]; then echo "$0: setting acwt=1.0" diff --git a/egs/wsj/s5/steps/decode_basis_fmllr.sh b/egs/wsj/s5/steps/decode_basis_fmllr.sh index afb914e7f0d..7e39048f463 100755 --- a/egs/wsj/s5/steps/decode_basis_fmllr.sh +++ b/egs/wsj/s5/steps/decode_basis_fmllr.sh @@ -37,7 +37,7 @@ acwt=0.083333 # Acoustic weight used in getting fMLLR transforms, and also in # lattice generation. # Parameters in alignment of training data -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" align_beam=10 retry_beam=40 diff --git a/egs/wsj/s5/steps/decode_fromlats.sh b/egs/wsj/s5/steps/decode_fromlats.sh index ee719c0e132..73c8954fb48 100755 --- a/egs/wsj/s5/steps/decode_fromlats.sh +++ b/egs/wsj/s5/steps/decode_fromlats.sh @@ -22,7 +22,7 @@ beam=20.0 lattice_beam=7.0 acwt=0.083333 batch_size=75 # Limits memory blowup in compile-train-graphs-fsts -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" +scale_opts="" skip_scoring=false # End configuration. diff --git a/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh b/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh index 1cdd9885314..a953aeb90e7 100755 --- a/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh +++ b/egs/wsj/s5/steps/decode_sgmm2_fromlats.sh @@ -33,7 +33,7 @@ vecs_beam=4.0 # Beam we use to prune lattices while getting posteriors for use_fmllr=false fmllr_iters=10 fmllr_min_count=1000 -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" +scale_opts="" skip_scoring=false # End configuration section. diff --git a/egs/wsj/s5/steps/get_fmllr_basis.sh b/egs/wsj/s5/steps/get_fmllr_basis.sh index 9b60af1fa51..3f145714ef1 100755 --- a/egs/wsj/s5/steps/get_fmllr_basis.sh +++ b/egs/wsj/s5/steps/get_fmllr_basis.sh @@ -8,7 +8,7 @@ stage=0 # Parameters in alignment of training data -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" per_utt=true # If true, then treat each utterance as a separate speaker for purposes of # basis training... this is recommended if the number of actual speakers in your # training set is less than (feature-dim) * (feature-dim+1). diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py index 4a39ed9dae6..144bc879e51 100644 --- a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py +++ b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py @@ -76,12 +76,10 @@ def prepare_initial_acoustic_model(dir, alidir, run_opts, common_train_lib.prepare_initial_network(dir, run_opts, srand=srand) - # Convert to .mdl, train the transitions, set the priors. + # Convert to .mdl, set the priors. common_lib.execute_command( """{command} {dir}/log/init_mdl.log \ - nnet3-am-init {alidir}/final.mdl {raw_mdl} - \| \ - nnet3-am-train-transitions - \ - "ark:gunzip -c {alidir}/ali.*.gz|" {dir}/0.mdl + nnet3-am-init {alidir}/final.mdl {raw_mdl} {dir}/0.mdl """.format(command=run_opts.command, dir=dir, alidir=alidir, raw_mdl=(input_model if input_model is not None diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py index db4cb392f10..21874ad6923 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/attention.py @@ -247,3 +247,284 @@ def _add_components(self, input_desc, input_dim, nonlinearities): configs.append(line) cur_node = '{0}.{1}'.format(self.name, nonlinearity) return configs + + +# This class is for parsing lines like +# 'attention-block dim=768 bottleneck-dim=128 num-heads=8 value-dim=50 key-dim=50 time-stride=3 num-left-inputs=30 num-right-inputs=10 bypass-scale=0.66' +# +# It is a little like a TDNNF-layer, but with attention in the middle and no +# ReLU. Note: as of now, there is no nonlinearity other than what comes from +# the attention component itself (it has a softmax). Imagine the input and +# output dim of the layer is largish, like 768. +# +# So we go, 768 --(linear with orthonormal)--> 128 --(affine)--> attention-input-dim --(attention)--> (50+context-dim)*8 \ +# --(linear with orthonormal)-->128 -->(linear) 768 -> batchnorm, then add residual connection from original 768-dim input. +# +# ... where attention-input-dim equals value-dim + 2*key-dim + context-dim +# and context-dim = (num-left-inputs + 1 + num-right-inputs + 1) +# in this case it's 50 + 2*50 + (30+10+1) = 191. +# +# +# Parameters of the class, and their defaults: +# input='[-1]' [Descriptor giving the input of the layer.] +# bottleneck-dim=-1 [bottleneck dimension, e.g. 128.] +# num-heads=-1 [Number of attention heads, e.g. 8] +# value-dim=-1 [Dimension of values (the things which get weighted-averaged +# and then output. E.g. 50] +# key-dim=-1 [Dimension of the keys, e.g. 50. Affects the query +# dimension, but that's larger by context_dim, +# where context_dim == num-left-inputs+1+num-right-inputs. +# That's for the encoding of the position of the input frame.] +# dim=-1 [Dimension of the output of this layer (after the bottleneck; +# e.g. 768]. Defaults to the dimension of the input.] +# time-stride=1 [Time stride, dictates the spacing of the inputs to this +# layer. E.g. might be 3 in typical TDNN-F setups.] +# num-left-inputs=-1 [Number of inputs to the left that we use. Must be specified.] +# num-right-inputs=-1 [Number of inputs to the right that we use. Must be specified.] +# num-left-inputs-required: -1 [This affects the left/right context that the network will +# have, i.e. how many frames of input it will insist on having. +# It affects the behavior at chunk boundaries; larger will tend +# to be slower but more accurate. Note: the default of -1 means: +# use the same as num-left-inputs]. +# num-right-inputs-required: -1 [See comment for num-left-inputs-required] +# output-context: True [If true, the softmax weights will be an additional +# output of the attention heads.] +# key-scale: 0.0 [If >0.0, becomes a scaling factor on the keys. Otherwise, we +# use the default value of 1.0 / sqrt(key-dim).] +# +# +# bypass-scale : 0.66 [Scale on the input in the residual connection.] +# target-rms: 1.0 [Scaling on the output of the batchnorm] +# +# Extra configs that are passed into the affine and linear components: +# learning-rate-factor=1.0 [This can be used to make the affine component +# train faster or slower]. +# max-change=0.75 [maximum change per iteration, per component] +# l2-regularize=0.0 [l2 regularization constant for linear and affine components.] +# +# use-relu=False [If true, add relu] +# +# Documentation for the rest of the parameters (related to the +# attention component) can be found in nnet-attention-component.h + + +class XconfigAttentionBlock(XconfigLayerBase): + def __init__(self, first_token, key_to_value, prev_names = None): + # Here we just list some likely combinations.. you can just add any + # combinations you want to use, to this list. + assert first_token == 'attention-block' + XconfigLayerBase.__init__(self, first_token, key_to_value, prev_names) + + def set_default_configs(self): + # note: self.config['input'] is a descriptor, '[-1]' means output + # the most recent layer. + self.config = { 'input':'[-1]', + 'dim': -1, + 'bottleneck-dim': -1, + 'num-heads': -1, + 'value-dim': -1, + 'key-dim': -1, + 'dim': -1, + 'time-stride': 1, + 'num-left-inputs': -1, + 'num-right-inputs': -1, + 'learning-rate-factor': 1.0, + 'max-change' : 0.75, + 'ng-affine-options' : '', + 'l2-regularize': 0.0, + 'num-left-inputs-required': -1, + 'num-right-inputs-required': -1, + 'output-context': True, + 'target-rms': 1.0, + 'key-scale': 0.0, + 'bypass-scale': 0.66, + 'use-relu': False} + + + def check_configs(self): + for x in [ 'bottleneck-dim', 'num-heads', 'value-dim', 'key-dim' ]: + if self.config[x] <= 0: + raise RuntimeError("Expected {} to be positive, got {}".format(x, self.config[x])) + for x in ['num-left-inputs', 'num-right-inputs' ]: + if self.config[x] < 0: + raise RuntimeError("Expected {} to be nonnegative, got {}".format(x, self.config[x])) + # Not checking everything here. + if self.config['learning-rate-factor'] <= 0.0: + raise RuntimeError("learning-rate-factor has invalid value {0}" + .format(self.config['learning-rate-factor'])) + if self.config['key-scale'] == 0.0: + self.config['key-scale'] = 1.0 / math.sqrt(self.config['key-dim']) + + def output_name(self, auxiliary_output=None): + # at a later stage we might want to expose even the pre-nonlinearity + # vectors + return '{0}.noop'.format(self.name) + + def attention_input_dim(self): + context_dim = (self.config['num-left-inputs'] + + self.config['num-right-inputs'] + 1) + num_heads = self.config['num-heads'] + key_dim = self.config['key-dim'] + value_dim = self.config['value-dim'] + query_dim = key_dim + context_dim; + return num_heads * (key_dim + value_dim + query_dim) + + def attention_output_dim(self): + context_dim = (self.config['num-left-inputs'] + + self.config['num-right-inputs'] + 1) + num_heads = self.config['num-heads'] + value_dim = self.config['value-dim'] + return (num_heads * + (value_dim + + (context_dim if self.config['output-context'] else 0))) + + def output_dim(self, auxiliary_output = None): + dim = self.config['dim'] + if dim > 0: + return dim + else: + return self.descriptors['input']['dim'] + + def get_full_config(self): + ans = [] + config_lines = self._generate_config() + + for line in config_lines: + for config_name in ['ref', 'final']: + # we do not support user specified matrices in this layer + # so 'ref' and 'final' configs are the same. + ans.append((config_name, line)) + return ans + + + def _generate_config(self): + # by 'descriptor_final_string' we mean a string that can appear in + # config-files, i.e. it contains the 'final' names of nodes. + input_desc = self.descriptors['input']['final-string'] + input_dim = self.descriptors['input']['dim'] + output_dim = self.config['dim'] + if output_dim <= 0: + output_dim = input_dim + bottleneck_dim = self.config['bottleneck-dim'] + attention_input_dim = self.attention_input_dim() + attention_output_dim = self.attention_output_dim() + target_rms = self.config['target-rms'] + max_change = self.config['max-change'] + l2_regularize = self.config['l2-regularize'] + learning_rate_factor=self.config['learning-rate-factor'] + + learning_rate_option=('learning-rate-factor={0}'.format(learning_rate_factor) + if learning_rate_factor != 1.0 else '') + l2_regularize_option = ('l2-regularize={0} '.format(l2_regularize) + if l2_regularize != 0.0 else '') + + common_options=("{lroption} {l2option} max-change={max_change} " + "".format(lroption = learning_rate_option, + l2option = l2_regularize_option, + max_change = max_change)) + + + configs = [] + + + # The first linear component + line = ('component name={0}.linear1 type=LinearComponent ' + 'input-dim={1} output-dim={2} ' + '{3} orthonormal-constraint=-1 ' + ''.format(self.name, input_dim, bottleneck_dim, + common_options)) + + configs.append(line) + line = ('component-node name={0}.linear1 component={0}.linear1 input={1} ' + ''.format(self.name, input_desc)) + configs.append(line) + + # The first affine component + line = ('component name={0}.affine1 type=NaturalGradientAffineComponent ' + 'input-dim={1} output-dim={2} ' + '{3}'.format(self.name, bottleneck_dim, attention_input_dim, + common_options)) + configs.append(line) + line = ('component-node name={0}.affine1 component={0}.affine1 input={0}.linear1' + ''.format(self.name, input_desc)) + configs.append(line) + + + line = ('component name={0}.layernorm1 type=NormalizeComponent dim={1} ' + ' '.format(self.name, attention_input_dim)) + configs.append(line) + line = ('component-node name={0}.layernorm1 component={0}.layernorm1 ' + 'input={0}.affine1 '.format(self.name)) + configs.append(line) + cur_name='layernorm1' + + # The attention component + line = ('component name={name}.attention type=RestrictedAttentionComponent ' + 'value-dim={v} key-dim={k} num-left-inputs={nl} ' + 'num-right-inputs={nr} num-left-inputs-required={nlr}' + ' num-right-inputs-required={nrr} output-context={oc}' + ' time-stride={ts} num-heads={nh} key-scale={ks}' + ''.format(name=self.name, + v=self.config['value-dim'], k=self.config['key-dim'], + nl=self.config['num-left-inputs'], + nr=self.config['num-right-inputs'], + nlr=self.config['num-left-inputs-required'], + nrr=self.config['num-right-inputs-required'], + oc=self.config['output-context'], + ts=self.config['time-stride'], + nh=self.config['num-heads'], + ks=self.config['key-scale'])) + configs.append(line) + line = ('component-node name={0}.attention component={0}.attention input={0}.{1}' + ''.format(self.name, cur_name)) + configs.append(line) + + # The second linear component + line = ('component name={0}.linear2 type=LinearComponent ' + 'input-dim={1} output-dim={2} orthonormal-constraint=-1 ' + '{3}'.format(self.name, attention_output_dim, bottleneck_dim, + common_options)) + configs.append(line) + line = ('component-node name={0}.linear2 component={0}.linear2 ' + 'input={0}.attention '.format(self.name)) + configs.append(line) + + # The third linear component + line = ('component name={0}.linear3 type=LinearComponent ' + 'input-dim={1} output-dim={2} ' + '{3}'.format(self.name, bottleneck_dim, output_dim, + common_options)) + configs.append(line) + line = ('component-node name={0}.linear3 component={0}.linear3 ' + 'input={0}.linear2 '.format(self.name)) + configs.append(line) + + + if self.config['use-relu']: + line = ('component name={0}.relu type=RectifiedLinearComponent dim={1} ' + ''.format(self.name, output_dim)) + configs.append(line) + line = ('component-node name={0}.relu component={0}.relu ' + 'input={0}.linear3 '.format(self.name)) + configs.append(line) + cur_name = 'relu' + else: + cur_name = 'linear3' + + + line = ('component name={0}.layernorm2 type=NormalizeComponent dim={1} ' + 'target-rms={2} '.format(self.name, output_dim, target_rms)) + configs.append(line) + line = ('component-node name={0}.layernorm2 component={0}.layernorm2 ' + 'input={0}.{1} '.format(self.name, cur_name)) + configs.append(line) + + + line = ('component name={0}.noop type=NoOpComponent dim={1}'.format( + self.name, output_dim)) + configs.append(line) + line = ('component-node name={name}.noop component={name}.noop input=Sum(Scale({b}, {i}), {name}.layernorm2)' + ''.format(name=self.name, b=self.config['bypass-scale'], i=input_desc)) + configs.append(line) + + return configs diff --git a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py index 5e21c4c0274..1fdf4759ec1 100644 --- a/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py +++ b/egs/wsj/s5/steps/libs/nnet3/xconfig/parser.py @@ -86,6 +86,7 @@ 'dim-range-component': xlayers.XconfigDimRangeComponent, 'offset-component': xlayers.XconfigPerElementOffsetComponent, 'combine-feature-maps-layer': xlayers.XconfigCombineFeatureMapsLayer, + 'attention-block': xlayers.XconfigAttentionBlock, 'delta-layer': xlayers.XconfigDeltaLayer } diff --git a/egs/wsj/s5/steps/lmrescore.sh b/egs/wsj/s5/steps/lmrescore.sh index 88db8ae15dc..abe5e740166 100755 --- a/egs/wsj/s5/steps/lmrescore.sh +++ b/egs/wsj/s5/steps/lmrescore.sh @@ -7,7 +7,6 @@ mode=4 # mode can be 1 through 5. They should all give roughly similar results # See the comments in the case statement for more details. cmd=run.pl skip_scoring=false -self_loop_scale=0.1 # only matters for mode 4. acoustic_scale=0.1 # only matters for mode 5. # End configuration section. @@ -22,8 +21,6 @@ if [ $# != 5 ]; then echo " --cmd # How to run commands (e.g. run.pl, queue.pl)" echo " --mode (1|2|3|4|5) # Mode of LM rescoring to use (default: 4)." echo " # These should give very similar results." - echo " --self-loop-scale # Self-loop-scale, only relevant in mode 4." - echo " # Default: 0.1." echo " --acoustic-scale # Acoustic scale, only relevant in mode 5." echo " # Default: 0.1." exit 1; @@ -109,8 +106,6 @@ case "$mode" in # grammar and transition weights. mdl=`dirname $indir`/final.mdl [ ! -f $mdl ] && echo No such model $mdl && exit 1; - [[ -f `dirname $indir`/frame_subsampling_factor && "$self_loop_scale" == 0.1 ]] && \ - echo "$0: WARNING: chain models need '--self-loop-scale 1.0'"; $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ gunzip -c $indir/lat.JOB.gz \| \ lattice-scale --lm-scale=0.0 ark:- ark:- \| \ @@ -118,8 +113,7 @@ case "$mode" in lattice-compose ark:- $outdir/Ldet.fst ark:- \| \ lattice-determinize ark:- ark:- \| \ lattice-compose --phi-label=$phi ark:- $newlm ark:- \| \ - lattice-add-trans-probs --transition-scale=1.0 --self-loop-scale=$self_loop_scale \ - $mdl ark:- ark:- \| \ + lattice-add-trans-probs $mdl ark:- ark:- \| \ gzip -c \>$outdir/lat.JOB.gz || exit 1; ;; 5) # Mode 5 uses the binary lattice-lmrescore-pruned to do the LM rescoring diff --git a/egs/wsj/s5/steps/make_phone_graph.sh b/egs/wsj/s5/steps/make_phone_graph.sh index aaf88cc66d2..c7b57374f1c 100755 --- a/egs/wsj/s5/steps/make_phone_graph.sh +++ b/egs/wsj/s5/steps/make_phone_graph.sh @@ -15,8 +15,6 @@ stage=0 cmd=run.pl N=3 # change N and P for non-trigram systems. P=1 -tscale=1.0 # transition scale. -loopscale=0.1 # scale for self-loops. # End configuration section. echo "$0 $@" # Print the command line for logging @@ -117,7 +115,7 @@ fi if [ $stage -le 5 ]; then echo "$0: creating Ha.fst" make-h-transducer --disambig-syms-out=$dir/phone_graph/disambig_tid.int \ - --transition-scale=$tscale $dir/phone_graph/ilabels_${N}_${P} $dir/tree $dir/final.mdl \ + $dir/phone_graph/ilabels_${N}_${P} $dir/tree $dir/final.mdl \ > $dir/phone_graph/Ha.fst fi @@ -131,13 +129,9 @@ if [ $stage -le 6 ]; then fi if [ $stage -le 7 ]; then - add-self-loops --self-loop-scale=$loopscale --reorder=true \ - $dir/final.mdl < $dir/phone_graph/HCLGa.fst > $dir/phone_graph/HCLG.fst || exit 1; + add-self-loops $dir/final.mdl < $dir/phone_graph/HCLGa.fst > $dir/phone_graph/HCLG.fst || exit 1; - if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then - # No point doing this test if transition-scale not 1, as it is bound to fail. - fstisstochastic $dir/phone_graph/HCLG.fst || echo "[info]: final HCLG is not stochastic." - fi + fstisstochastic $dir/phone_graph/HCLG.fst || echo "[info]: final HCLG is not stochastic." # $lang/phones.txt is the symbol table that corresponds to the output # symbols on the graph; decoding scripts expect it as words.txt. diff --git a/egs/wsj/s5/steps/nnet/align.sh b/egs/wsj/s5/steps/nnet/align.sh index f976711fa58..1a05e620394 100755 --- a/egs/wsj/s5/steps/nnet/align.sh +++ b/egs/wsj/s5/steps/nnet/align.sh @@ -10,7 +10,7 @@ nj=4 cmd=run.pl stage=0 # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 nnet_forward_opts="--no-softmax=true --prior-scale=1.0" @@ -19,7 +19,7 @@ text= # (optional) transcipts we align to, align_to_lats=false # optionally produce alignment in lattice format lats_decode_opts="--acoustic-scale=0.1 --beam=20 --lattice_beam=10" - lats_graph_scales="--transition-scale=1.0 --self-loop-scale=0.1" + lats_graph_scales="" use_gpu="no" # yes|no|optionaly # End configuration options. diff --git a/egs/wsj/s5/steps/nnet2/align.sh b/egs/wsj/s5/steps/nnet2/align.sh index fa040d692ad..5b89655ef37 100755 --- a/egs/wsj/s5/steps/nnet2/align.sh +++ b/egs/wsj/s5/steps/nnet2/align.sh @@ -9,7 +9,7 @@ nj=4 cmd=run.pl # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 transform_dir= diff --git a/egs/wsj/s5/steps/nnet3/align.sh b/egs/wsj/s5/steps/nnet3/align.sh index aa2de2ee1a5..75102f2c1d7 100755 --- a/egs/wsj/s5/steps/nnet3/align.sh +++ b/egs/wsj/s5/steps/nnet3/align.sh @@ -13,7 +13,7 @@ nj=4 cmd=run.pl # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 iter=final @@ -104,10 +104,10 @@ if [ -f $srcdir/frame_subsampling_factor ]; then frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" cp $srcdir/frame_subsampling_factor $dir if [ "$frame_subsampling_factor" -gt 1 ] && \ - [ "$scale_opts" == "--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" ]; then + [ "$scale_opts" == "--acoustic-scale=0.1" ]; then echo "$0: frame-subsampling-factor is not 1 (so likely a chain system)," echo "... but the scale opts are the defaults. You probably want" - echo "--scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0'" + echo "--scale-opts '--acoustic-scale=1.0 '" sleep 1 fi fi diff --git a/egs/wsj/s5/steps/nnet3/align_lats.sh b/egs/wsj/s5/steps/nnet3/align_lats.sh index 201cc3552ba..c3b55e8547c 100755 --- a/egs/wsj/s5/steps/nnet3/align_lats.sh +++ b/egs/wsj/s5/steps/nnet3/align_lats.sh @@ -13,7 +13,6 @@ nj=4 cmd=run.pl stage=-1 # Begin configuration. -scale_opts="--transition-scale=1.0 --self-loop-scale=0.1" acoustic_scale=0.1 beam=20 iter=final @@ -94,13 +93,10 @@ if [ -f $srcdir/frame_subsampling_factor ]; then cp $srcdir/frame_subsampling_factor $dir if [[ $frame_subsampling_factor -gt 1 ]]; then # Assume a chain system, check agrument sanity. - if [[ ! ($scale_opts == *--self-loop-scale=1.0* && - $scale_opts == *--transition-scale=1.0* && - $acoustic_scale = '1.0') ]]; then + if [[ $acoustic_scale = '1.0') ]]; then echo "$0: ERROR: frame-subsampling-factor is not 1, assuming a chain system." echo "... You should pass the following options to this script:" - echo " --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0'" \ - "--acoustic_scale 1.0" + echo "--acoustic_scale 1.0" fi fi fi @@ -122,7 +118,6 @@ if [ $stage -le 0 ]; then ## because the other scripts write them without transition probs. $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ $prog --read-disambig-syms=$lang/phones/disambig.int \ - $scale_opts \ $dir/tree $srcdir/${iter}.mdl $lang/L.fst "$tra" \ "ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1 fi diff --git a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh index 757963f13a7..ebb9e24902f 100755 --- a/egs/wsj/s5/steps/nnet3/chain/build_tree.sh +++ b/egs/wsj/s5/steps/nnet3/chain/build_tree.sh @@ -23,7 +23,6 @@ context_opts= # e.g. set this to "--context-width 5 --central-position 2" for q cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves frame_subsampling_factor=1 alignment_subsampling_factor= -leftmost_questions_truncate=-1 # note: this option is deprecated and has no effect tree_stats_opts= cluster_phones_opts= repeat_frames=false diff --git a/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh b/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh index 07d5ee8cfb8..3503cc57f50 100755 --- a/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh +++ b/egs/wsj/s5/steps/nnet3/chain/e2e/prepare_e2e.sh @@ -20,7 +20,7 @@ type=mono # Can be either mono or biphone -- either way # the resulting tree is full (i.e. it doesn't do any tying) ci_silence=false # If true, silence phones will be treated as context independent -scale_opts="--transition-scale=0.0 --self-loop-scale=0.0" +scale_opts=" " tie=false # If true, gmm-init-biphone will do some tying when # creating the full biphone tree (it won't be full anymore). # Specifically, it will revert to monophone if the data diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo.pl b/egs/wsj/s5/steps/nnet3/chain/gen_topo.pl deleted file mode 100755 index 32dfa272a97..00000000000 --- a/egs/wsj/s5/steps/nnet3/chain/gen_topo.pl +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env perl - -# Copyright 2012 Johns Hopkins University (author: Daniel Povey) - -# Generate a topology file. This allows control of the number of states in the -# non-silence HMMs, and in the silence HMMs. This is a modified version of -# 'utils/gen_topo.pl' that generates a different type of topology, one that we -# believe should be useful in the 'chain' model. Note: right now it doesn't -# have any real options, and it treats silence and nonsilence the same. The -# intention is that you write different versions of this script, or add options, -# if you experiment with it. - -if (@ARGV != 2) { - print STDERR "Usage: utils/gen_topo.pl \n"; - print STDERR "e.g.: utils/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n"; - exit (1); -} - -($nonsil_phones, $sil_phones) = @ARGV; - -$nonsil_phones =~ s/:/ /g; -$sil_phones =~ s/:/ /g; -$nonsil_phones =~ m/^\d[ \d]+$/ || die "$0: bad arguments @ARGV\n"; -$sil_phones =~ m/^\d[ \d]*$/ || die "$0: bad arguments @ARGV\n"; - -print "\n"; -print "\n"; -print "\n"; -print "$nonsil_phones $sil_phones\n"; -print "\n"; -# The next two lines may look like a bug, but they are as intended. State 0 has -# no self-loop, it happens exactly once. And it can go either to state 1 (with -# a self-loop) or to state 2, so we can have zero or more instances of state 1 -# following state 0. -# We make the transition-probs 0.5 so they normalize, to keep the code happy. -# In fact, we always set the transition probability scale to 0.0 in the 'chain' -# code, so they are never used. -print " 0 0 1 0.5 2 0.5 \n"; -print " 1 1 1 0.5 2 0.5 \n"; -print " 2 \n"; -print "\n"; -print "\n"; diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo.py index 88def77451b..f587d1b8448 100755 --- a/egs/wsj/s5/steps/nnet3/chain/gen_topo.py +++ b/egs/wsj/s5/steps/nnet3/chain/gen_topo.py @@ -32,17 +32,15 @@ nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] all_phones = silence_phones + nonsilence_phones + print("") print("") print("") print(" ".join([str(x) for x in all_phones])) print("") -# We make the transition-probs 0.5 so they normalize, to keep the code happy. -# In fact, we always set the transition probability scale to 0.0 in the 'chain' -# code, so they are never used. -# Note: the will actually happen on the incoming arc because -# we always build the graph with "reorder=true". -print(" 0 0 1 0 0.5 1 0.5 ") -print(" 1 ") +print("0 1 1 0.0") +print("1 1 2 0.69314718055") +print("1 0.69314718055") +print("") print("") print("") diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo2.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo2.py deleted file mode 100755 index a33dab666e6..00000000000 --- a/egs/wsj/s5/steps/nnet3/chain/gen_topo2.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2012 Johns Hopkins University (author: Daniel Povey) - -# Generate a topology file. This allows control of the number of states in the -# non-silence HMMs, and in the silence HMMs. This is a modified version of -# 'utils/gen_topo.pl' that generates a different type of topology, one that we -# believe should be useful in the 'chain' model. Note: right now it doesn't -# have any real options, and it treats silence and nonsilence the same. The -# intention is that you write different versions of this script, or add options, -# if you experiment with it. - -from __future__ import print_function -import argparse - - -parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " - " " - "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", - epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); -parser.add_argument("nonsilence_phones", type=str, - help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); -parser.add_argument("silence_phones", type=str, - help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); - -args = parser.parse_args() - -silence_phones = [ int(x) for x in args.silence_phones.split(":") ] -nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] -all_phones = silence_phones + nonsilence_phones - -print("") -print("") -print("") -print(" ".join([str(x) for x in all_phones])) -print("") - -# the pdf-classes are as follows: -# pdf-class 0 is in a 1-frame sequence, the initial and final state. -# pdf-class 1 is in a sequence with >=3 frames, the 'middle' states. (important that -# it be numbered 1, which is the default list of pdf-classes used in 'cluster-phones'). -# pdf-class 2 is the initial-state in a sequence with >= 2 frames. -# pdf-class 3 is the final-state in a sequence with >= 2 frames. -# state 0 is nonemitting in this topology. - -print(" 0 1 0.5 2 0.5 ") # initial nonemitting state. -print(" 1 0 5 1.0 ") # 1-frame sequence. -print(" 2 2 3 0.5 4 0.5 ") # 2 or more frames -print(" 3 1 3 0.5 4 0.5 ") # 3 or more frames -print(" 4 3 5 1.0 ") # 2 or more frames. -print(" 5 ") # final nonemitting state - -print("") -print("") - diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo3.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo3.py deleted file mode 100755 index f43f5046813..00000000000 --- a/egs/wsj/s5/steps/nnet3/chain/gen_topo3.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2012 Johns Hopkins University (author: Daniel Povey) - -# Generate a topology file. This allows control of the number of states in the -# non-silence HMMs, and in the silence HMMs. This is a modified version of -# 'utils/gen_topo.pl' that generates a different type of topology, one that we -# believe should be useful in the 'chain' model. Note: right now it doesn't -# have any real options, and it treats silence and nonsilence the same. The -# intention is that you write different versions of this script, or add options, -# if you experiment with it. - -from __future__ import print_function -import argparse - - -parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " - " " - "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", - epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); -parser.add_argument("nonsilence_phones", type=str, - help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); -parser.add_argument("silence_phones", type=str, - help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); - -args = parser.parse_args() - -silence_phones = [ int(x) for x in args.silence_phones.split(":") ] -nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] -all_phones = silence_phones + nonsilence_phones - -print("") -print("") -print("") -print(" ".join([str(x) for x in all_phones])) -print("") -print(" 0 0 0 0.5 1 0.5 ") -print(" 1 ") -print("") -print("") - diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo4.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo4.py deleted file mode 100755 index 6d88a6e4449..00000000000 --- a/egs/wsj/s5/steps/nnet3/chain/gen_topo4.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2012 Johns Hopkins University (author: Daniel Povey) - -# Generate a topology file. This allows control of the number of states in the -# non-silence HMMs, and in the silence HMMs. This is a modified version of -# 'utils/gen_topo.pl' that generates a different type of topology, one that we -# believe should be useful in the 'chain' model. Note: right now it doesn't -# have any real options, and it treats silence and nonsilence the same. The -# intention is that you write different versions of this script, or add options, -# if you experiment with it. - -from __future__ import print_function -import argparse - - -parser = argparse.ArgumentParser(description="Usage: steps/nnet3/chain/gen_topo.py " - " " - "e.g.: steps/nnet3/chain/gen_topo.pl 4:5:6:7:8:9:10 1:2:3\n", - epilog="See egs/swbd/s5c/local/chain/train_tdnn_a.sh for example of usage."); -parser.add_argument("nonsilence_phones", type=str, - help="List of non-silence phones as integers, separated by colons, e.g. 4:5:6:7:8:9"); -parser.add_argument("silence_phones", type=str, - help="List of silence phones as integers, separated by colons, e.g. 1:2:3"); - -args = parser.parse_args() - -silence_phones = [ int(x) for x in args.silence_phones.split(":") ] -nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] -all_phones = silence_phones + nonsilence_phones - -print("") -print("") -print("") -print(" ".join([str(x) for x in all_phones])) -print("") -# state 0 is obligatory (occurs once) -print(" 0 0 1 0.3333 2 0.3333 3 0.3333 ") -# state 1 is used only when >2 frames -print(" 1 1 1 0.5 2 0.5 ") -# state 2 is used only when >=2 frames (and occurs once) -print(" 2 2 3 1.0 ") -print(" 3 ") # final nonemitting state -print("") -print("") - diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py index 1583966b58c..9df502545a5 100755 --- a/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py +++ b/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py @@ -2,6 +2,9 @@ # Copyright 2012 Johns Hopkins University (author: Daniel Povey) +# This script was modified around 11.11.2016, when the code was extended to +# support having a different pdf-class on the self loop. + # Generate a topology file. This allows control of the number of states in the # non-silence HMMs, and in the silence HMMs. This is a modified version of # 'utils/gen_topo.pl' that generates a different type of topology, one that we @@ -29,22 +32,17 @@ nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] all_phones = silence_phones + nonsilence_phones + print("") print("") print("") print(" ".join([str(x) for x in all_phones])) print("") -# state 0 is nonemitting -print(" 0 1 0.5 2 0.5 ") -# state 1 is for when we traverse it in 1 state -print(" 1 0 4 1.0 ") -# state 2 is for when we traverse it in >1 state, for the first state. -print(" 2 2 3 1.0 ") -# state 3 is for the self-loop. Use pdf-class 1 here so that the default -# phone-class clustering (which uses only pdf-class 1 by default) gets only -# stats from longer phones. -print(" 3 1 3 0.5 4 0.5 ") -print(" 4 ") +print("0 1 1 0.69314718055") +print("0 2 3 0.69314718055") +print("1 1 2 0.69314718055") +print("1 0.69314718055") +print("2 0.0") +print("") print("") print("") - diff --git a/egs/wsj/s5/steps/nnet3/get_degs.sh b/egs/wsj/s5/steps/nnet3/get_degs.sh index 7853daa4563..4007419bc47 100755 --- a/egs/wsj/s5/steps/nnet3/get_degs.sh +++ b/egs/wsj/s5/steps/nnet3/get_degs.sh @@ -59,9 +59,6 @@ nj=200 iter=final -# decoding-graph option -self_loop_scale=0.1 # for decoding graph.. should be 1.0 for chain models. - # options relating to decoding. frames_per_chunk_decoding=150 beam=13.0 @@ -156,21 +153,10 @@ if [ -f $srcdir/frame_subsampling_factor ]; then # e.g. for 'chain' systems frame_subsampling_opt="--frame-subsampling-factor=$frame_subsampling_factor" cp $srcdir/frame_subsampling_factor $dir - if [ $frame_subsampling_factor -ne 1 ] && [ "$self_loop_scale" == "0.1" ]; then - echo "$0: warning: frame_subsampling_factor is not 1 (so likely a chain system)," - echo "... but self-loop-scale is 0.1. Make sure this is not a mistake." - sleep 1 - fi else frame_subsampling_factor=1 fi -if [ "$self_loop_scale" == "1.0" ] && [ "$acwt" == 0.1 ]; then - echo "$0: warning: you set --self-loop-scale=1.0 (so likely a chain system)", - echo " ... but the acwt is still 0.1 (you probably want --acwt 1.0)" - sleep 1 -fi - ## Make the decoding graph. if [ $stage -le 0 ]; then new_lang="$dir/"$(basename "$lang") @@ -183,7 +169,7 @@ if [ $stage -le 0 ]; then utils/make_unigram_grammar.pl | fstcompile | fstarcsort --sort_type=ilabel > $new_lang/G.fst \ || exit 1; - utils/mkgraph.sh --self-loop-scale $self_loop_scale $new_lang $srcdir $dir/dengraph || exit 1; + utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1; fi # copy alignments into ark,scp format which allows us to use different num-jobs diff --git a/egs/wsj/s5/steps/nnet3/make_denlats.sh b/egs/wsj/s5/steps/nnet3/make_denlats.sh index 36da179bbaf..29df25cbf91 100755 --- a/egs/wsj/s5/steps/nnet3/make_denlats.sh +++ b/egs/wsj/s5/steps/nnet3/make_denlats.sh @@ -17,7 +17,6 @@ sub_split=1 beam=13.0 frames_per_chunk=50 lattice_beam=7.0 -self_loop_scale=0.1 acwt=0.1 max_active=5000 min_active=200 @@ -102,7 +101,7 @@ else awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \ utils/make_unigram_grammar.pl | fstcompile | fstarcsort --sort_type=ilabel > $new_lang/G.fst \ || exit 1; - utils/mkgraph.sh --self-loop-scale $self_loop_scale $new_lang $srcdir $dir/dengraph || exit 1; + utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1; fi cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` cp $srcdir/cmvn_opts $dir 2>/dev/null diff --git a/egs/wsj/s5/steps/online/nnet2/align.sh b/egs/wsj/s5/steps/online/nnet2/align.sh index c24bbf0291e..249947ecfd9 100755 --- a/egs/wsj/s5/steps/online/nnet2/align.sh +++ b/egs/wsj/s5/steps/online/nnet2/align.sh @@ -14,7 +14,7 @@ nj=4 cmd=run.pl # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 iter=final diff --git a/egs/wsj/s5/steps/segmentation/internal/prepare_sad_graph.py b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_graph.py index 12c9bb1e902..e1d6702212e 100755 --- a/egs/wsj/s5/steps/segmentation/internal/prepare_sad_graph.py +++ b/egs/wsj/s5/steps/segmentation/internal/prepare_sad_graph.py @@ -41,13 +41,6 @@ def get_args(): duration constraint.""", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--transition-scale", type=float, default=1.0, - help="""Scale on transition probabilities relative to - LM weights""") - parser.add_argument("--loopscale", type=float, default=0.1, - help="""Scale on self-loop log-probabilities relative - to LM weights""") - parser.add_argument("--min-silence-duration", type=float, default=0.03, help="""Minimum duration for silence""") parser.add_argument("--min-speech-duration", type=float, default=0.3, diff --git a/egs/wsj/s5/steps/tandem/align_fmllr.sh b/egs/wsj/s5/steps/tandem/align_fmllr.sh index 0b012e24146..12526f6f792 100755 --- a/egs/wsj/s5/steps/tandem/align_fmllr.sh +++ b/egs/wsj/s5/steps/tandem/align_fmllr.sh @@ -19,7 +19,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 # factor by which to boost silence during alignment. diff --git a/egs/wsj/s5/steps/tandem/align_sgmm2.sh b/egs/wsj/s5/steps/tandem/align_sgmm2.sh index 48eb1fbef43..ab41834dfcb 100755 --- a/egs/wsj/s5/steps/tandem/align_sgmm2.sh +++ b/egs/wsj/s5/steps/tandem/align_sgmm2.sh @@ -19,7 +19,7 @@ use_gselect=false # use gselect info from srcdir [regardless, we use # Gaussian-selection info, we might have to compute it though.] gselect=15 # Number of Gaussian-selection indices for SGMMs. # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 transform_dir= # directory to find fMLLR transforms in. diff --git a/egs/wsj/s5/steps/tandem/align_si.sh b/egs/wsj/s5/steps/tandem/align_si.sh index 4e52c51e308..1cd9e534165 100755 --- a/egs/wsj/s5/steps/tandem/align_si.sh +++ b/egs/wsj/s5/steps/tandem/align_si.sh @@ -16,7 +16,7 @@ nj=4 cmd=run.pl use_graphs=false # Begin configuration. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 # Factor by which to boost silence during alignment. diff --git a/egs/wsj/s5/steps/tandem/train_deltas.sh b/egs/wsj/s5/steps/tandem/train_deltas.sh index d6a1baa6623..70fb30dcb9c 100755 --- a/egs/wsj/s5/steps/tandem/train_deltas.sh +++ b/egs/wsj/s5/steps/tandem/train_deltas.sh @@ -8,7 +8,7 @@ stage=-4 # This allows restarting after partway, when something when wrong. config= cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; num_iters=35 # Number of iterations of training max_iter_inc=25 # Last iter to increase #Gauss on. diff --git a/egs/wsj/s5/steps/tandem/train_lda_mllt.sh b/egs/wsj/s5/steps/tandem/train_lda_mllt.sh index a5fa4ea8786..67ca80b11ff 100755 --- a/egs/wsj/s5/steps/tandem/train_lda_mllt.sh +++ b/egs/wsj/s5/steps/tandem/train_lda_mllt.sh @@ -8,7 +8,7 @@ cmd=run.pl config= stage=-5 -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; mllt_iters="2 4 6 12"; num_iters=35 # Number of iterations of training diff --git a/egs/wsj/s5/steps/tandem/train_mllt.sh b/egs/wsj/s5/steps/tandem/train_mllt.sh index 7d46074baec..e8796c8f5db 100755 --- a/egs/wsj/s5/steps/tandem/train_mllt.sh +++ b/egs/wsj/s5/steps/tandem/train_mllt.sh @@ -12,7 +12,7 @@ cmd=run.pl config= stage=-5 -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; mllt_iters="2 4 6 12"; num_iters=35 # Number of iterations of training diff --git a/egs/wsj/s5/steps/tandem/train_mono.sh b/egs/wsj/s5/steps/tandem/train_mono.sh index b5c55f6f369..486478709d6 100755 --- a/egs/wsj/s5/steps/tandem/train_mono.sh +++ b/egs/wsj/s5/steps/tandem/train_mono.sh @@ -11,7 +11,7 @@ # Begin configuration section. nj=4 cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" num_iters=40 # Number of iterations of training max_iter_inc=30 # Last iter to increase #Gauss on. totgauss=1000 # Target #Gaussians. diff --git a/egs/wsj/s5/steps/tandem/train_sat.sh b/egs/wsj/s5/steps/tandem/train_sat.sh index 09e3f625674..2bfd2130b55 100755 --- a/egs/wsj/s5/steps/tandem/train_sat.sh +++ b/egs/wsj/s5/steps/tandem/train_sat.sh @@ -14,7 +14,7 @@ stage=-5 fmllr_update_type=full cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment diff --git a/egs/wsj/s5/steps/tandem/train_sgmm2.sh b/egs/wsj/s5/steps/tandem/train_sgmm2.sh index daa0437b47b..2df69708c8a 100755 --- a/egs/wsj/s5/steps/tandem/train_sgmm2.sh +++ b/egs/wsj/s5/steps/tandem/train_sgmm2.sh @@ -16,7 +16,7 @@ cmd=run.pl stage=-6 # use this to resume partially finished training context_opts= # e.g. set it to "--context-width=5 --central-position=2" for a # quinphone system. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" num_iters=25 # Total number of iterations of training num_iters_alimdl=3 # Number of iterations for estimating alignment model. max_iter_inc=15 # Last iter to increase #substates on. diff --git a/egs/wsj/s5/steps/train_deltas.sh b/egs/wsj/s5/steps/train_deltas.sh index 7deace6b13e..452cb2852cd 100755 --- a/egs/wsj/s5/steps/train_deltas.sh +++ b/egs/wsj/s5/steps/train_deltas.sh @@ -7,7 +7,7 @@ stage=-4 # This allows restarting after partway, when something when wrong. config= cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; num_iters=35 # Number of iterations of training max_iter_inc=25 # Last iter to increase #Gauss on. diff --git a/egs/wsj/s5/steps/train_lda_mllt.sh b/egs/wsj/s5/steps/train_lda_mllt.sh index a1828aa6fcb..60ce32bbd35 100755 --- a/egs/wsj/s5/steps/train_lda_mllt.sh +++ b/egs/wsj/s5/steps/train_lda_mllt.sh @@ -14,7 +14,7 @@ cmd=run.pl config= stage=-5 -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; mllt_iters="2 4 6 12"; num_iters=35 # Number of iterations of training diff --git a/egs/wsj/s5/steps/train_lvtln.sh b/egs/wsj/s5/steps/train_lvtln.sh index 111e0598edf..264171da00a 100755 --- a/egs/wsj/s5/steps/train_lvtln.sh +++ b/egs/wsj/s5/steps/train_lvtln.sh @@ -17,7 +17,7 @@ stage=-6 # This allows restarting after partway, when something when wrong. config= cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; num_iters=35 # Number of iterations of training max_iter_inc=25 # Last iter to increase #Gauss on. diff --git a/egs/wsj/s5/steps/train_mono.sh b/egs/wsj/s5/steps/train_mono.sh index 5a0b79a4a1c..3b4744db2ed 100755 --- a/egs/wsj/s5/steps/train_mono.sh +++ b/egs/wsj/s5/steps/train_mono.sh @@ -11,7 +11,7 @@ # Begin configuration section. nj=4 cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" num_iters=40 # Number of iterations of training max_iter_inc=30 # Last iter to increase #Gauss on. initial_beam=6 # beam used in the first iteration (set smaller to speed up initialization) diff --git a/egs/wsj/s5/steps/train_quick.sh b/egs/wsj/s5/steps/train_quick.sh index 3325c4964e9..4e3c807484a 100755 --- a/egs/wsj/s5/steps/train_quick.sh +++ b/egs/wsj/s5/steps/train_quick.sh @@ -10,7 +10,7 @@ # Begin configuration.. cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 15"; # Only realign twice. num_iters=20 # Number of iterations of training maxiterinc=15 # Last iter to increase #Gauss on. diff --git a/egs/wsj/s5/steps/train_raw_sat.sh b/egs/wsj/s5/steps/train_raw_sat.sh index aa5e8813d71..615988096e7 100755 --- a/egs/wsj/s5/steps/train_raw_sat.sh +++ b/egs/wsj/s5/steps/train_raw_sat.sh @@ -14,7 +14,7 @@ # Begin configuration section. stage=-6 cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment diff --git a/egs/wsj/s5/steps/train_sat.sh b/egs/wsj/s5/steps/train_sat.sh index 92b744dc75c..4219b52f804 100755 --- a/egs/wsj/s5/steps/train_sat.sh +++ b/egs/wsj/s5/steps/train_sat.sh @@ -17,7 +17,7 @@ exit_stage=-100 # you can use this to require it to exit at the # supported. fmllr_update_type=full cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 careful=false diff --git a/egs/wsj/s5/steps/train_sat_basis.sh b/egs/wsj/s5/steps/train_sat_basis.sh index 5245ea0c619..c2b5591a773 100755 --- a/egs/wsj/s5/steps/train_sat_basis.sh +++ b/egs/wsj/s5/steps/train_sat_basis.sh @@ -13,7 +13,7 @@ # Begin configuration section. stage=-5 cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" beam=10 retry_beam=40 boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment diff --git a/egs/wsj/s5/steps/train_segmenter.sh b/egs/wsj/s5/steps/train_segmenter.sh index 515005c0257..64d006c2e75 100755 --- a/egs/wsj/s5/steps/train_segmenter.sh +++ b/egs/wsj/s5/steps/train_segmenter.sh @@ -8,7 +8,7 @@ stage=-4 # For restarting a process that went part way. config= cmd=run.pl -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" realign_iters="10 20 30"; num_iters=35 # Number of iterations of training max_iter_inc=25 # Last iter to increase #Gauss on. diff --git a/egs/wsj/s5/steps/train_sgmm2.sh b/egs/wsj/s5/steps/train_sgmm2.sh index 7f7df2e046a..812387599af 100755 --- a/egs/wsj/s5/steps/train_sgmm2.sh +++ b/egs/wsj/s5/steps/train_sgmm2.sh @@ -14,7 +14,7 @@ cmd=run.pl stage=-6 # use this to resume partially finished training context_opts= # e.g. set it to "--context-width=5 --central-position=2" for a # quinphone system. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" num_iters=25 # Total number of iterations of training num_iters_alimdl=3 # Number of iterations for estimating alignment model. max_iter_inc=15 # Last iter to increase #substates on. diff --git a/egs/wsj/s5/steps/train_sgmm2_group.sh b/egs/wsj/s5/steps/train_sgmm2_group.sh index 7263e2d5e8e..59cfb51e9ab 100755 --- a/egs/wsj/s5/steps/train_sgmm2_group.sh +++ b/egs/wsj/s5/steps/train_sgmm2_group.sh @@ -17,7 +17,7 @@ cmd=run.pl stage=-6 # use this to resume partially finished training context_opts= # e.g. set it to "--context-width=5 --central-position=2" for a # quinphone system. -scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" +scale_opts="--acoustic-scale=0.1" num_iters=25 # Total number of iterations of training num_iters_alimdl=3 # Number of iterations for estimating alignment model. max_iter_inc=15 # Last iter to increase #substates on. diff --git a/egs/wsj/s5/utils/gen_topo.pl b/egs/wsj/s5/utils/gen_topo.pl index 1c02ed0eaeb..896f41d870d 100755 --- a/egs/wsj/s5/utils/gen_topo.pl +++ b/egs/wsj/s5/utils/gen_topo.pl @@ -4,6 +4,9 @@ # Generate a topology file. This allows control of the number of states in the # non-silence HMMs, and in the silence HMMs. +# This is the topology we use for GMM training, which is, when configured +# with 3 states, the Bakis model. For chain (lattice-free MMI) training, see +# steps/chain/gen_topo.pl. if (@ARGV != 4) { print STDERR "Usage: utils/gen_topo.pl \n"; @@ -28,52 +31,75 @@ print "\n"; print "$nonsil_phones\n"; print "\n"; -for ($state = 0; $state < $num_nonsil_states; $state++) { - $statep1 = $state+1; - print " $state $state $state 0.75 $statep1 0.25 \n"; +# The following is the single transition leaving the start-state. It has pdf-id +# 1, corresponding to state 1 which it enters.. The cost is 0.0 = log(1); there +# is only one choice here. Note: there are actually $num_nonsil_states + 1 +# states, but in HMM terms it's equivalent to $num_nonsil_states states; +# and that's the length of the shortest successful path. +print "0 1 1 0.0\n"; +for ($state = 1; $state <= $num_nonsil_states; $state++) { + $pdf_class = $state; + $next_state = $state + 1; + $next_pdf_class = $next_state; + # self-loop. + print "$state $state $pdf_class 0.6931471806\n"; + if ($next_state <= $num_nonsil_states) { + print "$state $next_state $next_pdf_class 0.6931471806\n"; + } else { + print "$state 0.6931471806\n"; # final-prob. + } } -print " $num_nonsil_states \n"; # non-emitting final state. +print "\n"; # terminate the FSA.. empty line marks its end. print "\n"; # Now silence phones. They have a different topology-- apart from the first and # last states, it's fully connected, as long as you have >= 3 states. +print "\n"; +print "\n"; +print "$sil_phones\n"; +print "\n"; + + +print "0 1 1 0.0\n"; if ($num_sil_states > 1) { - $transp = 1.0 / ($num_sil_states-1); - print "\n"; - print "\n"; - print "$sil_phones\n"; - print "\n"; - print " 0 0 "; - for ($nextstate = 0; $nextstate < $num_sil_states-1; $nextstate++) { # Transitions to all but last - # emitting state. - print " $nextstate $transp "; + # Note: actually it must be >= 3, we checked this above; + # 2 is disallowed (I know, it's odd). + # Also note: $num_sil_states is not actually the number of states + # in the FSA; it's the number of states in its HMM equivalent. + # the FSA has one extra state, state 0. + # we'll treat the final state, numbered $num_sil_states, + # separately; it doesn't have the transition back to + # lower-numbered states. + + $self_loop_cost = 0.6931471806; # -log(0.5) + $non_self_loop_cost = -log(0.5 / ($num_sil_states - 2)); + + $state = 1; + $pdf_id = $state; + print "$state $state $pdf_id $self_loop_cost\n"; + for ($next_state = 2; $next_state < $num_sil_states; $next_state++) { + $next_pdf_id = $next_state; + print "$state $next_state $next_pdf_id $non_self_loop_cost\n"; } - print "\n"; - for ($state = 1; $state < $num_sil_states-1; $state++) { # the central states all have transitions to - # themselves and to the last emitting state. - print " $state $state "; - for ($nextstate = 1; $nextstate < $num_sil_states; $nextstate++) { - print " $nextstate $transp "; + + for ($state = 2; $state < $num_sil_states; $state++) { + $pdf_id = $state; + for ($next_state = 2; $next_state <= $num_sil_states; $next_state++) { + my $cost = ($next_state == $state ? $self_loop_cost : $non_self_loop_cost); + $next_pdf_id = $next_state; + print "$state $next_state $next_pdf_id $cost\n"; } - print "\n"; } - # Final emitting state (non-skippable). - $state = $num_sil_states-1; - print " $state $state $state 0.75 $num_sil_states 0.25 \n"; - # Final nonemitting state: - print " $num_sil_states \n"; - print "\n"; + $final_state = $num_sil_states; + $pdf_id = $final_state; + print "$final_state $final_state $pdf_id $self_loop_cost\n"; + print "$final_state 0.6931471806\n"; + print "\n"; } else { - print "\n"; - print "\n"; - print "$sil_phones\n"; - print "\n"; - print " 0 0 "; - print " 0 0.75 "; - print " 1 0.25 "; - print "\n"; - print " $num_sil_states \n"; # non-emitting final state. - print "\n"; + print "0 0 1 0.6931471806\n"; + print "1 1 1 0.6931471806\n"; + print "1 0.6931471806\n"; + print "\n"; } - +print "\n"; print "\n"; diff --git a/egs/wsj/s5/utils/mkgraph.sh b/egs/wsj/s5/utils/mkgraph.sh index 31e86cd38f6..8346c69ffb7 100755 --- a/egs/wsj/s5/utils/mkgraph.sh +++ b/egs/wsj/s5/utils/mkgraph.sh @@ -15,17 +15,12 @@ set -o pipefail -tscale=1.0 -loopscale=0.1 - remove_oov=false for x in `seq 4`; do [ "$1" == "--mono" -o "$1" == "--left-biphone" -o "$1" == "--quinphone" ] && shift && \ echo "WARNING: the --mono, --left-biphone and --quinphone options are now deprecated and ignored." [ "$1" == "--remove-oov" ] && remove_oov=true && shift; - [ "$1" == "--transition-scale" ] && tscale=$2 && shift 2; - [ "$1" == "--self-loop-scale" ] && loopscale=$2 && shift 2; done if [ $# != 3 ]; then @@ -34,8 +29,6 @@ if [ $# != 3 ]; then echo " Options:" echo " --remove-oov # If true, any paths containing the OOV symbol (obtained from oov.int" echo " # in the lang directory) are removed from the G.fst during compilation." - echo " --transition-scale # Scaling factor on transition probabilities." - echo " --self-loop-scale # Please see: http://kaldi-asr.org/doc/hmm.html#hmm_scale." echo "Note: the --mono, --left-biphone and --quinphone options are now deprecated" echo "and will be ignored." exit 1; @@ -75,8 +68,6 @@ fi N=$(tree-info $tree | grep "context-width" | cut -d' ' -f2) || { echo "Error when getting context-width"; exit 1; } P=$(tree-info $tree | grep "central-position" | cut -d' ' -f2) || { echo "Error when getting central-position"; exit 1; } -[[ -f $2/frame_subsampling_factor && "$loopscale" == "0.1" ]] && \ - echo "$0: WARNING: chain models need '--self-loop-scale 1.0'"; if [ -f $lang/phones/nonterm_phones_offset.int ]; then if [[ $N != 2 || $P != 1 ]]; then @@ -124,7 +115,7 @@ trap "rm -f $dir/Ha.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model \ || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then make-h-transducer $nonterm_opt --disambig-syms-out=$dir/disambig_tid.int \ - --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \ + $lang/tmp/ilabels_${N}_${P} $tree $model \ > $dir/Ha.fst.$$ || exit 1; mv $dir/Ha.fst.$$ $dir/Ha.fst fi @@ -146,14 +137,11 @@ fi trap "rm -f $dir/HCLG.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then - add-self-loops --self-loop-scale=$loopscale --reorder=true $model $dir/HCLGa.fst | \ + add-self-loops $model $dir/HCLGa.fst | \ $prepare_grammar_command | \ fstconvert --fst_type=const > $dir/HCLG.fst.$$ || exit 1; mv $dir/HCLG.fst.$$ $dir/HCLG.fst - if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then - # No point doing this test if transition-scale not 1, as it is bound to fail. - fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic." - fi + fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic." fi # note: the empty FST has 66 bytes. this check is for whether the final FST diff --git a/egs/wsj/s5/utils/validate_lang.pl b/egs/wsj/s5/utils/validate_lang.pl index 8dba2a0ca69..88230d428e1 100755 --- a/egs/wsj/s5/utils/validate_lang.pl +++ b/egs/wsj/s5/utils/validate_lang.pl @@ -627,19 +627,20 @@ sub check_summation { %phones_in_topo_int_hash = ( ); %phones_in_topo_hash = ( ); while () { - chomp; - next if (m/^<.*>[ ]*$/); - foreach $i (split(" ", $_)) { - if (defined $phones_in_topo_int_hash{$i}) { - $topo_ok = 0; - $exit = 1; print "--> ERROR: $lang/topo has phone $i twice\n"; - } - if (!defined $pint2sym{$i}) { - $topo_ok = 0; - $exit = 1; print "--> ERROR: $lang/topo has phone $i which is not in phones.txt\n"; + if (m//) { + my $line = ; + foreach $phone (split(" ", $line)) { + if (defined $phones_in_topo_int_hash{$phone}) { + $topo_ok = 0; + $exit = 1; print "--> ERROR: $lang/topo has phone $phone twice\n"; + } + if (!defined $pint2sym{$phone}) { + $topo_ok = 0; + $exit = 1; print "--> ERROR: $lang/topo has phone $phone which is not in phones.txt\n"; + } + $phones_in_topo_int_hash{$phone} = 1; + $phones_in_topo_hash{$pint2sym{$phone}} = 1; } - $phones_in_topo_int_hash{$i} = 1; - $phones_in_topo_hash{$pint2sym{$i}} = 1; } } close(T); @@ -816,8 +817,8 @@ sub check_summation { # Check validity of L.fst, L_disambig.fst, and word_boundary.int. # First we generate a random word/subword sequence. We then compile it into fst and compose it with L.fst/L_disambig.fst. -# For subword case the last subword of the sequence must be a end-subword -# (i.e. the subword can only be at the end of word or is a single word itself) +# For subword case the last subword of the sequence must be a end-subword +# (i.e. the subword can only be at the end of word or is a single word itself) # to guarantee the composition would not fail. # We then get the corresponging phones sequence and apply a transition matrix on it to get the number of valid boundaries. # In word case, the number of valid boundaries should be equal to the number of words. @@ -883,14 +884,14 @@ sub check_summation { $end_subword ++; } } - } + } # generate the last word (subword) $id = int(rand(scalar(keys %wint2sym))); if ($subword_check) { $subword = $wint2sym{$id}; $suffix = substr($subword, -$separator_length, $separator_length); - # the last subword can not followed by separator + # the last subword can not followed by separator while (defined $wdisambig_words_hash{$id} or $wint2sym{$id} eq "" or $wint2sym{$id} eq "" or $wint2sym{$id} =~ m/^#nonterm/ or $id == 0 or $suffix eq $separator) { @@ -952,7 +953,7 @@ sub check_summation { } } if (!$exit) { - if ($subword_check) { + if ($subword_check) { $wlen = $end_subword; } if ($num_words != $wlen) { diff --git a/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh index 700b57d9fce..90fae4d4015 100755 --- a/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_fa/v1/local/chain/run_cnn_e2eali_1b.sh @@ -102,7 +102,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} $data_dir/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts @@ -227,7 +227,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $data_dir/$lang_test \ + $data_dir/$lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh b/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh index bb5352943f6..6adde439b00 100755 --- a/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh +++ b/egs/yomdle_fa/v1/local/chain/run_flatstart_cnn1a.sh @@ -155,7 +155,7 @@ if [ $stage -le 4 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $data_dir/$lang_test \ + $data_dir/$lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_fa/v1/run.sh b/egs/yomdle_fa/v1/run.sh index a7547b1ee69..da75679a8b5 100755 --- a/egs/yomdle_fa/v1/run.sh +++ b/egs/yomdle_fa/v1/run.sh @@ -99,7 +99,7 @@ if [ $stage -le 6 ]; then echo "$0: Aligning the training data using the e2e chain model..." echo "Date: $(date)." steps/nnet3/align.sh --nj $nj --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \ + \ $data_dir/train_aug $data_dir/lang $exp_dir/chain/e2e_cnn_1a $exp_dir/chain/e2e_ali_train fi diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index 03333f6d229..ad00b8d4774 100755 --- a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -98,7 +98,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -216,7 +216,7 @@ if [ $stage -le 6 ] && $decode_chain; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index fd9cdc8921d..3e9197e7e42 100755 --- a/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_korean/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -95,7 +95,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh index f6b2c1bac42..5fa8d3a0d29 100755 --- a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh @@ -100,7 +100,7 @@ for f in data/$supervised_set/feats.scp \ done if [ ! -f $graphdir/HCLG.fst ]; then - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $sup_chain_dir $graphdir + utils/mkgraph.sh $lang_decode $sup_chain_dir $graphdir fi # Decode unsupervised data and write lattices in non-compact @@ -312,7 +312,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $dir $dir/graph + utils/mkgraph.sh $lang_decode $dir $dir/graph fi if [ $stage -le 18 ]; then diff --git a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh index 8185fa2645d..cef080071b1 100755 --- a/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh +++ b/egs/yomdle_korean/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh @@ -99,7 +99,7 @@ for f in data/$supervised_set/feats.scp \ done if [ ! -f $graphdir/HCLG.fst ]; then - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $sup_chain_dir $graphdir + utils/mkgraph.sh $lang_decode $sup_chain_dir $graphdir fi # Decode unsupervised data and write lattices in non-compact @@ -310,7 +310,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $dir $dir/graph + utils/mkgraph.sh $lang_decode $dir $dir/graph fi if [ $stage -le 18 ]; then diff --git a/egs/yomdle_korean/v1/run_end2end.sh b/egs/yomdle_korean/v1/run_end2end.sh index 65f5beb4b08..193e6eebff3 100755 --- a/egs/yomdle_korean/v1/run_end2end.sh +++ b/egs/yomdle_korean/v1/run_end2end.sh @@ -127,7 +127,7 @@ fi if [ $stage -le 7 ]; then echo "$(date) stage 7: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj $nj --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \ + \ data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi @@ -152,7 +152,7 @@ if [ $stage -le 10 ] && $decode_e2e; then echo "$(date) stage 10: decoding end2end setup..." utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1; steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ @@ -170,7 +170,7 @@ if [ $stage -le 11 ] && $decode_chain; then echo "$(date) stage 11: decoding chain alignment setup..." utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ exp/chain/cnn_e2eali_1a/ exp/chain/cnn_e2eali_1a/graph || exit 1; frames_per_chunk=$(echo $chunk_width | cut -d, -f1) diff --git a/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index cd582472993..969f50dc857 100755 --- a/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/yomdle_russian/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -90,7 +90,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi diff --git a/egs/yomdle_russian/v1/run_end2end.sh b/egs/yomdle_russian/v1/run_end2end.sh index 12beebeaa05..03525a22d54 100755 --- a/egs/yomdle_russian/v1/run_end2end.sh +++ b/egs/yomdle_russian/v1/run_end2end.sh @@ -127,7 +127,7 @@ fi if [ $stage -le 7 ]; then echo "$0: $(date) stage 7: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj $nj --cmd "$cmd" \ - --scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \ + \ data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi @@ -152,7 +152,7 @@ if [ $stage -le 10 ] && $decode_e2e; then echo "$0: $(date) stage 10: decoding end2end setup..." utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ exp/chain/e2e_cnn_1a/ exp/chain/e2e_cnn_1a/graph || exit 1; steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ @@ -170,7 +170,7 @@ if [ $stage -le 11 ] && $decode_chain; then echo "$0: $(date) stage 11: decoding chain alignment setup..." utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ exp/chain/cnn_e2eali_1a/ exp/chain/cnn_e2eali_1a/graph || exit 1; frames_per_chunk=$(echo $chunk_width | cut -d, -f1) diff --git a/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh b/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh index f553467d4a6..7145dd365a4 100755 --- a/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh +++ b/egs/yomdle_tamil/v1/local/chain/run_e2e_cnn.sh @@ -141,7 +141,7 @@ if [ $stage -le 4 ] && $decode_e2e; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh index 03333f6d229..ad00b8d4774 100755 --- a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh +++ b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1a.sh @@ -98,7 +98,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -216,7 +216,7 @@ if [ $stage -le 6 ] && $decode_chain; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh index fb15ce10dde..a531d966dad 100755 --- a/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_tamil/v1/local/chain/tuning/run_cnn_e2eali_1b.sh @@ -96,7 +96,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} data/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts fi @@ -216,7 +216,7 @@ if [ $stage -le 6 ] && $decode_chain; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $lang_decode \ + $lang_decode \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh index f6b2c1bac42..5fa8d3a0d29 100755 --- a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh +++ b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1a.sh @@ -100,7 +100,7 @@ for f in data/$supervised_set/feats.scp \ done if [ ! -f $graphdir/HCLG.fst ]; then - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $sup_chain_dir $graphdir + utils/mkgraph.sh $lang_decode $sup_chain_dir $graphdir fi # Decode unsupervised data and write lattices in non-compact @@ -312,7 +312,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $dir $dir/graph + utils/mkgraph.sh $lang_decode $dir $dir/graph fi if [ $stage -le 18 ]; then diff --git a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh index 17d59642b05..dae34d51f20 100755 --- a/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh +++ b/egs/yomdle_tamil/v1/local/semisup/chain/run_cnn_chainali_semisupervised_1b.sh @@ -99,7 +99,7 @@ for f in data/$supervised_set/feats.scp \ done if [ ! -f $graphdir/HCLG.fst ]; then - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $sup_chain_dir $graphdir + utils/mkgraph.sh $lang_decode $sup_chain_dir $graphdir fi # Decode unsupervised data and write lattices in non-compact @@ -308,7 +308,7 @@ if [ $stage -le 17 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $lang_decode $dir $dir/graph + utils/mkgraph.sh $lang_decode $dir $dir/graph fi if [ $stage -le 18 ]; then diff --git a/egs/yomdle_tamil/v1/run_end2end.sh b/egs/yomdle_tamil/v1/run_end2end.sh index e6a8e0a4432..55a4d7bc83d 100755 --- a/egs/yomdle_tamil/v1/run_end2end.sh +++ b/egs/yomdle_tamil/v1/run_end2end.sh @@ -155,7 +155,7 @@ if [ $stage -le 8 ]; then echo "$(date) stage 8: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj $nj --cmd "$cmd" \ --use-gpu false \ - --scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \ + \ data/train_aug data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi diff --git a/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh index 357ce6a1f8e..d12366f7923 100755 --- a/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh +++ b/egs/yomdle_zh/v1/local/chain/run_cnn_e2eali_1b.sh @@ -89,7 +89,7 @@ if [ $stage -le 2 ]; then # use the same num-jobs as the alignments steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \ --acoustic-scale 1.0 \ - --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \ + \ ${train_data_dir} $data_dir/lang $e2echain_model_dir $lat_dir echo "" >$lat_dir/splice_opts @@ -215,7 +215,7 @@ if [ $stage -le 6 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $data_dir/$lang_test \ + $data_dir/$lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh b/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh index 28ea2863e38..9ed5df36729 100755 --- a/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh +++ b/egs/yomdle_zh/v1/local/chain/run_flatstart_cnn1a.sh @@ -156,7 +156,7 @@ if [ $stage -le 4 ]; then # as long as phones.txt was compatible. utils/mkgraph.sh \ - --self-loop-scale 1.0 $data_dir/$lang_test \ + $data_dir/$lang_test \ $dir $dir/graph || exit 1; fi diff --git a/egs/yomdle_zh/v1/run.sh b/egs/yomdle_zh/v1/run.sh index eb8e9e11927..75f11b13dcb 100755 --- a/egs/yomdle_zh/v1/run.sh +++ b/egs/yomdle_zh/v1/run.sh @@ -102,7 +102,7 @@ if [ $stage -le 6 ]; then echo "$0: Aligning the training data using the e2e chain model..." echo "Date: $(date)." steps/nnet3/align.sh --nj $nj --cmd "$cmd" --use-gpu false \ - --scale-opts '--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0' \ + \ $data_dir/train_aug $data_dir/lang $exp_dir/chain/e2e_cnn_1a $exp_dir/chain/e2e_ali_train fi diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh index 14b9a8d6c8e..02706d98602 100755 --- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh +++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_1a.sh @@ -252,7 +252,7 @@ if [ $stage -le 13 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgsmall/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgsmall \ + data/lang_test_tgsmall \ $tree_dir $tree_dir/graph_tgsmall || exit 1; fi diff --git a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh index 28b36243ba3..5372a5862fa 100755 --- a/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh +++ b/egs/zeroth_korean/s5/local/chain/tuning/run_tdnn_opgru_1a.sh @@ -260,7 +260,7 @@ if [ $stage -le 13 ]; then utils/lang/check_phones_compatible.sh \ data/lang_test_tgsmall/phones.txt $lang/phones.txt utils/mkgraph.sh \ - --self-loop-scale 1.0 data/lang_test_tgsmall \ + data/lang_test_tgsmall \ $tree_dir $tree_dir/graph_tgsmall || exit 1; fi diff --git a/kaldi b/kaldi new file mode 120000 index 00000000000..e8310385c56 --- /dev/null +++ b/kaldi @@ -0,0 +1 @@ +src \ No newline at end of file diff --git a/src/Makefile b/src/Makefile index 07b7947f3b1..a287fc3d25a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -5,16 +5,16 @@ SHELL := /bin/bash SUBDIRS = base matrix util feat cudafeat tree gmm transform \ - fstext hmm lm decoder lat kws cudamatrix nnet \ + fstext hmm lm decoder lat kws cudamatrix \ bin fstbin gmmbin fgmmbin featbin cudafeatbin \ - nnetbin latbin sgmm2 sgmm2bin nnet2 nnet3 rnnlm chain nnet3bin nnet2bin kwsbin \ + latbin nnet3 rnnlm chain nnet3bin kwsbin \ ivector ivectorbin online2 online2bin lmbin chainbin rnnlmbin \ - cudadecoder cudadecoderbin + cudadecoder cudadecoderbin cblasext MEMTESTDIRS = base matrix util feat cudafeat tree gmm transform \ - fstext hmm lm decoder lat nnet kws chain \ + fstext hmm lm decoder lat kws chain \ bin fstbin gmmbin fgmmbin featbin cudafeatbin \ - nnetbin latbin sgmm2 nnet2 nnet3 rnnlm nnet2bin nnet3bin sgmm2bin kwsbin \ + latbin nnet3 rnnlm nnet3bin kwsbin \ ivector ivectorbin online2 online2bin lmbin CUDAMEMTESTDIR = cudamatrix @@ -23,9 +23,6 @@ SUBDIRS_LIB = $(filter-out %bin, $(SUBDIRS)) KALDI_SONAME ?= libkaldi.so -# Optional subdirectories -EXT_SUBDIRS = online onlinebin # python-kaldi-decoding -EXT_SUBDIRS_LIB = $(filter-out %bin, $(EXT_SUBDIRS)) include kaldi.mk @@ -72,19 +69,6 @@ endif endif endif -biglibext: $(EXT_SUBDIRS_LIB) -ifeq ($(KALDI_FLAVOR), dynamic) -ifeq ($(shell uname), Darwin) - $(CXX) -dynamiclib -o $(KALDILIBDIR)/libkaldi_ext.dylib -install_name @rpath/libkaldi_ext.dylib -framework Accelerate $(LDFLAGS) $(EXT_SUBDIRS_LIB:=/*.dylib) -else -ifeq ($(shell uname), Linux) - #$(warning The following command will probably fail, in that case add -fPIC to your CXXFLAGS and remake all.) - $(CXX) -shared -o $(KALDILIBDIR)/libkaldi_ext.so -Wl,-soname=libkaldi_ext.so,--whole-archive $(EXT_SUBDIRS_LIB:=/kaldi-*.a) -Wl,--no-whole-archive -else - $(error Dynamic libraries not supported on this platform. Run configure with --static flag. ) -endif -endif -endif kaldi.mk: @[ -f kaldi.mk ] || { echo "kaldi.mk does not exist; you have to run ./configure"; exit 1; } @@ -143,39 +127,37 @@ $(EXT_SUBDIRS) : checkversion kaldi.mk mklibdir ext_depend ### Dependency list ### # this is necessary for correct parallel compilation #1)The tools depend on all the libraries -bin fstbin gmmbin fgmmbin sgmm2bin featbin cudafeatbin nnetbin nnet2bin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin rnnlmbin cudadecoderbin: \ - base matrix util feat cudafeat tree gmm transform sgmm2 fstext hmm \ - lm decoder lat cudamatrix nnet nnet2 nnet3 ivector chain kws online2 rnnlm \ - cudadecoder + +bin fstbin gmmbin fgmmbin featbin cudafeatbin nnet3bin chainbin latbin ivectorbin lmbin kwsbin online2bin rnnlmbin cudadecoderbin: \ + base matrix util feat cudafeat tree gmm transform fstext hmm \ + lm decoder lat cudadecoder cudamatrix nnet3 ivector chain kws online2 rnnlm #2)The libraries have inter-dependencies base: base/.depend.mk -matrix: base -util: base matrix -feat: base matrix util gmm transform tree -tree: base util matrix -gmm: base util matrix tree -transform: base util matrix gmm tree -sgmm2: base util matrix gmm tree transform hmm -fstext: base util matrix tree -hmm: base tree matrix util -lm: base util matrix fstext -decoder: base util matrix gmm hmm tree transform lat fstext -lat: base util hmm tree matrix -cudamatrix: base util matrix -nnet: base util hmm tree matrix cudamatrix -nnet2: base util matrix lat gmm hmm tree transform cudamatrix -nnet3: base util matrix decoder lat gmm hmm tree transform cudamatrix chain fstext -rnnlm: base util matrix cudamatrix nnet3 lm hmm -chain: lat hmm tree fstext matrix cudamatrix util base -ivector: base util matrix transform tree gmm -#3)Dependencies for optional parts of Kaldi -onlinebin: base matrix util feat tree gmm transform sgmm2 fstext hmm lm decoder lat cudamatrix nnet nnet2 online -# python-kaldi-decoding: base matrix util feat tree gmm transform sgmm2 fstext hmm decoder lat online -cudafeat: base matrix util gmm transform tree feat cudamatrix online2 -cudafeatbin: base matrix util gmm transform tree feat cudamatrix cudafeat online2 -online: decoder gmm transform feat matrix util base lat hmm tree -online2: decoder gmm transform feat matrix util base lat hmm tree ivector cudamatrix nnet2 nnet3 chain -kws: base util hmm tree matrix lat +cblasext: base +matrix: base cblasext +util: base matrix cblasext +feat: base cudamatrix matrix cblasext util gmm transform tree +tree: base util matrix cblasext +gmm: base util matrix cblasext tree +transform: base util matrix cblasext gmm tree +fstext: base util matrix cblasext tree +hmm: base tree matrix cblasext util +lm: base util matrix cblasext fstext +decoder: base util matrix cblasext gmm hmm tree transform lat fstext +lat: base util hmm tree matrix cblasext +cudamatrix: base util matrix cblasext +nnet3: base util matrix cblasext decoder lat gmm hmm tree transform cudamatrix chain fstext +rnnlm: base util matrix cblasext cudamatrix nnet3 lm hmm +chain: lat hmm tree fstext matrix cblasext cudamatrix util base +ivector: base util matrix cblasext transform tree gmm +cudafeat: base cudamatrix matrix cblasext util gmm transform tree feat cudamatrix online2 +onlinebin: base cudamatrix matrix cblasext util feat tree gmm transform fstext hmm lm decoder lat cudamatrix online +# python-kaldi-decoding: base cudamatrix matrix cblasext util feat tree gmm transform fstext hmm decoder lat online + +cudafeatbin: base cudamatrix matrix cblasext util gmm transform tree feat cudamatrix cudafeat online2 +online: decoder gmm transform feat matrix cblasext util base lat hmm tree +online2: decoder gmm transform feat matrix cblasext util base lat hmm tree ivector cudamatrix nnet3 chain +kws: base util hmm tree matrix cblasext lat cudadecoder: cudamatrix cudafeat online2 nnet3 ivector feat fstext lat chain transform -cudadecoderbin: cudadecoder cudafeat cudamatrix online2 nnet3 ivector feat fstext lat chain transform + diff --git a/src/bin/Makefile b/src/bin/Makefile index 7cb01b50120..855a43bf350 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -1,6 +1,5 @@ all: - -rm -f arpa2fst EXTRA_CXXFLAGS = -Wno-sign-compare include ../kaldi.mk @@ -22,7 +21,7 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \ matrix-sum build-pfile-from-ali get-post-on-ali tree-info am-info \ vector-sum matrix-sum-rows est-pca sum-lda-accs sum-mllt-accs \ transform-vec align-text matrix-dim post-to-smat compile-graph \ - compare-int-vector + compare-int-vector cuda-gpu-available OBJFILES = @@ -30,9 +29,9 @@ OBJFILES = ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../lm/kaldi-lm.a \ ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a - + ../tree/kaldi-tree.a ../cudamatrix/kaldi-cudamatrix.a \ + ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ + ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a TESTFILES = diff --git a/src/bin/acc-lda.cc b/src/bin/acc-lda.cc index b664135bdc7..a0451218513 100644 --- a/src/bin/acc-lda.cc +++ b/src/bin/acc-lda.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/posterior.h" #include "transform/lda-estimate.h" @@ -57,7 +57,7 @@ int main(int argc, char *argv[]) { std::string posteriors_rspecifier = po.GetArg(3); std::string acc_wxfilename = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_rxfilename, &binary_read); diff --git a/src/bin/acc-tree-stats.cc b/src/bin/acc-tree-stats.cc index 8b9ce9065b4..c0eb31f6064 100644 --- a/src/bin/acc-tree-stats.cc +++ b/src/bin/acc-tree-stats.cc @@ -22,7 +22,7 @@ #include "util/common-utils.h" #include "tree/context-dep.h" #include "tree/build-tree-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/tree-accu.h" /** @brief Accumulate tree statistics for decision tree training. The @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) { AccumulateTreeStatsInfo acc_tree_stats_info(opts); - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_filename, &binary); diff --git a/src/bin/add-self-loops.cc b/src/bin/add-self-loops.cc index b223dfe317d..ebaf219aff1 100644 --- a/src/bin/add-self-loops.cc +++ b/src/bin/add-self-loops.cc @@ -18,7 +18,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "tree/context-dep.h" #include "util/common-utils.h" @@ -46,20 +46,14 @@ int main(int argc, char *argv[]) { "is recommended as the decoding will in that case be faster.\n" "Usage: add-self-loops [options] transition-gmm/acoustic-model [fst-in] [fst-out]\n" "e.g.: \n" - " add-self-loops --self-loop-scale=0.1 1.mdl HCLGa.fst HCLG.fst\n" - "or: add-self-loops --self-loop-scale=0.1 1.mdl HCLG.fst\n"; + " add-self-loops1.mdl HCLGa.fst HCLG.fst\n" + "or: add-self-loops 1.mdl HCLG.fst\n"; - BaseFloat self_loop_scale = 1.0; - bool reorder = true; std::string disambig_in_filename; ParseOptions po(usage); - po.Register("self-loop-scale", &self_loop_scale, - "Scale for self-loop probabilities relative to LM."); po.Register("disambig-syms", &disambig_in_filename, "List of disambiguation symbols on input of fst-in [input file]"); - po.Register("reorder", &reorder, - "If true, reorder symbols for more decoding efficiency"); po.Read(argc, argv); if (po.NumArgs() < 1 || po.NumArgs() > 3) { @@ -88,7 +82,7 @@ int main(int argc, char *argv[]) { "standard input" : disambig_in_filename); } - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_in_filename, &trans_model); @@ -97,13 +91,14 @@ int main(int argc, char *argv[]) { if (!fst) KALDI_ERR << "add-self-loops: error reading input FST."; - bool check_no_self_loops = true; + BaseFloat self_loop_scale = 1.0; + bool currently_self_loop_free = true; // The work gets done here. AddSelfLoops(trans_model, disambig_syms_in, self_loop_scale, - reorder, check_no_self_loops, fst); + currently_self_loop_free, fst); if (! fst->Write(fst_out_filename) ) KALDI_ERR << "add-self-loops: error writing FST to " diff --git a/src/bin/ali-to-pdf.cc b/src/bin/ali-to-pdf.cc index 61b5138cf31..3c978ca62f0 100644 --- a/src/bin/ali-to-pdf.cc +++ b/src/bin/ali-to-pdf.cc @@ -21,7 +21,7 @@ */ #include "base/kaldi-common.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "util/common-utils.h" #include "fst/fstlib.h" @@ -48,7 +48,7 @@ int main(int argc, char *argv[]) { alignments_rspecifier = po.GetArg(2), pdfs_wspecifier = po.GetArg(3); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_filename, &trans_model); SequentialInt32VectorReader reader(alignments_rspecifier); @@ -60,7 +60,7 @@ int main(int argc, char *argv[]) { std::vector alignment = reader.Value(); for (size_t i = 0; i < alignment.size(); i++) - alignment[i] = trans_model.TransitionIdToPdf(alignment[i]); + alignment[i] = trans_model.TransitionIdToPdfFast(alignment[i]); writer.Write(key, alignment); num_done++; diff --git a/src/bin/ali-to-phones.cc b/src/bin/ali-to-phones.cc index 602e32e9768..ed7f99758cd 100644 --- a/src/bin/ali-to-phones.cc +++ b/src/bin/ali-to-phones.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "util/common-utils.h" #include "fst/fstlib.h" @@ -68,7 +68,7 @@ int main(int argc, char *argv[]) { std::string model_filename = po.GetArg(1), alignments_rspecifier = po.GetArg(2); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_filename, &trans_model); SequentialInt32VectorReader reader(alignments_rspecifier); @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) { BaseFloat phone_start = 0.0; for (size_t i = 0; i < split.size(); i++) { KALDI_ASSERT(!split[i].empty()); - int32 phone = trans_model.TransitionIdToPhone(split[i][0]); + int32 phone = trans_model.InfoForTransitionId(split[i][0]).phone; int32 num_repeats = split[i].size(); ctm_writer.Stream() << key << " 1 " << phone_start << " " << (frame_shift * num_repeats) << " " << phone << std::endl; @@ -108,7 +108,7 @@ int main(int argc, char *argv[]) { std::vector phones; for (size_t i = 0; i < split.size(); i++) { KALDI_ASSERT(!split[i].empty()); - int32 phone = trans_model.TransitionIdToPhone(split[i][0]); + int32 phone = trans_model.InfoForTransitionId(split[i][0]).phone; int32 num_repeats = split[i].size(); //KALDI_ASSERT(num_repeats!=0); if (per_frame) @@ -122,7 +122,7 @@ int main(int argc, char *argv[]) { std::vector > pairs; for (size_t i = 0; i < split.size(); i++) { KALDI_ASSERT(split[i].size() > 0); - int32 phone = trans_model.TransitionIdToPhone(split[i][0]); + int32 phone = trans_model.InfoForTransitionId(split[i][0]).phone; int32 num_repeats = split[i].size(); //KALDI_ASSERT(num_repeats!=0); pairs.push_back(std::make_pair(phone, num_repeats)); diff --git a/src/bin/ali-to-post.cc b/src/bin/ali-to-post.cc index ac87d676c06..00c026c0692 100644 --- a/src/bin/ali-to-post.cc +++ b/src/bin/ali-to-post.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "hmm/posterior.h" diff --git a/src/bin/align-compiled-mapped.cc b/src/bin/align-compiled-mapped.cc index 98ffebd6eaa..a47231f7b5a 100644 --- a/src/bin/align-compiled-mapped.cc +++ b/src/bin/align-compiled-mapped.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" @@ -50,17 +50,11 @@ int main(int argc, char *argv[]) { AlignConfig align_config; bool binary = true; BaseFloat acoustic_scale = 1.0; - BaseFloat transition_scale = 1.0; - BaseFloat self_loop_scale = 1.0; align_config.Register(&po); po.Register("binary", &binary, "Write output in binary mode"); - po.Register("transition-scale", &transition_scale, - "Transition-probability scale [relative to acoustics]"); po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods"); - po.Register("self-loop-scale", &self_loop_scale, - "Scale of self-loop versus non-self-loop log probs [relative to acoustics]"); po.Read(argc, argv); if (po.NumArgs() < 4 || po.NumArgs() > 5) { @@ -74,7 +68,7 @@ int main(int argc, char *argv[]) { std::string alignment_wspecifier = po.GetArg(4); std::string scores_wspecifier = po.GetOptArg(5); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_in_filename, &trans_model); SequentialBaseFloatMatrixReader loglikes_reader(feature_rspecifier); @@ -110,13 +104,6 @@ int main(int argc, char *argv[]) { continue; } - { // Add transition-probs to the FST. - std::vector disambig_syms; // empty. - AddTransitionProbs(trans_model, disambig_syms, - transition_scale, self_loop_scale, - &decode_fst); - } - DecodableMatrixScaledMapped decodable(trans_model, loglikes, acoustic_scale); AlignUtteranceWrapper(align_config, utt, diff --git a/src/bin/align-equal-compiled.cc b/src/bin/align-equal-compiled.cc index c4ab9d4205a..f5900727aef 100644 --- a/src/bin/align-equal-compiled.cc +++ b/src/bin/align-equal-compiled.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/training-graph-compiler.h" diff --git a/src/bin/align-equal.cc b/src/bin/align-equal.cc index a3bc40dc236..80caff00168 100644 --- a/src/bin/align-equal.cc +++ b/src/bin/align-equal.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/training-graph-compiler.h" @@ -65,13 +65,13 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; ReadKaldiObject(tree_in_filename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_in_filename, &trans_model); // need VectorFst because we will change it by adding subseq symbol. VectorFst *lex_fst = fst::ReadFstKaldi(lex_in_filename); - TrainingGraphCompilerOptions gc_opts(1.0, true); // true -> Dan style graph. + TrainingGraphCompilerOptions gc_opts; std::vector disambig_syms; if (disambig_rxfilename != "") diff --git a/src/bin/align-mapped.cc b/src/bin/align-mapped.cc index c78401fffdd..e8249c4a123 100644 --- a/src/bin/align-mapped.cc +++ b/src/bin/align-mapped.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" #include "decoder/training-graph-compiler.h" @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; ReadKaldiObject(tree_in_filename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_in_filename, &trans_model); VectorFst *lex_fst = fst::ReadFstKaldi(lex_in_filename); diff --git a/src/bin/am-info.cc b/src/bin/am-info.cc index 6afb0c5014e..f2516c436f8 100644 --- a/src/bin/am-info.cc +++ b/src/bin/am-info.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { @@ -45,7 +45,7 @@ int main(int argc, char *argv[]) { std::string model_in_filename = po.GetArg(1); - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); @@ -56,8 +56,6 @@ int main(int argc, char *argv[]) { std::cout << "number of pdfs " << trans_model.NumPdfs() << '\n'; std::cout << "number of transition-ids " << trans_model.NumTransitionIds() << '\n'; - std::cout << "number of transition-states " - << trans_model.NumTransitionStates() << '\n'; } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/bin/build-pfile-from-ali.cc b/src/bin/build-pfile-from-ali.cc index fadb873825f..e1967c77d8c 100644 --- a/src/bin/build-pfile-from-ali.cc +++ b/src/bin/build-pfile-from-ali.cc @@ -25,7 +25,7 @@ using std::vector; #include "base/kaldi-common.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "util/common-utils.h" @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) { feature_rspecifier = po.GetArg(3), pfile_wspecifier = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) { } // Output the class label ss << " "; - ss << trans_model.TransitionIdToPdf(alignment[i]); + ss << trans_model.TransitionIdToPdfFast(alignment[i]); ko.Stream() << ss.str().c_str(); ko.Stream() << "\n"; diff --git a/src/bin/build-tree-two-level.cc b/src/bin/build-tree-two-level.cc index c7cd553484e..005c5d80532 100644 --- a/src/bin/build-tree-two-level.cc +++ b/src/bin/build-tree-two-level.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/hmm-topology.h" +#include "hmm/topology.h" #include "tree/context-dep.h" #include "tree/build-tree.h" #include "tree/build-tree-utils.h" @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) { ReadRootsFile(ki.Stream(), &phone_sets, &is_shared_root, &is_split_root); } - HmmTopology topo; + Topology topo; ReadKaldiObject(topo_filename, &topo); BuildTreeStatsType stats; diff --git a/src/bin/build-tree.cc b/src/bin/build-tree.cc index 72774900d61..b37c9c7d184 100644 --- a/src/bin/build-tree.cc +++ b/src/bin/build-tree.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/hmm-topology.h" +#include "hmm/topology.h" #include "tree/context-dep.h" #include "tree/build-tree.h" #include "tree/build-tree-utils.h" @@ -91,7 +91,7 @@ int main(int argc, char *argv[]) { ReadRootsFile(ki.Stream(), &phone_sets, &is_shared_root, &is_split_root); } - HmmTopology topo; + Topology topo; ReadKaldiObject(topo_filename, &topo); BuildTreeStatsType stats; diff --git a/src/bin/cluster-phones.cc b/src/bin/cluster-phones.cc index 1d5b3824252..24627ca3bfc 100644 --- a/src/bin/cluster-phones.cc +++ b/src/bin/cluster-phones.cc @@ -49,7 +49,7 @@ int main(int argc, char *argv[]) { // bool binary = true; int32 P = 1, N = 3; // Note: N does not matter. - std::string pdf_class_list_str = "1"; // 1 is just the central position of 3. + std::string pdf_class_list_str = "2"; // 2 is just the central position of 3. std::string mode = "questions"; int32 num_classes = -1; @@ -57,7 +57,7 @@ int main(int argc, char *argv[]) { // po.Register("binary", &binary, "Write output in binary mode"); po.Register("central-position", &P, "Central position in context window [must match acc-tree-stats]"); po.Register("context-width", &N, "Does not have any effect-- included for scripting convenience."); - po.Register("pdf-class-list", &pdf_class_list_str, "Colon-separated list of HMM positions to consider [Default = 1: just central position for 3-state models]."); + po.Register("pdf-class-list", &pdf_class_list_str, "Colon-separated list of HMM positions to consider [Default = 2: just central position for 3-state models]."); po.Register("mode", &mode, "Mode of operation: \"questions\"->sets suitable for decision trees; \"k-means\"->k-means algorithm, output k classes (set num-classes options)\n"); po.Register("num-classes", &num_classes, "For k-means mode, number of classes."); @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) { std::vector pdf_class_list; if (!SplitStringToIntegers(pdf_class_list_str, ":", false, &pdf_class_list) || pdf_class_list.empty()) { - KALDI_ERR << "Invalid pdf-class-list string [expecting colon-separated list of integers]: " + KALDI_ERR << "Invalid pdf-class-list string [expecting colon-separated list of integers]: " << pdf_class_list_str; } diff --git a/src/bin/compile-graph.cc b/src/bin/compile-graph.cc index 7174fdf8113..dea332aced0 100644 --- a/src/bin/compile-graph.cc +++ b/src/bin/compile-graph.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "fstext/fstext-lib.h" #include "fstext/push-special.h" @@ -48,19 +48,12 @@ int main(int argc, char *argv[]) { ParseOptions po(usage); - BaseFloat transition_scale = 1.0; - BaseFloat self_loop_scale = 1.0; // Caution: the script default is 0.1. int32 nonterm_phones_offset = -1; std::string disambig_rxfilename; po.Register("read-disambig-syms", &disambig_rxfilename, "File containing " "list of disambiguation symbols in phone symbol table"); - po.Register("transition-scale", &transition_scale, "Scale of transition " - "probabilities (excluding self-loops)."); - po.Register("self-loop-scale", &self_loop_scale, "Scale of self-loop vs. " - "non-self-loop probability mass. Caution: the default of " - "mkgraph.sh is 0.1, but this defaults to 1.0."); po.Register("nonterm-phones-offset", &nonterm_phones_offset, "Integer " "value of symbol #nonterm_bos in phones.txt, if present. " "(Only relevant for grammar decoding)."); @@ -81,7 +74,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; // the tree. ReadKaldiObject(tree_rxfilename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_rxfilename, &trans_model); VectorFst *lex_fst = fst::ReadFstKaldi(lex_rxfilename), @@ -141,20 +134,18 @@ int main(int argc, char *argv[]) { lg_fst.DeleteStates(); HTransducerConfig h_cfg; - h_cfg.transition_scale = transition_scale; h_cfg.nonterm_phones_offset = nonterm_phones_offset; std::vector disambig_syms_h; // disambiguation symbols on // input side of H. - VectorFst *h_fst = GetHTransducer(ilabels, - ctx_dep, - trans_model, - h_cfg, - &disambig_syms_h); + std::unique_ptr> h_fst = GetHTransducer(ilabels, + ctx_dep, + trans_model, + h_cfg, + &disambig_syms_h); VectorFst hclg_fst; // transition-id to word. TableCompose(*h_fst, clg_fst, &hclg_fst); clg_fst.DeleteStates(); - delete h_fst; KALDI_ASSERT(hclg_fst.Start() != fst::kNoStateId); @@ -170,13 +161,12 @@ int main(int argc, char *argv[]) { MinimizeEncoded(&hclg_fst); std::vector disambig; - bool check_no_self_loops = true, - reorder = true; + bool currently_self_loop_free = true, + use_weights = true; AddSelfLoops(trans_model, disambig, - self_loop_scale, - reorder, - check_no_self_loops, + currently_self_loop_free, + use_weights, &hclg_fst); if (nonterm_phones_offset >= 0) diff --git a/src/bin/compile-questions.cc b/src/bin/compile-questions.cc index f9694140ae8..bf734ac01da 100644 --- a/src/bin/compile-questions.cc +++ b/src/bin/compile-questions.cc @@ -19,12 +19,12 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/hmm-topology.h" +#include "hmm/topology.h" #include "tree/build-tree-questions.h" namespace kaldi { -int32 ProcessTopo(const HmmTopology &topo, const std::vector > &questions) { +int32 ProcessTopo(const Topology &topo, const std::vector > &questions) { std::vector seen_phones; // ids of phones seen in questions. for (size_t i = 0; i < questions.size(); i++) for (size_t j= 0; j < questions[i].size(); j++) seen_phones.push_back(questions[i][j]); @@ -93,7 +93,7 @@ int main(int argc, char *argv[]) { questions_rxfilename = po.GetArg(2), questions_out_filename = po.GetArg(3); - HmmTopology topo; // just needed for checking, and to get the + Topology topo; // just needed for checking, and to get the // largest number of pdf-classes for any phone. ReadKaldiObject(topo_filename, &topo); @@ -130,13 +130,13 @@ int main(int argc, char *argv[]) { } QuestionsForKey pdfclass_opts(num_iters_refine); - std::vector > pdfclass_questions(max_num_pdfclasses-1); - for (int32 i = 0; i < max_num_pdfclasses - 1; i++) - for (int32 j = 0; j <= i; j++) - pdfclass_questions[i].push_back(j); - // E.g. if max_num_pdfclasses == 3, pdfclass_questions is now [ [0], [0, 1] ]. + std::vector > pdfclass_questions(max_num_pdfclasses - 1); + for (int32 i = 1; i <= max_num_pdfclasses - 1; i++) + for (int32 j = 1; j <= i; j++) + pdfclass_questions[i-1].push_back(j); + // E.g. if max_num_pdfclasses == 3, pdfclass_questions is now [ 1], [1, 2] ]. pdfclass_opts.initial_questions = pdfclass_questions; - KALDI_LOG << "Setting questions for hmm-position [hmm-position ranges from 0 to "<< (max_num_pdfclasses-1) <<"]"; + KALDI_LOG << "Setting questions for pdf-class [pdf-class ranges from 1 to "<< max_num_pdfclasses <<"]"; qo.SetQuestionsOf(kPdfClass, pdfclass_opts); WriteKaldiObject(qo, questions_out_filename, binary); diff --git a/src/bin/compile-train-graphs-fsts.cc b/src/bin/compile-train-graphs-fsts.cc index 00ec1038943..8d0203c0a5e 100644 --- a/src/bin/compile-train-graphs-fsts.cc +++ b/src/bin/compile-train-graphs-fsts.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/training-graph-compiler.h" @@ -52,9 +52,6 @@ int main(int argc, char *argv[]) { TrainingGraphCompilerOptions gopts; int32 batch_size = 250; - gopts.transition_scale = 0.0; // Change the default to 0.0 since we will generally add the - // transition probs in the alignment phase (since they change each time) - gopts.self_loop_scale = 0.0; // Ditto for self-loop probs. std::string disambig_rxfilename; gopts.Register(&po); @@ -63,7 +60,7 @@ int main(int argc, char *argv[]) { "more memory. E.g. 500"); po.Register("read-disambig-syms", &disambig_rxfilename, "File containing " "list of disambiguation symbols in phone symbol table"); - + po.Read(argc, argv); if (po.NumArgs() != 5) { @@ -80,7 +77,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; // the tree. ReadKaldiObject(tree_rxfilename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_rxfilename, &trans_model); // need VectorFst because we will change it by adding subseq symbol. @@ -103,7 +100,7 @@ int main(int argc, char *argv[]) { SequentialTableReader fst_reader(fsts_rspecifier); TableWriter fst_writer(fsts_wspecifier); - + int num_succeed = 0, num_fail = 0; if (batch_size == 1) { // We treat batch_size of 1 as a special case in order diff --git a/src/bin/compile-train-graphs.cc b/src/bin/compile-train-graphs.cc index 874d079376e..3e3532fbd98 100644 --- a/src/bin/compile-train-graphs.cc +++ b/src/bin/compile-train-graphs.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/training-graph-compiler.h" @@ -46,9 +46,7 @@ int main(int argc, char *argv[]) { TrainingGraphCompilerOptions gopts; int32 batch_size = 250; - gopts.transition_scale = 0.0; // Change the default to 0.0 since we will generally add the - // transition probs in the alignment phase (since they change eacm time) - gopts.self_loop_scale = 0.0; // Ditto for self-loop probs. + std::string disambig_rxfilename; gopts.Register(&po); @@ -57,7 +55,7 @@ int main(int argc, char *argv[]) { "more memory. E.g. 500"); po.Register("read-disambig-syms", &disambig_rxfilename, "File containing " "list of disambiguation symbols in phone symbol table"); - + po.Read(argc, argv); if (po.NumArgs() != 5) { @@ -74,7 +72,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; // the tree. ReadKaldiObject(tree_rxfilename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_rxfilename, &trans_model); // need VectorFst because we will change it by adding subseq symbol. @@ -85,7 +83,7 @@ int main(int argc, char *argv[]) { if (!ReadIntegerVectorSimple(disambig_rxfilename, &disambig_syms)) KALDI_ERR << "fstcomposecontext: Could not read disambiguation symbols from " << disambig_rxfilename; - + TrainingGraphCompiler gc(trans_model, ctx_dep, lex_fst, disambig_syms, gopts); lex_fst = NULL; // we gave ownership to gc. diff --git a/src/bin/convert-ali.cc b/src/bin/convert-ali.cc index 89fe838638c..d245d93a0f8 100644 --- a/src/bin/convert-ali.cc +++ b/src/bin/convert-ali.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "hmm/tree-accu.h" // for ReadPhoneMap @@ -38,7 +38,6 @@ int main(int argc, char *argv[]) { " convert-ali old/final.mdl new/0.mdl new/tree ark:old/ali.1 ark:new/ali.1\n"; int32 frame_subsampling_factor = 1; - bool reorder = true; bool repeat_frames = false; std::string phone_map_rxfilename; @@ -46,9 +45,6 @@ int main(int argc, char *argv[]) { po.Register("phone-map", &phone_map_rxfilename, "File name containing old->new phone mapping (each line is: " "old-integer-id new-integer-id)"); - po.Register("reorder", &reorder, - "True if you want the converted alignments to be 'reordered' " - "versus the way they appear in the HmmTopology object"); po.Register("repeat-frames", &repeat_frames, "Only relevant when frame-subsampling-factor != 1. If true, " "repeat frames of alignment by 'frame-subsampling-factor' " @@ -79,10 +75,10 @@ int main(int argc, char *argv[]) { SequentialInt32VectorReader alignment_reader(old_alignments_rspecifier); Int32VectorWriter alignment_writer(new_alignments_wspecifier); - TransitionModel old_trans_model; + Transitions old_trans_model; ReadKaldiObject(old_model_filename, &old_trans_model); - TransitionModel new_trans_model; + Transitions new_trans_model; ReadKaldiObject(new_model_filename, &new_trans_model); if (!(old_trans_model.GetTopo() == new_trans_model.GetTopo())) @@ -105,7 +101,6 @@ int main(int argc, char *argv[]) { old_alignment, frame_subsampling_factor, repeat_frames, - reorder, (phone_map_rxfilename != "" ? &phone_map : NULL), &new_alignment)) { alignment_writer.Write(key, new_alignment); diff --git a/src/bin/copy-gselect.cc b/src/bin/copy-gselect.cc index e6c92013b58..ee427d59b8e 100644 --- a/src/bin/copy-gselect.cc +++ b/src/bin/copy-gselect.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { diff --git a/src/bin/copy-transition-model.cc b/src/bin/copy-transition-model.cc index 62a5d0c51dd..b05c64d28bf 100644 --- a/src/bin/copy-transition-model.cc +++ b/src/bin/copy-transition-model.cc @@ -17,7 +17,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fst/fstlib.h" #include "util/common-utils.h" @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { transition_model_wxfilename = po.GetArg(2); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(transition_model_rxfilename, &trans_model); WriteKaldiObject(trans_model, transition_model_wxfilename, binary); diff --git a/src/bin/copy-tree.cc b/src/bin/copy-tree.cc index c412366b151..69ab0c309ad 100644 --- a/src/bin/copy-tree.cc +++ b/src/bin/copy-tree.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/hmm-topology.h" +#include "hmm/topology.h" #include "tree/context-dep.h" #include "tree/clusterable-classes.h" #include "util/text-utils.h" diff --git a/src/bin/cuda-gpu-available.cc b/src/bin/cuda-gpu-available.cc new file mode 100644 index 00000000000..67063fc0f96 --- /dev/null +++ b/src/bin/cuda-gpu-available.cc @@ -0,0 +1,111 @@ +// nnetbin/cuda-gpu-available.cc + +// Copyright 2015 Brno University of Technology (author: Karel Vesely) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef _MSC_VER + #include + #include +#endif + +#include "base/kaldi-common.h" +#include "util/parse-options.h" +#include "cudamatrix/cu-device.h" +#include "cudamatrix/cu-matrix.h" + +using namespace kaldi; + +#if HAVE_CUDA == 1 +/** + * With incorrect CUDA setup, this will trigger "invalid device function" error. + */ +void TestGpuComputation() { + CuMatrix m(100,100); + m.SetRandn(); + m.ApplySoftMaxPerRow(m); +} +#endif + +int main(int argc, char *argv[]) try { + + /* only for Doxygen documentation, never shown in command line */ + const char *usage = + "Test if there is a GPU available, and if the GPU setup is correct.\n" + "A GPU is acquired and a small computation is done\n" + "(generating a random matrix and computing softmax for its rows).\n" + "\n" + "exit-code: 0 = success, 1 = compiled without GPU support, -1 = error\n" + "\n" + "Usage: cuda-gpu-available\n"; + + ParseOptions po(usage); + po.Read(argc, argv); + + char hostname[100] = "UNKNOWN-HOSTNAME"; +#if !defined(_MSC_VER) && !defined(__CYGWIN__) + if (gethostname(hostname, 100)) { + KALDI_WARN << "Cannot get hostname, " << strerror(errno); + } +#endif + KALDI_LOG << "\n\n### IS CUDA GPU AVAILABLE? '" << hostname << "' ###"; +#if HAVE_CUDA == 1 + CuDevice::Instantiate().SelectGpuId("yes"); + fprintf(stderr, "### HURRAY, WE GOT A CUDA GPU FOR COMPUTATION!!! ##\n\n"); + fprintf(stderr, "### Testing CUDA setup with a small computation " + "(setup = cuda-toolkit + gpu-driver + kaldi):\n"); + // the test of setup by computation, + try { + TestGpuComputation(); + } catch (const std::exception &e) { + fprintf(stderr, "%s\n", e.what()); + KALDI_LOG << "...\n" + << "### The CUDA setup is wrong! " + << "(\"invalid device function\" == problem with 'compute capability' " + << "in compiled kaldi)\n" + << "### Before posting the error to forum, please try following:\n" + << "### 1) update kaldi & cuda-toolkit (& GPU driver),\n" + << "### 2) re-run 'src/configure',\n" + << "### 3) re-compile kaldi by 'make clean; make -j depend; make -j'\n" + << "###\n" + << "### If the problem persists, please send us your:\n" + << "### - GPU model name, cuda-toolkit version, driver version " + << "(run nvidia-smi), variable $(CUDA_ARCH) from src/kaldi.mk"; + return -1; + } + fprintf(stderr, "### Test OK!\n"); + return 0; +#else + std::cerr + << "### CUDA WAS NOT COMPILED IN! ###\n" + << "To support CUDA, you must run 'configure' on a machine " + << "that has the CUDA compiler 'nvcc' available.\n"; + return 1; +#endif +} catch (const std::exception &e) { + fprintf(stderr, "%s\n", e.what()); + KALDI_LOG << "...\n" + << "### WE DID NOT GET A CUDA GPU!!! ###\n" + << "### If your system has a 'free' CUDA GPU, try re-installing " + << "latest 'CUDA toolkit' from NVidia (this updates GPU drivers too).\n" + << "### Otherwise 'nvidia-smi' shows the status of GPUs:\n" + << "### - The versions should match ('NVIDIA-SMI' and 'Driver Version'), " + << "otherwise reboot or reload kernel module,\n" + << "### - The GPU should be unused " + << "(no 'process' in list, low 'memory-usage' (<100MB), low 'gpu-fan' (<30%)),\n" + << "### - You should see your GPU (burnt GPUs may disappear from the list until reboot),"; + return -1; +} diff --git a/src/bin/decode-faster-mapped.cc b/src/bin/decode-faster-mapped.cc index c7411592504..4606933411f 100644 --- a/src/bin/decode-faster-mapped.cc +++ b/src/bin/decode-faster-mapped.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/faster-decoder.h" #include "decoder/decodable-matrix.h" @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetArg(4), alignment_wspecifier = po.GetOptArg(5); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_in_filename, &trans_model); Int32VectorWriter words_writer(words_wspecifier); diff --git a/src/bin/decode-faster.cc b/src/bin/decode-faster.cc index cbcdb771d56..a1e112b129f 100644 --- a/src/bin/decode-faster.cc +++ b/src/bin/decode-faster.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/faster-decoder.h" #include "decoder/decodable-matrix.h" diff --git a/src/bin/draw-tree.cc b/src/bin/draw-tree.cc index d107ab1cfac..f95478e7c52 100644 --- a/src/bin/draw-tree.cc +++ b/src/bin/draw-tree.cc @@ -33,7 +33,7 @@ void MakeEvent(std::string &qry, fst::SymbolTable *phone_syms, EventValueType value; if (key == kPdfClass) { value = static_cast(atoi(valstr.c_str())); - if (value < 0) { // not valid pdf-class + if (value < 1) { // not valid pdf-class KALDI_ERR << "Bad query: invalid pdf-class (" << valstr << ')'; } } diff --git a/src/bin/est-mllt.cc b/src/bin/est-mllt.cc index 48021304b80..2a01f0dbb78 100644 --- a/src/bin/est-mllt.cc +++ b/src/bin/est-mllt.cc @@ -20,7 +20,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/mllt.h" int main(int argc, char *argv[]) { diff --git a/src/bin/get-post-on-ali.cc b/src/bin/get-post-on-ali.cc index 6d6dfd0d3df..471bbfbfff2 100644 --- a/src/bin/get-post-on-ali.cc +++ b/src/bin/get-post-on-ali.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "hmm/posterior.h" diff --git a/src/bin/hmm-info.cc b/src/bin/hmm-info.cc index 4ece5e88171..6daa0bc6385 100644 --- a/src/bin/hmm-info.cc +++ b/src/bin/hmm-info.cc @@ -19,7 +19,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { @@ -43,7 +43,7 @@ int main(int argc, char *argv[]) { std::string model_in_filename = po.GetArg(1); - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); @@ -54,8 +54,6 @@ int main(int argc, char *argv[]) { std::cout << "number of pdfs " << trans_model.NumPdfs() << '\n'; std::cout << "number of transition-ids " << trans_model.NumTransitionIds() << '\n'; - std::cout << "number of transition-states " - << trans_model.NumTransitionStates() << '\n'; } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; diff --git a/src/bin/latgen-faster-mapped-parallel.cc b/src/bin/latgen-faster-mapped-parallel.cc index 4479ec8b73e..415fd1a3584 100644 --- a/src/bin/latgen-faster-mapped-parallel.cc +++ b/src/bin/latgen-faster-mapped-parallel.cc @@ -24,7 +24,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" #include "decoder/decodable-matrix.h" @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetOptArg(5), alignment_wspecifier = po.GetOptArg(6); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_in_filename, &trans_model); bool determinize = config.determinize_lattice; diff --git a/src/bin/latgen-faster-mapped.cc b/src/bin/latgen-faster-mapped.cc index 610d9aa6d7d..3a65d78be04 100644 --- a/src/bin/latgen-faster-mapped.cc +++ b/src/bin/latgen-faster-mapped.cc @@ -23,7 +23,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" #include "decoder/decodable-matrix.h" @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetOptArg(5), alignment_wspecifier = po.GetOptArg(6); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_in_filename, &trans_model); bool determinize = config.determinize_lattice; diff --git a/src/bin/logprob-to-post.cc b/src/bin/logprob-to-post.cc index f221580a484..0edfba0189d 100644 --- a/src/bin/logprob-to-post.cc +++ b/src/bin/logprob-to-post.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "hmm/posterior.h" diff --git a/src/bin/make-h-transducer.cc b/src/bin/make-h-transducer.cc index c54b9250cf7..e3a66a99536 100644 --- a/src/bin/make-h-transducer.cc +++ b/src/bin/make-h-transducer.cc @@ -16,7 +16,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "tree/context-dep.h" #include "util/common-utils.h" @@ -71,17 +71,18 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; ReadKaldiObject(tree_filename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_filename, &trans_model); std::vector disambig_syms_out; // The work gets done here. - fst::VectorFst *H = GetHTransducer (ilabel_info, - ctx_dep, - trans_model, - hcfg, - &disambig_syms_out); + std::unique_ptr> + H = GetHTransducer (ilabel_info, + ctx_dep, + trans_model, + hcfg, + &disambig_syms_out); #if _MSC_VER if (fst_out_filename == "") _setmode(_fileno(stdout), _O_BINARY); @@ -101,7 +102,6 @@ int main(int argc, char *argv[]) { << (fst_out_filename == "" ? "standard output" : fst_out_filename); - delete H; return 0; } catch(const std::exception &e) { std::cerr << e.what(); diff --git a/src/bin/make-ilabel-transducer.cc b/src/bin/make-ilabel-transducer.cc index a78cefafd3a..70a5d6d4e18 100644 --- a/src/bin/make-ilabel-transducer.cc +++ b/src/bin/make-ilabel-transducer.cc @@ -16,7 +16,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "tree/context-dep.h" #include "util/common-utils.h" @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; ReadKaldiObject(tree_filename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_filename, &trans_model); diff --git a/src/bin/make-pdf-to-tid-transducer.cc b/src/bin/make-pdf-to-tid-transducer.cc index 907380a974d..b4ed45192e6 100644 --- a/src/bin/make-pdf-to-tid-transducer.cc +++ b/src/bin/make-pdf-to-tid-transducer.cc @@ -16,7 +16,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "util/common-utils.h" #include "fst/fstlib.h" @@ -47,10 +47,11 @@ int main(int argc, char *argv[]) { std::string trans_model_filename = po.GetArg(1); std::string fst_out_filename = po.GetOptArg(2); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(trans_model_filename, &trans_model); - fst::VectorFst *fst = GetPdfToTransitionIdTransducer(trans_model); + std::unique_ptr> fst = + GetPdfToTransitionIdTransducer(trans_model); #if _MSC_VER if (fst_out_filename == "") @@ -60,7 +61,6 @@ int main(int argc, char *argv[]) { if (!fst->Write(fst_out_filename)) KALDI_ERR << "Error writing fst to " << (fst_out_filename == "" ? "standard output" : fst_out_filename); - delete fst; } catch(const std::exception &e) { std::cerr << e.what(); return -1; diff --git a/src/bin/phones-to-prons.cc b/src/bin/phones-to-prons.cc index 0d7ab12c232..23c17a58385 100644 --- a/src/bin/phones-to-prons.cc +++ b/src/bin/phones-to-prons.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "util/common-utils.h" #include "fst/fstlib.h" diff --git a/src/bin/post-to-pdf-post.cc b/src/bin/post-to-pdf-post.cc index 99aa5770aa5..6c2227806b4 100644 --- a/src/bin/post-to-pdf-post.cc +++ b/src/bin/post-to-pdf-post.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "hmm/posterior.h" @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) { posteriors_rspecifier = po.GetArg(2), posteriors_wspecifier = po.GetArg(3); - TransitionModel trans_model; + Transitions trans_model; { bool binary_in; Input ki(model_rxfilename, &binary_in); diff --git a/src/bin/post-to-phone-post.cc b/src/bin/post-to-phone-post.cc index 871f03a91a1..cf97c631243 100644 --- a/src/bin/post-to-phone-post.cc +++ b/src/bin/post-to-phone-post.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/posterior.h" int main(int argc, char *argv[]) { @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) { kaldi::SequentialPosteriorReader posterior_reader(post_rspecifier); kaldi::PosteriorWriter posterior_writer(phone_post_wspecifier); - TransitionModel trans_model; + Transitions trans_model; { bool binary_in; Input ki(model_rxfilename, &binary_in); @@ -98,11 +98,11 @@ int main(int argc, char *argv[]) { for (int32 i = 1; i <= num_tids; i++) { BaseFloat count = transition_counts(i); - int32 phone = trans_model.TransitionIdToPhone(i), - pdf_id = trans_model.TransitionIdToPdf(i); + const Transitions::TransitionIdInfo + &info = trans_model.InfoForTransitionId(i); // Relying on C++11 value-initialization thingies that should make the // map's elements default to zero. - pdf_to_phones[pdf_id][phone] += count; + pdf_to_phones[info.pdf_id][info.phone] += count; } for (int32 i = 0; i < num_pdfs; i++) { diff --git a/src/bin/post-to-tacc.cc b/src/bin/post-to-tacc.cc index afa5315d6b4..842356f8ffb 100644 --- a/src/bin/post-to-tacc.cc +++ b/src/bin/post-to-tacc.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/posterior.h" int main(int argc, char *argv[]) { @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { bool binary_in; Input ki(model_rxfilename, &binary_in); - TransitionModel trans_model; + Transitions trans_model; trans_model.Read(ki.Stream(), binary_in); num_transition_ids = trans_model.NumTransitionIds(); @@ -90,7 +90,7 @@ int main(int argc, char *argv[]) { int32 num_pdf_ids = trans_model.NumPdfs(); Vector pdf_accs(num_pdf_ids); for (int32 i = 1; i < num_transition_ids; i++) { - int32 pid = trans_model.TransitionIdToPdf(i); + int32 pid = trans_model.TransitionIdToPdfFast(i); pdf_accs(pid) += transition_accs(i); } Vector pdf_accs_float(pdf_accs); diff --git a/src/bin/prob-to-post.cc b/src/bin/prob-to-post.cc index 4266d34ca47..7bdff6f1e78 100644 --- a/src/bin/prob-to-post.cc +++ b/src/bin/prob-to-post.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "hmm/posterior.h" diff --git a/src/bin/prons-to-wordali.cc b/src/bin/prons-to-wordali.cc index a6331043500..8579c79ea02 100644 --- a/src/bin/prons-to-wordali.cc +++ b/src/bin/prons-to-wordali.cc @@ -19,7 +19,7 @@ #include "base/kaldi-common.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "util/common-utils.h" #include "fst/fstlib.h" diff --git a/src/bin/show-alignments.cc b/src/bin/show-alignments.cc index 06bc907005f..f8c79d2d79b 100644 --- a/src/bin/show-alignments.cc +++ b/src/bin/show-alignments.cc @@ -19,7 +19,7 @@ #include "base/kaldi-common.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "util/common-utils.h" #include "fst/fstlib.h" @@ -47,7 +47,7 @@ int main(int argc, char *argv[]) { model_filename = po.GetArg(2), alignments_rspecifier = po.GetArg(3); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_filename, &trans_model); fst::SymbolTable *phones_symtab = NULL; @@ -80,8 +80,7 @@ int main(int argc, char *argv[]) { split_str[i] = ss.str(); int32 tid = split[i][0], - tstate = trans_model.TransitionIdToTransitionState(tid), - phone = trans_model.TransitionStateToPhone(tstate); + phone = trans_model.InfoForTransitionId(tid).phone; split_str_phones[i] = phones_symtab->Find(phone) + " "; std::string space; diff --git a/src/bin/show-transitions.cc b/src/bin/show-transitions.cc index bdc780b060a..db72d47f988 100644 --- a/src/bin/show-transitions.cc +++ b/src/bin/show-transitions.cc @@ -18,7 +18,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fst/fstlib.h" #include "util/common-utils.h" @@ -59,7 +59,7 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < syms->NumSymbols(); i++) names[i] = syms->Find(i); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(transition_model_filename, &trans_model); Vector occs; diff --git a/src/bin/tree-info.cc b/src/bin/tree-info.cc index ce3c5c9cfc1..a1f4f21e983 100644 --- a/src/bin/tree-info.cc +++ b/src/bin/tree-info.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/hmm-topology.h" +#include "hmm/topology.h" #include "tree/context-dep.h" int main(int argc, char *argv[]) { diff --git a/src/bin/weight-silence-post.cc b/src/bin/weight-silence-post.cc index dba935d1cd3..3c8478752c8 100644 --- a/src/bin/weight-silence-post.cc +++ b/src/bin/weight-silence-post.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "hmm/posterior.h" @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) { KALDI_WARN <<"No silence phones, this will have no effect"; ConstIntegerSet silence_set(silence_phones); // faster lookup. - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(model_rxfilename, &trans_model); int32 num_posteriors = 0; diff --git a/src/cblasext/Makefile b/src/cblasext/Makefile new file mode 100644 index 00000000000..a3d684cdee7 --- /dev/null +++ b/src/cblasext/Makefile @@ -0,0 +1,21 @@ + + +all: + +OPENFST_CXXFLAGS = +OPENFST_LDLIBS = + +include ../kaldi.mk + + +# you can uncomment matrix-lib-speed-test if you want to do the speed tests. + +TESTFILES = + +OBJFILES = cblas-extensions.o + +LIBNAME = kaldi-cblasext + +ADDLIBS = ../base/kaldi-base.a + +include ../makefiles/default_rules.mk diff --git a/src/cblasext/cblas-extensions.cc b/src/cblasext/cblas-extensions.cc new file mode 100644 index 00000000000..8d23ae6ab2d --- /dev/null +++ b/src/cblasext/cblas-extensions.cc @@ -0,0 +1,162 @@ +// cblasext/cblas-extensions.cc + +// Copyright 2019 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include "cblasext/cblas-wrappers.h" +#include "cblasext/cblas-extensions.h" + +namespace kaldi { + +template +void cblasext_Xgemv_sparsevec(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, Real alpha, const Real *Mdata, + KaldiBlasInt stride, const Real *xdata, + KaldiBlasInt incX, Real beta, Real *ydata, + KaldiBlasInt incY) { + if (trans == CblasNoTrans) { + if (beta != 1.0) cblas_Xscal(num_rows, beta, ydata, incY); + for (KaldiBlasInt i = 0; i < num_cols; i++) { + Real x_i = xdata[i * incX]; + if (x_i == 0.0) continue; + // Add to ydata, the i'th column of M, times alpha * x_i + cblas_Xaxpy(num_rows, x_i * alpha, Mdata + i, stride, ydata, incY); + } + } else { + if (beta != 1.0) cblas_Xscal(num_cols, beta, ydata, incY); + for (KaldiBlasInt i = 0; i < num_rows; i++) { + Real x_i = xdata[i * incX]; + if (x_i == 0.0) continue; + // Add to ydata, the i'th row of M, times alpha * x_i + cblas_Xaxpy(num_cols, x_i * alpha, + Mdata + (i * stride), 1, ydata, incY); + } + } +} + + +template +void cblasext_Xgemv_sparsevec(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, float alpha, const float *Mdata, + KaldiBlasInt stride, const float *xdata, + KaldiBlasInt incX, float beta, float *ydata, + KaldiBlasInt incY); +template +void cblasext_Xgemv_sparsevec(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, double alpha, const double *Mdata, + KaldiBlasInt stride, const double *xdata, + KaldiBlasInt incX, double beta, double *ydata, + KaldiBlasInt incY); + + +template +void cblasext_mul_elements_vec( + const KaldiBlasInt dim, + const Real *a, + Real *b) { // does b *= a, elementwise. + Real c1, c2, c3, c4; + KaldiBlasInt i; + for (i = 0; i + 4 <= dim; i += 4) { + c1 = a[i] * b[i]; + c2 = a[i+1] * b[i+1]; + c3 = a[i+2] * b[i+2]; + c4 = a[i+3] * b[i+3]; + b[i] = c1; + b[i+1] = c2; + b[i+2] = c3; + b[i+3] = c4; + } + for (; i < dim; i++) + b[i] *= a[i]; +} + +template void cblasext_mul_elements_vec(const KaldiBlasInt dim, + const float *a, float *b); +template void cblasext_mul_elements_vec(const KaldiBlasInt dim, + const double *a, double *b); + + +template +void cblasext_mul_elements_mat( + const Real *Adata, + KaldiBlasInt a_num_rows, + KaldiBlasInt a_num_cols, + KaldiBlasInt a_stride, + Real *Bdata, + KaldiBlasInt b_stride) { + if (a_num_cols == a_stride && a_num_cols == b_stride) { + cblasext_mul_elements_vec(a_num_rows * a_num_cols, Adata, Bdata); + } else { + for (KaldiBlasInt i = 0; i < a_num_rows; i++) { + cblasext_mul_elements_vec(a_num_cols, Adata, Bdata); + Adata += a_stride; + Bdata += b_stride; + } + } +} + + +template void cblasext_mul_elements_mat( + const float *Adata, KaldiBlasInt a_num_rows, + KaldiBlasInt a_num_cols, KaldiBlasInt a_stride, + float *Bdata, KaldiBlasInt b_stride); +template void cblasext_mul_elements_mat( + const double *Adata, KaldiBlasInt a_num_rows, + KaldiBlasInt a_num_cols, KaldiBlasInt a_stride, + double *Bdata, KaldiBlasInt b_stride); + + +template +Real cblasext_trace_mat_mat( + const Real *a_data, + KaldiBlasInt a_num_rows, KaldiBlasInt a_num_cols, + KaldiBlasInt a_stride, KaldiBlasInt a_col_stride, + const Real *b_data, CBLAS_TRANSPOSE b_trans, + KaldiBlasInt b_stride, KaldiBlasInt b_col_stride) { + Real ans = 0.0; + if (b_trans == CblasNoTrans) { + for (KaldiBlasInt i = 0; i < a_num_rows; + i++, a_data += a_stride, b_data += b_col_stride) { + ans += cblas_Xdot(a_num_cols, a_data, a_col_stride, b_data, b_stride); + } + return ans; + } else { + for (KaldiBlasInt i = 0; i < a_num_rows; + i++, a_data += a_stride, b_data += b_stride) { + ans += cblas_Xdot(a_num_cols, a_data, a_col_stride, + b_data, b_col_stride); + } + return ans; + } +} + +template float cblasext_trace_mat_mat( + const float *a_data, + KaldiBlasInt a_num_rows, KaldiBlasInt a_num_cols, + KaldiBlasInt a_stride, KaldiBlasInt a_col_stride, + const float *b_data, CBLAS_TRANSPOSE b_trans, + KaldiBlasInt b_stride, KaldiBlasInt b_col_stride); +template double cblasext_trace_mat_mat( + const double *a_data, + KaldiBlasInt a_num_rows, KaldiBlasInt a_num_cols, + KaldiBlasInt a_stride, KaldiBlasInt a_col_stride, + const double *b_data, CBLAS_TRANSPOSE b_trans, + KaldiBlasInt b_stride, KaldiBlasInt b_col_stride); + + + +} // namespace kaldi diff --git a/src/cblasext/cblas-extensions.h b/src/cblasext/cblas-extensions.h new file mode 100644 index 00000000000..f3c3dbe3be9 --- /dev/null +++ b/src/cblasext/cblas-extensions.h @@ -0,0 +1,110 @@ +// cblasext/cblas-extensions.h + +// Copyright 2012-2019 Johns Hopkins University (author: Daniel Povey); +// Haihua Xu; Wei Shi + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_MATRIX_CBLAS_EXTENSIONS_H_ +#define KALDI_MATRIX_CBLAS_EXTENSIONS_H_ 1 + + +#include "cblasext/kaldi-blas.h" +#include "cblasext/cblas-wrappers.h" + +// In directories other than this directory, this file is intended to mostly be +// included from .cc files, not from headers, since it includes cblas headers +// (via kaldi-blas.h) and those can be quite polluting. + +// This file contains templated wrappers for CBLAS functions, which enable C++ +// code calling these functions to be templated. +namespace kaldi { + + + +// This has the same interface as cblas_Xgemv, i.e. it does y = alpha M x + beta y; +// it is just specialized for the case where the vector 'x' has a lot of zeros. +template +void cblasext_Xgemv_sparsevec(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, Real alpha, const Real *Mdata, + KaldiBlasInt stride, const Real *xdata, + KaldiBlasInt incX, Real beta, Real *ydata, + KaldiBlasInt incY); + + + +/** + Does, elementwise for 0 <= i < dim, + b[i] *= a[i]. +*/ +template +void cblasext_mul_elements_vec( + const KaldiBlasInt dim, + const Real *a, + Real *b); + + +/** + Does b *= where a and b are matrices of the same dimension. + Does not currently support transpose. + + Requires that a and b do not overlap (but this is not checked). +*/ +template +void cblasext_mul_elements_mat( + const Real *Adata, + KaldiBlasInt a_num_rows, KaldiBlasInt a_num_cols, KaldiBlasInt a_stride, + Real *Bdata, + KaldiBlasInt b_stride); + +/** + For matrices A and B (possibly with column strides as well as + row strides): if transB = false, compute + tr(A B) = \sum_{i,j} A(i, j) B(j, i) + or if transB = true, compute + tr(A B) = \sum_{i,j} A(i, j) B(i, j). + @param [in] Adata Data pointer of matrix A + @param [in] a_num_rows Number of rows of matrix A + @param [in] a_num_cols Number of columns of matrix A + @param [in] a_stride Row stride of matrix A; may have any value. + @param [in] a_col_stride Column stride of A, would be 1 for + a normal matrix; must be positive. + @param [in] b_data Data pointer of matrix B; may be + the same as Adata. + @param [in] b_trans True if B is transposed. Note: the + expression would have the same value + if the transpose was applied to A + instead. + @param [in] b_stride Row stride of matrix B; may have any + value. + @param [in] b_col_stride Column stride of matrix B; must be + positive, will normally be 1. + */ +template +Real cblasext_trace_mat_mat( + const Real *a_data, + KaldiBlasInt a_num_rows, KaldiBlasInt a_num_cols, + KaldiBlasInt a_stride, KaldiBlasInt a_col_stride, + const Real *b_data, CBLAS_TRANSPOSE b_trans, + KaldiBlasInt b_stride, KaldiBlasInt b_col_stride); + + + + + +} +// namespace kaldi + +#endif diff --git a/src/cblasext/cblas-wrappers.h b/src/cblasext/cblas-wrappers.h new file mode 100644 index 00000000000..39fa12931ca --- /dev/null +++ b/src/cblasext/cblas-wrappers.h @@ -0,0 +1,408 @@ +// matrix/cblas-wrappers.h + +// Copyright 2012-2019 Johns Hopkins University (author: Daniel Povey); +// Haihua Xu; Wei Shi + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at + +// http://www.apache.org/licenses/LICENSE-2.0 + +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. +#ifndef KALDI_MATRIX_CBLAS_WRAPPERS_H_ +#define KALDI_MATRIX_CBLAS_WRAPPERS_H_ 1 + + +#include "cblasext/kaldi-blas.h" + +// In directories other than this directory, this file is intended to mostly be +// included from .cc files, not from headers, since it includes cblas headers +// (via kaldi-blas.h) and those can be quite polluting. + +// This file contains templated wrappers for CBLAS functions, which enable C++ +// code calling these functions to be templated. +namespace kaldi { + + +inline void cblas_Xcopy(const KaldiBlasInt N, const float *X, const KaldiBlasInt incX, float *Y, + const KaldiBlasInt incY) { + cblas_scopy(N, X, incX, Y, incY); +} + +inline void cblas_Xcopy(const KaldiBlasInt N, const double *X, const KaldiBlasInt incX, double *Y, + const KaldiBlasInt incY) { + cblas_dcopy(N, X, incX, Y, incY); +} + +inline float cblas_Xasum(const KaldiBlasInt N, const float *X, const KaldiBlasInt incX) { + return cblas_sasum(N, X, incX); +} + +inline double cblas_Xasum(const KaldiBlasInt N, const double *X, const KaldiBlasInt incX) { + return cblas_dasum(N, X, incX); +} + +inline void cblas_Xrot(const KaldiBlasInt N, float *X, const KaldiBlasInt incX, float *Y, + const KaldiBlasInt incY, const float c, const float s) { + cblas_srot(N, X, incX, Y, incY, c, s); +} +inline void cblas_Xrot(const KaldiBlasInt N, double *X, const KaldiBlasInt incX, double *Y, + const KaldiBlasInt incY, const double c, const double s) { + cblas_drot(N, X, incX, Y, incY, c, s); +} +inline float cblas_Xdot(const KaldiBlasInt N, const float *const X, + const KaldiBlasInt incX, const float *const Y, + const KaldiBlasInt incY) { + return cblas_sdot(N, X, incX, Y, incY); +} +inline double cblas_Xdot(const KaldiBlasInt N, const double *const X, + const KaldiBlasInt incX, const double *const Y, + const KaldiBlasInt incY) { + return cblas_ddot(N, X, incX, Y, incY); +} +inline void cblas_Xaxpy(const KaldiBlasInt N, const float alpha, const float *X, + const KaldiBlasInt incX, float *Y, const KaldiBlasInt incY) { + cblas_saxpy(N, alpha, X, incX, Y, incY); +} +inline void cblas_Xaxpy(const KaldiBlasInt N, const double alpha, const double *X, + const KaldiBlasInt incX, double *Y, const KaldiBlasInt incY) { + cblas_daxpy(N, alpha, X, incX, Y, incY); +} +inline void cblas_Xscal(const KaldiBlasInt N, const float alpha, float *data, + const KaldiBlasInt inc) { + cblas_sscal(N, alpha, data, inc); +} +inline void cblas_Xscal(const KaldiBlasInt N, const double alpha, double *data, + const KaldiBlasInt inc) { + cblas_dscal(N, alpha, data, inc); +} +inline void cblas_Xtpmv(CBLAS_TRANSPOSE trans, const float *Mdata, + const KaldiBlasInt num_rows, float *y, const KaldiBlasInt y_inc) { + cblas_stpmv(CblasRowMajor, CblasLower, static_cast(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} +inline void cblas_Xtpmv(CBLAS_TRANSPOSE trans, const double *Mdata, + const KaldiBlasInt num_rows, double *y, const KaldiBlasInt y_inc) { + cblas_dtpmv(CblasRowMajor, CblasLower, static_cast(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} + + +inline void cblas_Xtpsv(CBLAS_TRANSPOSE trans, const float *Mdata, + const KaldiBlasInt num_rows, float *y, const KaldiBlasInt y_inc) { + cblas_stpsv(CblasRowMajor, CblasLower, static_cast(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} +inline void cblas_Xtpsv(CBLAS_TRANSPOSE trans, const double *Mdata, + const KaldiBlasInt num_rows, double *y, const KaldiBlasInt y_inc) { + cblas_dtpsv(CblasRowMajor, CblasLower, static_cast(trans), + CblasNonUnit, num_rows, Mdata, y, y_inc); +} + +// x = alpha * M * y + beta * x +inline void cblas_Xspmv(KaldiBlasInt dim, float alpha, const float *Mdata, + const float *ydata, KaldiBlasInt ystride, + float beta, float *xdata, KaldiBlasInt xstride) { + cblas_sspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata, + ydata, ystride, beta, xdata, xstride); +} +inline void cblas_Xspmv(KaldiBlasInt dim, double alpha, const double *Mdata, + const double *ydata, KaldiBlasInt ystride, + double beta, double *xdata, KaldiBlasInt xstride) { + cblas_dspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata, + ydata, ystride, beta, xdata, xstride); +} + +// Implements A += alpha * (x y' + y x'); A is symmetric matrix. +inline void cblas_Xspr2(KaldiBlasInt dim, float alpha, const float *Xdata, + KaldiBlasInt incX, const float *Ydata, KaldiBlasInt incY, + float *Adata) { + cblas_sspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata, + incX, Ydata, incY, Adata); +} +inline void cblas_Xspr2(KaldiBlasInt dim, double alpha, const double *Xdata, + KaldiBlasInt incX, const double *Ydata, KaldiBlasInt incY, + double *Adata) { + cblas_dspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata, + incX, Ydata, incY, Adata); +} + +// Implements A += alpha * (x x'); A is symmetric matrix. +inline void cblas_Xspr(KaldiBlasInt dim, float alpha, const float *Xdata, + KaldiBlasInt incX, float *Adata) { + cblas_sspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata); +} +inline void cblas_Xspr(KaldiBlasInt dim, double alpha, const double *Xdata, + KaldiBlasInt incX, double *Adata) { + cblas_dspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata); +} + +// sgemv,dgemv: y = alpha M x + beta y. +inline void cblas_Xgemv(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, float alpha, const float *Mdata, + KaldiBlasInt stride, const float *xdata, + KaldiBlasInt incX, float beta, float *ydata, KaldiBlasInt incY) { + cblas_sgemv(CblasRowMajor, static_cast(trans), num_rows, + num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY); +} +inline void cblas_Xgemv(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, double alpha, const double *Mdata, + KaldiBlasInt stride, const double *xdata, + KaldiBlasInt incX, double beta, double *ydata, KaldiBlasInt incY) { + cblas_dgemv(CblasRowMajor, static_cast(trans), num_rows, + num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY); +} + +// sgbmv, dgmmv: y = alpha M x + + beta * y. +inline void cblas_Xgbmv(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, KaldiBlasInt num_below, + KaldiBlasInt num_above, float alpha, const float *Mdata, + KaldiBlasInt stride, const float *xdata, + KaldiBlasInt incX, float beta, float *ydata, KaldiBlasInt incY) { + cblas_sgbmv(CblasRowMajor, static_cast(trans), num_rows, + num_cols, num_below, num_above, alpha, Mdata, stride, xdata, + incX, beta, ydata, incY); +} +inline void cblas_Xgbmv(CBLAS_TRANSPOSE trans, KaldiBlasInt num_rows, + KaldiBlasInt num_cols, KaldiBlasInt num_below, + KaldiBlasInt num_above, double alpha, const double *Mdata, + KaldiBlasInt stride, const double *xdata, + KaldiBlasInt incX, double beta, double *ydata, KaldiBlasInt incY) { + cblas_dgbmv(CblasRowMajor, static_cast(trans), num_rows, + num_cols, num_below, num_above, alpha, Mdata, stride, xdata, + incX, beta, ydata, incY); +} + +inline void cblas_Xgemm(const float alpha, + CBLAS_TRANSPOSE transA, + const float *Adata, + KaldiBlasInt a_num_rows, KaldiBlasInt a_num_cols, KaldiBlasInt a_stride, + CBLAS_TRANSPOSE transB, + const float *Bdata, KaldiBlasInt b_stride, + const float beta, + float *Mdata, + KaldiBlasInt num_rows, KaldiBlasInt num_cols,KaldiBlasInt stride) { + cblas_sgemm(CblasRowMajor, static_cast(transA), + static_cast(transB), + num_rows, num_cols, transA == CblasNoTrans ? a_num_cols : a_num_rows, + alpha, Adata, a_stride, Bdata, b_stride, + beta, Mdata, stride); +} +inline void cblas_Xgemm(const double alpha, + CBLAS_TRANSPOSE transA, + const double *Adata, + KaldiBlasInt a_num_rows, KaldiBlasInt a_num_cols, KaldiBlasInt a_stride, + CBLAS_TRANSPOSE transB, + const double *Bdata, KaldiBlasInt b_stride, + const double beta, + double *Mdata, + KaldiBlasInt num_rows, KaldiBlasInt num_cols,KaldiBlasInt stride) { + cblas_dgemm(CblasRowMajor, static_cast(transA), + static_cast(transB), + num_rows, num_cols, transA == CblasNoTrans ? a_num_cols : a_num_rows, + alpha, Adata, a_stride, Bdata, b_stride, + beta, Mdata, stride); +} + + +inline void cblas_Xsymm(const float alpha, + KaldiBlasInt sz, + const float *Adata,KaldiBlasInt a_stride, + const float *Bdata,KaldiBlasInt b_stride, + const float beta, + float *Mdata, KaldiBlasInt stride) { + cblas_ssymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata, + a_stride, Bdata, b_stride, beta, Mdata, stride); +} +inline void cblas_Xsymm(const double alpha, + KaldiBlasInt sz, + const double *Adata,KaldiBlasInt a_stride, + const double *Bdata,KaldiBlasInt b_stride, + const double beta, + double *Mdata, KaldiBlasInt stride) { + cblas_dsymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata, + a_stride, Bdata, b_stride, beta, Mdata, stride); +} +// ger: M += alpha x y^T. +inline void cblas_Xger(KaldiBlasInt num_rows, KaldiBlasInt num_cols, float alpha, + const float *xdata, KaldiBlasInt incX, const float *ydata, + KaldiBlasInt incY, float *Mdata, KaldiBlasInt stride) { + cblas_sger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1, + Mdata, stride); +} +inline void cblas_Xger(KaldiBlasInt num_rows, KaldiBlasInt num_cols, double alpha, + const double *xdata, KaldiBlasInt incX, const double *ydata, + KaldiBlasInt incY, double *Mdata, KaldiBlasInt stride) { + cblas_dger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1, + Mdata, stride); +} + +// syrk: symmetric rank-k update. +// if trans==CblasNoTrans, then C = alpha A A^T + beta C +// else C = alpha A^T A + beta C. +// note: dim_c is dim(C), other_dim_a is the "other" dimension of A, i.e. +// num-cols(A) if CblasNoTrans, or num-rows(A) if CblasTrans. +// We only need the row-major and lower-triangular option of this, and this +// is hard-coded. +inline void cblas_Xsyrk ( + const CBLAS_TRANSPOSE trans, const KaldiBlasInt dim_c, + const KaldiBlasInt other_dim_a, const float alpha, const float *A, + const KaldiBlasInt a_stride, const float beta, float *C, + const KaldiBlasInt c_stride) { + cblas_ssyrk(CblasRowMajor, CblasLower, static_cast(trans), + dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride); +} + +inline void cblas_Xsyrk( + const CBLAS_TRANSPOSE trans, const KaldiBlasInt dim_c, + const KaldiBlasInt other_dim_a, const double alpha, const double *A, + const KaldiBlasInt a_stride, const double beta, double *C, + const KaldiBlasInt c_stride) { + cblas_dsyrk(CblasRowMajor, CblasLower, static_cast(trans), + dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride); +} + +/// matrix-vector multiply using a banded matrix; we always call this +/// with b = 1 meaning we're multiplying by a diagonal matrix. This is used for +/// elementwise multiplication. We miss some of the arguments out of this +/// wrapper. +inline void cblas_Xsbmv1( + const KaldiBlasInt dim, + const double *A, + const double alpha, + const double *x, + const double beta, + double *y) { + cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A, + 1, x, 1, beta, y, 1); +} + +inline void cblas_Xsbmv1( + const KaldiBlasInt dim, + const float *A, + const float alpha, + const float *x, + const float beta, + float *y) { + cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A, + 1, x, 1, beta, y, 1); +} + + +// add clapack here +#if !defined(HAVE_ATLAS) +inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) { + stptri_(const_cast("U"), const_cast("N"), num_rows, Mdata, result); +} +inline void clapack_Xtptri(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *result) { + dtptri_(const_cast("U"), const_cast("N"), num_rows, Mdata, result); +} +// +inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, + float *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, + KaldiBlasInt *result) { + sgetrf_(num_rows, num_cols, Mdata, stride, pivot, result); +} +inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, + double *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, + KaldiBlasInt *result) { + dgetrf_(num_rows, num_cols, Mdata, stride, pivot, result); +} + +// +inline void clapack_Xgetri2(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride, + KaldiBlasInt *pivot, float *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + sgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result); +} +inline void clapack_Xgetri2(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride, + KaldiBlasInt *pivot, double *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + dgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result); +} +// +inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols, + KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride, + float *sv, float *Vdata, KaldiBlasInt *vstride, + float *Udata, KaldiBlasInt *ustride, float *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + sgesvd_(v, u, + num_cols, num_rows, Mdata, stride, + sv, Vdata, vstride, Udata, ustride, + p_work, l_work, result); +} +inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols, + KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride, + double *sv, double *Vdata, KaldiBlasInt *vstride, + double *Udata, KaldiBlasInt *ustride, double *p_work, + KaldiBlasInt *l_work, KaldiBlasInt *result) { + dgesvd_(v, u, + num_cols, num_rows, Mdata, stride, + sv, Vdata, vstride, Udata, ustride, + p_work, l_work, result); +} +// +void inline clapack_Xsptri(KaldiBlasInt *num_rows, float *Mdata, + KaldiBlasInt *ipiv, float *work, KaldiBlasInt *result) { + ssptri_(const_cast("U"), num_rows, Mdata, ipiv, work, result); +} +void inline clapack_Xsptri(KaldiBlasInt *num_rows, double *Mdata, + KaldiBlasInt *ipiv, double *work, KaldiBlasInt *result) { + dsptri_(const_cast("U"), num_rows, Mdata, ipiv, work, result); +} +// +void inline clapack_Xsptrf(KaldiBlasInt *num_rows, float *Mdata, + KaldiBlasInt *ipiv, KaldiBlasInt *result) { + ssptrf_(const_cast("U"), num_rows, Mdata, ipiv, result); +} +void inline clapack_Xsptrf(KaldiBlasInt *num_rows, double *Mdata, + KaldiBlasInt *ipiv, KaldiBlasInt *result) { + dsptrf_(const_cast("U"), num_rows, Mdata, ipiv, result); +} +#else +inline void clapack_Xgetrf(KaldiBlasInt num_rows, KaldiBlasInt num_cols, + float *Mdata, KaldiBlasInt stride, + KaldiBlasInt *pivot, KaldiBlasInt *result) { + *result = clapack_sgetrf(CblasColMajor, num_rows, num_cols, + Mdata, stride, pivot); +} + +inline void clapack_Xgetrf(KaldiBlasInt num_rows, KaldiBlasInt num_cols, + double *Mdata, KaldiBlasInt stride, + KaldiBlasInt *pivot, KaldiBlasInt *result) { + *result = clapack_dgetrf(CblasColMajor, num_rows, num_cols, + Mdata, stride, pivot); +} +// +inline KaldiBlasInt clapack_Xtrtri(KaldiBlasInt num_rows, float *Mdata, KaldiBlasInt stride) { + return clapack_strtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows, + Mdata, stride); +} + +inline KaldiBlasInt clapack_Xtrtri(KaldiBlasInt num_rows, double *Mdata, KaldiBlasInt stride) { + return clapack_dtrtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows, + Mdata, stride); +} +// +inline void clapack_Xgetri(KaldiBlasInt num_rows, float *Mdata, KaldiBlasInt stride, + KaldiBlasInt *pivot, KaldiBlasInt *result) { + *result = clapack_sgetri(CblasColMajor, num_rows, Mdata, stride, pivot); +} +inline void clapack_Xgetri(KaldiBlasInt num_rows, double *Mdata, KaldiBlasInt stride, + KaldiBlasInt *pivot, KaldiBlasInt *result) { + *result = clapack_dgetri(CblasColMajor, num_rows, Mdata, stride, pivot); +} +#endif + +} +// namespace kaldi + +#endif diff --git a/src/matrix/kaldi-blas.h b/src/cblasext/kaldi-blas.h similarity index 96% rename from src/matrix/kaldi-blas.h rename to src/cblasext/kaldi-blas.h index 8a06540bba2..88ba12a0be1 100644 --- a/src/matrix/kaldi-blas.h +++ b/src/cblasext/kaldi-blas.h @@ -122,10 +122,8 @@ typedef integer KaldiBlasInt; #ifdef HAVE_MKL typedef MKL_INT KaldiBlasInt; #endif - #ifdef HAVE_ATLAS -// in this case there is no need for KaldiBlasInt-- this typedef is only needed -// for Svd code which is not included in ATLAS (we re-implement it). +typedef int KaldiBlasInt; #endif diff --git a/src/chain/Makefile b/src/chain/Makefile index fbad28f7de6..dd4859f5449 100644 --- a/src/chain/Makefile +++ b/src/chain/Makefile @@ -18,7 +18,7 @@ LIBNAME = kaldi-chain ADDLIBS = ../cudamatrix/kaldi-cudamatrix.a ../lat/kaldi-lat.a \ ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \ - ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a # Make sure we have CUDA_ARCH from kaldi.mk, ifeq ($(CUDA), true) diff --git a/src/chain/chain-den-graph.cc b/src/chain/chain-den-graph.cc index 11c851091bd..36e82c1baf6 100644 --- a/src/chain/chain-den-graph.cc +++ b/src/chain/chain-den-graph.cc @@ -162,7 +162,7 @@ void DenominatorGraph::GetNormalizationFst(const fst::StdVectorFst &ifst, } -void MapFstToPdfIdsPlusOne(const TransitionModel &trans_model, +void MapFstToPdfIdsPlusOne(const Transitions &trans_model, fst::StdVectorFst *fst) { int32 num_states = fst->NumStates(); for (int32 s = 0; s < num_states; s++) { @@ -171,7 +171,7 @@ void MapFstToPdfIdsPlusOne(const TransitionModel &trans_model, fst::StdArc arc = aiter.Value(); KALDI_ASSERT(arc.ilabel == arc.olabel); if (arc.ilabel > 0) { - arc.ilabel = trans_model.TransitionIdToPdf(arc.ilabel) + 1; + arc.ilabel = trans_model.TransitionIdToPdfFast(arc.ilabel) + 1; arc.olabel = arc.ilabel; aiter.SetValue(arc); } @@ -295,7 +295,7 @@ static void CheckDenominatorFst(int32 num_pdfs, } void CreateDenominatorFst(const ContextDependency &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::StdVectorFst &phone_lm_in, fst::StdVectorFst *den_fst) { using fst::StdVectorFst; @@ -336,31 +336,26 @@ void CreateDenominatorFst(const ContextDependency &ctx_dep, << context_dep_lm.NumStates() << " and " << NumArcs(context_dep_lm); std::vector disambig_syms_h; // disambiguation symbols on input side - // of H -- will be empty. + // of H -- will be empty. + HTransducerConfig h_config; - // the default is 1, but just document that we want this to stay as one. - // we'll use the same value in test time. Consistency is the key here. - h_config.transition_scale = 1.0; - - StdVectorFst *h_fst = GetHTransducer(inv_cfst.IlabelInfo(), - ctx_dep, - trans_model, - h_config, - &disambig_syms_h); + std::unique_ptr h_fst = GetHTransducer(inv_cfst.IlabelInfo(), + ctx_dep, + trans_model, + h_config, + &disambig_syms_h); KALDI_ASSERT(disambig_syms_h.empty()); StdVectorFst transition_id_fst; TableCompose(*h_fst, context_dep_lm, &transition_id_fst); - delete h_fst; - BaseFloat self_loop_scale = 1.0; // We have to be careful to use the same - // value in test time. // 'reorder' must always be set to true for chain models. - bool reorder = true; - bool check_no_self_loops = true; + bool currently_self_loop_free = true, + use_weights = true; // add self-loops to the FST with transition-ids as its labels. - AddSelfLoops(trans_model, disambig_syms_h, self_loop_scale, reorder, - check_no_self_loops, &transition_id_fst); + AddSelfLoops(trans_model, disambig_syms_h, + currently_self_loop_free, use_weights, + &transition_id_fst); // at this point transition_id_fst will have transition-ids as its ilabels and // context-dependent phones (indexes into IlabelInfo()) as its olabels. // Discard the context-dependent phones by projecting on the input, keeping diff --git a/src/chain/chain-den-graph.h b/src/chain/chain-den-graph.h index b2510651f39..baf5ac2c6f1 100644 --- a/src/chain/chain-den-graph.h +++ b/src/chain/chain-den-graph.h @@ -32,7 +32,7 @@ #include "lat/kaldi-lattice.h" #include "matrix/kaldi-matrix.h" #include "chain/chain-datastruct.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "cudamatrix/cu-matrix.h" #include "cudamatrix/cu-vector.h" #include "cudamatrix/cu-array.h" @@ -149,7 +149,7 @@ void MinimizeAcceptorNoPush(fst::StdVectorFst *fst); // transition-ids to pdf-ids plus one. Assumes 'fst' // is an acceptor, but does not check this (only looks at its // ilabels). -void MapFstToPdfIdsPlusOne(const TransitionModel &trans_model, +void MapFstToPdfIdsPlusOne(const Transitions &trans_model, fst::StdVectorFst *fst); // Starting from an acceptor on phones that represents some kind of compiled @@ -157,7 +157,7 @@ void MapFstToPdfIdsPlusOne(const TransitionModel &trans_model, // denominator-graph. Note: there is similar code in chain-supervision.cc, when // creating the supervision graph. void CreateDenominatorFst(const ContextDependency &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::StdVectorFst &phone_lm, fst::StdVectorFst *den_graph); diff --git a/src/chain/chain-denominator.h b/src/chain/chain-denominator.h index 217b7447621..68e6e32682d 100644 --- a/src/chain/chain-denominator.h +++ b/src/chain/chain-denominator.h @@ -31,7 +31,7 @@ #include "tree/context-dep.h" #include "lat/kaldi-lattice.h" #include "matrix/kaldi-matrix.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "cudamatrix/cu-matrix.h" #include "cudamatrix/cu-array.h" #include "chain/chain-den-graph.h" diff --git a/src/chain/chain-generic-numerator.h b/src/chain/chain-generic-numerator.h index fc5e00b2c63..8c542d6049c 100644 --- a/src/chain/chain-generic-numerator.h +++ b/src/chain/chain-generic-numerator.h @@ -32,7 +32,7 @@ #include "tree/context-dep.h" #include "lat/kaldi-lattice.h" #include "matrix/kaldi-matrix.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "chain/chain-supervision.h" #include "cudamatrix/cu-matrix.h" #include "cudamatrix/cu-array.h" diff --git a/src/chain/chain-numerator.h b/src/chain/chain-numerator.h index 15cb31e0571..c4ea4774b53 100644 --- a/src/chain/chain-numerator.h +++ b/src/chain/chain-numerator.h @@ -31,7 +31,7 @@ #include "tree/context-dep.h" #include "lat/kaldi-lattice.h" #include "matrix/kaldi-matrix.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "chain/chain-supervision.h" #include "cudamatrix/cu-matrix.h" #include "cudamatrix/cu-array.h" diff --git a/src/chain/chain-supervision-test.cc b/src/chain/chain-supervision-test.cc index 7ee5ee117b0..8af77af5d12 100644 --- a/src/chain/chain-supervision-test.cc +++ b/src/chain/chain-supervision-test.cc @@ -57,7 +57,7 @@ void ComputeExamplePhoneLanguageModel(const std::vector &phones, void ComputeExampleDenFst(const ContextDependency &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, fst::StdVectorFst *den_graph) { using fst::StdVectorFst; using fst::StdArc; @@ -151,7 +151,7 @@ void TestSupervisionNumerator(const Supervision &supervision) { } -void TestSupervisionAppend(const TransitionModel &trans_model, +void TestSupervisionAppend(const Transitions &trans_model, const Supervision &supervision) { int32 num_append = RandInt(1,5); std::vector input(num_append); @@ -180,7 +180,7 @@ void TestSupervisionAppend(const TransitionModel &trans_model, output.Check(trans_model); } -void TestSupervisionReattached(const TransitionModel &trans_model, +void TestSupervisionReattached(const Transitions &trans_model, const Supervision &supervision, const Supervision &reattached_supervision) { using namespace fst; @@ -333,7 +333,7 @@ void ChainTrainingTest(const DenominatorGraph &den_graph, } void TestSupervisionSplitting(const ContextDependency &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const Supervision &supervision) { fst::StdVectorFst den_fst, normalization_fst; ComputeExampleDenFst(ctx_dep, trans_model, &den_fst); @@ -456,7 +456,7 @@ void ChainDenominatorTest(const DenominatorGraph &den_graph) { void ChainSupervisionTest() { ContextDependency *ctx_dep; - TransitionModel *trans_model = GenRandTransitionModel(&ctx_dep); + Transitions *trans_model = GenRandTransitions(&ctx_dep); const std::vector &phones = trans_model->GetPhones(); int32 subsample_factor = RandInt(1, 3); diff --git a/src/chain/chain-supervision.cc b/src/chain/chain-supervision.cc index f8a2c1d11cc..a99592aa403 100644 --- a/src/chain/chain-supervision.cc +++ b/src/chain/chain-supervision.cc @@ -21,6 +21,7 @@ #include "lat/lattice-functions.h" #include "util/text-utils.h" #include "hmm/hmm-utils.h" +#include "fstext/fstext-utils.h" #include namespace kaldi { @@ -229,7 +230,7 @@ bool PhoneLatticeToProtoSupervision(const SupervisionOptions &opts, bool TimeEnforcerFst::GetArc(StateId s, Label ilabel, fst::StdArc* oarc) { // the following call will do the range-check on 'ilabel'. - int32 phone = trans_model_.TransitionIdToPhone(ilabel); + int32 phone = trans_model_.InfoForTransitionId(ilabel).phone; KALDI_ASSERT(static_cast(s) <= allowed_phones_.size()); if (static_cast(s) == allowed_phones_.size()) { // No arcs come from the final state.a @@ -240,7 +241,7 @@ bool TimeEnforcerFst::GetArc(StateId s, Label ilabel, fst::StdArc* oarc) { oarc->ilabel = ilabel; if (convert_to_pdfs_) { // the olabel will be a pdf-id plus one, not a transition-id. - int32 pdf_id = trans_model_.TransitionIdToPdf(ilabel); + int32 pdf_id = trans_model_.TransitionIdToPdfFast(ilabel); oarc->olabel = pdf_id + 1; } else { oarc->olabel = ilabel; @@ -255,7 +256,7 @@ bool TimeEnforcerFst::GetArc(StateId s, Label ilabel, fst::StdArc* oarc) { bool TrainingGraphToSupervisionE2e( const fst::StdVectorFst &training_graph, - const TransitionModel &trans_model, + const Transitions &trans_model, int32 num_frames, Supervision *supervision) { using fst::VectorFst; @@ -276,7 +277,7 @@ bool TrainingGraphToSupervisionE2e( } KALDI_ASSERT(arc.ilabel != 0); StdArc arc2(arc); - arc2.ilabel = arc2.olabel = trans_model.TransitionIdToPdf(arc.ilabel) + 1; + arc2.ilabel = arc2.olabel = trans_model.TransitionIdToPdfFast(arc.ilabel) + 1; aiter.SetValue(arc2); } } @@ -292,7 +293,7 @@ bool TrainingGraphToSupervisionE2e( bool ProtoSupervisionToSupervision( const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const ProtoSupervision &proto_supervision, bool convert_to_pdfs, Supervision *supervision) { @@ -332,39 +333,27 @@ bool ProtoSupervisionToSupervision( // disambiguation symbols on the output. HTransducerConfig h_cfg; - - // We don't want to add any transition probabilities as they will be added - // when we compose with the denominator graph. - h_cfg.transition_scale = 0.0; - - VectorFst *h_fst = GetHTransducer(inv_cfst.IlabelInfo(), - ctx_dep, - trans_model, - h_cfg, - &disambig_syms_h); + h_cfg.include_self_loops = true; + std::unique_ptr> h_fst = GetHTransducer(inv_cfst.IlabelInfo(), + ctx_dep, + trans_model, + h_cfg, + &disambig_syms_h); KALDI_ASSERT(disambig_syms_h.empty()); + // We don't want to include any transition probabilities as they will be added + // when we compose with the normalization FST. + fst::RemoveWeights(h_fst.get()); + VectorFst transition_id_fst; TableCompose(*h_fst, context_dep_fst, &transition_id_fst); - delete h_fst; - - // We don't want to add any transition probabilities as they will be added - // when we compose with the denominator graph. - BaseFloat self_loop_scale = 0.0; - - // You should always set reorder to true; for the current chain-model - // topologies, it will affect results if you are inconsistent about this. - bool reorder = true, - check_no_self_loops = true; - // add self-loops to the FST with transition-ids as its labels. - AddSelfLoops(trans_model, disambig_syms_h, self_loop_scale, reorder, - check_no_self_loops, &transition_id_fst); // at this point transition_id_fst will have transition-ids as its ilabels and // context-dependent phones (indexes into ILabelInfo()) as its olabels. // Discard the context-dependent phones by projecting on the input, keeping // only the transition-ids. fst::Project(&transition_id_fst, fst::PROJECT_INPUT); + if (transition_id_fst.Properties(fst::kIEpsilons, true) != 0) { // remove epsilons, if there are any. fst::RmEpsilon(&transition_id_fst); @@ -906,7 +895,7 @@ bool Supervision::operator == (const Supervision &other) const { label_dim == other.label_dim && fst::Equal(fst, other.fst); } -void Supervision::Check(const TransitionModel &trans_mdl) const { +void Supervision::Check(const Transitions &trans_mdl) const { if (weight <= 0.0) KALDI_ERR << "Weight should be positive."; if (frames_per_sequence <= 0) @@ -970,7 +959,7 @@ void GetWeightsForRanges(int32 range_length, } bool ConvertSupervisionToUnconstrained( - const TransitionModel &trans_mdl, + const Transitions &trans_mdl, Supervision *supervision) { KALDI_ASSERT(supervision->label_dim == trans_mdl.NumTransitionIds() && supervision->fst.NumStates() > 0 && @@ -1000,7 +989,7 @@ bool ConvertSupervisionToUnconstrained( } for (int32 i = 0; i < supervision->frames_per_sequence; i++) { supervision->alignment_pdfs[i] = - trans_mdl.TransitionIdToPdf(supervision->alignment_pdfs[i]); + trans_mdl.TransitionIdToPdfFast(supervision->alignment_pdfs[i]); } } @@ -1027,7 +1016,7 @@ bool ConvertSupervisionToUnconstrained( // because these graphs are always built with reorder == true; if it was // built with reorder == false, we'd have to treat the last, not first, // frame specially.) - if (trans_mdl.IsSelfLoop(transition_id) && s != start_state) + if (trans_mdl.InfoForTransitionId(transition_id).is_self_loop && s != start_state) arc.ilabel = 0; aiter.SetValue(arc); } @@ -1062,19 +1051,18 @@ bool ConvertSupervisionToUnconstrained( // There are be no disambiguation symbols here. std::vector disambig_syms; - // We're not adding transition probabilities; we rely on compsition with the + // We're not adding transition probabilities; we rely on composition with the // normalization FST for that. (note: all transition probabilities are just // 0.5 anyway, for the typical chain topology). - BaseFloat self_loop_scale = 0.0; - // 'reorder' must always be true for chain models. - bool reorder = true; - // The FST we're about to call AddSelfLoops() on will have self-loops, on - // the first frame, so disable the check that the FST was originally - // self-loop-free. - bool check_no_self_loops = false; + // + // The FST we're about to call AddSelfLoops() on will already have one + // self-loop, on the first frame, so tell that to AddSelfLoops(). + bool currently_self_loop_free = false, + use_weights = false; supervision->e2e_fsts.resize(1); - AddSelfLoops(trans_mdl, disambig_syms, self_loop_scale, - reorder, check_no_self_loops, &(supervision->e2e_fsts[0])); + AddSelfLoops(trans_mdl, disambig_syms, + currently_self_loop_free, use_weights, + &(supervision->e2e_fsts[0])); } { // Convert transition-ids to pdf-ids+1 on the FST labels, @@ -1089,7 +1077,7 @@ bool ConvertSupervisionToUnconstrained( // AddSelfLoops() works (it calls MakePrecedingInputSymbolsSame(), which // adds epsilons). zero olabels. if (arc.ilabel != 0) { - int32 pdf_id_plus_one = trans_mdl.TransitionIdToPdf(arc.ilabel) + 1; + int32 pdf_id_plus_one = trans_mdl.TransitionIdToPdfFast(arc.ilabel) + 1; arc.ilabel = pdf_id_plus_one; arc.olabel = pdf_id_plus_one; aiter.SetValue(arc); diff --git a/src/chain/chain-supervision.h b/src/chain/chain-supervision.h index f1a796dc2f8..0b8a760f1e6 100644 --- a/src/chain/chain-supervision.h +++ b/src/chain/chain-supervision.h @@ -29,7 +29,7 @@ #include "util/common-utils.h" #include "lat/kaldi-lattice.h" #include "fstext/deterministic-fst.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" namespace kaldi { namespace chain { @@ -181,7 +181,7 @@ class TimeEnforcerFst: typedef fst::StdArc::StateId StateId; typedef fst::StdArc::Label Label; - TimeEnforcerFst(const TransitionModel &trans_model, + TimeEnforcerFst(const Transitions &trans_model, bool convert_to_pdfs, const std::vector > &allowed_phones): trans_model_(trans_model), @@ -204,7 +204,7 @@ class TimeEnforcerFst: virtual bool GetArc(StateId s, Label ilabel, fst::StdArc* oarc); private: - const TransitionModel &trans_model_; + const Transitions &trans_model_; // if convert_to_pdfs_ is true, this FST will map from transition-id (on the // input side) to pdf-id plus one (on the output side); if false, both sides' // labels will be transition-id. @@ -234,10 +234,10 @@ struct Supervision { // the maximum possible value of the labels in 'fst' (which go from 1 to // label_dim). For fully-processed examples this will equal the NumPdfs() in the - // TransitionModel object, but for newer-style "unconstrained" examples + // Transitions object, but for newer-style "unconstrained" examples // that have been output by chain-get-supervision but not yet processed // by nnet3-chain-get-egs, it will be the NumTransitionIds() of the - // TransitionModel object. + // Transitions object. int32 label_dim; // This is an epsilon-free unweighted acceptor that is sorted in increasing @@ -297,7 +297,7 @@ struct Supervision { // This function checks that this supervision object satifsies some // of the properties we expect of it, and calls KALDI_ERR if not. - void Check(const TransitionModel &trans_model) const; + void Check(const Transitions &trans_model) const; void Write(std::ostream &os, bool binary) const; void Read(std::istream &is, bool binary); @@ -317,7 +317,7 @@ struct Supervision { */ bool ProtoSupervisionToSupervision( const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const ProtoSupervision &proto_supervision, bool convert_to_pdfs, Supervision *supervision); @@ -333,7 +333,7 @@ bool ProtoSupervisionToSupervision( */ bool TrainingGraphToSupervisionE2e( const fst::StdVectorFst& training_graph, - const TransitionModel& trans_model, + const Transitions& trans_model, int32 num_frames, Supervision *supervision); @@ -484,7 +484,7 @@ void GetWeightsForRanges(int32 range_length, /// It returns true on success, and false if some kind of error happened /// (this is not expected). bool ConvertSupervisionToUnconstrained( - const TransitionModel &trans_mdl, + const Transitions &trans_mdl, Supervision *supervision); diff --git a/src/chain/chain-training.h b/src/chain/chain-training.h index 3e7efbb59a1..cd243ff06ba 100644 --- a/src/chain/chain-training.h +++ b/src/chain/chain-training.h @@ -31,7 +31,7 @@ #include "tree/context-dep.h" #include "lat/kaldi-lattice.h" #include "matrix/kaldi-matrix.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "chain/chain-den-graph.h" #include "chain/chain-supervision.h" diff --git a/src/chainbin/Makefile b/src/chainbin/Makefile index 41ac7342d17..519c2bbf77d 100644 --- a/src/chainbin/Makefile +++ b/src/chainbin/Makefile @@ -25,7 +25,7 @@ ADDLIBS = ../nnet3/kaldi-nnet3.a ../chain/kaldi-chain.a \ ../cudamatrix/kaldi-cudamatrix.a ../decoder/kaldi-decoder.a \ ../lat/kaldi-lat.a ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ + ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/chainbin/chain-get-supervision.cc b/src/chainbin/chain-get-supervision.cc index 1ac89d4630b..8a4904843be 100644 --- a/src/chainbin/chain-get-supervision.cc +++ b/src/chainbin/chain-get-supervision.cc @@ -30,7 +30,7 @@ namespace chain { // This wrapper function does all the job of processing the features and // lattice into ChainSupervision objects, and writing them out. -static bool ProcessSupervision(const TransitionModel &trans_model, +static bool ProcessSupervision(const Transitions &trans_model, const ContextDependencyInterface &ctx_dep, const ProtoSupervision &proto_sup, const std::string &key, @@ -97,7 +97,7 @@ int main(int argc, char *argv[]) { phone_durs_or_lat_rspecifier = po.GetArg(3), supervision_wspecifier = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(trans_model_rxfilename, &trans_model); ContextDependency ctx_dep; diff --git a/src/chainbin/chain-make-den-fst.cc b/src/chainbin/chain-make-den-fst.cc index 0d8d249242b..dc2b41a369d 100644 --- a/src/chainbin/chain-make-den-fst.cc +++ b/src/chainbin/chain-make-den-fst.cc @@ -56,7 +56,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; - TransitionModel trans_model; + Transitions trans_model; fst::StdVectorFst phone_lm; ReadKaldiObject(tree_rxfilename, &ctx_dep); diff --git a/src/chainbin/nnet3-chain-acc-lda-stats.cc b/src/chainbin/nnet3-chain-acc-lda-stats.cc index 693eb2dad86..0cf2d449d76 100644 --- a/src/chainbin/nnet3-chain-acc-lda-stats.cc +++ b/src/chainbin/nnet3-chain-acc-lda-stats.cc @@ -19,7 +19,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "lat/lattice-functions.h" #include "nnet3/nnet-nnet.h" #include "nnet3/nnet-chain-example.h" diff --git a/src/chainbin/nnet3-chain-copy-egs.cc b/src/chainbin/nnet3-chain-copy-egs.cc index 0117fe2200f..46744b239d0 100644 --- a/src/chainbin/nnet3-chain-copy-egs.cc +++ b/src/chainbin/nnet3-chain-copy-egs.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "nnet3/nnet-chain-example.h" namespace kaldi { diff --git a/src/chainbin/nnet3-chain-e2e-get-egs.cc b/src/chainbin/nnet3-chain-e2e-get-egs.cc index 8cdda8deb32..31b14cb7b0f 100644 --- a/src/chainbin/nnet3-chain-e2e-get-egs.cc +++ b/src/chainbin/nnet3-chain-e2e-get-egs.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "hmm/posterior.h" #include "nnet3/nnet-example.h" @@ -74,7 +74,7 @@ static int32 FindMinimumLengthPath( */ static bool ProcessFile(const ExampleGenerationConfig &opts, - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::StdVectorFst &normalization_fst, const MatrixBase &feats, const MatrixBase *ivector_feats, @@ -285,7 +285,7 @@ int main(int argc, char *argv[]) { KALDI_ASSERT(normalization_fst.NumStates() > 0); } - TransitionModel trans_model; + Transitions trans_model; ReadKaldiObject(trans_model_rxfilename, &trans_model); RandomAccessBaseFloatMatrixReader feat_reader(feature_rspecifier); diff --git a/src/chainbin/nnet3-chain-get-egs.cc b/src/chainbin/nnet3-chain-get-egs.cc index 1032b7e2125..2c506c5b460 100644 --- a/src/chainbin/nnet3-chain-get-egs.cc +++ b/src/chainbin/nnet3-chain-get-egs.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/posterior.h" #include "nnet3/nnet-example.h" #include "nnet3/nnet-chain-example.h" @@ -86,7 +86,7 @@ namespace nnet3 { **/ -static bool ProcessFile(const TransitionModel *trans_mdl, +static bool ProcessFile(const Transitions *trans_mdl, const fst::StdVectorFst &normalization_fst, const GeneralMatrix &feats, const MatrixBase *ivector_feats, @@ -345,8 +345,8 @@ int main(int argc, char *argv[]) { UtteranceSplitter utt_splitter(eg_config); - const TransitionModel *trans_mdl_ptr = NULL; - TransitionModel trans_mdl; + const Transitions *trans_mdl_ptr = NULL; + Transitions trans_mdl; if (!trans_mdl_rxfilename.empty()) { ReadKaldiObject(trans_mdl_rxfilename, &trans_mdl); diff --git a/src/chainbin/nnet3-chain-merge-egs.cc b/src/chainbin/nnet3-chain-merge-egs.cc index a3686d2fc30..14bdbe55115 100644 --- a/src/chainbin/nnet3-chain-merge-egs.cc +++ b/src/chainbin/nnet3-chain-merge-egs.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "nnet3/nnet-chain-example.h" diff --git a/src/chainbin/nnet3-chain-normalize-egs.cc b/src/chainbin/nnet3-chain-normalize-egs.cc index a97797e3246..70f6852e963 100644 --- a/src/chainbin/nnet3-chain-normalize-egs.cc +++ b/src/chainbin/nnet3-chain-normalize-egs.cc @@ -19,7 +19,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "nnet3/nnet-chain-example.h" #include "chain/chain-supervision.h" diff --git a/src/chainbin/nnet3-chain-shuffle-egs.cc b/src/chainbin/nnet3-chain-shuffle-egs.cc index 7ab6e28f607..94ba30799b0 100644 --- a/src/chainbin/nnet3-chain-shuffle-egs.cc +++ b/src/chainbin/nnet3-chain-shuffle-egs.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "nnet3/nnet-chain-example.h" int main(int argc, char *argv[]) { diff --git a/src/configure b/src/configure index e6ffdf337af..c727948962e 100755 --- a/src/configure +++ b/src/configure @@ -502,8 +502,8 @@ function configure_cuda { echo CUDA = true >> kaldi.mk echo CUDATKDIR = $CUDATKDIR >> kaldi.mk echo "CUDA_ARCH = $CUDA_ARCH" >> kaldi.mk - - + + echo >> kaldi.mk # 64bit/32bit? We do not support cross compilation with CUDA so, use direct @@ -524,7 +524,7 @@ WARNING: CUDA will not be used! CUDA is not supported with 32-bit builds." exit 1; fi - + #add cusolver flags for newer toolkits if [ "$CUSOLVER" == "true" ]; then echo "CUDA_LDLIBS += -lcusolver" >> kaldi.mk @@ -1346,6 +1346,9 @@ if [ -n "$ENV_CXXFLAGS" ]; then echo "CXXFLAGS += $ENV_CXXFLAGS" >> kaldi.mk; fi if [ -n "$ENV_LDFLAGS" ]; then echo "LDFLAGS += $ENV_LDFLAGS" >> kaldi.mk; fi if [ -n "$ENV_LDLIBS" ]; then echo "LDLIBS += $ENV_LDLIBS" >> kaldi.mk; fi +echo "# The following makes it possible to include as kaldi/foo/bar.h" >> kaldi.mk +echo "CXXFLAGS += -I ../.." >> kaldi.mk + # We check for slow exp implementation just before we exit. This check uses # and possibly modifies the kaldi.mk file that we just generated. check_for_slow_expf; diff --git a/src/cudadecoder/Makefile b/src/cudadecoder/Makefile index 166f72e060f..6b3f4129a18 100644 --- a/src/cudadecoder/Makefile +++ b/src/cudadecoder/Makefile @@ -20,8 +20,8 @@ LDLIBS += $(CUDA_LDLIBS) LIBNAME = kaldi-cudadecoder -ADDLIBS = ../cudamatrix/kaldi-cudamatrix.a ../base/kaldi-base.a ../matrix/kaldi-matrix.a \ - ../lat/kaldi-lat.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../gmm/kaldi-gmm.a \ +ADDLIBS = ../cudamatrix/kaldi-cudamatrix.a ../base/kaldi-base.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ + ../lat/kaldi-lat.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../gmm/kaldi-gmm.a \ ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a ../gmm/kaldi-gmm.a ../transform/kaldi-transform.a \ ../tree/kaldi-tree.a ../online2/kaldi-online2.a ../nnet3/kaldi-nnet3.a \ ../cudafeat/kaldi-cudafeat.a diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc index d3ad909d80a..0007234016d 100644 --- a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc +++ b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.cc @@ -28,7 +28,7 @@ namespace cuda_decoder { void BatchedThreadedNnet3CudaPipeline::Initialize( const fst::Fst &decode_fst, const nnet3::AmNnetSimple &am_nnet, - const TransitionModel &trans_model) { + const Transitions &trans_model) { KALDI_LOG << "BatchedThreadedNnet3CudaPipeline Initialize with " << config_.num_control_threads << " control threads, " << config_.num_worker_threads << " worker threads" diff --git a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h index 6401b24b7db..79bc6d69de6 100644 --- a/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h +++ b/src/cudadecoder/batched-threaded-nnet3-cuda-pipeline.h @@ -343,7 +343,7 @@ class BatchedThreadedNnet3CudaPipeline { BatchedThreadedNnet3CudaPipelineConfig config_; CudaFst cuda_fst_; - const TransitionModel *trans_model_; + const Transitions *trans_model_; const nnet3::AmNnetSimple *am_nnet_; nnet3::DecodableNnetSimpleLoopedInfo *decodable_info_; OnlineNnet2FeaturePipelineInfo *feature_info_; diff --git a/src/cudadecoder/cuda-fst.cc b/src/cudadecoder/cuda-fst.cc index 6f899d87321..70f745f286a 100644 --- a/src/cudadecoder/cuda-fst.cc +++ b/src/cudadecoder/cuda-fst.cc @@ -113,21 +113,21 @@ void CudaFst::PopulateArcs(const fst::Fst &fst) { h_arc_id_ilabels_[idx] = arc.ilabel; // For now we consider id indexing == pdf indexing // If the two are differents, we'll call ApplyTransModelOnIlabels with a - // TransitionModel + // Transitions h_arc_pdf_ilabels_[idx] = arc.ilabel; h_arc_olabels_[idx] = arc.olabel; } } } -void CudaFst::ApplyTransitionModelOnIlabels( - const TransitionModel &trans_model) { +void CudaFst::ApplyTransitionsOnIlabels( + const Transitions &trans_model) { // Converting ilabel here, to avoid reindexing when reading nnet3 output // We only need to convert the emitting arcs // The emitting arcs are the first e_count_ arcs for (int iarc = 0; iarc < e_count_; ++iarc) h_arc_pdf_ilabels_[iarc] = - trans_model.TransitionIdToPdf(h_arc_id_ilabels_[iarc]); + trans_model.InfoForTransitionId(h_arc_id_ilabels_[iarc]).pdf_id; } void CudaFst::CopyDataToDevice() { @@ -153,7 +153,7 @@ void CudaFst::CopyDataToDevice() { } void CudaFst::Initialize(const fst::Fst &fst, - const TransitionModel *trans_model) { + const Transitions *trans_model) { nvtxRangePushA("CudaFst constructor"); start_ = fst.Start(); @@ -164,7 +164,7 @@ void CudaFst::Initialize(const fst::Fst &fst, // at the end of Initialize h_arc_pdf_ilabels_.resize(arc_count_); PopulateArcs(fst); - if (trans_model) ApplyTransitionModelOnIlabels(*trans_model); + if (trans_model) ApplyTransitionsOnIlabels(*trans_model); KALDI_ASSERT(d_e_offsets_); KALDI_ASSERT(d_ne_offsets_); diff --git a/src/cudadecoder/cuda-fst.h b/src/cudadecoder/cuda-fst.h index 1dac627755b..8c07bb4936d 100644 --- a/src/cudadecoder/cuda-fst.h +++ b/src/cudadecoder/cuda-fst.h @@ -20,7 +20,7 @@ #include "cudadecoder/cuda-decoder-common.h" #include "cudamatrix/cu-device.h" #include "lat/kaldi-lattice.h" -#include "nnet3/decodable-online-looped.h" // TransitionModel +#include "nnet3/decodable-online-looped.h" // Transitions namespace kaldi { namespace cuda_decoder { @@ -52,13 +52,13 @@ class CudaFst { d_final_(nullptr){}; // Creates a CSR representation of the FST, // then copies it to the GPU - // If a TransitionModel is passed, we'll use it to convert the ilabels id + // If a Transitions is passed, we'll use it to convert the ilabels id // indexes into pdf indexes - // If no TransitionModel is passed, we'll assume TransitionModel == identity - // Important: The CudaDecodable won't apply the TransitionModel. If you use a - // TransitionModel, you need to apply it now + // If no Transitions is passed, we'll assume Transitions == identity + // Important: The CudaDecodable won't apply the Transitions. If you use a + // Transitions, you need to apply it now void Initialize(const fst::Fst &fst, - const TransitionModel *trans_model = NULL); + const Transitions *trans_model = NULL); void Finalize(); inline uint32_t NumStates() const { return num_states_; } @@ -75,7 +75,7 @@ class CudaFst { // Converting the id ilabels into pdf ilabels using the transition model // It allows the CudaDecoder to read the acoustic model loglikelihoods at the // right indexes - void ApplyTransitionModelOnIlabels(const TransitionModel &trans_model); + void ApplyTransitionsOnIlabels(const Transitions &trans_model); // Copies fst to device into the pre-allocated datastructures void CopyDataToDevice(); // Total number of states diff --git a/src/cudadecoder/decodable-cumatrix.cc b/src/cudadecoder/decodable-cumatrix.cc index d7c1d0359a5..4704238852c 100644 --- a/src/cudadecoder/decodable-cumatrix.cc +++ b/src/cudadecoder/decodable-cumatrix.cc @@ -24,7 +24,7 @@ namespace kaldi { namespace cuda_decoder { DecodableCuMatrixMapped::DecodableCuMatrixMapped( - const TransitionModel &tm, const CuMatrixBase &likes, + const Transitions &tm, const CuMatrixBase &likes, int32 frame_offset) : trans_model_(tm), likes_(&likes), frame_offset_(frame_offset) { if (likes.NumCols() != tm.NumPdfs()) diff --git a/src/cudadecoder/decodable-cumatrix.h b/src/cudadecoder/decodable-cumatrix.h index d34079cc9c7..aaef4c9fd3f 100644 --- a/src/cudadecoder/decodable-cumatrix.h +++ b/src/cudadecoder/decodable-cumatrix.h @@ -35,7 +35,7 @@ class DecodableCuMatrixMapped : public CudaDecodableInterface { // This constructor creates an object that will not delete "likes" when done. // the frame_offset is the frame the row 0 of 'likes' corresponds to, would be // greater than one if this is not the first chunk of likelihoods. - DecodableCuMatrixMapped(const TransitionModel &tm, + DecodableCuMatrixMapped(const Transitions &tm, const CuMatrixBase &likes, int32 frame_offset = 0); @@ -57,7 +57,7 @@ class DecodableCuMatrixMapped : public CudaDecodableInterface { virtual BaseFloat *GetLogLikelihoodsCudaPointer(int32 subsampled_frame); private: - const TransitionModel &trans_model_; // for tid to pdf mapping + const Transitions &trans_model_; // for tid to pdf mapping const CuMatrixBase *likes_; int32 frame_offset_; diff --git a/src/cudadecoderbin/Makefile b/src/cudadecoderbin/Makefile index 6a31a52ceca..b0867e75c7f 100644 --- a/src/cudadecoderbin/Makefile +++ b/src/cudadecoderbin/Makefile @@ -15,12 +15,12 @@ TESTFILES = ADDLIBS = ../cudadecoder/kaldi-cudadecoder.a ../cudafeat/kaldi-cudafeat.a \ ../online2/kaldi-online2.a ../ivector/kaldi-ivector.a \ -../nnet3/kaldi-nnet3.a ../chain/kaldi-chain.a ../nnet2/kaldi-nnet2.a \ +../nnet3/kaldi-nnet3.a ../chain/kaldi-chain.a \ ../cudamatrix/kaldi-cudamatrix.a ../decoder/kaldi-decoder.a \ ../lat/kaldi-lat.a ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \ ../feat/kaldi-feat.a ../transform/kaldi-transform.a \ ../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \ -../matrix/kaldi-matrix.a ../base/kaldi-base.a +../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a endif diff --git a/src/cudadecoderbin/batched-wav-nnet3-cuda.cc b/src/cudadecoderbin/batched-wav-nnet3-cuda.cc index df6810ee2c8..10a58699ed6 100644 --- a/src/cudadecoderbin/batched-wav-nnet3-cuda.cc +++ b/src/cudadecoderbin/batched-wav-nnet3-cuda.cc @@ -169,7 +169,7 @@ int main(int argc, char *argv[]) { std::string nnet3_rxfilename = po.GetArg(1), fst_rxfilename = po.GetArg(2), wav_rspecifier = po.GetArg(3), clat_wspecifier = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; nnet3::AmNnetSimple am_nnet; // read transition model and nnet diff --git a/src/cudafeat/Makefile b/src/cudafeat/Makefile index 33aca4eedaa..7dfe3c41cea 100644 --- a/src/cudafeat/Makefile +++ b/src/cudafeat/Makefile @@ -5,17 +5,17 @@ all: include ../kaldi.mk ifeq ($(CUDA), true) -TESTFILES = +TESTFILES = ifeq ($(CUDA), true) OBJFILES += feature-window-cuda.o feature-spectral-cuda.o feature-online-cmvn-cuda.o \ - online-ivector-feature-cuda-kernels.o online-ivector-feature-cuda.o \ - online-cuda-feature-pipeline.o + online-ivector-feature-cuda-kernels.o online-ivector-feature-cuda.o \ + online-cuda-feature-pipeline.o endif LIBNAME = kaldi-cudafeat -ADDLIBS = ../feat/kaldi-feat.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ +ADDLIBS = ../feat/kaldi-feat.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ ../base/kaldi-base.a ../cudamatrix/kaldi-cudamatrix.a \ ../gmm/kaldi-gmm.a ../ivector/kaldi-ivector.a ../online2/kaldi-online2.a diff --git a/src/cudafeat/feature-spectral-cuda.h b/src/cudafeat/feature-spectral-cuda.h index 8683372098c..ba9f8ebea0f 100644 --- a/src/cudafeat/feature-spectral-cuda.h +++ b/src/cudafeat/feature-spectral-cuda.h @@ -22,7 +22,6 @@ #include #endif -#include "cudafeat/feature-window-cuda.h" #include "cudamatrix/cu-matrix.h" #include "cudamatrix/cu-vector.h" #include "feat/feature-fbank.h" @@ -38,8 +37,8 @@ struct CudaSpectralFeatureOptions { SpectralFeatureType feature_type; CudaSpectralFeatureOptions(MfccOptions opts_in) : mfcc_opts(opts_in), - use_log_fbank(true), - use_power(true), + use_log_fbank(true), + use_power(true), use_dct(true), feature_type(MFCC) {} CudaSpectralFeatureOptions(FbankOptions opts){ @@ -75,13 +74,13 @@ class CudaSpectralFeatures : public MfccComputer { ~CudaSpectralFeatures(); CudaSpectralFeatureOptions cumfcc_opts_; int32 Dim() - // The dimension of the output is different for MFCC and Fbank. + // The dimension of the output is different for MFCC and Fbank. // This returns the appropriate value depending on the feature // extraction algorithm { if (cumfcc_opts_.feature_type == MFCC) return MfccComputer::Dim(); //If we're running fbank, we need to set the dimension right - else return cumfcc_opts_.mfcc_opts.mel_opts.num_bins + + else return cumfcc_opts_.mfcc_opts.mel_opts.num_bins + (cumfcc_opts_.mfcc_opts.use_energy ? 1 : 0); } diff --git a/src/cudafeat/feature-window-cuda.h b/src/cudafeat/feature-window-cuda.h deleted file mode 100644 index ff749a855b9..00000000000 --- a/src/cudafeat/feature-window-cuda.h +++ /dev/null @@ -1,38 +0,0 @@ -// cudafeat/feature-window-cuda.h -// -// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -// Justin Luitjens -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_CUDAFEAT_FEATURE_WINDOW_CUDA_H_ -#define KALDI_CUDAFEAT_FEATURE_WINDOW_CUDA_H_ - -#include "cudamatrix/cu-matrix.h" -#include "cudamatrix/cu-vector.h" -#include "feat/feature-window.h" - -namespace kaldi { - -// This struct stores a feature window on the device. -// Behind the scense it just computes a feature window on -// the host and then copies it into device memory. -struct CudaFeatureWindowFunction { - CudaFeatureWindowFunction() {} - explicit CudaFeatureWindowFunction(const FrameExtractionOptions &opts); - CuVector cu_window; -}; - -} // namespace kaldi - -#endif // KALDI_CUDAFEAT_FEATURE_WINDOW_CUDA_H_ diff --git a/src/cudafeatbin/Makefile b/src/cudafeatbin/Makefile index 105ece3c67f..b154623b1fb 100644 --- a/src/cudafeatbin/Makefile +++ b/src/cudafeatbin/Makefile @@ -22,6 +22,7 @@ ADDLIBS = ../cudafeat/kaldi-cudafeat.a ../online2/kaldi-online2.a \ ../hmm/kaldi-hmm.a ../feat/kaldi-feat.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ + ../cblasext/kaldi-cblasext.a \ ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/cudamatrix/Makefile b/src/cudamatrix/Makefile index 45c2ba44fd7..5c0b4e7680c 100644 --- a/src/cudamatrix/Makefile +++ b/src/cudamatrix/Makefile @@ -18,7 +18,7 @@ endif LIBNAME = kaldi-cudamatrix -ADDLIBS = ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a +ADDLIBS = ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a # Make sure we have CUDA_ARCH from kaldi.mk, ifeq ($(CUDA), true) diff --git a/src/cudamatrix/cu-common.cc b/src/cudamatrix/cu-common.cc index c788a621a85..d285699edc7 100644 --- a/src/cudamatrix/cu-common.cc +++ b/src/cudamatrix/cu-common.cc @@ -26,7 +26,7 @@ // files in this directory. #include #include "base/kaldi-common.h" -#include "matrix/kaldi-blas.h" +#include "cblasext/kaldi-blas.h" #include "cudamatrix/cu-device.h" #include "cudamatrix/cu-common.h" #include "cudamatrix/cu-matrixdim.h" diff --git a/src/cudamatrix/cu-kernels.cu b/src/cudamatrix/cu-kernels.cu index 21468ca9f63..514d129d56d 100644 --- a/src/cudamatrix/cu-kernels.cu +++ b/src/cudamatrix/cu-kernels.cu @@ -990,7 +990,7 @@ static void _trace_mat_mat_trans(const Real* A, const Real* B, MatrixDim dA, } ssum[tid] = tsum; __syncthreads(); - + // Block reduce # pragma unroll for (int shift = CU1DBLOCK / 2; shift > warpSize; shift >>= 1) { @@ -1655,7 +1655,7 @@ static void _transform_reduce_mat_rows( Real tdata = op.InitValue(); for (int i = tid; i < d.rows; i += CU1DBLOCK) { //Note the loads of mat are uncoalesced. We could eliminate these - //with shared memory but at the matrix sizes we are currently looking + //with shared memory but at the matrix sizes we are currently looking //at it probably would not help much and would add a lot of complexity. //Alternatively we could look at something like trov to help loads. tdata = op.Reduce(tdata, op.Transform(mat[i * d.stride + j])); @@ -3618,7 +3618,7 @@ template __global__ void _cuda_mat_copy_range_clamped( int32_t row_start, int32_t row_end, int32_t num_cols, - const Real * __restrict__ src, int32_t lds, + const Real * __restrict__ src, int32_t lds, int32_t clamp_low, int32_t clamp_high, Real * __restrict__ dst, int32_t ldd) { int32_t rid = blockIdx.y*blockDim.y+threadIdx.y; @@ -3641,7 +3641,7 @@ void _cuda_mat_copy_range_clamped( } } -template +template struct MatrixCopyDesc { const Real *input; Real *output; @@ -3652,7 +3652,7 @@ struct MatrixCopyDesc { template struct BatchedMatrixCopyDesc { //maximum size allowed in formal parameter list - static const int32_t MAX_BATCH_SIZE=128; + static const int32_t MAX_BATCH_SIZE=128; MatrixCopyDesc batch[MAX_BATCH_SIZE]; }; @@ -3660,12 +3660,12 @@ struct BatchedMatrixCopyDesc { // grid dim x,y expands to fill out average in x/y across batches // grid dim.z is batch template -__global__ +__global__ void _cuda_batch_copy_mats(BatchedMatrixCopyDesc batch_desc) { int32_t rid = blockIdx.y * blockDim.y + threadIdx.y; int32_t cid = blockIdx.x * blockDim.x + threadIdx.x; - int32_t bid = blockIdx.z; // batch id + int32_t bid = blockIdx.z; // batch id // read copy parameters MatrixCopyDesc desc = batch_desc.batch[bid]; @@ -5466,7 +5466,7 @@ void cuda_legacy_noop() { void cudaF_mat_copy_range_clamped( int32_t row_start, int32_t row_end, int32_t num_cols, - const float *src, int32_t lds, + const float *src, int32_t lds, int32_t clamp_low, int32_t clamp_high, float *dst, int32_t ldd) { @@ -5480,7 +5480,7 @@ void cudaF_mat_copy_range_clamped( void cudaD_mat_copy_range_clamped( int32_t row_start, int32_t row_end, int32_t num_cols, - const double *src, int32_t lds, + const double *src, int32_t lds, int32_t clamp_low, int32_t clamp_high, double *dst, int32_t ldd) { @@ -5498,14 +5498,14 @@ void cudaF_batched_copy_mats(int32_t num_mats, int32_t *num_rows, dim3 threads(32,32); int32_t total_rows=0, total_cols=0; - - BatchedMatrixCopyDesc batch_desc; + + BatchedMatrixCopyDesc batch_desc; const int32_t MAX_BATCH_SIZE=batch_desc.MAX_BATCH_SIZE; int i; for (i = 0; i < num_mats; i++) { int b = i%MAX_BATCH_SIZE; - + // fill in desc MatrixCopyDesc &desc = batch_desc.batch[b]; desc.num_rows = num_rows[i]; @@ -5523,12 +5523,12 @@ void cudaF_batched_copy_mats(int32_t num_mats, int32_t *num_rows, int32_t rows = ceilf(total_rows / (float)MAX_BATCH_SIZE); int32_t cols = ceilf(total_cols / (float)MAX_BATCH_SIZE); dim3 blocks((cols + 31) / 32, - (rows + 31) / 32, + (rows + 31) / 32, MAX_BATCH_SIZE); // no memcpy needed here. Memory will be passed down directly // through paramter passing and live in constant memory - + // launch batch _cuda_batch_copy_mats<<>>(batch_desc); @@ -5544,9 +5544,9 @@ void cudaF_batched_copy_mats(int32_t num_mats, int32_t *num_rows, // compute average number of rows/cols across batch int32_t rows = ceilf(total_rows / (float)remaining); int32_t cols = ceilf(total_cols / (float)remaining); - + dim3 blocks((cols + 31) / 32, - (rows + 31) / 32, + (rows + 31) / 32, remaining); // no memcpy needed here. Memory will be passed down directly @@ -5563,14 +5563,14 @@ void cudaD_batched_copy_mats(int32_t num_mats, int32_t *num_rows, dim3 threads(32,32); int32_t total_rows=0, total_cols=0; - - BatchedMatrixCopyDesc batch_desc; + + BatchedMatrixCopyDesc batch_desc; const int32_t MAX_BATCH_SIZE=batch_desc.MAX_BATCH_SIZE; int i; for (i = 0; i < num_mats; i++) { int b = i%MAX_BATCH_SIZE; - + // fill in desc MatrixCopyDesc &desc = batch_desc.batch[b]; desc.num_rows = num_rows[i]; @@ -5588,12 +5588,12 @@ void cudaD_batched_copy_mats(int32_t num_mats, int32_t *num_rows, int32_t rows = ceilf(total_rows / (float)MAX_BATCH_SIZE); int32_t cols = ceilf(total_cols / (float)MAX_BATCH_SIZE); dim3 blocks((cols + 31) / 32, - (rows + 31) / 32, + (rows + 31) / 32, MAX_BATCH_SIZE); // no memcpy needed here. Memory will be passed down directly // through paramter passing and live in constant memory - + // launch batch _cuda_batch_copy_mats<<>>(batch_desc); @@ -5611,9 +5611,9 @@ void cudaD_batched_copy_mats(int32_t num_mats, int32_t *num_rows, int32_t cols = ceilf(total_cols / (float)remaining); dim3 blocks((cols + 31) / 32, - (rows + 31) / 32, + (rows + 31) / 32, remaining); - + // no memcpy needed here. Memory will be passed down directly // through paramter passing and live in constant memory diff --git a/src/cudamatrix/cu-kernels.h b/src/cudamatrix/cu-kernels.h index 1df1626fc6d..a706b317cdd 100644 --- a/src/cudamatrix/cu-kernels.h +++ b/src/cudamatrix/cu-kernels.h @@ -1558,7 +1558,7 @@ inline void cuda_mat_uncompress(dim3 Gr, dim3 Bl, BaseFloat *dest, inline void cuda_mat_copy_range_clamped( int32_t row_start, int32_t row_end, int32_t num_cols, - const double *src, int32_t lds, + const double *src, int32_t lds, int32_t clamp_low, int32_t clamp_high, double *dst, int32_t ldd) { cudaD_mat_copy_range_clamped(row_start, row_end, num_cols, @@ -1567,7 +1567,7 @@ inline void cuda_mat_copy_range_clamped( inline void cuda_mat_copy_range_clamped( int32_t row_start, int32_t row_end, int32_t num_cols, - const float *src, int32_t lds, + const float *src, int32_t lds, int32_t clamp_low, int32_t clamp_high, float *dst, int32_t ldd) { cudaF_mat_copy_range_clamped(row_start, row_end, num_cols, @@ -1587,7 +1587,7 @@ inline void cuda_batched_copy_mats(int32_t num_mats, int32_t *num_rows, cudaD_batched_copy_mats(num_mats, num_rows, num_cols, inputs, ldi, outputs, ldo); } - + } // namespace kaldi diff --git a/src/decoder/Makefile b/src/decoder/Makefile index fbd8386f005..b74f45a38a4 100644 --- a/src/decoder/Makefile +++ b/src/decoder/Makefile @@ -13,7 +13,7 @@ LIBNAME = kaldi-decoder ADDLIBS = ../lat/kaldi-lat.a ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrixp/kaldi-matrix.a \ + ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/decoder/decodable-matrix.cc b/src/decoder/decodable-matrix.cc index 3cc7b87f2d7..98cd75d1ede 100644 --- a/src/decoder/decodable-matrix.cc +++ b/src/decoder/decodable-matrix.cc @@ -22,7 +22,7 @@ namespace kaldi { DecodableMatrixMapped::DecodableMatrixMapped( - const TransitionModel &tm, + const Transitions &tm, const MatrixBase &likes, int32 frame_offset): trans_model_(tm), likes_(&likes), likes_to_delete_(NULL), @@ -32,12 +32,12 @@ DecodableMatrixMapped::DecodableMatrixMapped( if (likes.NumCols() != tm.NumPdfs()) KALDI_ERR << "Mismatch, matrix has " - << likes.NumCols() << " rows but transition-model has " + << likes.NumCols() << " rows but transitions.has " << tm.NumPdfs() << " pdf-ids."; } DecodableMatrixMapped::DecodableMatrixMapped( - const TransitionModel &tm, const Matrix *likes, + const Transitions &tm, const Matrix *likes, int32 frame_offset): trans_model_(tm), likes_(likes), likes_to_delete_(likes), frame_offset_(frame_offset) { @@ -45,7 +45,7 @@ DecodableMatrixMapped::DecodableMatrixMapped( raw_data_ = likes->Data() - (stride_ * frame_offset_); if (likes->NumCols() != tm.NumPdfs()) KALDI_ERR << "Mismatch, matrix has " - << likes->NumCols() << " rows but transition-model has " + << likes->NumCols() << " rows but transitions.has " << tm.NumPdfs() << " pdf-ids."; } diff --git a/src/decoder/decodable-matrix.h b/src/decoder/decodable-matrix.h index 30b8b467c2e..c7d52c8ff10 100644 --- a/src/decoder/decodable-matrix.h +++ b/src/decoder/decodable-matrix.h @@ -24,7 +24,7 @@ #include #include "base/kaldi-common.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "itf/decodable-itf.h" #include "matrix/kaldi-matrix.h" @@ -34,26 +34,26 @@ namespace kaldi { class DecodableMatrixScaledMapped: public DecodableInterface { public: // This constructor creates an object that will not delete "likes" when done. - DecodableMatrixScaledMapped(const TransitionModel &tm, + DecodableMatrixScaledMapped(const Transitions &tm, const Matrix &likes, BaseFloat scale): trans_model_(tm), likes_(&likes), scale_(scale), delete_likes_(false) { if (likes.NumCols() != tm.NumPdfs()) KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has " - << likes.NumCols() << " rows but transition-model has " + << likes.NumCols() << " rows but transitions.has " << tm.NumPdfs() << " pdf-ids."; } // This constructor creates an object that will delete "likes" // when done. - DecodableMatrixScaledMapped(const TransitionModel &tm, + DecodableMatrixScaledMapped(const Transitions &tm, BaseFloat scale, const Matrix *likes): trans_model_(tm), likes_(likes), scale_(scale), delete_likes_(true) { if (likes->NumCols() != tm.NumPdfs()) KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has " - << likes->NumCols() << " rows but transition-model has " + << likes->NumCols() << " rows but transitions.has " << tm.NumPdfs() << " pdf-ids."; } @@ -76,7 +76,7 @@ class DecodableMatrixScaledMapped: public DecodableInterface { if (delete_likes_) delete likes_; } private: - const TransitionModel &trans_model_; // for tid to pdf mapping + const Transitions &trans_model_; // for tid to pdf mapping const Matrix *likes_; BaseFloat scale_; bool delete_likes_; @@ -100,13 +100,13 @@ class DecodableMatrixMapped: public DecodableInterface { // This constructor creates an object that will not delete "likes" when done. // the frame_offset is the frame the row 0 of 'likes' corresponds to, would be // greater than one if this is not the first chunk of likelihoods. - DecodableMatrixMapped(const TransitionModel &tm, + DecodableMatrixMapped(const Transitions &tm, const MatrixBase &likes, int32 frame_offset = 0); // This constructor creates an object that will delete "likes" // when done. - DecodableMatrixMapped(const TransitionModel &tm, + DecodableMatrixMapped(const Transitions &tm, const Matrix *likes, int32 frame_offset = 0); @@ -122,7 +122,7 @@ class DecodableMatrixMapped: public DecodableInterface { virtual ~DecodableMatrixMapped(); private: - const TransitionModel &trans_model_; // for tid to pdf mapping + const Transitions &trans_model_; // for tid to pdf mapping const MatrixBase *likes_; const Matrix *likes_to_delete_; int32 frame_offset_; @@ -151,7 +151,7 @@ class DecodableMatrixMapped: public DecodableInterface { */ class DecodableMatrixMappedOffset: public DecodableInterface { public: - DecodableMatrixMappedOffset(const TransitionModel &tm): + DecodableMatrixMappedOffset(const Transitions &tm): trans_model_(tm), frame_offset_(0), input_is_finished_(false) { } // this is not part of the generic Decodable interface. @@ -192,7 +192,7 @@ class DecodableMatrixMappedOffset: public DecodableInterface { // nothing special to do in destructor. virtual ~DecodableMatrixMappedOffset() { } private: - const TransitionModel &trans_model_; // for tid to pdf mapping + const Transitions &trans_model_; // for tid to pdf mapping Matrix loglikes_; int32 frame_offset_; bool input_is_finished_; diff --git a/src/decoder/decoder-wrappers.cc b/src/decoder/decoder-wrappers.cc index 588274e113b..b684b6bcda4 100644 --- a/src/decoder/decoder-wrappers.cc +++ b/src/decoder/decoder-wrappers.cc @@ -32,7 +32,7 @@ namespace kaldi { DecodeUtteranceLatticeFasterClass::DecodeUtteranceLatticeFasterClass( LatticeFasterDecoder *decoder, DecodableInterface *decodable, - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, const std::string &utt, BaseFloat acoustic_scale, @@ -201,7 +201,7 @@ template bool DecodeUtteranceLatticeFaster( LatticeFasterDecoderTpl &decoder, // not const but is really an input. DecodableInterface &decodable, // not const but is really an input. - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, @@ -299,7 +299,7 @@ bool DecodeUtteranceLatticeFaster( template bool DecodeUtteranceLatticeFaster( LatticeFasterDecoderTpl > &decoder, DecodableInterface &decodable, - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, @@ -314,7 +314,7 @@ template bool DecodeUtteranceLatticeFaster( template bool DecodeUtteranceLatticeFaster( LatticeFasterDecoderTpl &decoder, DecodableInterface &decodable, - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, @@ -331,7 +331,7 @@ template bool DecodeUtteranceLatticeFaster( bool DecodeUtteranceLatticeSimple( LatticeSimpleDecoder &decoder, // not const but is really an input. DecodableInterface &decodable, // not const but is really an input. - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, diff --git a/src/decoder/decoder-wrappers.h b/src/decoder/decoder-wrappers.h index 17592d0282b..3e440cea1e5 100644 --- a/src/decoder/decoder-wrappers.h +++ b/src/decoder/decoder-wrappers.h @@ -103,7 +103,7 @@ template bool DecodeUtteranceLatticeFaster( LatticeFasterDecoderTpl &decoder, // not const but is really an input. DecodableInterface &decodable, // not const but is really an input. - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, @@ -129,7 +129,7 @@ class DecodeUtteranceLatticeFasterClass { DecodeUtteranceLatticeFasterClass( LatticeFasterDecoder *decoder, DecodableInterface *decodable, - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, const std::string &utt, BaseFloat acoustic_scale, @@ -150,7 +150,7 @@ class DecodeUtteranceLatticeFasterClass { // The following variables correspond to inputs: LatticeFasterDecoder *decoder_; DecodableInterface *decodable_; - const TransitionModel *trans_model_; + const Transitions *trans_model_; const fst::SymbolTable *word_syms_; std::string utt_; BaseFloat acoustic_scale_; @@ -183,7 +183,7 @@ class DecodeUtteranceLatticeFasterClass { bool DecodeUtteranceLatticeSimple( LatticeSimpleDecoder &decoder, // not const but is really an input. DecodableInterface &decodable, // not const but is really an input. - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, diff --git a/src/decoder/training-graph-compiler.cc b/src/decoder/training-graph-compiler.cc index 191d02f1720..a59e83dee43 100644 --- a/src/decoder/training-graph-compiler.cc +++ b/src/decoder/training-graph-compiler.cc @@ -23,7 +23,7 @@ namespace kaldi { -TrainingGraphCompiler::TrainingGraphCompiler(const TransitionModel &trans_model, +TrainingGraphCompiler::TrainingGraphCompiler(const Transitions &trans_model, const ContextDependency &ctx_dep, // Does not maintain reference to this. fst::VectorFst *lex_fst, const std::vector &disambig_syms, @@ -98,15 +98,17 @@ bool TrainingGraphCompiler::CompileGraph(const fst::VectorFst &word KALDI_ASSERT(ctx2word_fst.Start() != kNoStateId); HTransducerConfig h_cfg; - h_cfg.transition_scale = opts_.transition_scale; std::vector disambig_syms_h; // disambiguation symbols on - // input side of H. - VectorFst *H = GetHTransducer(inv_cfst.IlabelInfo(), - ctx_dep_, - trans_model_, - h_cfg, - &disambig_syms_h); + // input side of H. + + std::unique_ptr> H = GetHTransducer(inv_cfst.IlabelInfo(), + ctx_dep_, + trans_model_, + h_cfg, + &disambig_syms_h); + + RemoveWeights(H.get()); VectorFst &trans2word_fst = *out_fst; // transition-id to word. TableCompose(*H, ctx2word_fst, &trans2word_fst); @@ -129,15 +131,15 @@ bool TrainingGraphCompiler::CompileGraph(const fst::VectorFst &word MinimizeEncoded(&trans2word_fst); std::vector disambig; - bool check_no_self_loops = true; + bool currently_self_loop_free = true, + use_weights = false; + AddSelfLoops(trans_model_, disambig, - opts_.self_loop_scale, - opts_.reorder, - check_no_self_loops, + currently_self_loop_free, + use_weights, &trans2word_fst); - delete H; return true; } @@ -195,14 +197,13 @@ bool TrainingGraphCompiler::CompileGraphs( } HTransducerConfig h_cfg; - h_cfg.transition_scale = opts_.transition_scale; std::vector disambig_syms_h; - VectorFst *H = GetHTransducer(inv_cfst.IlabelInfo(), - ctx_dep_, - trans_model_, - h_cfg, - &disambig_syms_h); + std::unique_ptr> H = GetHTransducer(inv_cfst.IlabelInfo(), + ctx_dep_, + trans_model_, + h_cfg, + &disambig_syms_h); for (size_t i = 0; i < out_fsts->size(); i++) { VectorFst &ctx2word_fst = *((*out_fsts)[i]); @@ -216,25 +217,21 @@ bool TrainingGraphCompiler::CompileGraphs( if (opts_.rm_eps) RemoveEpsLocal(&trans2word_fst); } - - // Encoded minimization. MinimizeEncoded(&trans2word_fst); std::vector disambig; - bool check_no_self_loops = true; + bool currently_self_loop_free = true, + use_weights = true; AddSelfLoops(trans_model_, disambig, - opts_.self_loop_scale, - opts_.reorder, - check_no_self_loops, + currently_self_loop_free, + use_weights, &trans2word_fst); KALDI_ASSERT(trans2word_fst.Start() != kNoStateId); *((*out_fsts)[i]) = trans2word_fst; } - - delete H; return true; } diff --git a/src/decoder/training-graph-compiler.h b/src/decoder/training-graph-compiler.h index ee56c6dfb3d..989accb2a05 100644 --- a/src/decoder/training-graph-compiler.h +++ b/src/decoder/training-graph-compiler.h @@ -21,7 +21,7 @@ #define KALDI_DECODER_TRAINING_GRAPH_COMPILER_H_ #include "base/kaldi-common.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fst/fstlib.h" #include "fstext/fstext-lib.h" #include "tree/context-dep.h" @@ -31,34 +31,20 @@ namespace kaldi { struct TrainingGraphCompilerOptions { - BaseFloat transition_scale; - BaseFloat self_loop_scale; bool rm_eps; - bool reorder; // (Dan-style graphs) - explicit TrainingGraphCompilerOptions(BaseFloat transition_scale = 1.0, - BaseFloat self_loop_scale = 1.0, - bool b = true) : - transition_scale(transition_scale), - self_loop_scale(self_loop_scale), - rm_eps(false), - reorder(b) { } + explicit TrainingGraphCompilerOptions(): rm_eps(false) { } void Register(OptionsItf *opts) { - opts->Register("transition-scale", &transition_scale, "Scale of transition " - "probabilities (excluding self-loops)"); - opts->Register("self-loop-scale", &self_loop_scale, "Scale of self-loop vs. " - "non-self-loop probability mass "); - opts->Register("reorder", &reorder, "Reorder transition ids for greater decoding efficiency."); - opts->Register("rm-eps", &rm_eps, "Remove [most] epsilons before minimization (only applicable " - "if disambig symbols present)"); + opts->Register("rm-eps", &rm_eps, "Remove [most] epsilons before minimization (only " + "matters if disambig symbols present)"); } }; class TrainingGraphCompiler { public: - TrainingGraphCompiler(const TransitionModel &trans_model, // Maintains reference to this object. + TrainingGraphCompiler(const Transitions &trans_model, // Maintains reference to this object. const ContextDependency &ctx_dep, // And this. fst::VectorFst *lex_fst, // Takes ownership of this object. // It should not contain disambiguation symbols or subsequential symbol, @@ -93,7 +79,7 @@ class TrainingGraphCompiler { ~TrainingGraphCompiler() { delete lex_fst_; } private: - const TransitionModel &trans_model_; + const Transitions &trans_model_; const ContextDependency &ctx_dep_; fst::VectorFst *lex_fst_; // lexicon FST (an input; we take // ownership as we need to modify it). diff --git a/src/doc/online_decoding.dox b/src/doc/online_decoding.dox index 9bcc2575be1..dc04d9bef4e 100644 --- a/src/doc/online_decoding.dox +++ b/src/doc/online_decoding.dox @@ -444,25 +444,22 @@ The program to run the TCP sever is online2-tcp-nnet3-decode-faster located in t ~/src/online2bin folder. The usage is as follows: \verbatim -online2-tcp-nnet3-decode-faster +online2-tcp-nnet3-decode-faster \endverbatim For example: \verbatim -online2-tcp-nnet3-decode-faster model/final.mdl graph/HCLG.fst graph/words.txt +online2-tcp-nnet3-decode-faster model/final.mdl graph/HCLG.fst graph/words.txt 5050 \endverbatim The word symbol table is mandatory (unlike other nnet3 online decoding programs) because the server outputs word strings. Endpointing is mandatory to make the operation of the program reasonable. Other, non-standard options include: - - port-num - the port the server listens on (by default 5050) - samp-freq - sampling frequency of audio (usually 8000 for telephony and 16000 for other uses) - chunk-length - length of signal being processed by decoder at each step - output-period - how often we check for changes in the decoding (ie. output refresh rate, default 1s) - num-threads-startup - number of threads used when initializing iVector extractor - - read-timeout - it the program doesn't receive data during this timeout, the server terminates the connection. - Use -1 to disable this feature. The TCP protocol simply takes RAW signal on input (16-bit signed integer encoding at chosen sampling frequency) and outputs simple text using the following @@ -482,25 +479,9 @@ command should look like this: \verbatim online2-tcp-nnet3-decode-faster --samp-freq=8000 --frames-per-chunk=20 --extra-left-context-initial=0 --frame-subsampling-factor=3 --config=model/conf/online.conf --min-active=200 --max-active=7000 - --beam=15.0 --lattice-beam=6.0 --acoustic-scale=1.0 --port-num=5050 model/final.mdl graph/HCLG.fst graph/words.txt + --beam=15.0 --lattice-beam=6.0 --acoustic-scale=1.0 model/final.mdl graph/HCLG.fst graph/words.txt 5050 \endverbatim -Note in order to make the communication as simple as possible, the server has to accept -any data on input and cannot figure out when the stream is over. It will therefore not -be able to terminate the connection and it is the client's resposibility to disconnect -when it is ready to do so. As a fallback for certain situations, the read-timeout option -was added, which will automatically disconnect if a chosen amount of seconds has passed. -Keep in mind, that this is not an ideal solution and it's a better idea to design your -client to properly disconnect the connection when neccessary. - -For testing purposes, we will use the netcat program. We will also use sox to reeoncode the -files properly from any source. Netcat has an issue that, similarly to what was stated above -about the server, it cannot always interpret the data and usually it won't automatically -disconnect the TCP connection. To get around this, we will use the '-N' switch, which kills -the connection once streaming of the file is complete, but this can have a small sideffect of -not reading the whole output from the Kaldi server if the discconect comes too fast. Just -keep this in mind if you intend to implement any of these programs into a production environment. - To send a WAV file into the server, it first needs to be decoded into raw audio, then it can be sent to the socket: \verbatim diff --git a/src/feat/Makefile b/src/feat/Makefile index dcd029f7f94..4396caaf409 100644 --- a/src/feat/Makefile +++ b/src/feat/Makefile @@ -4,19 +4,19 @@ all: include ../kaldi.mk -TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \ +TESTFILES = feature-mfcc-test \ feature-functions-test pitch-functions-test feature-sdc-test \ resample-test online-feature-test signal-test wave-reader-test -OBJFILES = feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \ - feature-spectrogram.o mel-computations.o wave-reader.o \ +OBJFILES = feature-functions.o feature-mfcc.o feature-fbank.o \ + mel-computations.o wave-reader.o \ pitch-functions.o resample.o online-feature.o signal.o \ feature-window.o LIBNAME = kaldi-feat ADDLIBS = ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ + ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/feat/feature-common-inl.h b/src/feat/feature-common-inl.h index 26127a4dc4d..10bfe5cdfd1 100644 --- a/src/feat/feature-common-inl.h +++ b/src/feat/feature-common-inl.h @@ -70,15 +70,12 @@ void OfflineFeatureTpl::Compute( } output->Resize(rows_out, cols_out); Vector window; // windowed waveform. - bool use_raw_log_energy = computer_.NeedRawLogEnergy(); for (int32 r = 0; r < rows_out; r++) { // r is frame index. - BaseFloat raw_log_energy = 0.0; ExtractWindow(0, wave, r, computer_.GetFrameOptions(), - feature_window_function_, &window, - (use_raw_log_energy ? &raw_log_energy : NULL)); + feature_window_function_, &window); SubVector output_row(*output, r); - computer_.Compute(raw_log_energy, vtln_warp, &window, &output_row); + computer_.Compute(vtln_warp, &window, &output_row); } } diff --git a/src/feat/feature-common.h b/src/feat/feature-common.h index 3c2fbd37381..04cdca6d8bf 100644 --- a/src/feat/feature-common.h +++ b/src/feat/feature-common.h @@ -115,8 +115,10 @@ class OfflineFeatureTpl { // Note: feature_window_function_ is the windowing function, which initialized // using the options class, that we cache at this level. OfflineFeatureTpl(const Options &opts): - computer_(opts), - feature_window_function_(computer_.GetFrameOptions()) { } + computer_(opts) { + InitFeatureWindowFunction(computer_.GetFrameOptions(), + &feature_window_function_); + } // Internal (and back-compatibility) interface for computing features, which // requires that the user has already checked that the sampling frequency @@ -164,7 +166,7 @@ class OfflineFeatureTpl { OfflineFeatureTpl &operator =(const OfflineFeatureTpl &other); F computer_; - FeatureWindowFunction feature_window_function_; + Vector feature_window_function_; }; /// @} End of "addtogroup feat" diff --git a/src/feat/feature-fbank-test.cc b/src/feat/feature-fbank-test.cc index 47b7b1c4244..9298b47eba4 100644 --- a/src/feat/feature-fbank-test.cc +++ b/src/feat/feature-fbank-test.cc @@ -29,431 +29,6 @@ using namespace kaldi; -static void UnitTestReadWave() { - - std::cout << "=== UnitTestReadWave() ===\n"; - - Vector v, v2; - - std::cout << "<<<=== Reading waveform\n"; - - { - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - const Matrix data(wave.Data()); - KALDI_ASSERT(data.NumRows() == 1); - v.Resize(data.NumCols()); - v.CopyFromVec(data.Row(0)); - } - - std::cout << "<<<=== Reading Vector waveform, prepared by matlab\n"; - std::ifstream input( - "test_data/test_matlab.ascii" - ); - KALDI_ASSERT(input.good()); - v2.Read(input, false); - input.close(); - - std::cout << "<<<=== Comparing freshly read waveform to 'libsndfile' waveform\n"; - KALDI_ASSERT(v.Dim() == v2.Dim()); - for (int32 i = 0; i < v.Dim(); i++) { - KALDI_ASSERT(v(i) == v2(i)); - } - std::cout << "<<<=== Comparing done\n"; - - // std::cout << "== The Waveform Samples == \n"; - // std::cout << v; - - std::cout << "Test passed :)\n\n"; - -} - - - -/** - */ -static void UnitTestSimple() { - std::cout << "=== UnitTestSimple() ===\n"; - - Vector v(100000); - Matrix m; - - // init with noise - for (int32 i = 0; i < v.Dim(); i++) { - v(i) = (abs( i * 433024253 ) % 65535) - (65535 / 2); - } - - std::cout << "<<<=== Just make sure it runs... Nothing is compared\n"; - // the parametrization object - FbankOptions op; - // trying to have same opts as baseline. - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "rectangular"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; - op.htk_compat = true; - op.use_energy = true; - - Fbank fbank(op); - // use default parameters - - // compute fbanks. - fbank.Compute(v, 1.0, &m); - - // possibly dump - // std::cout << "== Output features == \n" << m; - std::cout << "Test passed :)\n\n"; -} - - -static void UnitTestHTKCompare1() { - std::cout << "=== UnitTestHTKCompare1() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fbank_htk.1", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use fbank with default configuration... - FbankOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; - op.htk_compat = true; - op.mel_opts.htk_mode = true; - op.use_energy = false; // C0 not energy. - - Fbank fbank(op); - - // calculate kaldi features - Matrix kaldi_features; - fbank.Compute(waveform, 1.0, &kaldi_features); - - - std::cout << "<<<=== Compare with HTK features...\n"; - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - /// THE FEATURES ARE ALMOST IDENTICAL WITH HTK!!! (SEE THE TOLERANCE!) - if ((std::abs(b - a)) > 0.001) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (i_old != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 000007 // FBANK - }; - { - std::ofstream os("tmp.test.wav.fbank_kaldi.1", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fbank_kaldi.1"); -} - - -static void UnitTestHTKCompare2() { - std::cout << "=== UnitTestHTKCompare2() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fbank_htk.2", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use fbank with default configuration... - FbankOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 25.0; - op.htk_compat = true; - op.mel_opts.htk_mode = true; - op.use_energy = false; // C0 not energy. - - Fbank fbank(op); - - // calculate kaldi features - Matrix kaldi_features; - fbank.Compute(waveform, 1.0, &kaldi_features); - - - std::cout << "<<<=== Compare with HTK features...\n"; - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - /// THE FEATURES ARE ALMOST IDENTICAL WITH HTK!!! (SEE THE TOLERANCE!) - if ((std::abs(b - a)) > 0.001) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (i_old != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 000007 // FBANK - }; - { - std::ofstream os("tmp.test.wav.fbank_kaldi.1", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fbank_kaldi.1"); -} - -static void UnitTestHTKCompare3() { - std::cout << "=== UnitTestHTKCompare3() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fbank_htk.3", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use fbank with default configuration... - FbankOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 25.0; - op.htk_compat = true; - op.mel_opts.htk_mode = true; - op.use_energy = false; // C0 not energy. - - op.mel_opts.vtln_low = 100.0; - op.mel_opts.vtln_high = 7500.0; - BaseFloat vtln_warp = 0.9; - - Fbank fbank(op); - - // calculate kaldi features - Matrix kaldi_features; - fbank.Compute(waveform, vtln_warp, &kaldi_features); - - - std::cout << "<<<=== Compare with HTK features...\n"; - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - /// THE FEATURES ARE ALMOST IDENTICAL WITH HTK!!! (SEE THE TOLERANCE!) - if ((std::abs(b - a)) > 0.001) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (i_old != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - if (j < 20) passed = false; // We know the last couple of filterbanks differ. We let this slide. - else KALDI_WARN << "Ignoring difference in last fbanks, we know the algorithms differ."; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 000007 // FBANK - }; - { - std::ofstream os("tmp.test.wav.fbank_kaldi.1", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fbank_kaldi.1"); -} - - -static void UnitTestHTKCompare4() { - std::cout << "=== UnitTestHTKCompare4() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fbank_htk.4", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use fbank with default configuration... - FbankOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 25.0; - op.htk_compat = true; - op.mel_opts.htk_mode = true; - op.use_energy = false; // C0 not energy. - - op.mel_opts.vtln_low = 100.0; - op.mel_opts.vtln_high = 7500.0; - BaseFloat vtln_warp = 1.1; - - Fbank fbank(op); - - // calculate kaldi features - Matrix kaldi_features; - fbank.Compute(waveform, vtln_warp, &kaldi_features); - - - std::cout << "<<<=== Compare with HTK features...\n"; - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - /// THE FEATURES ARE ALMOST IDENTICAL WITH HTK!!! (SEE THE TOLERANCE!) - if ((std::abs(b - a)) > 0.01) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (i_old != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 000007 // FBANK - }; - { - std::ofstream os("tmp.test.wav.fbank_kaldi.1", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fbank_kaldi.1"); -} - - - - -static void UnitTestFeat() { - UnitTestReadWave(); - UnitTestSimple(); - UnitTestHTKCompare1(); - UnitTestHTKCompare2(); - UnitTestHTKCompare3(); - UnitTestHTKCompare4(); -} - - - int main() { try { @@ -466,5 +41,3 @@ int main() { return 1; } } - - diff --git a/src/feat/feature-fbank.cc b/src/feat/feature-fbank.cc index d9ac03e5920..df10712f956 100644 --- a/src/feat/feature-fbank.cc +++ b/src/feat/feature-fbank.cc @@ -24,28 +24,22 @@ namespace kaldi { FbankComputer::FbankComputer(const FbankOptions &opts): - opts_(opts), srfft_(NULL) { - if (opts.energy_floor > 0.0) - log_energy_floor_ = Log(opts.energy_floor); - - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... - srfft_ = new SplitRadixRealFft(padded_window_size); - + opts_(opts), + srfft_(new SplitRadixRealFft(opts.frame_opts.PaddedWindowSize())) { + KALDI_ASSERT(opts.energy_floor > 0.0 && "Nonzero energy floor is required."); // We'll definitely need the filterbanks info for VTLN warping factor 1.0. // [note: this call caches it.] GetMelBanks(1.0); } FbankComputer::FbankComputer(const FbankComputer &other): - opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), - mel_banks_(other.mel_banks_), srfft_(NULL) { + opts_(other.opts_), + mel_banks_(other.mel_banks_), + srfft_(new SplitRadixRealFft(*(other.srfft_))) { for (std::map::iterator iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter) iter->second = new MelBanks(*(iter->second)); - if (other.srfft_) - srfft_ = new SplitRadixRealFft(*(other.srfft_)); } FbankComputer::~FbankComputer() { @@ -69,8 +63,7 @@ const MelBanks* FbankComputer::GetMelBanks(BaseFloat vtln_warp) { return this_mel_banks; } -void FbankComputer::Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, +void FbankComputer::Compute(BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature) { @@ -80,45 +73,40 @@ void FbankComputer::Compute(BaseFloat signal_raw_log_energy, feature->Dim() == this->Dim()); - // Compute energy after window function (not the raw one). - if (opts_.use_energy && !opts_.raw_energy) - signal_raw_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::epsilon())); + BaseFloat signal_log_energy = 0.0; + if (opts_.use_energy) + signal_log_energy = Log(std::max( + VecVec(*signal_frame, *signal_frame), + opts_.energy_floor * opts_.frame_opts.WindowSize())); - if (srfft_ != NULL) // Compute FFT using split-radix algorithm. - srfft_->Compute(signal_frame->Data(), true); - else // An alternative algorithm that works for non-powers-of-two. - RealFft(signal_frame, true); + // Compute FFT using split-radix algorithm. + srfft_->Compute(signal_frame->Data(), true); // Convert the FFT into a power spectrum. ComputePowerSpectrum(signal_frame); SubVector power_spectrum(*signal_frame, 0, signal_frame->Dim() / 2 + 1); - // Use magnitude instead of power if requested. - if (!opts_.use_power) - power_spectrum.ApplyPow(0.5); + // The energy_floor has the scale for the energy of a single sample, and the + // FFT has a higher dynamic range (it's not the orthogonal FFT)... the sqrt + // expression is to correct for that. + BaseFloat floor = opts_.energy_floor * + std::sqrt(BaseFloat(opts_.frame_opts.WindowSize())); + power_spectrum.ApplyFloor(floor); - int32 mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0); + int32 mel_offset = (opts_.use_energy ? 1 : 0); SubVector mel_energies(*feature, mel_offset, opts_.mel_opts.num_bins); // Sum with mel fiterbanks over the power spectrum mel_banks.Compute(power_spectrum, &mel_energies); - if (opts_.use_log_fbank) { - // Avoid log of zero (which should be prevented anyway by dithering). - mel_energies.ApplyFloor(std::numeric_limits::epsilon()); - mel_energies.ApplyLog(); // take the log. - } - // Copy energy as first value (or the last, if htk_compat == true). + mel_energies.ApplyLog(); // take the log. + + // Copy energy as first value if (opts_.use_energy) { - if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) { - signal_raw_log_energy = log_energy_floor_; - } - int32 energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0; - (*feature)(energy_index) = signal_raw_log_energy; + (*feature)(0) = signal_log_energy; } } diff --git a/src/feat/feature-fbank.h b/src/feat/feature-fbank.h index f57d185a41c..665a087fcaa 100644 --- a/src/feat/feature-fbank.h +++ b/src/feat/feature-fbank.h @@ -42,41 +42,26 @@ struct FbankOptions { FrameExtractionOptions frame_opts; MelBanksOptions mel_opts; bool use_energy; // append an extra dimension with energy to the filter banks - BaseFloat energy_floor; - bool raw_energy; // If true, compute energy before preemphasis and windowing - bool htk_compat; // If true, put energy last (if using energy) - bool use_log_fbank; // if true (default), produce log-filterbank, else linear - bool use_power; // if true (default), use power in filterbank analysis, else magnitude. + BaseFloat energy_floor; // Floor on energy, to avoid log(0.0), which will be + // multiplied by sqrt(window-length-in-frames) and + // applied per FFT bin. The value of 1.0e-09 is + // approximately (1.0/32768.0)^2, like a signal value + // of +- 1 in a 16-bit recording. FbankOptions(): mel_opts(23), - // defaults the #mel-banks to 23 for the FBANK computations. - // this seems to be common for 16khz-sampled data, - // but for 8khz-sampled data, 15 may be better. - use_energy(false), - energy_floor(0.0), - raw_energy(true), - htk_compat(false), - use_log_fbank(true), - use_power(true) {} + use_energy(false), + energy_floor(1.0e-09) { } void Register(OptionsItf *opts) { frame_opts.Register(opts); mel_opts.Register(opts); opts->Register("use-energy", &use_energy, - "Add an extra dimension with energy to the FBANK output."); + "Add an extra dimension with energy to the filterbank " + "output."); opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in FBANK computation. " - "Only makes a difference if --use-energy=true; only necessary if " - "--dither=0.0. Suggested values: 0.1 or 1.0"); - opts->Register("raw-energy", &raw_energy, - "If true, compute energy before preemphasis and windowing"); - opts->Register("htk-compat", &htk_compat, "If true, put energy last. " - "Warning: not sufficient to get HTK compatible features (need " - "to change other parameters)."); - opts->Register("use-log-fbank", &use_log_fbank, - "If true, produce log-filterbank, else produce linear."); - opts->Register("use-power", &use_power, - "If true, use power, else use magnitude."); + "Floor on energy expressed as a squared-signal value per " + "frame. The default value represents about +-1 in int16 " + "representation."); } }; @@ -94,8 +79,6 @@ class FbankComputer { return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0); } - bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; } - const FrameExtractionOptions &GetFrameOptions() const { return opts_.frame_opts; } @@ -104,11 +87,6 @@ class FbankComputer { Function that computes one frame of features from one frame of signal. - @param [in] signal_raw_log_energy The log-energy of the frame of the signal - prior to windowing and pre-emphasis, or - log(numeric_limits::min()), whichever is greater. Must be - ignored by this function if this class returns false from - this->NeedsRawLogEnergy(). @param [in] vtln_warp The VTLN warping factor that the user wants to be applied when computing features for this utterance. Will normally be 1.0, meaning no warping is to be done. The value will @@ -121,8 +99,7 @@ class FbankComputer { @param [out] feature Pointer to a vector of size this->Dim(), to which the computed feature will be written. */ - void Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, + void Compute(BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature); @@ -133,7 +110,6 @@ class FbankComputer { FbankOptions opts_; - BaseFloat log_energy_floor_; std::map mel_banks_; // BaseFloat is VTLN coefficient. SplitRadixRealFft *srfft_; // Disallow assignment. diff --git a/src/feat/feature-functions.cc b/src/feat/feature-functions.cc index 76500ccf87a..36c20df6f84 100644 --- a/src/feat/feature-functions.cc +++ b/src/feat/feature-functions.cc @@ -29,13 +29,8 @@ namespace kaldi { void ComputePowerSpectrum(VectorBase *waveform) { int32 dim = waveform->Dim(); - // no, letting it be non-power-of-two for now. - // KALDI_ASSERT(dim > 0 && (dim & (dim-1) == 0)); // make sure a power of two.. actually my FFT code - // does not require this (dan) but this is better in case we use different code [dan]. - - // RealFft(waveform, true); // true == forward (not inverse) FFT; makes no difference here, - // as we just want power spectrum. - + // make sure a power of two. + KALDI_ASSERT(dim > 0 && ((dim & (dim-1)) == 0)); // now we have in waveform, first half of complex spectrum // it's stored as [real0, realN/2, real1, im1, real2, im2, ...] int32 half_dim = dim/2; @@ -46,8 +41,9 @@ void ComputePowerSpectrum(VectorBase *waveform) { (*waveform)(i) = real*real + im*im; } (*waveform)(0) = first_energy; - (*waveform)(half_dim) = last_energy; // Will actually never be used, and anyway - // if the signal has been bandlimited sensibly this should be zero. + (*waveform)(half_dim) = last_energy; + // Will actually never be used, and anyway if the signal has been bandlimited + // sensibly this should be zero. } diff --git a/src/feat/feature-mfcc-test.cc b/src/feat/feature-mfcc-test.cc index c4367139707..280e2155c86 100644 --- a/src/feat/feature-mfcc-test.cc +++ b/src/feat/feature-mfcc-test.cc @@ -53,6 +53,7 @@ static void UnitTestReadWave() { ); KALDI_ASSERT(input.good()); v2.Read(input, false); + v2.Scale(BaseFloat(1.0 / 32768.0)); input.close(); std::cout << "<<<=== Comparing freshly read waveform to 'libsndfile' waveform\n"; @@ -71,551 +72,7 @@ static void UnitTestReadWave() { -/** - */ -static void UnitTestSimple() { - std::cout << "=== UnitTestSimple() ===\n"; - Vector v(100000); - Matrix m; - - // init with noise - for (int32 i = 0; i < v.Dim(); i++) { - v(i) = (abs( i * 433024253 ) % 65535) - (65535 / 2); - } - - std::cout << "<<<=== Just make sure it runs... Nothing is compared\n"; - // the parametrization object - MfccOptions op; - // trying to have same opts as baseline. - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "rectangular"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; - op.mel_opts.htk_mode = true; - op.htk_compat = true; - - Mfcc mfcc(op); - // use default parameters - - // compute mfccs. - mfcc.Compute(v, 1.0, &m); - - // possibly dump - // std::cout << "== Output features == \n" << m; - std::cout << "Test passed :)\n\n"; -} - - -static void UnitTestHTKCompare1() { - std::cout << "=== UnitTestHTKCompare1() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fea_htk.1", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use mfcc with default configuration... - MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; - op.mel_opts.htk_mode = true; - op.htk_compat = true; - op.use_energy = false; // C0 not energy. - - Mfcc mfcc(op); - - // calculate kaldi features - Matrix kaldi_raw_features; - mfcc.Compute(waveform, 1.0, &kaldi_raw_features); - - DeltaFeaturesOptions delta_opts; - Matrix kaldi_features; - ComputeDeltas(delta_opts, - kaldi_raw_features, - &kaldi_features); - - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - if ((std::abs(b - a)) > 1.0) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (i_old != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 021406 // MFCC_D_A_0 - }; - { - std::ofstream os("tmp.test.wav.fea_kaldi.1", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fea_kaldi.1"); -} - - -static void UnitTestHTKCompare2() { - std::cout << "=== UnitTestHTKCompare2() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fea_htk.2", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use mfcc with default configuration... - MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; - op.mel_opts.htk_mode = true; - op.htk_compat = true; - op.use_energy = true; // Use energy. - - Mfcc mfcc(op); - - // calculate kaldi features - Matrix kaldi_raw_features; - mfcc.Compute(waveform, 1.0, &kaldi_raw_features); - - DeltaFeaturesOptions delta_opts; - Matrix kaldi_features; - ComputeDeltas(delta_opts, - kaldi_raw_features, - &kaldi_features); - - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - if ((std::abs(b - a)) > 1.0) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (i_old != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 021406 // MFCC_D_A_0 - }; - { - std::ofstream os("tmp.test.wav.fea_kaldi.2", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fea_kaldi.2"); -} - - -static void UnitTestHTKCompare3() { - std::cout << "=== UnitTestHTKCompare3() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fea_htk.3", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use mfcc with default configuration... - MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.htk_compat = true; - op.use_energy = true; // Use energy. - op.mel_opts.low_freq = 20.0; - //op.mel_opts.debug_mel = true; - op.mel_opts.htk_mode = true; - - Mfcc mfcc(op); - - // calculate kaldi features - Matrix kaldi_raw_features; - mfcc.Compute(waveform, 1.0, &kaldi_raw_features); - - DeltaFeaturesOptions delta_opts; - Matrix kaldi_features; - ComputeDeltas(delta_opts, - kaldi_raw_features, - &kaldi_features); - - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - if ((std::abs(b - a)) > 1.0) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (static_cast(i_old) != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 021406 // MFCC_D_A_0 - }; - { - std::ofstream os("tmp.test.wav.fea_kaldi.3", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fea_kaldi.3"); -} - - -static void UnitTestHTKCompare4() { - std::cout << "=== UnitTestHTKCompare4() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fea_htk.4", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use mfcc with default configuration... - MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; - op.htk_compat = true; - op.use_energy = true; // Use energy. - op.mel_opts.htk_mode = true; - - Mfcc mfcc(op); - - // calculate kaldi features - Matrix kaldi_raw_features; - mfcc.Compute(waveform, 1.0, &kaldi_raw_features); - - DeltaFeaturesOptions delta_opts; - Matrix kaldi_features; - ComputeDeltas(delta_opts, - kaldi_raw_features, - &kaldi_features); - - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - if ((std::abs(b - a)) > 1.0) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (static_cast(i_old) != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 021406 // MFCC_D_A_0 - }; - { - std::ofstream os("tmp.test.wav.fea_kaldi.4", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fea_kaldi.4"); -} - - -static void UnitTestHTKCompare5() { - std::cout << "=== UnitTestHTKCompare5() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fea_htk.5", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use mfcc with default configuration... - MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.htk_compat = true; - op.use_energy = true; // Use energy. - op.mel_opts.low_freq = 0.0; - op.mel_opts.vtln_low = 100.0; - op.mel_opts.vtln_high = 7500.0; - op.mel_opts.htk_mode = true; - - BaseFloat vtln_warp = 1.1; // our approach identical to htk for warp factor >1, - // differs slightly for higher mel bins if warp_factor <0.9 - - Mfcc mfcc(op); - - // calculate kaldi features - Matrix kaldi_raw_features; - mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features); - - DeltaFeaturesOptions delta_opts; - Matrix kaldi_features; - ComputeDeltas(delta_opts, - kaldi_raw_features, - &kaldi_features); - - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - if ((std::abs(b - a)) > 1.0) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (static_cast(i_old) != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 021406 // MFCC_D_A_0 - }; - { - std::ofstream os("tmp.test.wav.fea_kaldi.5", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fea_kaldi.5"); -} - -static void UnitTestHTKCompare6() { - std::cout << "=== UnitTestHTKCompare6() ===\n"; - - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.fea_htk.6", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use mfcc with default configuration... - MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.97; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.num_bins = 24; - op.mel_opts.low_freq = 125.0; - op.mel_opts.high_freq = 7800.0; - op.htk_compat = true; - op.use_energy = false; // C0 not energy. - - Mfcc mfcc(op); - - // calculate kaldi features - Matrix kaldi_raw_features; - mfcc.Compute(waveform, 1.0, &kaldi_raw_features); - - DeltaFeaturesOptions delta_opts; - Matrix kaldi_features; - ComputeDeltas(delta_opts, - kaldi_raw_features, - &kaldi_features); - - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - if ((std::abs(b - a)) > 1.0) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (static_cast(i_old) != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 021406 // MFCC_D_A_0 - }; - { - std::ofstream os("tmp.test.wav.fea_kaldi.6", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.fea_kaldi.6"); -} void UnitTestVtln() { // Test the function VtlnWarpFreq. @@ -656,16 +113,6 @@ void UnitTestVtln() { static void UnitTestFeat() { UnitTestVtln(); UnitTestReadWave(); - UnitTestSimple(); - UnitTestHTKCompare1(); - UnitTestHTKCompare2(); - // commenting out this one as it doesn't compare right now I normalized - // the way the FFT bins are treated (removed offset of 0.5)... this seems - // to relate to the way frequency zero behaves. - UnitTestHTKCompare3(); - UnitTestHTKCompare4(); - UnitTestHTKCompare5(); - UnitTestHTKCompare6(); std::cout << "Tests succeeded.\n"; } @@ -682,5 +129,3 @@ int main() { return 1; } } - - diff --git a/src/feat/feature-mfcc.cc b/src/feat/feature-mfcc.cc index 73ab4b312c4..79e02ca5db2 100644 --- a/src/feat/feature-mfcc.cc +++ b/src/feat/feature-mfcc.cc @@ -25,62 +25,57 @@ namespace kaldi { -void MfccComputer::Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, +// Compute liftering coefficients (scaling on cepstral coeffs) +// coeffs are numbered slightly differently from HTK: the zeroth +// index is C0, which is not affected. +static void ComputeLifterCoeffs(BaseFloat Q, VectorBase *coeffs) { + for (int32 i = 0; i < coeffs->Dim(); i++) + (*coeffs)(i) = 1.0 + 0.5 * Q * sin (M_PI * i / Q); +} + + +void MfccComputer::Compute(BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature) { KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && feature->Dim() == this->Dim()); + BaseFloat signal_log_energy; + if (opts_.use_energy) + signal_log_energy = Log(std::max( + VecVec(*signal_frame, *signal_frame), + opts_.energy_floor * opts_.frame_opts.WindowSize())); const MelBanks &mel_banks = *(GetMelBanks(vtln_warp)); - if (opts_.use_energy && !opts_.raw_energy) - signal_raw_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::epsilon())); - - if (srfft_ != NULL) // Compute FFT using the split-radix algorithm. - srfft_->Compute(signal_frame->Data(), true); - else // An alternative algorithm that works for non-powers-of-two. - RealFft(signal_frame, true); + srfft_->Compute(signal_frame->Data(), true); // Convert the FFT into a power spectrum. ComputePowerSpectrum(signal_frame); SubVector power_spectrum(*signal_frame, 0, signal_frame->Dim() / 2 + 1); - mel_banks.Compute(power_spectrum, &mel_energies_); + // The energy_floor has the scale for the energy of a single sample, and the + // FFT has a higher dynamic range (it's not the orthogonal FFT)... the sqrt + // expression is to correct for that. + BaseFloat floor = opts_.energy_floor * + std::sqrt(BaseFloat(opts_.frame_opts.WindowSize())); + power_spectrum.ApplyFloor(floor); - // avoid log of zero (which should be prevented anyway by dithering). - mel_energies_.ApplyFloor(std::numeric_limits::epsilon()); - mel_energies_.ApplyLog(); // take the log. + mel_banks.Compute(power_spectrum, &mel_energies_); + mel_energies_.ApplyLog(); feature->SetZero(); // in case there were NaNs. // feature = dct_matrix_ * mel_energies [which now have log] feature->AddMatVec(1.0, dct_matrix_, kNoTrans, mel_energies_, 0.0); + feature->MulElements(lifter_coeffs_); - if (opts_.cepstral_lifter != 0.0) - feature->MulElements(lifter_coeffs_); - - if (opts_.use_energy) { - if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) - signal_raw_log_energy = log_energy_floor_; - (*feature)(0) = signal_raw_log_energy; - } - - if (opts_.htk_compat) { - BaseFloat energy = (*feature)(0); - for (int32 i = 0; i < opts_.num_ceps - 1; i++) - (*feature)(i) = (*feature)(i+1); - if (!opts_.use_energy) - energy *= M_SQRT2; // scale on C0 (actually removing a scale - // we previously added that's part of one common definition of - // the cosine transform.) - (*feature)(opts_.num_ceps - 1) = energy; - } + if (opts_.use_energy) + (*feature)(0) = signal_log_energy; } MfccComputer::MfccComputer(const MfccOptions &opts): - opts_(opts), srfft_(NULL), + opts_(opts), + srfft_(new SplitRadixRealFft(opts.frame_opts.PaddedWindowSize())), mel_energies_(opts.mel_opts.num_bins) { int32 num_bins = opts.mel_opts.num_bins; @@ -92,22 +87,16 @@ MfccComputer::MfccComputer(const MfccOptions &opts): Matrix dct_matrix(num_bins, num_bins); ComputeDctMatrix(&dct_matrix); + lifter_coeffs_.Resize(opts.num_ceps); + ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_); + + // Note that we include zeroth dct in either case. If using the // energy we replace this with the energy. This means a different // ordering of features than HTK. SubMatrix dct_rows(dct_matrix, 0, opts.num_ceps, 0, num_bins); dct_matrix_.Resize(opts.num_ceps, num_bins); dct_matrix_.CopyFromMat(dct_rows); // subset of rows. - if (opts.cepstral_lifter != 0.0) { - lifter_coeffs_.Resize(opts.num_ceps); - ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_); - } - if (opts.energy_floor > 0.0) - log_energy_floor_ = Log(opts.energy_floor); - - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... - srfft_ = new SplitRadixRealFft(padded_window_size); // We'll definitely need the filterbanks info for VTLN warping factor 1.0. // [note: this call caches it.] @@ -117,15 +106,12 @@ MfccComputer::MfccComputer(const MfccOptions &opts): MfccComputer::MfccComputer(const MfccComputer &other): opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_), dct_matrix_(other.dct_matrix_), - log_energy_floor_(other.log_energy_floor_), mel_banks_(other.mel_banks_), - srfft_(NULL), + srfft_(new SplitRadixRealFft(*(other.srfft_))), mel_energies_(other.mel_energies_.Dim(), kUndefined) { for (std::map::iterator iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter) iter->second = new MelBanks(*(iter->second)); - if (other.srfft_ != NULL) - srfft_ = new SplitRadixRealFft(*(other.srfft_)); } diff --git a/src/feat/feature-mfcc.h b/src/feat/feature-mfcc.h index dbfb9d60364..993d0dc777e 100644 --- a/src/feat/feature-mfcc.h +++ b/src/feat/feature-mfcc.h @@ -1,7 +1,7 @@ // feat/feature-mfcc.h // Copyright 2009-2011 Karel Vesely; Petr Motlicek; Saarland University -// 2014-2016 Johns Hopkins University (author: Daniel Povey) +// 2014-2019 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -39,25 +39,24 @@ struct MfccOptions { FrameExtractionOptions frame_opts; MelBanksOptions mel_opts; int32 num_ceps; // e.g. 13: num cepstral coeffs, counting zero. - bool use_energy; // use energy; else C0 - BaseFloat energy_floor; // 0 by default; set to a value like 1.0 or 0.1 if - // you disable dithering. - bool raw_energy; // If true, compute energy before preemphasis and windowing - BaseFloat cepstral_lifter; // Scaling factor on cepstra for HTK compatibility. - // if 0.0, no liftering is done. - bool htk_compat; // if true, put energy/C0 last and introduce a factor of - // sqrt(2) on C0 to be the same as HTK. + bool use_energy; // if true, use energy; else C0 + BaseFloat energy_floor; // Floor on energy, to avoid log(0.0), which will be + // multiplied by sqrt(window-length-in-frames) and + // applied per FFT bin. The value of 1.0e-09 is + // approximately (1.0/32768.0)^2, like a signal value + // of +- 1 in a 16-bit recording. + // cepstral_lifter controls a scaling factor on the cepstra that helps give + // all the MFCC coeffs a similar dynamic range by scaling up the + // higher-frequency coefficients. It's a rather odd formula involving + // a sigh. We don't make it configurable. + BaseFloat cepstral_lifter; MfccOptions() : mel_opts(23), - // defaults the #mel-banks to 23 for the MFCC computations. - // this seems to be common for 16khz-sampled data, - // but for 8khz-sampled data, 15 may be better. num_ceps(13), use_energy(true), - energy_floor(0.0), - raw_energy(true), - cepstral_lifter(22.0), - htk_compat(false) {} + energy_floor(1.0e-09), + cepstral_lifter(22.0) { } + void Register(OptionsItf *opts) { frame_opts.Register(opts); @@ -67,17 +66,8 @@ struct MfccOptions { opts->Register("use-energy", &use_energy, "Use energy (not C0) in MFCC computation"); opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in MFCC computation. " - "Only makes a difference if --use-energy=true; only necessary if " - "--dither=0.0. Suggested values: 0.1 or 1.0"); - opts->Register("raw-energy", &raw_energy, - "If true, compute energy before preemphasis and windowing"); - opts->Register("cepstral-lifter", &cepstral_lifter, - "Constant that controls scaling of MFCCs"); - opts->Register("htk-compat", &htk_compat, - "If true, put energy or C0 last and use a factor of sqrt(2) on " - "C0. Warning: not sufficient to get HTK compatible features " - "(need to change other parameters)."); + "Floor on energy (absolute, not relative) of mel bins etc. " + "in MFCC computation. "); } }; @@ -96,17 +86,10 @@ class MfccComputer { int32 Dim() const { return opts_.num_ceps; } - bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; } - /** Function that computes one frame of features from one frame of signal. - @param [in] signal_raw_log_energy The log-energy of the frame of the signal - prior to windowing and pre-emphasis, or - log(numeric_limits::min()), whichever is greater. Must be - ignored by this function if this class returns false from - this->NeedsRawLogEnergy(). @param [in] vtln_warp The VTLN warping factor that the user wants to be applied when computing features for this utterance. Will normally be 1.0, meaning no warping is to be done. The value will @@ -119,8 +102,7 @@ class MfccComputer { @param [out] feature Pointer to a vector of size this->Dim(), to which the computed feature will be written. */ - void Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, + void Compute(BaseFloat vtln_warp, VectorBase *signal_frame, VectorBase *feature); @@ -135,7 +117,6 @@ class MfccComputer { MfccOptions opts_; Vector lifter_coeffs_; Matrix dct_matrix_; // matrix we left-multiply by to perform DCT. - BaseFloat log_energy_floor_; std::map mel_banks_; // BaseFloat is VTLN coefficient. SplitRadixRealFft *srfft_; diff --git a/src/feat/feature-plp-test.cc b/src/feat/feature-plp-test.cc deleted file mode 100644 index ad872cffcd0..00000000000 --- a/src/feat/feature-plp-test.cc +++ /dev/null @@ -1,177 +0,0 @@ -// feat/feature-plp-test.cc - -// Copyright 2009-2011 Karel Vesely; Petr Motlicek - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include - -#include "feat/feature-plp.h" -#include "base/kaldi-math.h" -#include "matrix/kaldi-matrix-inl.h" -#include "feat/wave-reader.h" - -using namespace kaldi; - - - - - -/** - */ -static void UnitTestSimple() { - std::cout << "=== UnitTestSimple() ===\n"; - - Vector v(100000); - Matrix m; - - // init with noise - for (int32 i = 0; i < v.Dim(); i++) { - v(i) = (abs( i * 433024253 ) % 65535) - (65535 / 2); - } - - std::cout << "<<<=== Just make sure it runs... Nothing is compared\n"; - // the parametrization object - PlpOptions op; - // trying to have same opts as baseline. - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "rectangular"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; -// op.htk_compat = true; - - Plp plp(op); - // use default parameters - - // compute mfccs. - plp.Compute(v, 1.0, &m); - - // possibly dump - // std::cout << "== Output features == \n" << m; - std::cout << "Test passed :)\n\n"; -} - - -static void UnitTestHTKCompare1() { - std::cout << "=== UnitTestHTKCompare1() ===\n"; - - std::ifstream is("test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // read the HTK features - Matrix htk_features; - { - std::ifstream is("test_data/test.wav.plp_htk.1", - std::ios::in | std::ios_base::binary); - bool ans = ReadHtk(is, &htk_features, 0); - KALDI_ASSERT(ans); - } - - // use plp with default configuration... - PlpOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.mel_opts.low_freq = 0.0; - op.htk_compat = true; - op.use_energy = false; // C0 not energy. - op.cepstral_scale = 1.0; - - Plp plp(op); - - // calculate kaldi features - Matrix kaldi_raw_features; - plp.Compute(waveform, 1.0, &kaldi_raw_features); - - DeltaFeaturesOptions delta_opts; - Matrix kaldi_features; - ComputeDeltas(delta_opts, - kaldi_raw_features, - &kaldi_features); - - // compare the results - bool passed = true; - int32 i_old = -1; - KALDI_ASSERT(kaldi_features.NumRows() == htk_features.NumRows()); - KALDI_ASSERT(kaldi_features.NumCols() == htk_features.NumCols()); - // Ignore ends-- we make slightly different choices than - // HTK about how to treat the deltas at the ends. - for (int32 i = 10; i+10 < kaldi_features.NumRows(); i++) { - for (int32 j = 0; j < kaldi_features.NumCols(); j++) { - BaseFloat a = kaldi_features(i, j), b = htk_features(i, j); - if ((std::abs(b - a)) > 0.10) { //<< TOLERANCE TO DIFFERENCES!!!!! - // print the non-matching data only once per-line - if (i_old != i) { - std::cout << "\n\n\n[HTK-row: " << i << "] " << htk_features.Row(i) << "\n"; - std::cout << "[Kaldi-row: " << i << "] " << kaldi_features.Row(i) << "\n\n\n"; - i_old = i; - } - // print indices of non-matching cells - std::cout << "[" << i << ", " << j << "]"; - passed = false; - }}} - if (!passed) KALDI_ERR << "Test failed"; - - // write the htk features for later inspection - HtkHeader header = { - kaldi_features.NumRows(), - 100000, // 10ms - static_cast(sizeof(float)*kaldi_features.NumCols()), - 021413 // PLP_D_A_0 - }; - { - std::ofstream os("tmp.test.wav.plp_kaldi.1", - std::ios::out|std::ios::binary); - WriteHtk(os, kaldi_features, header); - } - - std::cout << "Test passed :)\n\n"; - - unlink("tmp.test.wav.plp_kaldi.1"); -} - - - - -static void UnitTestFeat() { - UnitTestSimple(); - UnitTestHTKCompare1(); -} - - - - -int main() { - try { - for (int i = 0; i < 5; i++) - UnitTestFeat(); - std::cout << "Tests succeeded.\n"; - return 0; - } catch (const std::exception &e) { - std::cerr << e.what(); - return 1; - } -} - - diff --git a/src/feat/feature-plp.cc b/src/feat/feature-plp.cc deleted file mode 100644 index e0c270c7061..00000000000 --- a/src/feat/feature-plp.cc +++ /dev/null @@ -1,191 +0,0 @@ -// feat/feature-plp.cc - -// Copyright 2009-2011 Petr Motlicek; Karel Vesely -// 2016 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "feat/feature-plp.h" - -namespace kaldi { - -PlpComputer::PlpComputer(const PlpOptions &opts): - opts_(opts), srfft_(NULL), - mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined), - autocorr_coeffs_(opts_.lpc_order + 1, kUndefined), - lpc_coeffs_(opts_.lpc_order, kUndefined), - raw_cepstrum_(opts_.lpc_order, kUndefined) { - - if (opts.cepstral_lifter != 0.0) { - lifter_coeffs_.Resize(opts.num_ceps); - ComputeLifterCoeffs(opts.cepstral_lifter, &lifter_coeffs_); - } - InitIdftBases(opts_.lpc_order + 1, opts_.mel_opts.num_bins + 2, - &idft_bases_); - - if (opts.energy_floor > 0.0) - log_energy_floor_ = Log(opts.energy_floor); - - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two... - srfft_ = new SplitRadixRealFft(padded_window_size); - - // We'll definitely need the filterbanks info for VTLN warping factor 1.0. - // [note: this call caches it.] - GetMelBanks(1.0); -} - -PlpComputer::PlpComputer(const PlpComputer &other): - opts_(other.opts_), lifter_coeffs_(other.lifter_coeffs_), - idft_bases_(other.idft_bases_), log_energy_floor_(other.log_energy_floor_), - mel_banks_(other.mel_banks_), equal_loudness_(other.equal_loudness_), - srfft_(NULL), - mel_energies_duplicated_(opts_.mel_opts.num_bins + 2, kUndefined), - autocorr_coeffs_(opts_.lpc_order + 1, kUndefined), - lpc_coeffs_(opts_.lpc_order, kUndefined), - raw_cepstrum_(opts_.lpc_order, kUndefined) { - for (std::map::iterator iter = mel_banks_.begin(); - iter != mel_banks_.end(); ++iter) - iter->second = new MelBanks(*(iter->second)); - for (std::map*>::iterator - iter = equal_loudness_.begin(); - iter != equal_loudness_.end(); ++iter) - iter->second = new Vector(*(iter->second)); - if (other.srfft_ != NULL) - srfft_ = new SplitRadixRealFft(*(other.srfft_)); -} - -PlpComputer::~PlpComputer() { - for (std::map::iterator iter = mel_banks_.begin(); - iter != mel_banks_.end(); ++iter) - delete iter->second; - for (std::map* >::iterator - iter = equal_loudness_.begin(); - iter != equal_loudness_.end(); ++iter) - delete iter->second; - delete srfft_; -} - -const MelBanks *PlpComputer::GetMelBanks(BaseFloat vtln_warp) { - MelBanks *this_mel_banks = NULL; - std::map::iterator iter = mel_banks_.find(vtln_warp); - if (iter == mel_banks_.end()) { - this_mel_banks = new MelBanks(opts_.mel_opts, - opts_.frame_opts, - vtln_warp); - mel_banks_[vtln_warp] = this_mel_banks; - } else { - this_mel_banks = iter->second; - } - return this_mel_banks; -} - -const Vector *PlpComputer::GetEqualLoudness(BaseFloat vtln_warp) { - const MelBanks *this_mel_banks = GetMelBanks(vtln_warp); - Vector *ans = NULL; - std::map*>::iterator iter - = equal_loudness_.find(vtln_warp); - if (iter == equal_loudness_.end()) { - ans = new Vector; - GetEqualLoudnessVector(*this_mel_banks, ans); - equal_loudness_[vtln_warp] = ans; - } else { - ans = iter->second; - } - return ans; -} - -void PlpComputer::Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, - VectorBase *signal_frame, - VectorBase *feature) { - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && - feature->Dim() == this->Dim()); - - const MelBanks &mel_banks = *GetMelBanks(vtln_warp); - const Vector &equal_loudness = *GetEqualLoudness(vtln_warp); - - - KALDI_ASSERT(opts_.num_ceps <= opts_.lpc_order+1); // our num-ceps includes C0. - - - if (opts_.use_energy && !opts_.raw_energy) - signal_raw_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::min())); - - if (srfft_ != NULL) // Compute FFT using split-radix algorithm. - srfft_->Compute(signal_frame->Data(), true); - else // An alternative algorithm that works for non-powers-of-two. - RealFft(signal_frame, true); - - // Convert the FFT into a power spectrum. - ComputePowerSpectrum(signal_frame); // elements 0 ... signal_frame->Dim()/2 - - SubVector power_spectrum(*signal_frame, - 0, signal_frame->Dim() / 2 + 1); - - int32 num_mel_bins = opts_.mel_opts.num_bins; - - SubVector mel_energies(mel_energies_duplicated_, 1, num_mel_bins); - - mel_banks.Compute(power_spectrum, &mel_energies); - - mel_energies.MulElements(equal_loudness); - - mel_energies.ApplyPow(opts_.compress_factor); - - // duplicate first and last elements - mel_energies_duplicated_(0) = mel_energies_duplicated_(1); - mel_energies_duplicated_(num_mel_bins + 1) = - mel_energies_duplicated_(num_mel_bins); - - autocorr_coeffs_.SetZero(); // In case of NaNs or infs - autocorr_coeffs_.AddMatVec(1.0, idft_bases_, kNoTrans, - mel_energies_duplicated_, 0.0); - - BaseFloat residual_log_energy = ComputeLpc(autocorr_coeffs_, &lpc_coeffs_); - - residual_log_energy = std::max(residual_log_energy, - std::numeric_limits::min()); - - Lpc2Cepstrum(opts_.lpc_order, lpc_coeffs_.Data(), raw_cepstrum_.Data()); - feature->Range(1, opts_.num_ceps - 1).CopyFromVec( - raw_cepstrum_.Range(0, opts_.num_ceps - 1)); - (*feature)(0) = residual_log_energy; - - if (opts_.cepstral_lifter != 0.0) - feature->MulElements(lifter_coeffs_); - - if (opts_.cepstral_scale != 1.0) - feature->Scale(opts_.cepstral_scale); - - if (opts_.use_energy) { - if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) - signal_raw_log_energy = log_energy_floor_; - (*feature)(0) = signal_raw_log_energy; - } - - if (opts_.htk_compat) { // reorder the features. - BaseFloat log_energy = (*feature)(0); - for (int32 i = 0; i < opts_.num_ceps-1; i++) - (*feature)(i) = (*feature)(i+1); - (*feature)(opts_.num_ceps-1) = log_energy; - } -} - - -} // namespace kaldi diff --git a/src/feat/feature-plp.h b/src/feat/feature-plp.h deleted file mode 100644 index 4f156ca1e88..00000000000 --- a/src/feat/feature-plp.h +++ /dev/null @@ -1,176 +0,0 @@ -// feat/feature-plp.h - -// Copyright 2009-2011 Petr Motlicek; Karel Vesely - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_FEAT_FEATURE_PLP_H_ -#define KALDI_FEAT_FEATURE_PLP_H_ - -#include -#include - -#include "feat/feature-common.h" -#include "feat/feature-functions.h" -#include "feat/feature-window.h" -#include "feat/mel-computations.h" -#include "itf/options-itf.h" - -namespace kaldi { -/// @addtogroup feat FeatureExtraction -/// @{ - - - -/// PlpOptions contains basic options for computing PLP features. -/// It only includes things that can be done in a "stateless" way, i.e. -/// it does not include energy max-normalization. -/// It does not include delta computation. -struct PlpOptions { - FrameExtractionOptions frame_opts; - MelBanksOptions mel_opts; - int32 lpc_order; - int32 num_ceps; // num cepstra including zero - bool use_energy; // use energy; else C0 - BaseFloat energy_floor; - bool raw_energy; // If true, compute energy before preemphasis and windowing - BaseFloat compress_factor; - int32 cepstral_lifter; - BaseFloat cepstral_scale; - - bool htk_compat; // if true, put energy/C0 last and introduce a factor of - // sqrt(2) on C0 to be the same as HTK. - - PlpOptions() : mel_opts(23), - // default number of mel-banks for the PLP computation; this - // seems to be common for 16kHz-sampled data. For 8kHz-sampled - // data, 15 may be better. - lpc_order(12), - num_ceps(13), - use_energy(true), - energy_floor(0.0), - raw_energy(true), - compress_factor(0.33333), - cepstral_lifter(22), - cepstral_scale(1.0), - htk_compat(false) {} - - void Register(OptionsItf *opts) { - frame_opts.Register(opts); - mel_opts.Register(opts); - opts->Register("lpc-order", &lpc_order, - "Order of LPC analysis in PLP computation"); - opts->Register("num-ceps", &num_ceps, - "Number of cepstra in PLP computation (including C0)"); - opts->Register("use-energy", &use_energy, - "Use energy (not C0) for zeroth PLP feature"); - opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in PLP computation. " - "Only makes a difference if --use-energy=true; only necessary if " - "--dither=0.0. Suggested values: 0.1 or 1.0"); - opts->Register("raw-energy", &raw_energy, - "If true, compute energy before preemphasis and windowing"); - opts->Register("compress-factor", &compress_factor, - "Compression factor in PLP computation"); - opts->Register("cepstral-lifter", &cepstral_lifter, - "Constant that controls scaling of PLPs"); - opts->Register("cepstral-scale", &cepstral_scale, - "Scaling constant in PLP computation"); - opts->Register("htk-compat", &htk_compat, - "If true, put energy or C0 last. Warning: not sufficient " - "to get HTK compatible features (need to change other " - "parameters)."); - } -}; - - -/// This is the new-style interface to the PLP computation. -class PlpComputer { - public: - typedef PlpOptions Options; - explicit PlpComputer(const PlpOptions &opts); - PlpComputer(const PlpComputer &other); - - const FrameExtractionOptions &GetFrameOptions() const { - return opts_.frame_opts; - } - - int32 Dim() const { return opts_.num_ceps; } - - bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; } - - /** - Function that computes one frame of features from - one frame of signal. - - @param [in] signal_raw_log_energy The log-energy of the frame of the signal - prior to windowing and pre-emphasis, or - log(numeric_limits::min()), whichever is greater. Must be - ignored by this function if this class returns false from - this->NeedsRawLogEnergy(). - @param [in] vtln_warp The VTLN warping factor that the user wants - to be applied when computing features for this utterance. Will - normally be 1.0, meaning no warping is to be done. The value will - be ignored for feature types that don't support VLTN, such as - spectrogram features. - @param [in] signal_frame One frame of the signal, - as extracted using the function ExtractWindow() using the options - returned by this->GetFrameOptions(). The function will use the - vector as a workspace, which is why it's a non-const pointer. - @param [out] feature Pointer to a vector of size this->Dim(), to which - the computed feature will be written. - */ - void Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, - VectorBase *signal_frame, - VectorBase *feature); - - ~PlpComputer(); - private: - - const MelBanks *GetMelBanks(BaseFloat vtln_warp); - - const Vector *GetEqualLoudness(BaseFloat vtln_warp); - - PlpOptions opts_; - Vector lifter_coeffs_; - Matrix idft_bases_; - BaseFloat log_energy_floor_; - std::map mel_banks_; // BaseFloat is VTLN coefficient. - std::map* > equal_loudness_; - SplitRadixRealFft *srfft_; - - // temporary vector used inside Compute; size is opts_.mel_opts.num_bins + 2 - Vector mel_energies_duplicated_; - // temporary vector used inside Compute; size is opts_.lpc_order + 1 - Vector autocorr_coeffs_; - // temporary vector used inside Compute; size is opts_.lpc_order - Vector lpc_coeffs_; - // temporary vector used inside Compute; size is opts_.lpc_order - Vector raw_cepstrum_; - - // Disallow assignment. - PlpComputer &operator =(const PlpComputer &other); -}; - -typedef OfflineFeatureTpl Plp; - -/// @} End of "addtogroup feat" - -} // namespace kaldi - - -#endif // KALDI_FEAT_FEATURE_PLP_H_ diff --git a/src/feat/feature-sdc-test.cc b/src/feat/feature-sdc-test.cc index 4b99c65fef8..42370ce4715 100644 --- a/src/feat/feature-sdc-test.cc +++ b/src/feat/feature-sdc-test.cc @@ -45,7 +45,7 @@ static void UnitTestCompareWithDeltaFeatures(Matrix &raw_features, in int32 dd_num_rows = deltas_features.NumRows(); int32 sdc_num_rows = shifted_deltas_features.NumRows(); int32 num_features = raw_features.NumCols(); - + // Number of rows will be equal, but not // columns, in general. KALDI_ASSERT(dd_num_rows == sdc_num_rows); @@ -60,7 +60,7 @@ static void UnitTestCompareWithDeltaFeatures(Matrix &raw_features, in } } -static void UnitTestParams(Matrix &raw_features, int32 window, +static void UnitTestParams(Matrix &raw_features, int32 window, int32 shift, int32 n_blocks) { std::cout << "=== UnitTestSDCParams() ===\n"; ShiftedDeltaFeaturesOptions shifted_deltas_opts; @@ -78,8 +78,8 @@ static void UnitTestParams(Matrix &raw_features, int32 window, int32 sdc_num_cols = shifted_deltas_features.NumCols(); KALDI_ASSERT(sdc_num_cols == raw_num_cols * (n_blocks + 1)); - - /* For every coefficient in the raw feature vector a + + /* For every coefficient in the raw feature vector a delta is calculated and appended to the new feature vector, as is done normally in a delta-deltas computation. In addition, n_blocks delta in advance are also appended. @@ -89,7 +89,7 @@ static void UnitTestParams(Matrix &raw_features, int32 window, mapping from these additional deltas to where they would appear in a delta-deltas computation and verfies these values' equality. */ - for (int32 i = 0; i < sdc_num_rows; i++) { + for (int32 i = 0; i < sdc_num_rows; i++) { for (int32 j = 2 * raw_num_cols; j < sdc_num_cols; j += raw_num_cols) { for (int32 k = 0; k < raw_num_cols; k++) { int32 row = i + (j/raw_num_cols - 1) * shift; @@ -103,7 +103,7 @@ static void UnitTestParams(Matrix &raw_features, int32 window, } } -static void UnitTestEndEffects(Matrix &raw_features, int32 window, +static void UnitTestEndEffects(Matrix &raw_features, int32 window, int32 shift, int32 n_blocks) { std::cout << "=== UnitTestSDCEndEffects() ===\n"; ShiftedDeltaFeaturesOptions shifted_deltas_opts; @@ -118,7 +118,7 @@ static void UnitTestEndEffects(Matrix &raw_features, int32 window, int32 raw_num_cols = raw_features.NumCols(); int32 sdc_num_rows = shifted_deltas_features.NumRows(); int32 sdc_num_cols = shifted_deltas_features.NumCols(); - + // If the entire window is out-of-bounds the delta should be zero. for (int32 i = sdc_num_rows - n_blocks + 1; i < sdc_num_rows; i++) { for (int32 j = 2 * raw_num_cols; j < sdc_num_cols; j += raw_num_cols) { @@ -126,7 +126,7 @@ static void UnitTestEndEffects(Matrix &raw_features, int32 window, if (i + (j/raw_num_cols - 1) * shift - window/2 > sdc_num_rows) KALDI_ASSERT(shifted_deltas_features(i, j + k) <= 0.00001); } - } + } } } @@ -137,13 +137,7 @@ int main() { KALDI_ASSERT(wave.Data().NumRows() == 1); SubVector waveform(wave.Data(), 0); - // mfcc with default configuration... MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; op.mel_opts.low_freq = 0.0; op.use_energy = false; Mfcc mfcc(op); @@ -165,6 +159,5 @@ int main() { static_cast(e); return 1; } - -} +} diff --git a/src/feat/feature-spectrogram.cc b/src/feat/feature-spectrogram.cc deleted file mode 100644 index 7eee2643cf5..00000000000 --- a/src/feat/feature-spectrogram.cc +++ /dev/null @@ -1,82 +0,0 @@ -// feat/feature-spectrogram.cc - -// Copyright 2009-2012 Karel Vesely -// Copyright 2012 Navdeep Jaitly - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "feat/feature-spectrogram.h" - - -namespace kaldi { - -SpectrogramComputer::SpectrogramComputer(const SpectrogramOptions &opts) - : opts_(opts), srfft_(NULL) { - if (opts.energy_floor > 0.0) - log_energy_floor_ = Log(opts.energy_floor); - - int32 padded_window_size = opts.frame_opts.PaddedWindowSize(); - if ((padded_window_size & (padded_window_size-1)) == 0) // Is a power of two - srfft_ = new SplitRadixRealFft(padded_window_size); -} - -SpectrogramComputer::SpectrogramComputer(const SpectrogramComputer &other): - opts_(other.opts_), log_energy_floor_(other.log_energy_floor_), srfft_(NULL) { - if (other.srfft_ != NULL) - srfft_ = new SplitRadixRealFft(*other.srfft_); -} - -SpectrogramComputer::~SpectrogramComputer() { - delete srfft_; -} - -void SpectrogramComputer::Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, - VectorBase *signal_frame, - VectorBase *feature) { - KALDI_ASSERT(signal_frame->Dim() == opts_.frame_opts.PaddedWindowSize() && - feature->Dim() == this->Dim()); - - - // Compute energy after window function (not the raw one) - if (!opts_.raw_energy) - signal_raw_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::epsilon())); - - if (srfft_ != NULL) // Compute FFT using split-radix algorithm. - srfft_->Compute(signal_frame->Data(), true); - else // An alternative algorithm that works for non-powers-of-two - RealFft(signal_frame, true); - - // Convert the FFT into a power spectrum. - ComputePowerSpectrum(signal_frame); - SubVector power_spectrum(*signal_frame, - 0, signal_frame->Dim() / 2 + 1); - - power_spectrum.ApplyFloor(std::numeric_limits::epsilon()); - power_spectrum.ApplyLog(); - - feature->CopyFromVec(power_spectrum); - - if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) - signal_raw_log_energy = log_energy_floor_; - // The zeroth spectrogram component is always set to the signal energy, - // instead of the square of the constant component of the signal. - (*feature)(0) = signal_raw_log_energy; -} - -} // namespace kaldi diff --git a/src/feat/feature-spectrogram.h b/src/feat/feature-spectrogram.h deleted file mode 100644 index 132a6875e00..00000000000 --- a/src/feat/feature-spectrogram.h +++ /dev/null @@ -1,117 +0,0 @@ -// feat/feature-spectrogram.h - -// Copyright 2009-2012 Karel Vesely -// Copyright 2012 Navdeep Jaitly - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_FEAT_FEATURE_SPECTROGRAM_H_ -#define KALDI_FEAT_FEATURE_SPECTROGRAM_H_ - - -#include - -#include "feat/feature-common.h" -#include "feat/feature-functions.h" -#include "feat/feature-window.h" - -namespace kaldi { -/// @addtogroup feat FeatureExtraction -/// @{ - - -/// SpectrogramOptions contains basic options for computing spectrogram -/// features. -struct SpectrogramOptions { - FrameExtractionOptions frame_opts; - BaseFloat energy_floor; - bool raw_energy; // If true, compute energy before preemphasis and windowing - - SpectrogramOptions() : - energy_floor(0.0), - raw_energy(true) {} - - void Register(OptionsItf *opts) { - frame_opts.Register(opts); - opts->Register("energy-floor", &energy_floor, - "Floor on energy (absolute, not relative) in Spectrogram " - "computation. Caution: this floor is applied to the zeroth " - "component, representing the total signal energy. The " - "floor on the individual spectrogram elements is fixed at " - "std::numeric_limits::epsilon()."); - opts->Register("raw-energy", &raw_energy, - "If true, compute energy before preemphasis and windowing"); - } -}; - -/// Class for computing spectrogram features. -class SpectrogramComputer { - public: - typedef SpectrogramOptions Options; - explicit SpectrogramComputer(const SpectrogramOptions &opts); - SpectrogramComputer(const SpectrogramComputer &other); - - const FrameExtractionOptions& GetFrameOptions() const { - return opts_.frame_opts; - } - - int32 Dim() const { return opts_.frame_opts.PaddedWindowSize() / 2 + 1; } - - bool NeedRawLogEnergy() const { return opts_.raw_energy; } - - - /** - Function that computes one frame of spectrogram features from - one frame of signal. - - @param [in] signal_raw_log_energy The log-energy of the frame of the signal - prior to windowing and pre-emphasis, or - log(numeric_limits::min()), whichever is greater. Must be - ignored by this function if this class returns false from - this->NeedsRawLogEnergy(). - @param [in] vtln_warp This is ignored by this function, it's only - needed for interface compatibility. - @param [in] signal_frame One frame of the signal, - as extracted using the function ExtractWindow() using the options - returned by this->GetFrameOptions(). The function will use the - vector as a workspace, which is why it's a non-const pointer. - @param [out] feature Pointer to a vector of size this->Dim(), to which - the computed feature will be written. - */ - void Compute(BaseFloat signal_raw_log_energy, - BaseFloat vtln_warp, - VectorBase *signal_frame, - VectorBase *feature); - - ~SpectrogramComputer(); - - private: - SpectrogramOptions opts_; - BaseFloat log_energy_floor_; - SplitRadixRealFft *srfft_; - - // Disallow assignment. - SpectrogramComputer &operator=(const SpectrogramComputer &other); -}; - -typedef OfflineFeatureTpl Spectrogram; - - -/// @} End of "addtogroup feat" -} // namespace kaldi - - -#endif // KALDI_FEAT_FEATURE_SPECTROGRAM_H_ diff --git a/src/feat/feature-window.cc b/src/feat/feature-window.cc index c5d4cc29831..cd7b1a26326 100644 --- a/src/feat/feature-window.cc +++ b/src/feat/feature-window.cc @@ -1,7 +1,7 @@ // feat/feature-window.cc // Copyright 2009-2011 Karel Vesely; Petr Motlicek; Microsoft Corporation -// 2013-2016 Johns Hopkins University (author: Daniel Povey) +// 2013-2019 Johns Hopkins University (author: Daniel Povey) // 2014 IMSL, PKU-HKUST (author: Wei Shi) // See ../../COPYING for clarification regarding multiple authors @@ -30,13 +30,9 @@ namespace kaldi { int64 FirstSampleOfFrame(int32 frame, const FrameExtractionOptions &opts) { int64 frame_shift = opts.WindowShift(); - if (opts.snip_edges) { - return frame * frame_shift; - } else { - int64 midpoint_of_frame = frame_shift * frame + frame_shift / 2, - beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2; - return beginning_of_frame; - } + int64 midpoint_of_frame = frame_shift * frame + frame_shift / 2, + beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2; + return beginning_of_frame; } int32 NumFrames(int64 num_samples, @@ -44,85 +40,54 @@ int32 NumFrames(int64 num_samples, bool flush) { int64 frame_shift = opts.WindowShift(); int64 frame_length = opts.WindowSize(); - if (opts.snip_edges) { - // with --snip-edges=true (the default), we use a HTK-like approach to - // determining the number of frames-- all frames have to fit completely into - // the waveform, and the first frame begins at sample zero. - if (num_samples < frame_length) - return 0; - else - return (1 + ((num_samples - frame_length) / frame_shift)); - // You can understand the expression above as follows: 'num_samples - - // frame_length' is how much room we have to shift the frame within the - // waveform; 'frame_shift' is how much we shift it each time; and the ratio - // is how many times we can shift it (integer arithmetic rounds down). - } else { - // if --snip-edges=false, the number of frames is determined by rounding the - // (file-length / frame-shift) to the nearest integer. The point of this - // formula is to make the number of frames an obvious and predictable - // function of the frame shift and signal length, which makes many - // segmentation-related questions simpler. - // - // Because integer division in C++ rounds toward zero, we add (half the - // frame-shift minus epsilon) before dividing, to have the effect of - // rounding towards the closest integer. - int32 num_frames = (num_samples + (frame_shift / 2)) / frame_shift; - - if (flush) - return num_frames; - - // note: 'end' always means the last plus one, i.e. one past the last. - int64 end_sample_of_last_frame = FirstSampleOfFrame(num_frames - 1, opts) - + frame_length; - - // the following code is optimized more for clarity than efficiency. - // If flush == false, we can't output frames that extend past the end - // of the signal. - while (num_frames > 0 && end_sample_of_last_frame > num_samples) { - num_frames--; - end_sample_of_last_frame -= frame_shift; - } + + // The number of frames is determined by rounding the + // (file-length / frame-shift) to the nearest integer. The point of this + // formula is to make the number of frames an obvious and predictable + // function of the frame shift and signal length, which makes many + // segmentation-related questions simpler. + // + // Because integer division in C++ rounds toward zero, we add (half the + // frame-shift minus epsilon) before dividing, to have the effect of + // rounding towards the closest integer. + int32 num_frames = (num_samples + (frame_shift / 2)) / frame_shift; + + if (flush) return num_frames; - } -} + // note: 'end' always means the last plus one, i.e. one past the last. + int64 end_sample_of_last_frame = FirstSampleOfFrame(num_frames - 1, opts) + + frame_length; -void Dither(VectorBase *waveform, BaseFloat dither_value) { - if (dither_value == 0.0) - return; - int32 dim = waveform->Dim(); - BaseFloat *data = waveform->Data(); - RandomState rstate; - for (int32 i = 0; i < dim; i++) - data[i] += RandGauss(&rstate) * dither_value; + // the following code is optimized more for clarity than efficiency. + // If flush == false, we can't output frames that extend past the end + // of the signal. + while (num_frames > 0 && end_sample_of_last_frame > num_samples) { + num_frames--; + end_sample_of_last_frame -= frame_shift; + } + return num_frames; } -void Preemphasize(VectorBase *waveform, BaseFloat preemph_coeff) { - if (preemph_coeff == 0.0) return; - KALDI_ASSERT(preemph_coeff >= 0.0 && preemph_coeff <= 1.0); - for (int32 i = waveform->Dim()-1; i > 0; i--) - (*waveform)(i) -= preemph_coeff * (*waveform)(i-1); - (*waveform)(0) -= preemph_coeff * (*waveform)(0); -} - -FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts) { +void InitFeatureWindowFunction(const FrameExtractionOptions &opts, + Vector *window_function) { int32 frame_length = opts.WindowSize(); KALDI_ASSERT(frame_length > 0); - window.Resize(frame_length); + window_function->Resize(frame_length); double a = M_2PI / (frame_length-1); for (int32 i = 0; i < frame_length; i++) { double i_fl = static_cast(i); if (opts.window_type == "hanning") { - window(i) = 0.5 - 0.5*cos(a * i_fl); + (*window_function)(i) = 0.5 - 0.5*cos(a * i_fl); } else if (opts.window_type == "hamming") { - window(i) = 0.54 - 0.46*cos(a * i_fl); + (*window_function)(i) = 0.54 - 0.46*cos(a * i_fl); } else if (opts.window_type == "povey") { // like hamming but goes to zero at edges. - window(i) = pow(0.5 - 0.5*cos(a * i_fl), 0.85); + (*window_function)(i) = pow(0.5 - 0.5*cos(a * i_fl), 0.85); } else if (opts.window_type == "rectangular") { - window(i) = 1.0; + (*window_function)(i) = 1.0; } else if (opts.window_type == "blackman") { - window(i) = opts.blackman_coeff - 0.5*cos(a * i_fl) + + (*window_function)(i) = opts.blackman_coeff - 0.5*cos(a * i_fl) + (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl); } else { KALDI_ERR << "Invalid window type " << opts.window_type; @@ -131,54 +96,34 @@ FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts) } void ProcessWindow(const FrameExtractionOptions &opts, - const FeatureWindowFunction &window_function, - VectorBase *window, - BaseFloat *log_energy_pre_window) { + const VectorBase &window_function, + VectorBase *window) { int32 frame_length = opts.WindowSize(); KALDI_ASSERT(window->Dim() == frame_length); - if (opts.dither != 0.0) - Dither(window, opts.dither); - - if (opts.remove_dc_offset) - window->Add(-window->Sum() / frame_length); - - if (log_energy_pre_window != NULL) { - BaseFloat energy = std::max(VecVec(*window, *window), - std::numeric_limits::epsilon()); - *log_energy_pre_window = Log(energy); - } - if (opts.preemph_coeff != 0.0) - Preemphasize(window, opts.preemph_coeff); + /* This was formerly enabled by the --remove-dc-offset option. Right now that + option is mandatory. */ + window->Add(-window->Sum() / frame_length); - window->MulElements(window_function.window); + window->MulElements(window_function); } // ExtractWindow extracts a windowed frame of waveform with a power-of-two, -// padded size. It does mean subtraction, pre-emphasis and dithering as -// requested. +// padded size. It does mean subtraction if requested. void ExtractWindow(int64 sample_offset, const VectorBase &wave, int32 f, // with 0 <= f < NumFrames(feats, opts) const FrameExtractionOptions &opts, - const FeatureWindowFunction &window_function, - Vector *window, - BaseFloat *log_energy_pre_window) { + const VectorBase &window_function, + Vector *window) { KALDI_ASSERT(sample_offset >= 0 && wave.Dim() != 0); int32 frame_length = opts.WindowSize(), frame_length_padded = opts.PaddedWindowSize(); - int64 num_samples = sample_offset + wave.Dim(), - start_sample = FirstSampleOfFrame(f, opts), - end_sample = start_sample + frame_length; + int64 start_sample = FirstSampleOfFrame(f, opts); - if (opts.snip_edges) { - KALDI_ASSERT(start_sample >= sample_offset && - end_sample <= num_samples); - } else { - KALDI_ASSERT(sample_offset == 0 || start_sample >= sample_offset); - } + KALDI_ASSERT(sample_offset == 0 || start_sample >= sample_offset); if (window->Dim() != frame_length_padded) window->Resize(frame_length_padded, kUndefined); @@ -216,7 +161,8 @@ void ExtractWindow(int64 sample_offset, SubVector frame(*window, 0, frame_length); - ProcessWindow(opts, window_function, &frame, log_energy_pre_window); + ProcessWindow(opts, window_function, &frame); + } } // namespace kaldi diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h index a7abba50eca..979a6cac249 100644 --- a/src/feat/feature-window.h +++ b/src/feat/feature-window.h @@ -36,17 +36,12 @@ struct FrameExtractionOptions { BaseFloat samp_freq; BaseFloat frame_shift_ms; // in milliseconds. BaseFloat frame_length_ms; // in milliseconds. - BaseFloat dither; // Amount of dithering, 0.0 means no dither. - BaseFloat preemph_coeff; // Preemphasis coefficient. - bool remove_dc_offset; // Subtract mean of wave before FFT. std::string window_type; // e.g. Hamming window // May be "hamming", "rectangular", "povey", "hanning", "blackman" // "povey" is a window I made to be similar to Hamming but to go to zero at the // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) // I just don't think the Hamming window makes sense as a windowing function. - bool round_to_power_of_two; BaseFloat blackman_coeff; - bool snip_edges; bool allow_downsample; bool allow_upsample; int max_feature_vectors; @@ -54,17 +49,11 @@ struct FrameExtractionOptions { samp_freq(16000), frame_shift_ms(10.0), frame_length_ms(25.0), - dither(1.0), - preemph_coeff(0.97), - remove_dc_offset(true), window_type("povey"), - round_to_power_of_two(true), blackman_coeff(0.42), - snip_edges(true), allow_downsample(false), allow_upsample(false), - max_feature_vectors(-1) - { } + max_feature_vectors(-1) { } void Register(OptionsItf *opts) { opts->Register("sample-frequency", &samp_freq, @@ -72,26 +61,11 @@ struct FrameExtractionOptions { "if specified there)"); opts->Register("frame-length", &frame_length_ms, "Frame length in milliseconds"); opts->Register("frame-shift", &frame_shift_ms, "Frame shift in milliseconds"); - opts->Register("preemphasis-coefficient", &preemph_coeff, - "Coefficient for use in signal preemphasis"); - opts->Register("remove-dc-offset", &remove_dc_offset, - "Subtract mean from waveform on each frame"); - opts->Register("dither", &dither, "Dithering constant (0.0 means no dither). " - "If you turn this off, you should set the --energy-floor " - "option, e.g. to 1.0 or 0.1"); opts->Register("window-type", &window_type, "Type of window " "(\"hamming\"|\"hanning\"|\"povey\"|\"rectangular\"" "|\"blackmann\")"); opts->Register("blackman-coeff", &blackman_coeff, "Constant coefficient for generalized Blackman window."); - opts->Register("round-to-power-of-two", &round_to_power_of_two, - "If true, round window size to power of two by zero-padding " - "input to FFT."); - opts->Register("snip-edges", &snip_edges, - "If true, end effects will be handled by outputting only frames that " - "completely fit in the file, and the number of frames depends on the " - "frame-length. If false, the number of frames depends only on the " - "frame-shift, and we reflect the data at the ends."); opts->Register("allow-downsample", &allow_downsample, "If true, allow the input waveform to have a higher frequency than " "the specified --sample-frequency (and we'll downsample)."); @@ -110,19 +84,16 @@ struct FrameExtractionOptions { return static_cast(samp_freq * 0.001 * frame_length_ms); } int32 PaddedWindowSize() const { - return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) : - WindowSize()); + return RoundUpToNearestPowerOfTwo(WindowSize()); } }; -struct FeatureWindowFunction { - FeatureWindowFunction() {} - explicit FeatureWindowFunction(const FrameExtractionOptions &opts); - FeatureWindowFunction(const FeatureWindowFunction &other): - window(other.window) { } - Vector window; -}; +// Sets up the feature window function (e.g. Hamming) as specified by the +// options. +void InitFeatureWindowFunction( + const FrameExtractionOptions &opts, + Vector *window_function); /** @@ -135,8 +106,7 @@ struct FeatureWindowFunction { @param [in] flush True if we are asserting that this number of samples is 'all there is', false if we expecting more data to possibly come - in. This only makes a difference to the answer if opts.snips_edges - == false. For offline feature extraction you always want flush == + in. For offline feature extraction you always want flush == true. In an online-decoding context, once you know (or decide) that no more data is coming in, you'd call it with flush == true at the end to flush out any remaining data. @@ -146,25 +116,27 @@ int32 NumFrames(int64 num_samples, bool flush = true); /* - This function returns the index of the first sample of the frame indexed - 'frame'. If snip-edges=true, it just returns frame * opts.WindowShift(); if - snip-edges=false, the formula is a little more complicated and the result may - be negative. + This function returns the sample-index of the first sample of the frame + indexed 'frame'. + @param [in] frame frame index frame >= 0 + @param [in] opts Options class, used for window width, and frame + shift. + @return Returns the sample index of the first sample of + this frame. Note: this may be negative if + `frame` is close to zero. The calling code + will handle this by reflecting the signal in + the boundary. */ int64 FirstSampleOfFrame(int32 frame, const FrameExtractionOptions &opts); -void Dither(VectorBase *waveform, BaseFloat dither_value); - -void Preemphasize(VectorBase *waveform, BaseFloat preemph_coeff); - /** - This function does all the windowing steps after actually - extracting the windowed signal: depending on the - configuration, it does dithering, dc offset removal, - preemphasis, and multiplication by the windowing function. + This function does all the windowing steps after actually extracting the + windowed signal: depeding on the configuration, it dc offset removal and + multiplication by the windowing function. + @param [in] opts The options class to be used @param [in] window_function The windowing function-- should have been initialized using 'opts'. @@ -173,14 +145,10 @@ void Preemphasize(VectorBase *waveform, BaseFloat preemph_coeff); opts.PaddedWindowSize(), with the remaining samples zero, as the FFT code is more efficient if it operates on data with power-of-two size. - @param [out] log_energy_pre_window If non-NULL, then after dithering and - DC offset removal, this function will write to this pointer the log of - the total energy (i.e. sum-squared) of the frame. */ void ProcessWindow(const FrameExtractionOptions &opts, - const FeatureWindowFunction &window_function, - VectorBase *window, - BaseFloat *log_energy_pre_window = NULL); + const VectorBase &window_function, + VectorBase *window); /* @@ -202,18 +170,15 @@ void ProcessWindow(const FrameExtractionOptions &opts, @param [in] window_function The windowing function, as derived from the options class. @param [out] window The windowed, possibly-padded waveform to be - extracted. Will be resized as needed. - @param [out] log_energy_pre_window If non-NULL, the log-energy of - the signal prior to pre-emphasis and multiplying by - the windowing function will be written to here. + extracted. Will be resized as needed. */ void ExtractWindow(int64 sample_offset, const VectorBase &wave, int32 f, const FrameExtractionOptions &opts, - const FeatureWindowFunction &window_function, - Vector *window, - BaseFloat *log_energy_pre_window = NULL); + const VectorBase &window_function, + Vector *window); + /// @} End of "addtogroup feat" diff --git a/src/feat/mel-computations.cc b/src/feat/mel-computations.cc index bb5e9f9acff..be050b386ee 100644 --- a/src/feat/mel-computations.cc +++ b/src/feat/mel-computations.cc @@ -32,8 +32,7 @@ namespace kaldi { MelBanks::MelBanks(const MelBanksOptions &opts, const FrameExtractionOptions &frame_opts, - BaseFloat vtln_warp_factor): - htk_mode_(opts.htk_mode) { + BaseFloat vtln_warp_factor) { int32 num_bins = opts.num_bins; if (num_bins < 3) KALDI_ERR << "Must have at least 3 mel bins"; BaseFloat sample_freq = frame_opts.samp_freq; @@ -128,10 +127,6 @@ MelBanks::MelBanks(const MelBanksOptions &opts, bins_[bin].second.Resize(size); bins_[bin].second.CopyFromVec(this_bin.Range(first_index, size)); - // Replicate a bug in HTK, for testing purposes. - if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0) - bins_[bin].second(0) = 0.0; - } if (debug_) { for (size_t i = 0; i < bins_.size(); i++) { @@ -144,8 +139,7 @@ MelBanks::MelBanks(const MelBanksOptions &opts, MelBanks::MelBanks(const MelBanks &other): center_freqs_(other.center_freqs_), bins_(other.bins_), - debug_(other.debug_), - htk_mode_(other.htk_mode_) { } + debug_(other.debug_) { } BaseFloat MelBanks::VtlnWarpFreq(BaseFloat vtln_low_cutoff, // upper+lower frequency cutoffs for VTLN. BaseFloat vtln_high_cutoff, @@ -232,8 +226,6 @@ void MelBanks::Compute(const VectorBase &power_spectrum, int32 offset = bins_[i].first; const Vector &v(bins_[i].second); BaseFloat energy = VecVec(v, power_spectrum.Range(offset, v.Dim())); - // HTK-like flooring- for testing purposes (we prefer dither) - if (htk_mode_ && energy < 1.0) energy = 1.0; (*mel_energies_out)(i) = energy; // The following assert was added due to a problem with OpenBlas that @@ -250,91 +242,7 @@ void MelBanks::Compute(const VectorBase &power_spectrum, } } -void ComputeLifterCoeffs(BaseFloat Q, VectorBase *coeffs) { - // Compute liftering coefficients (scaling on cepstral coeffs) - // coeffs are numbered slightly differently from HTK: the zeroth - // index is C0, which is not affected. - for (int32 i = 0; i < coeffs->Dim(); i++) - (*coeffs)(i) = 1.0 + 0.5 * Q * sin (M_PI * i / Q); -} - - -// Durbin's recursion - converts autocorrelation coefficients to the LPC -// pTmp - temporal place [n] -// pAC - autocorrelation coefficients [n + 1] -// pLP - linear prediction coefficients [n] (predicted_sn = sum_1^P{a[i-1] * s[n-i]}}) -// F(z) = 1 / (1 - A(z)), 1 is not stored in the demoninator -BaseFloat Durbin(int n, const BaseFloat *pAC, BaseFloat *pLP, BaseFloat *pTmp) { - BaseFloat ki; // reflection coefficient - int i; - int j; - - BaseFloat E = pAC[0]; - - for (i = 0; i < n; i++) { - // next reflection coefficient - ki = pAC[i + 1]; - for (j = 0; j < i; j++) - ki += pLP[j] * pAC[i - j]; - ki = ki / E; - - // new error - BaseFloat c = 1 - ki * ki; - if (c < 1.0e-5) // remove NaNs for constan signal - c = 1.0e-5; - E *= c; - - // new LP coefficients - pTmp[i] = -ki; - for (j = 0; j < i; j++) - pTmp[j] = pLP[j] - ki * pLP[i - j - 1]; - - for (j = 0; j <= i; j++) - pLP[j] = pTmp[j]; - } - return E; -} - - -void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst) { - for (int32 i = 0; i < n; i++) { - double sum = 0.0; - int j; - for (j = 0; j < i; j++) { - sum += static_cast(i - j) * pLPC[j] * pCepst[i - j - 1]; - } - pCepst[i] = -pLPC[i] - sum / static_cast(i + 1); - } -} - -void GetEqualLoudnessVector(const MelBanks &mel_banks, - Vector *ans) { - int32 n = mel_banks.NumBins(); - // Central frequency of each mel bin. - const Vector &f0 = mel_banks.GetCenterFreqs(); - ans->Resize(n); - for (int32 i = 0; i < n; i++) { - BaseFloat fsq = f0(i) * f0(i); - BaseFloat fsub = fsq / (fsq + 1.6e5); - (*ans)(i) = fsub * fsub * ((fsq + 1.44e6) / (fsq + 9.61e6)); - } -} - - -// Compute LP coefficients from autocorrelation coefficients. -BaseFloat ComputeLpc(const VectorBase &autocorr_in, - Vector *lpc_out) { - int32 n = autocorr_in.Dim() - 1; - KALDI_ASSERT(lpc_out->Dim() == n); - Vector tmp(n); - BaseFloat ans = Durbin(n, autocorr_in.Data(), - lpc_out->Data(), - tmp.Data()); - if (ans <= 0.0) - KALDI_WARN << "Zero energy in LPC computation"; - return -Log(1.0 / ans); // forms the C0 value -} } // namespace kaldi diff --git a/src/feat/mel-computations.h b/src/feat/mel-computations.h index 0c1d41ca45c..6822debc242 100644 --- a/src/feat/mel-computations.h +++ b/src/feat/mel-computations.h @@ -1,7 +1,7 @@ // feat/mel-computations.h // Copyright 2009-2011 Phonexia s.r.o.; Microsoft Corporation -// 2016 Johns Hopkins University (author: Daniel Povey) +// 2016-2019 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -44,18 +44,14 @@ struct MelBanksOptions { int32 num_bins; // e.g. 25; number of triangular bins BaseFloat low_freq; // e.g. 20; lower frequency cutoff BaseFloat high_freq; // an upper frequency cutoff; 0 -> no cutoff, negative - // ->added to the Nyquist frequency to get the cutoff. + // ->added to the Nyquist frequency to get the cutoff. BaseFloat vtln_low; // vtln lower cutoff of warping function. BaseFloat vtln_high; // vtln upper cutoff of warping function: if negative, added // to the Nyquist frequency to get the cutoff. bool debug_mel; - // htk_mode is a "hidden" config, it does not show up on command line. - // Enables more exact compatibility with HTK, for testing purposes. Affects - // mel-energy flooring and reproduces a bug in HTK. - bool htk_mode; explicit MelBanksOptions(int num_bins = 25) : num_bins(num_bins), low_freq(20), high_freq(0), vtln_low(100), - vtln_high(-500), debug_mel(false), htk_mode(false) {} + vtln_high(-500), debug_mel(false) { } void Register(OptionsItf *opts) { opts->Register("num-mel-bins", &num_bins, @@ -87,10 +83,9 @@ class MelBanks { } static BaseFloat VtlnWarpFreq(BaseFloat vtln_low_cutoff, - BaseFloat vtln_high_cutoff, // discontinuities in warp func + BaseFloat vtln_high_cutoff, BaseFloat low_freq, - BaseFloat high_freq, // upper+lower frequency cutoffs in - // the mel computation + BaseFloat high_freq, BaseFloat vtln_warp_factor, BaseFloat freq); @@ -106,7 +101,7 @@ class MelBanks { const FrameExtractionOptions &frame_opts, BaseFloat vtln_warp_factor); - /// Compute Mel energies (note: not log enerties). + /// Compute Mel energies (note: not log energies). /// At input, "fft_energies" contains the FFT energies (not log). void Compute(const VectorBase &fft_energies, VectorBase *mel_energies_out) const; @@ -135,36 +130,9 @@ class MelBanks { std::vector > > bins_; bool debug_; - bool htk_mode_; }; -// Compute liftering coefficients (scaling on cepstral coeffs) -// coeffs are numbered slightly differently from HTK: the zeroth -// index is C0, which is not affected. -void ComputeLifterCoeffs(BaseFloat Q, VectorBase *coeffs); - - -// Durbin's recursion - converts autocorrelation coefficients to the LPC -// pTmp - temporal place [n] -// pAC - autocorrelation coefficients [n + 1] -// pLP - linear prediction coefficients [n] (predicted_sn = sum_1^P{a[i-1] * s[n-i]}}) -// F(z) = 1 / (1 - A(z)), 1 is not stored in the denominator -// Returns log energy of residual (I think) -BaseFloat Durbin(int n, const BaseFloat *pAC, BaseFloat *pLP, BaseFloat *pTmp); - -// Compute LP coefficients from autocorrelation coefficients. -// Returns log energy of residual (I think) -BaseFloat ComputeLpc(const VectorBase &autocorr_in, - Vector *lpc_out); - -void Lpc2Cepstrum(int n, const BaseFloat *pLPC, BaseFloat *pCepst); - - - -void GetEqualLoudnessVector(const MelBanks &mel_banks, - Vector *ans); - /// @} End of "addtogroup feat" } // namespace kaldi diff --git a/src/feat/online-feature-test.cc b/src/feat/online-feature-test.cc index 7ba6c7c32be..3e7834d6423 100644 --- a/src/feat/online-feature-test.cc +++ b/src/feat/online-feature-test.cc @@ -152,17 +152,11 @@ void TestOnlineMfcc() { // the parametrization object MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; + op.frame_opts.samp_freq = wave.SampFreq(); op.mel_opts.low_freq = 0.0; - op.htk_compat = false; op.use_energy = false; // C0 not energy. - if (RandInt(0, 1) == 0) - op.frame_opts.snip_edges = false; Mfcc mfcc(op); // compute mfcc offline @@ -195,55 +189,6 @@ void TestOnlineMfcc() { } } -void TestOnlinePlp() { - std::ifstream is("../feat/test_data/test.wav", std::ios_base::binary); - WaveData wave; - wave.Read(is); - KALDI_ASSERT(wave.Data().NumRows() == 1); - SubVector waveform(wave.Data(), 0); - - // the parametrization object - PlpOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; - op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; - op.frame_opts.samp_freq = wave.SampFreq(); - op.mel_opts.low_freq = 0.0; - op.htk_compat = false; - op.use_energy = false; // C0 not energy. - Plp plp(op); - - // compute plp offline - Matrix plp_feats; - plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported - - // compare - // The test waveform is about 1.44s long, so - // we try to break it into from 5 pieces to 9(not essential to do so) - for (int32 num_piece = 5; num_piece < 10; num_piece++) { - OnlinePlp online_plp(op); - std::vector piece_length(num_piece); - bool ret = RandomSplit(waveform.Dim(), &piece_length, num_piece); - KALDI_ASSERT(ret); - - int32 offset_start = 0; - for (int32 i = 0; i < num_piece; i++) { - Vector wave_piece( - waveform.Range(offset_start, piece_length[i])); - online_plp.AcceptWaveform(wave.SampFreq(), wave_piece); - offset_start += piece_length[i]; - } - online_plp.InputFinished(); - - Matrix online_plp_feats; - GetOutput(&online_plp, &online_plp_feats); - - AssertEqual(plp_feats, online_plp_feats); - } -} - void TestOnlineTransform() { std::ifstream is("../feat/test_data/test.wav", std::ios_base::binary); WaveData wave; @@ -253,14 +198,10 @@ void TestOnlineTransform() { // build online feature interface, take OnlineMfcc as an example MfccOptions op; - op.frame_opts.dither = 0.0; - op.frame_opts.preemph_coeff = 0.0; op.frame_opts.window_type = "hamming"; - op.frame_opts.remove_dc_offset = false; - op.frame_opts.round_to_power_of_two = true; + op.frame_opts.samp_freq = wave.SampFreq(); op.mel_opts.low_freq = 0.0; - op.htk_compat = false; op.use_energy = false; // C0 not energy. OnlineMfcc online_mfcc(op); @@ -296,14 +237,9 @@ void TestOnlineAppendFeature() { // the parametrization object for 1st stream mfcc feature MfccOptions mfcc_op; - mfcc_op.frame_opts.dither = 0.0; - mfcc_op.frame_opts.preemph_coeff = 0.0; mfcc_op.frame_opts.window_type = "hamming"; - mfcc_op.frame_opts.remove_dc_offset = false; - mfcc_op.frame_opts.round_to_power_of_two = true; mfcc_op.frame_opts.samp_freq = wave.SampFreq(); mfcc_op.mel_opts.low_freq = 0.0; - mfcc_op.htk_compat = false; mfcc_op.use_energy = false; // C0 not energy. Mfcc mfcc(mfcc_op); @@ -311,30 +247,13 @@ void TestOnlineAppendFeature() { Matrix mfcc_feats; mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported - // the parametrization object for 2nd stream plp feature - PlpOptions plp_op; - plp_op.frame_opts.dither = 0.0; - plp_op.frame_opts.preemph_coeff = 0.0; - plp_op.frame_opts.window_type = "hamming"; - plp_op.frame_opts.remove_dc_offset = false; - plp_op.frame_opts.round_to_power_of_two = true; - plp_op.frame_opts.samp_freq = wave.SampFreq(); - plp_op.mel_opts.low_freq = 0.0; - plp_op.htk_compat = false; - plp_op.use_energy = false; // C0 not energy. - Plp plp(plp_op); - - // compute plp offline - Matrix plp_feats; - plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported - // compare // The test waveform is about 1.44s long, so // we try to break it into from 5 pieces to 9(not essential to do so) for (int32 num_piece = 5; num_piece < 10; num_piece++) { - OnlineMfcc online_mfcc(mfcc_op); - OnlinePlp online_plp(plp_op); - OnlineAppendFeature online_mfcc_plp(&online_mfcc, &online_plp); + OnlineMfcc online_mfcc(mfcc_op), + online_mfcc2(mfcc_op); + OnlineAppendFeature online_mfcc_doubled(&online_mfcc, &online_mfcc2); std::vector piece_length(num_piece); bool ret = RandomSplit(waveform.Dim(), &piece_length, num_piece); @@ -344,32 +263,27 @@ void TestOnlineAppendFeature() { Vector wave_piece( waveform.Range(offset_start, piece_length[i])); online_mfcc.AcceptWaveform(wave.SampFreq(), wave_piece); - online_plp.AcceptWaveform(wave.SampFreq(), wave_piece); + online_mfcc2.AcceptWaveform(wave.SampFreq(), wave_piece); offset_start += piece_length[i]; } online_mfcc.InputFinished(); - online_plp.InputFinished(); + online_mfcc2.InputFinished(); - Matrix online_mfcc_plp_feats; - GetOutput(&online_mfcc_plp, &online_mfcc_plp_feats); + Matrix online_mfcc_doubled_feats; + GetOutput(&online_mfcc_doubled, &online_mfcc_doubled_feats); - // compare mfcc_feats & plp_features with online_mfcc_plp_feats - KALDI_ASSERT(mfcc_feats.NumRows() == online_mfcc_plp_feats.NumRows() - && plp_feats.NumRows() == online_mfcc_plp_feats.NumRows() - && mfcc_feats.NumCols() + plp_feats.NumCols() - == online_mfcc_plp_feats.NumCols()); - for (MatrixIndexT i = 0; i < online_mfcc_plp_feats.NumRows(); i++) { + // compare mfcc_feats & plp_features with online_mfcc_doubled_feats + KALDI_ASSERT(mfcc_feats.NumRows() == online_mfcc_doubled_feats.NumRows() && + online_mfcc_doubled_feats.NumCols() == 2 * mfcc_feats.NumCols()); + for (MatrixIndexT i = 0; i < online_mfcc_doubled_feats.NumRows(); i++) { for (MatrixIndexT j = 0; j < mfcc_feats.NumCols(); j++) { - KALDI_ASSERT(std::abs(mfcc_feats(i, j) - online_mfcc_plp_feats(i, j)) - < 0.0001*std::max(1.0, static_cast(std::abs(mfcc_feats(i, j)) - + std::abs(online_mfcc_plp_feats(i, j))))); - } - for (MatrixIndexT k = 0; k < plp_feats.NumCols(); k++) { - KALDI_ASSERT( - std::abs(plp_feats(i, k) - - online_mfcc_plp_feats(i, mfcc_feats.NumCols() + k)) - < 0.0001*std::max(1.0, static_cast(std::abs(plp_feats(i, k)) - +std::abs(online_mfcc_plp_feats(i, mfcc_feats.NumCols() + k))))); + MatrixIndexT jj = j; + for (int count = 0; count < 2; count++) { + KALDI_ASSERT(std::abs(mfcc_feats(i, j) - online_mfcc_doubled_feats(i, jj)) + < 0.0001*std::max(1.0, static_cast(std::abs(mfcc_feats(i, j)) + + std::abs(online_mfcc_doubled_feats(i, jj))))); + jj += mfcc_feats.NumCols(); + } } } } @@ -423,7 +337,6 @@ int main() { TestOnlineDeltaFeature(); TestOnlineSpliceFrames(); TestOnlineMfcc(); - TestOnlinePlp(); TestOnlineTransform(); TestOnlineAppendFeature(); TestRecyclingVector(); diff --git a/src/feat/online-feature.cc b/src/feat/online-feature.cc index 6f5ce6ee95b..b2c4799dacf 100644 --- a/src/feat/online-feature.cc +++ b/src/feat/online-feature.cc @@ -69,14 +69,12 @@ void OnlineGenericBaseFeature::GetFrame(int32 frame, template OnlineGenericBaseFeature::OnlineGenericBaseFeature( const typename C::Options &opts): - computer_(opts), window_function_(computer_.GetFrameOptions()), + computer_(opts), features_(opts.frame_opts.max_feature_vectors), - input_finished_(false), waveform_offset_(0) { - // RE the following assert: search for ONLINE_IVECTOR_LIMIT in - // online-ivector-feature.cc. - // Casting to uint32, an unsigned type, means that -1 would be treated - // as `very large`. - KALDI_ASSERT(static_cast(opts.frame_opts.max_feature_vectors) > 200); + input_finished_(false), + waveform_offset_(0) { + InitFeatureWindowFunction(computer_.GetFrameOptions(), + &window_function_); } @@ -168,17 +166,14 @@ void OnlineGenericBaseFeature::ComputeFeatures() { KALDI_ASSERT(num_frames_new >= num_frames_old); Vector window; - bool need_raw_log_energy = computer_.NeedRawLogEnergy(); for (int32 frame = num_frames_old; frame < num_frames_new; frame++) { - BaseFloat raw_log_energy = 0.0; ExtractWindow(waveform_offset_, waveform_remainder_, frame, - frame_opts, window_function_, &window, - need_raw_log_energy ? &raw_log_energy : NULL); + frame_opts, window_function_, &window); Vector *this_feature = new Vector(computer_.Dim(), kUndefined); // note: this online feature-extraction code does not support VTLN. BaseFloat vtln_warp = 1.0; - computer_.Compute(raw_log_energy, vtln_warp, &window, this_feature); + computer_.Compute(vtln_warp, &window, this_feature); features_.PushBack(this_feature); } // OK, we will now discard any portion of the signal that will not be @@ -205,7 +200,6 @@ void OnlineGenericBaseFeature::ComputeFeatures() { // instantiate the templates defined here for MFCC, PLP and filterbank classes. template class OnlineGenericBaseFeature; -template class OnlineGenericBaseFeature; template class OnlineGenericBaseFeature; OnlineCmvnState::OnlineCmvnState(const OnlineCmvnState &other): diff --git a/src/feat/online-feature.h b/src/feat/online-feature.h index f2ebe45bf3e..0c34c2de5dc 100644 --- a/src/feat/online-feature.h +++ b/src/feat/online-feature.h @@ -32,7 +32,6 @@ #include "base/kaldi-error.h" #include "feat/feature-functions.h" #include "feat/feature-mfcc.h" -#include "feat/feature-plp.h" #include "feat/feature-fbank.h" #include "itf/online-feature-itf.h" @@ -72,7 +71,7 @@ class RecyclingVector { /// This is a templated class for online feature extraction; -/// it's templated on a class like MfccComputer or PlpComputer +/// it's templated on a class like MfccComputer /// that does the basic feature extraction. template class OnlineGenericBaseFeature: public OnlineBaseFeature { @@ -126,15 +125,15 @@ class OnlineGenericBaseFeature: public OnlineBaseFeature { void MaybeCreateResampler(BaseFloat sampling_rate); - C computer_; // class that does the MFCC or PLP or filterbank computation + C computer_; // class that does the MFCC or filterbank computation // resampler in cases when the input sampling frequency is not equal to // the expected sampling rate std::unique_ptr resampler_; - FeatureWindowFunction window_function_; + Vector window_function_; - // features_ is the Mfcc or Plp or Fbank features that we have already computed. + // features_ is the Mfcc or Fbank features that we have already computed. RecyclingVector features_; @@ -156,7 +155,6 @@ class OnlineGenericBaseFeature: public OnlineBaseFeature { }; typedef OnlineGenericBaseFeature OnlineMfcc; -typedef OnlineGenericBaseFeature OnlinePlp; typedef OnlineGenericBaseFeature OnlineFbank; @@ -597,7 +595,7 @@ class OnlineCacheFeature: public OnlineFeatureInterface { /// This online-feature class implements combination of two feature -/// streams (such as pitch, plp) into one stream. +/// streams (such as pitch) into one stream. class OnlineAppendFeature: public OnlineFeatureInterface { public: virtual int32 Dim() const { return src1_->Dim() + src2_->Dim(); } diff --git a/src/feat/pitch-functions-test.cc b/src/feat/pitch-functions-test.cc index 0e481c18674..e3953acb884 100644 --- a/src/feat/pitch-functions-test.cc +++ b/src/feat/pitch-functions-test.cc @@ -25,7 +25,6 @@ #include #include "base/kaldi-math.h" -#include "feat/feature-plp.h" #include "feat/pitch-functions.h" #include "feat/wave-reader.h" #include "sys/stat.h" diff --git a/src/feat/test_data/README b/src/feat/test_data/README index 8deadd273a4..e44395c6bad 100644 --- a/src/feat/test_data/README +++ b/src/feat/test_data/README @@ -7,24 +7,4 @@ #1) convert 16kHz,lin16 wav to KALDI ASCII vector format cat prepare_wav_in_ascii.m | matlab -#2) perform reference feature extraction by HTK -# we used HCopy from HTK V3.4 -HCopy -C hcopy1.conf test.wav test.wav.fea_htk.1 - -HCopy -C hcopy2.conf test.wav test.wav.fea_htk.2 - -HCopy -C hcopy3.conf test.wav test.wav.fea_htk.3 - -HCopy -C hcopy4.conf test.wav test.wav.fea_htk.4 - -HCopy -C hcopy5.conf test.wav test.wav.fea_htk.5 - -HCopy -C hcopy6.conf test.wav test.wav.fea_htk.6 - -HCopy -C plp1.conf test.wav test.wav.plp_htk.1 - -HCopy -C fbank1.conf test.wav test.wav.fbank_htk.1 - -HCopy -C fbank2.conf test.wav test.wav.fbank_htk.2 - -HCopy -C fbank3.conf test.wav test.wav.fbank_htk.3 +#2) perform reference feature extraction by HTK - HTK support is deprecated. diff --git a/src/feat/test_data/fbank1.conf b/src/feat/test_data/fbank1.conf deleted file mode 100644 index b751b61d6d1..00000000000 --- a/src/feat/test_data/fbank1.conf +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = FBANK - -LOFREQ = 0 -HIFREQ = 8000 -#WARPLCUTOFF = 100 -#WARPUCUTOFF = 7500 -#WARPFREQ = 1.0 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - - -PREEMCOEF = 0.0 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - diff --git a/src/feat/test_data/fbank2.conf b/src/feat/test_data/fbank2.conf deleted file mode 100644 index 604819a88c4..00000000000 --- a/src/feat/test_data/fbank2.conf +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = FBANK - -LOFREQ = 25 -HIFREQ = 8000 -#WARPLCUTOFF = 100 -#WARPUCUTOFF = 7500 -#WARPFREQ = 1.0 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - - -PREEMCOEF = 0.0 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - diff --git a/src/feat/test_data/fbank3.conf b/src/feat/test_data/fbank3.conf deleted file mode 100644 index f86fec4b248..00000000000 --- a/src/feat/test_data/fbank3.conf +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = FBANK - -LOFREQ = 25 -HIFREQ = 8000 -WARPLCUTOFF = 100 -WARPUCUTOFF = 7500 -WARPFREQ = 0.9 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - - -PREEMCOEF = 0.0 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - diff --git a/src/feat/test_data/fbank4.conf b/src/feat/test_data/fbank4.conf deleted file mode 100644 index a19679f8375..00000000000 --- a/src/feat/test_data/fbank4.conf +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = FBANK - -LOFREQ = 25 -HIFREQ = 8000 -WARPLCUTOFF = 100 -WARPUCUTOFF = 7500 -WARPFREQ = 1.1 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - - -PREEMCOEF = 0.0 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - diff --git a/src/feat/test_data/hcopy1.conf b/src/feat/test_data/hcopy1.conf deleted file mode 100644 index 25230348076..00000000000 --- a/src/feat/test_data/hcopy1.conf +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = MFCC_D_A_0 - -LOFREQ = 0 -HIFREQ = 8000 -#WARPLCUTOFF = 100 -#WARPUCUTOFF = 7500 -#WARPFREQ = 1.0 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - - -PREEMCOEF = 0.0 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - -CEPLIFTER = 22 -NUMCEPS = 12 diff --git a/src/feat/test_data/hcopy2.conf b/src/feat/test_data/hcopy2.conf deleted file mode 100644 index 36c7d97d459..00000000000 --- a/src/feat/test_data/hcopy2.conf +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = MFCC_D_A_E - -LOFREQ = 0 -HIFREQ = 8000 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - -PREEMCOEF = 0 # no preemphase -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - -CEPLIFTER = 22 -NUMCEPS = 12 diff --git a/src/feat/test_data/hcopy3.conf b/src/feat/test_data/hcopy3.conf deleted file mode 100644 index 6ed093af685..00000000000 --- a/src/feat/test_data/hcopy3.conf +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = MFCC_D_A_E - -LOFREQ = 20 -HIFREQ = 8000 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - -PREEMCOEF = 0 # no preemphase -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - -CEPLIFTER = 22 -NUMCEPS = 12 diff --git a/src/feat/test_data/hcopy4.conf b/src/feat/test_data/hcopy4.conf deleted file mode 100644 index e51a361cccd..00000000000 --- a/src/feat/test_data/hcopy4.conf +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = MFCC_D_A_E - -LOFREQ = 0 -HIFREQ = 8000 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - -PREEMCOEF = 0.97 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - -CEPLIFTER = 22 -NUMCEPS = 12 diff --git a/src/feat/test_data/hcopy5.conf b/src/feat/test_data/hcopy5.conf deleted file mode 100644 index d280548b91f..00000000000 --- a/src/feat/test_data/hcopy5.conf +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = MFCC_D_A_E - -LOFREQ = 0 -HIFREQ = 8000 -WARPLCUTOFF = 100 -WARPUCUTOFF = 7500 -WARPFREQ = 1.1 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - -PREEMCOEF = 0.97 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - -CEPLIFTER = 22 -NUMCEPS = 12 diff --git a/src/feat/test_data/hcopy6.conf b/src/feat/test_data/hcopy6.conf deleted file mode 100644 index 5e305c9d445..00000000000 --- a/src/feat/test_data/hcopy6.conf +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = MFCC_D_A_0 - -LOFREQ = 125 -HIFREQ = 7800 -#WARPLCUTOFF = 100 -#WARPUCUTOFF = 7500 -#WARPFREQ = 1.0 -NUMCHANS = 24 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -RAWENERGY = T -ENORMALISE = F - -PREEMCOEF = 0.97 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - -CEPLIFTER = 22 -NUMCEPS = 12 diff --git a/src/feat/test_data/plp1.conf b/src/feat/test_data/plp1.conf deleted file mode 100644 index 3465bd20d18..00000000000 --- a/src/feat/test_data/plp1.conf +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh - -SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAV -SOURCERATE = 625 -BYTEORDER = VAX -TARGETFORMAT = HTK -TARGETKIND = PLP_D_A_0 - -LOFREQ = 0 -HIFREQ = 8000 -NUMCHANS = 23 # number of critical bands -USEPOWER = T # using power spectrum -USEHAMMING = T # use hamming window on speech frame -COMPRESSFACT = 0.33 - -PREEMCOEF = 0 -TARGETRATE = 100000 # 10 ms frame rate -WINDOWSIZE = 250000 # 25 ms window -SAVEWITHCRC = F - -CEPLIFTER = 22 -NUMCEPS = 12 diff --git a/src/feat/test_data/test.wav.fbank_htk.1 b/src/feat/test_data/test.wav.fbank_htk.1 deleted file mode 100644 index cd6083c130d..00000000000 Binary files a/src/feat/test_data/test.wav.fbank_htk.1 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fbank_htk.2 b/src/feat/test_data/test.wav.fbank_htk.2 deleted file mode 100644 index 4e95c15a308..00000000000 Binary files a/src/feat/test_data/test.wav.fbank_htk.2 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fbank_htk.3 b/src/feat/test_data/test.wav.fbank_htk.3 deleted file mode 100644 index fb3ab2258eb..00000000000 Binary files a/src/feat/test_data/test.wav.fbank_htk.3 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fbank_htk.4 b/src/feat/test_data/test.wav.fbank_htk.4 deleted file mode 100644 index ec9fae638c0..00000000000 Binary files a/src/feat/test_data/test.wav.fbank_htk.4 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fea_htk.1 b/src/feat/test_data/test.wav.fea_htk.1 deleted file mode 100644 index 0cc28939ef2..00000000000 Binary files a/src/feat/test_data/test.wav.fea_htk.1 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fea_htk.2 b/src/feat/test_data/test.wav.fea_htk.2 deleted file mode 100644 index 5d68a63d43b..00000000000 Binary files a/src/feat/test_data/test.wav.fea_htk.2 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fea_htk.3 b/src/feat/test_data/test.wav.fea_htk.3 deleted file mode 100644 index c1c577d5749..00000000000 Binary files a/src/feat/test_data/test.wav.fea_htk.3 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fea_htk.4 b/src/feat/test_data/test.wav.fea_htk.4 deleted file mode 100644 index 0b8667e1fa3..00000000000 Binary files a/src/feat/test_data/test.wav.fea_htk.4 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fea_htk.5 b/src/feat/test_data/test.wav.fea_htk.5 deleted file mode 100644 index d5164ad76a4..00000000000 Binary files a/src/feat/test_data/test.wav.fea_htk.5 and /dev/null differ diff --git a/src/feat/test_data/test.wav.fea_htk.6 b/src/feat/test_data/test.wav.fea_htk.6 deleted file mode 100644 index c7d52ce013c..00000000000 Binary files a/src/feat/test_data/test.wav.fea_htk.6 and /dev/null differ diff --git a/src/feat/test_data/test.wav.plp_htk.1 b/src/feat/test_data/test.wav.plp_htk.1 deleted file mode 100644 index 3485889d10d..00000000000 Binary files a/src/feat/test_data/test.wav.plp_htk.1 and /dev/null differ diff --git a/src/feat/wave-reader-test.cc b/src/feat/wave-reader-test.cc index f9a71e8af34..ce8299446be 100644 --- a/src/feat/wave-reader-test.cc +++ b/src/feat/wave-reader-test.cc @@ -72,6 +72,10 @@ static void UnitTestStereo8K() { std::istringstream ies(expect_mat, std::ios::in); Matrix expected; expected.Read(ies, false /* text */); + // WaveData scales data to the range [-1, 1], so do the same. Don't + // put the scaled values in the string expect_mat, since + // representing floating point as text losslessly is tricky. + expected.Scale(BaseFloat(1.0 / 32768.0)); AssertEqual(wave.SampFreq(), hz, 0); AssertEqual(wave.Duration(), 3.0 /* samples */ / hz /* Hz */, 1E-6); @@ -118,6 +122,7 @@ static void UnitTestMono22K() { std::istringstream ies(expect_mat, std::ios::in); Matrix expected; expected.Read(ies, false /* text */); + expected.Scale(BaseFloat(1.0 / 32768.0)); AssertEqual(wave.SampFreq(), hz, 0); AssertEqual(wave.Duration(), 5.0 /* samples */ / hz /* Hz */, 1E-6); @@ -157,6 +162,7 @@ static void UnitTestEndless1() { std::istringstream ies(expect_mat, std::ios::in); Matrix expected; expected.Read(ies, false /* text */); + expected.Scale(BaseFloat(1.0 / 32768.0)); AssertEqual(wave.Data(), expected); } @@ -194,6 +200,7 @@ static void UnitTestEndless2() { std::istringstream ies(expect_mat, std::ios::in); Matrix expected; expected.Read(ies, false /* text */); + expected.Scale(BaseFloat(1.0 / 32768.0)); AssertEqual(wave.Data(), expected); } diff --git a/src/feat/wave-reader.cc b/src/feat/wave-reader.cc index f8259a3a82e..bd35b1cff43 100644 --- a/src/feat/wave-reader.cc +++ b/src/feat/wave-reader.cc @@ -308,7 +308,11 @@ void WaveData::Read(std::istream &is) { uint16 *data_ptr = reinterpret_cast(&buffer[0]); - // The matrix is arranged row per channel, column per sample. + // Scale the wave data to the range [-1, 1]. Prior to kaldi-10, + // it was in the range [-327680.0, 32768.0]. + const BaseFloat scale = 1.0 / 32768.0; + + // The row-indexes are channels; column-indexes are samples. data_.Resize(header.NumChannels(), buffer.size() / header.BlockAlign()); for (uint32 i = 0; i < data_.NumCols(); ++i) { @@ -316,7 +320,7 @@ void WaveData::Read(std::istream &is) { int16 k = *data_ptr++; if (header.ReverseBytes()) KALDI_SWAP2(k); - data_(j, i) = k; + data_(j, i) = k * scale; } } } @@ -358,9 +362,13 @@ void WaveData::Write(std::ostream &os) const { int32 stride = data_.Stride(); int num_clipped = 0; + + // This scaling factor is because we are writing 16-bit data. + const BaseFloat scale = 32768.0; + for (int32 i = 0; i < num_samp; i++) { for (int32 j = 0; j < num_chan; j++) { - int32 elem = static_cast(trunc(data_ptr[j * stride + i])); + int32 elem = static_cast(trunc(data_ptr[j * stride + i] * scale)); int16 elem_16 = static_cast(elem); if (elem < std::numeric_limits::min()) { elem_16 = std::numeric_limits::min(); diff --git a/src/feat/wave-reader.h b/src/feat/wave-reader.h index dae74139232..2c07bda2728 100644 --- a/src/feat/wave-reader.h +++ b/src/feat/wave-reader.h @@ -2,7 +2,7 @@ // Copyright 2009-2011 Karel Vesely; Microsoft Corporation // 2013 Florent Masson -// 2013 Johns Hopkins University (author: Daniel Povey) +// 2013-2019 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -57,10 +57,6 @@ namespace kaldi { -/// For historical reasons, we scale waveforms to the range -/// (2^15-1)*[-1, 1], not the usual default DSP range [-1, 1]. -const BaseFloat kWaveSampleMax = 32768.0; - /// This class reads and hold wave file header information. class WaveInfo { public: @@ -121,6 +117,8 @@ class WaveData { // This function returns the wave data-- it's in a matrix // because there may be multiple channels. In the normal case // there's just one channel so Data() will have one row. + // This data will be in the range [-1, 1]. This is a difference + // from pre-kaldi10. const Matrix &Data() const { return data_; } BaseFloat SampFreq() const { return samp_freq_; } diff --git a/src/featbin/Makefile b/src/featbin/Makefile index 861ba3f7a93..bb11b797e69 100644 --- a/src/featbin/Makefile +++ b/src/featbin/Makefile @@ -8,11 +8,10 @@ BINFILES = add-deltas add-deltas-sdc append-post-to-feats \ compose-transforms compute-and-process-kaldi-pitch-feats \ compute-cmvn-stats compute-cmvn-stats-two-channel \ compute-fbank-feats compute-kaldi-pitch-feats compute-mfcc-feats \ - compute-plp-feats compute-spectrogram-feats concat-feats copy-feats \ + concat-feats copy-feats \ copy-feats-to-htk copy-feats-to-sphinx extend-transform-dim \ extract-feature-segments extract-segments feat-to-dim \ - feat-to-len fmpe-acc-stats fmpe-apply-transform fmpe-est \ - fmpe-init fmpe-sum-accs get-full-lda-mat interpolate-pitch \ + feat-to-len get-full-lda-mat interpolate-pitch \ modify-cmvn-stats paste-feats post-to-feats \ process-kaldi-pitch-feats process-pitch-feats \ select-feats shift-feats splice-feats subsample-feats \ @@ -25,7 +24,7 @@ TESTFILES = ADDLIBS = ../hmm/kaldi-hmm.a ../feat/kaldi-feat.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ + ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/featbin/compute-plp-feats.cc b/src/featbin/compute-plp-feats.cc deleted file mode 100644 index 5c3b9843b4d..00000000000 --- a/src/featbin/compute-plp-feats.cc +++ /dev/null @@ -1,192 +0,0 @@ -// featbin/compute-plp-feats.cc - -// Copyright 2009-2012 Microsoft Corporation -// Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "feat/feature-plp.h" -#include "feat/wave-reader.h" -#include "util/common-utils.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - const char *usage = - "Create PLP feature files.\n" - "Usage: compute-plp-feats [options...] " - "\n"; - - // Construct all the global objects. - ParseOptions po(usage); - PlpOptions plp_opts; - // Define defaults for global options. - bool subtract_mean = false; - BaseFloat vtln_warp = 1.0; - std::string vtln_map_rspecifier; - std::string utt2spk_rspecifier; - int32 channel = -1; - BaseFloat min_duration = 0.0; - std::string output_format = "kaldi"; - std::string utt2dur_wspecifier; - - // Register the options. - po.Register("output-format", &output_format, "Format of the output " - "files [kaldi, htk]"); - po.Register("subtract-mean", &subtract_mean, "Subtract mean of each " - "feature file [CMS]. "); - po.Register("vtln-warp", &vtln_warp, "Vtln warp factor (only applicable " - "if vtln-map not specified)"); - po.Register("vtln-map", &vtln_map_rspecifier, "Map from utterance or " - "speaker-id to vtln warp factor (rspecifier)"); - po.Register("utt2spk", &utt2spk_rspecifier, "Utterance to speaker-id " - "map (if doing VTLN and you have warps per speaker)"); - po.Register("channel", &channel, "Channel to extract (-1 -> expect mono, " - "0 -> left, 1 -> right)"); - po.Register("min-duration", &min_duration, "Minimum duration of segments " - "to process (in seconds)."); - po.Register("write-utt2dur", &utt2dur_wspecifier, "Wspecifier to write " - "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'."); - - plp_opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - - std::string wav_rspecifier = po.GetArg(1); - - std::string output_wspecifier = po.GetArg(2); - - Plp plp(plp_opts); - - if (utt2spk_rspecifier != "" && vtln_map_rspecifier != "") - KALDI_ERR << ("The --utt2spk option is only needed if " - "the --vtln-map option is used."); - RandomAccessBaseFloatReaderMapped vtln_map_reader(vtln_map_rspecifier, - utt2spk_rspecifier); - - SequentialTableReader reader(wav_rspecifier); - BaseFloatMatrixWriter kaldi_writer; // typedef to TableWriter. - TableWriter htk_writer; - - if (output_format == "kaldi") { - if (!kaldi_writer.Open(output_wspecifier)) - KALDI_ERR << "Could not initialize output with wspecifier " - << output_wspecifier; - } else if (output_format == "htk") { - if (!htk_writer.Open(output_wspecifier)) - KALDI_ERR << "Could not initialize output with wspecifier " - << output_wspecifier; - } else { - KALDI_ERR << "Invalid output_format string " << output_format; - } - - DoubleWriter utt2dur_writer(utt2dur_wspecifier); - - int32 num_utts = 0, num_success = 0; - for (; !reader.Done(); reader.Next()) { - num_utts++; - std::string utt = reader.Key(); - const WaveData &wave_data = reader.Value(); - if (wave_data.Duration() < min_duration) { - KALDI_WARN << "File: " << utt << " is too short (" - << wave_data.Duration() << " sec): producing no output."; - continue; - } - int32 num_chan = wave_data.Data().NumRows(), this_chan = channel; - { // This block works out the channel (0=left, 1=right...). - KALDI_ASSERT(num_chan > 0); // This should have been caught in - // reading code if no channels. - if (channel == -1) { - this_chan = 0; - if (num_chan != 1) - KALDI_WARN << "Channel not specified but you have data with " - << num_chan << " channels; defaulting to zero"; - } else { - if (this_chan >= num_chan) { - KALDI_WARN << "File with id " << utt << " has " - << num_chan << " channels but you specified channel " - << channel << ", producing no output."; - continue; - } - } - } - BaseFloat vtln_warp_local; // Work out VTLN warp factor. - if (vtln_map_rspecifier != "") { - if (!vtln_map_reader.HasKey(utt)) { - KALDI_WARN << "No vtln-map entry for utterance-id (or speaker-id) " - << utt; - continue; - } - vtln_warp_local = vtln_map_reader.Value(utt); - } else { - vtln_warp_local = vtln_warp; - } - - SubVector waveform(wave_data.Data(), this_chan); - Matrix features; - try { - plp.ComputeFeatures(waveform, wave_data.SampFreq(), - vtln_warp_local, &features); - } catch (...) { - KALDI_WARN << "Failed to compute features for utterance " << utt; - continue; - } - if (subtract_mean) { - Vector mean(features.NumCols()); - mean.AddRowSumMat(1.0, features); - mean.Scale(1.0 / features.NumRows()); - for (size_t i = 0; i < features.NumRows(); i++) - features.Row(i).AddVec(-1.0, mean); - } - if (output_format == "kaldi") { - kaldi_writer.Write(utt, features); - } else { - std::pair, HtkHeader> p; - p.first.Resize(features.NumRows(), features.NumCols()); - p.first.CopyFromMat(features); - HtkHeader header = { - features.NumRows(), - 100000, // 10ms shift - static_cast(sizeof(float)*features.NumCols()), - 013 | // PLP - 020000 // C0 [no option currently to use energy in PLP. - }; - p.second = header; - htk_writer.Write(utt, p); - } - if (utt2dur_writer.IsOpen()) { - utt2dur_writer.Write(utt, wave_data.Duration()); - } - if (num_utts % 10 == 0) - KALDI_LOG << "Processed " << num_utts << " utterances"; - KALDI_VLOG(2) << "Processed features for key " << utt; - num_success++; - } - KALDI_LOG << " Done " << num_success << " out of " << num_utts - << " utterances."; - return (num_success != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/featbin/compute-spectrogram-feats.cc b/src/featbin/compute-spectrogram-feats.cc deleted file mode 100644 index 67932915278..00000000000 --- a/src/featbin/compute-spectrogram-feats.cc +++ /dev/null @@ -1,164 +0,0 @@ -// featbin/compute-spectrogram-feats.cc - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "feat/feature-spectrogram.h" -#include "feat/wave-reader.h" -#include "util/common-utils.h" - - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - const char *usage = - "Create spectrogram feature files.\n" - "Usage: compute-spectrogram-feats [options...] " - "\n"; - - // Construct all the global objects. - ParseOptions po(usage); - SpectrogramOptions spec_opts; - // Define defaults for global options. - bool subtract_mean = false; - int32 channel = -1; - BaseFloat min_duration = 0.0; - std::string output_format = "kaldi"; - std::string utt2dur_wspecifier; - - // Register the option struct - spec_opts.Register(&po); - // Register the options - po.Register("output-format", &output_format, - "Format of the output files [kaldi, htk]"); - po.Register("subtract-mean", &subtract_mean, "Subtract mean of each " - "feature file [CMS]; not recommended to do it this way. "); - po.Register("channel", &channel, "Channel to extract (-1 -> expect mono, " - "0 -> left, 1 -> right)"); - po.Register("min-duration", &min_duration, "Minimum duration of segments " - "to process (in seconds)."); - po.Register("write-utt2dur", &utt2dur_wspecifier, "Wspecifier to write " - "duration of each utterance in seconds, e.g. 'ark,t:utt2dur'."); - - po.Read(argc, argv); - - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - - std::string wav_rspecifier = po.GetArg(1); - - std::string output_wspecifier = po.GetArg(2); - - Spectrogram spec(spec_opts); - - SequentialTableReader reader(wav_rspecifier); - BaseFloatMatrixWriter kaldi_writer; // typedef to TableWriter. - TableWriter htk_writer; - - if (output_format == "kaldi") { - if (!kaldi_writer.Open(output_wspecifier)) - KALDI_ERR << "Could not initialize output with wspecifier " - << output_wspecifier; - } else if (output_format == "htk") { - if (!htk_writer.Open(output_wspecifier)) - KALDI_ERR << "Could not initialize output with wspecifier " - << output_wspecifier; - } else { - KALDI_ERR << "Invalid output_format string " << output_format; - } - - DoubleWriter utt2dur_writer(utt2dur_wspecifier); - - int32 num_utts = 0, num_success = 0; - for (; !reader.Done(); reader.Next()) { - num_utts++; - std::string utt = reader.Key(); - const WaveData &wave_data = reader.Value(); - if (wave_data.Duration() < min_duration) { - KALDI_WARN << "File: " << utt << " is too short (" - << wave_data.Duration() << " sec): producing no output."; - continue; - } - int32 num_chan = wave_data.Data().NumRows(), this_chan = channel; - { // This block works out the channel (0=left, 1=right...) - KALDI_ASSERT(num_chan > 0); // should have been caught in - // reading code if no channels. - if (channel == -1) { - this_chan = 0; - if (num_chan != 1) - KALDI_WARN << "Channel not specified but you have data with " - << num_chan << " channels; defaulting to zero"; - } else { - if (this_chan >= num_chan) { - KALDI_WARN << "File with id " << utt << " has " - << num_chan << " channels but you specified channel " - << channel << ", producing no output."; - continue; - } - } - } - - SubVector waveform(wave_data.Data(), this_chan); - Matrix features; - try { - spec.ComputeFeatures(waveform, wave_data.SampFreq(), 1.0, &features); - } catch (...) { - KALDI_WARN << "Failed to compute features for utterance " << utt; - continue; - } - if (subtract_mean) { - Vector mean(features.NumCols()); - mean.AddRowSumMat(1.0, features); - mean.Scale(1.0 / features.NumRows()); - for (int32 i = 0; i < features.NumRows(); i++) - features.Row(i).AddVec(-1.0, mean); - } - if (output_format == "kaldi") { - kaldi_writer.Write(utt, features); - } else { - std::pair, HtkHeader> p; - p.first.Resize(features.NumRows(), features.NumCols()); - p.first.CopyFromMat(features); - int32 frame_shift = spec_opts.frame_opts.frame_shift_ms * 10000; - HtkHeader header = { - features.NumRows(), - frame_shift, - static_cast(sizeof(float)*features.NumCols()), - 007 | 020000 - }; - p.second = header; - htk_writer.Write(utt, p); - } - if (utt2dur_writer.IsOpen()) { - utt2dur_writer.Write(utt, wave_data.Duration()); - } - if(num_utts % 10 == 0) - KALDI_LOG << "Processed " << num_utts << " utterances"; - KALDI_VLOG(2) << "Processed features for key " << utt; - num_success++; - } - KALDI_LOG << " Done " << num_success << " out of " << num_utts - << " utterances."; - return (num_success != 0 ? 0 : 1); - } catch(const std::exception& e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/featbin/fmpe-acc-stats.cc b/src/featbin/fmpe-acc-stats.cc deleted file mode 100644 index c69e95b6b59..00000000000 --- a/src/featbin/fmpe-acc-stats.cc +++ /dev/null @@ -1,108 +0,0 @@ -// featbin/fmpe-acc-stats.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "transform/fmpe.h" - -int main(int argc, char *argv[]) { - using namespace kaldi; - using kaldi::int32; - try { - const char *usage = - "Compute statistics for fMPE training\n" - "Usage: fmpe-acc-stats [options...] " - " \n" - "Note: gmm-fmpe-acc-stats avoids computing the features an extra time\n"; - - ParseOptions po(usage); - bool binary = true; - po.Register("binary", &binary, "If true, output stats in binary mode."); - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - std::string fmpe_rxfilename = po.GetArg(1), - feat_rspecifier = po.GetArg(2), - feat_diff_rspecifier = po.GetArg(3), - gselect_rspecifier = po.GetArg(4), - stats_wxfilename = po.GetArg(5); - - Fmpe fmpe; - ReadKaldiObject(fmpe_rxfilename, &fmpe); - - SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier); - RandomAccessBaseFloatMatrixReader diff_reader(feat_diff_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - - // fmpe stats... - FmpeStats fmpe_stats(fmpe); - - int32 num_done = 0, num_err = 0; - - for (; !feat_reader.Done(); feat_reader.Next()) { - std::string key = feat_reader.Key(); - const Matrix feat_in(feat_reader.Value()); - if (!gselect_reader.HasKey(key)) { - KALDI_WARN << "No gselect information for key " << key; - num_err++; - continue; - } - const std::vector > &gselect = - gselect_reader.Value(key); - if (static_cast(gselect.size()) != feat_in.NumRows()) { - KALDI_WARN << "gselect information has wrong size"; - num_err++; - continue; - } - if (!diff_reader.HasKey(key)) { - KALDI_WARN << "No gradient information for key " << key; - num_err++; - continue; - } - const Matrix &feat_deriv = diff_reader.Value(key); - - if (feat_deriv.NumCols() == feat_in.NumCols()) { // Only direct derivative. - fmpe.AccStats(feat_in, gselect, feat_deriv, NULL, &fmpe_stats); - } else if (feat_deriv.NumCols() == feat_in.NumCols() * 2) { // +indirect. - SubMatrix direct_deriv(feat_deriv, 0, feat_deriv.NumRows(), - 0, feat_in.NumCols()), - indirect_deriv(feat_deriv, 0, feat_deriv.NumRows(), - feat_in.NumCols(), feat_in.NumCols()); - fmpe.AccStats(feat_in, gselect, direct_deriv, &indirect_deriv, &fmpe_stats); - } else { - KALDI_ERR << "Mismatch in dimension of feature derivative."; - } - num_done++; - } - - KALDI_LOG << " Done " << num_done << " utterances, " << num_err - << " had errors."; - - WriteKaldiObject(fmpe_stats, stats_wxfilename, binary); - - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/featbin/fmpe-apply-transform.cc b/src/featbin/fmpe-apply-transform.cc deleted file mode 100644 index 9473e5f287b..00000000000 --- a/src/featbin/fmpe-apply-transform.cc +++ /dev/null @@ -1,89 +0,0 @@ -// featbin/fmpe-apply-transform.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) Yanmin Qian - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "transform/fmpe.h" - -int main(int argc, char *argv[]) { - using namespace kaldi; - using kaldi::int32; - try { - const char *usage = - "Apply fMPE transform to features\n" - "Usage: fmpe-apply-transform [options...] " - " \n"; - - ParseOptions po(usage); - bool add_to_features = true; - po.Register("add-to-features", &add_to_features, "If true, add original " - "features to fMPE offsets (false useful for diagnostics)"); - // no non-default options. - po.Read(argc, argv); - - if (po.NumArgs() != 4) { - po.PrintUsage(); - exit(1); - } - - std::string fmpe_rxfilename = po.GetArg(1), - feat_rspecifier = po.GetArg(2), - gselect_rspecifier = po.GetArg(3), - feat_wspecifier = po.GetArg(4); - - Fmpe fmpe; - ReadKaldiObject(fmpe_rxfilename, &fmpe); - - SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - BaseFloatMatrixWriter feat_writer(feat_wspecifier); - - int32 num_done = 0, num_err = 0; - - for (; !feat_reader.Done(); feat_reader.Next()) { - std::string key = feat_reader.Key(); - const Matrix feat_in(feat_reader.Value()); - if (!gselect_reader.HasKey(key)) { - KALDI_WARN << "No gselect information for key " << key; - num_err++; - continue; - } - const std::vector > &gselect = - gselect_reader.Value(key); - if (static_cast(gselect.size()) != feat_in.NumRows()) { - KALDI_WARN << "gselect information has wrong size"; - num_err++; - continue; - } - Matrix feat_out(feat_in.NumRows(), feat_in.NumCols()); - fmpe.ComputeFeatures(feat_in, gselect, &feat_out); - if (add_to_features) // feat_out += feat_in. - feat_out.AddMat(1.0, feat_in, kNoTrans); - - feat_writer.Write(key, feat_out); - num_done++; - } - KALDI_LOG << " Done " << num_done << " utterances, " << num_err - << " had errors."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/featbin/fmpe-est.cc b/src/featbin/fmpe-est.cc deleted file mode 100644 index 76463c32782..00000000000 --- a/src/featbin/fmpe-est.cc +++ /dev/null @@ -1,67 +0,0 @@ -// featbin/fmpe-est.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) Yanmin Qian - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "transform/fmpe.h" - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - const char *usage = - "Do one iteration of learning (modified gradient descent)\n" - "on fMPE transform\n" - "Usage: fmpe-est [options...] \n" - "E.g. fmpe-est 1.fmpe 1.accs 2.fmpe\n"; - - ParseOptions po(usage); - FmpeUpdateOptions opts; - bool binary = true; - po.Register("binary", &binary, "If true, output fMPE object in " - "binary mode."); - opts.Register(&po); - po.Read(argc, argv); - - if (po.NumArgs() != 3) { - po.PrintUsage(); - exit(1); - } - - std::string fmpe_rxfilename = po.GetArg(1), - stats_rxfilename = po.GetArg(2), - fmpe_wxfilename = po.GetArg(3); - - Fmpe fmpe; - ReadKaldiObject(fmpe_rxfilename, &fmpe); - FmpeStats stats; - ReadKaldiObject(stats_rxfilename, &stats); - - stats.DoChecks(); // checks certain checksums. - fmpe.Update(opts, stats); - - WriteKaldiObject(fmpe, fmpe_wxfilename, binary); - - KALDI_LOG << "Updated fMPE object and wrote to " - << fmpe_wxfilename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/featbin/fmpe-init.cc b/src/featbin/fmpe-init.cc deleted file mode 100644 index 5f4455f44fc..00000000000 --- a/src/featbin/fmpe-init.cc +++ /dev/null @@ -1,63 +0,0 @@ -// featbin/fmpe-init.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) Yanmin Qian - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "transform/fmpe.h" - -int main(int argc, char *argv[]) { - using namespace kaldi; - try { - const char *usage = - "Initialize fMPE transform (to zero)\n" - "Usage: fmpe-init [options...] \n" - "E.g. fmpe-init 1.ubm 1.fmpe\n"; - - ParseOptions po(usage); - FmpeOptions opts; - bool binary = true; - po.Register("binary", &binary, "If true, output fMPE object in binary mode."); - opts.Register(&po); - po.Read(argc, argv); - - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - - std::string dgmm_rxfilename = po.GetArg(1), - fmpe_wxfilename = po.GetArg(2); - - DiagGmm dgmm; - ReadKaldiObject(dgmm_rxfilename, &dgmm); - - - Fmpe fmpe(dgmm, opts); - - Output ko(fmpe_wxfilename, binary); - fmpe.Write(ko.Stream(), binary); - - KALDI_LOG << "Initialized fMPE object and wrote to " - << fmpe_wxfilename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/featbin/fmpe-sum-accs.cc b/src/featbin/fmpe-sum-accs.cc deleted file mode 100644 index e2976abe5ff..00000000000 --- a/src/featbin/fmpe-sum-accs.cc +++ /dev/null @@ -1,63 +0,0 @@ -// featbin/fmpe-sum-accs.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "transform/fmpe.h" - -int main(int argc, char *argv[]) { - using namespace kaldi; - using kaldi::int32; - try { - const char *usage = - "Sum fMPE stats\n" - "Usage: fmpe-sum-accs [options...] ... \n" - "E.g. fmpe-sum-accs 1.accs 1.1.accs 1.2.accs 1.3.accs 1.4.accs\n"; - - ParseOptions po(usage); - bool binary = true; - po.Register("binary", &binary, "If true, output fMPE stats in " - "binary mode."); - po.Read(argc, argv); - - if (po.NumArgs() < 2) { - po.PrintUsage(); - exit(1); - } - - std::string stats_wxfilename = po.GetArg(1); - - FmpeStats stats; - for (int32 arg = 2; arg <= po.NumArgs(); arg++) { - std::string stats_rxfilename = po.GetArg(arg); - bool binary; - Input ki(stats_rxfilename, &binary); - stats.Read(ki.Stream(), binary, true); // true == sum accs. - } - - WriteKaldiObject(stats, stats_wxfilename, binary); - - KALDI_LOG << "Summed " << (po.NumArgs()-1) << " fMPE stats and wrote to " - << stats_wxfilename; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/fgmmbin/Makefile b/src/fgmmbin/Makefile index 5db252477b5..060c5e06957 100644 --- a/src/fgmmbin/Makefile +++ b/src/fgmmbin/Makefile @@ -18,6 +18,6 @@ TESTFILES = ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a \ ../feat/kaldi-feat.a ../transform/kaldi-transform.a \ ../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \ - ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/fgmmbin/fgmm-global-info.cc b/src/fgmmbin/fgmm-global-info.cc index e00384fe13f..867db3bdc50 100644 --- a/src/fgmmbin/fgmm-global-info.cc +++ b/src/fgmmbin/fgmm-global-info.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/full-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { diff --git a/src/fgmmbin/fgmm-gselect.cc b/src/fgmmbin/fgmm-gselect.cc index ab36af74275..3d962972127 100644 --- a/src/fgmmbin/fgmm-gselect.cc +++ b/src/fgmmbin/fgmm-gselect.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/full-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { diff --git a/src/fstbin/Makefile b/src/fstbin/Makefile index a22c014a7d5..c91e405e2c2 100644 --- a/src/fstbin/Makefile +++ b/src/fstbin/Makefile @@ -26,6 +26,6 @@ TESTFILES = LIBFILE = ADDLIBS = ../decoder/kaldi-decoder.a ../fstext/kaldi-fstext.a \ - ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/fstext/Makefile b/src/fstext/Makefile index b76bd413c42..655437dc52d 100644 --- a/src/fstext/Makefile +++ b/src/fstext/Makefile @@ -24,7 +24,7 @@ LIBNAME = kaldi-fstext # tree and matrix archives needed for test-context-fst # matrix archive needed for push-special. -ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ +ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/fstext/fstext-utils-inl.h b/src/fstext/fstext-utils-inl.h index 37d50fa5d80..1c0e0cbf16a 100644 --- a/src/fstext/fstext-utils-inl.h +++ b/src/fstext/fstext-utils-inl.h @@ -532,18 +532,17 @@ void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst *fst) template void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst *fst, const F &f) { - typedef typename F::Result ClassType; typedef typename Arc::StateId StateId; typedef typename Arc::Weight Weight; - std::vector classes; - ClassType noClass = f(kNoLabel); - ClassType epsClass = f(0); + std::vector classes; + int32 no_class = f(kNoLabel), + eps_class = f(0); if (start_is_epsilon) { // treat having-start-state as epsilon in-transition. StateId start_state = fst->Start(); if (start_state < 0 || start_state == kNoStateId) // empty FST. return; - classes.resize(start_state+1, noClass); - classes[start_state] = epsClass; + classes.resize(start_state+1, no_class); + classes[start_state] = eps_class; } // Find bad states (states with multiple input-symbols into them). @@ -553,8 +552,8 @@ void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst * for (ArcIterator > aiter(*fst, s); !aiter.Done(); aiter.Next()) { const Arc &arc = aiter.Value(); if (classes.size() <= static_cast(arc.nextstate)) - classes.resize(arc.nextstate+1, noClass); - if (classes[arc.nextstate] == noClass) + classes.resize(arc.nextstate+1, no_class); + if (classes[arc.nextstate] == no_class) classes[arc.nextstate] = f(arc.ilabel); else if (classes[arc.nextstate] != f(arc.ilabel)) @@ -562,6 +561,7 @@ void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst * } } if (bad_states.empty()) return; // Nothing to do. + kaldi::ConstIntegerSet bad_states_ciset(bad_states); // faster lookup. // Work out list of arcs we have to change as (state, arc-offset). @@ -579,7 +579,7 @@ void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst * } KALDI_ASSERT(!arcs_to_change.empty()); // since !bad_states.empty(). - std::map, StateId> state_map; + std::map, StateId> state_map; // state_map is a map from (bad-state, input-symbol-class) to dummy-state. for (size_t i = 0; i < arcs_to_change.size(); i++) { @@ -590,7 +590,7 @@ void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst * // Transition is non-eps transition to "bad" state. Introduce new state (or find // existing one). - std::pair p(arc.nextstate, f(arc.ilabel)); + std::pair p(arc.nextstate, f(arc.ilabel)); if (state_map.count(p) == 0) { StateId newstate = state_map[p] = fst->AddState(); fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate)); @@ -606,65 +606,6 @@ void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst * } } -template -void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst *fst) { - IdentityFunction f; - MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f); -} - -template -void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon, MutableFst *fst, const F &f) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef typename F::Result ClassType; - std::vector bad_states; - ClassType noClass = f(kNoLabel); - ClassType epsClass = f(0); - for (StateIterator > siter(*fst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - ClassType c = noClass; - bool bad = false; - for (ArcIterator > aiter(*fst, s); !aiter.Done(); aiter.Next()) { - const Arc &arc = aiter.Value(); - if (c == noClass) - c = f(arc.ilabel); - else - if (c != f(arc.ilabel)) { - bad = true; - break; - } - } - if (end_is_epsilon && c != noClass && - c != epsClass && fst->Final(s) != Weight::Zero()) - bad = true; - if (bad) - bad_states.push_back(s); - } - std::vector my_arcs; - for (size_t i = 0; i < bad_states.size(); i++) { - StateId s = bad_states[i]; - my_arcs.clear(); - for (ArcIterator > aiter(*fst, s); !aiter.Done(); aiter.Next()) - my_arcs.push_back(aiter.Value()); - - for (size_t j = 0; j < my_arcs.size(); j++) { - Arc &arc = my_arcs[j]; - if (arc.ilabel != 0) { - StateId newstate = fst->AddState(); - // Create a new state for each non-eps arc in original FST, out of each bad state. - // Not as optimal as it could be, but does avoid some complicated weight-pushing - // issues in which, to maintain stochasticity, we would have to know which semiring - // we want to maintain stochasticity in. - fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate)); - MutableArcIterator > maiter(fst, s); - maiter.Seek(j); - maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate)); - } - } - } -} - - template VectorFst* MakeLoopFst(const std::vector *> &fsts) { typedef typename Arc::Weight Weight; diff --git a/src/fstext/fstext-utils-test.cc b/src/fstext/fstext-utils-test.cc index 4ce296f093a..ac02b7ec305 100644 --- a/src/fstext/fstext-utils-test.cc +++ b/src/fstext/fstext-utils-test.cc @@ -217,19 +217,13 @@ template void TestAcceptorMinimize() { template void TestMakeSymbolsSame() { VectorFst *fst = RandFst(); - bool foll = (kaldi::Rand() % 2 == 0); bool is_symbol = (kaldi::Rand() % 2 == 0); VectorFst fst2(*fst); - if (foll) { - MakeFollowingInputSymbolsSame(is_symbol, &fst2); - assert(FollowingInputSymbolsAreSame(is_symbol, fst2)); - } else { - MakePrecedingInputSymbolsSame(is_symbol, &fst2); - assert(PrecedingInputSymbolsAreSame(is_symbol, fst2)); - } + MakePrecedingInputSymbolsSame(is_symbol, &fst2); + assert(PrecedingInputSymbolsAreSame(is_symbol, fst2)); assert(RandEquivalent(*fst, fst2, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); @@ -254,20 +248,14 @@ struct TestFunctor { template void TestMakeSymbolsSameClass() { VectorFst *fst = RandFst(); - bool foll = (kaldi::Rand() % 2 == 0); bool is_symbol = (kaldi::Rand() % 2 == 0); VectorFst fst2(*fst); TestFunctor f; - if (foll) { - MakeFollowingInputSymbolsSameClass(is_symbol, &fst2, f); - assert(FollowingInputSymbolsAreSameClass(is_symbol, fst2, f)); - } else { - MakePrecedingInputSymbolsSameClass(is_symbol, &fst2, f); - assert(PrecedingInputSymbolsAreSameClass(is_symbol, fst2, f)); - } + MakePrecedingInputSymbolsSameClass(is_symbol, &fst2, f); + assert(PrecedingInputSymbolsAreSameClass(is_symbol, fst2, f)); assert(RandEquivalent(*fst, fst2, 5/*paths*/, 0.01/*delta*/, kaldi::Rand()/*seed*/, 100/*path length-- max?*/)); diff --git a/src/fstext/fstext-utils.h b/src/fstext/fstext-utils.h index fb55ad69f72..950c0c87c60 100644 --- a/src/fstext/fstext-utils.h +++ b/src/fstext/fstext-utils.h @@ -230,7 +230,7 @@ bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst &fst); /// F::Result F::operator() (F::Arg a) const; /// where F::Result is an integer type and F::Arc can be constructed from Arc::Label. /// this must apply to valid labels and also to kNoLabel (so we can have a marker for -/// the invalid labels. +/// the invalid labels). template bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon, const Fst &fst, const F &f); @@ -258,29 +258,13 @@ template void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst *fst); -/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps labels to classes. +/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps +/// labels to (int32) classes. Caution: it must not map kNoLabel (-1) to the +/// same value as any real symbol (it should generally map -1 to -1). template void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst *fst, const F &f); -/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst -/// state have the same input symbol. It does this by detecting states that have -/// differing input symbols on arcs that exit it, and inserting, for each of the -/// following arcs with non-epsilon input symbol, a new dummy state that has an -/// input-epsilon link from the fst state. The output symbol and weight stay on the -/// link to the dummy state (in order to keep the FST output-deterministic and -/// stochastic, if it already was). -/// If end_is_epsilon, treat "being a final-state" like having an epsilon output -/// link. -template -void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst *fst); - -/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps labels to classes. -template -void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon, MutableFst *fst, const F &f); - - - /// MakeLoopFst creates an FST that has a state that is both initial and /// final (weight == Weight::One()), and for each non-NULL pointer fsts[i], diff --git a/src/fstext/kaldi-fst-io-inl.h b/src/fstext/kaldi-fst-io-inl.h index b6bae4b9dc9..4870acdd0cc 100644 --- a/src/fstext/kaldi-fst-io-inl.h +++ b/src/fstext/kaldi-fst-io-inl.h @@ -163,7 +163,84 @@ void ReadFstKaldi(std::istream &is, bool binary, } } +template +void ReadFsaKaldi(std::istream &is, VectorFst *fst) { + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + // Consume the \r on Windows, the \n that the text-form FSA format starts + // with, and any extra spaces that might have got in there somehow. + while (std::isspace(is.peek()) && is.peek() != '\n') is.get(); + if (is.peek() == '\n') is.get(); // consume the newline. + else { // saw spaces but no newline.. this is not expected. + KALDI_ERR << "Reading FSA: unexpected sequence of spaces " + << " at file position " << is.tellg(); + } + using std::string; + using std::vector; + using kaldi::SplitStringToIntegers; + using kaldi::ConvertStringToInteger; + fst->DeleteStates(); + string line; + size_t nline = 0; + string separator = FLAGS_fst_field_separator + "\r\n"; + while (std::getline(is, line)) { + nline++; + vector col; + // on Windows we'll write in text and read in binary mode. + kaldi::SplitStringToVector(line, separator.c_str(), true, &col); + if (col.size() == 0) break; // Empty line is a signal to stop, in our + // archive format. + if (col.size() > 4) { + KALDI_ERR << "Bad line in FSA: " << line; + } + StateId s; + if (!ConvertStringToInteger(col[0], &s)) { + KALDI_ERR << "Bad line in FSA: " << line; + } + while (s >= fst->NumStates()) + fst->AddState(); + if (nline == 1) fst->SetStart(s); + bool ok = true; + Arc arc; + Weight w; + StateId d = s; + switch (col.size()) { + case 1: + fst->SetFinal(s, Weight::One()); + break; + case 2: + if (!StrToWeight(col[1], true, &w)) ok = false; + else fst->SetFinal(s, w); + break; + case 3: + ok = ConvertStringToInteger(col[1], &arc.nextstate) && + ConvertStringToInteger(col[2], &arc.ilabel); + arc.olabel = arc.ilabel; + if (ok) { + d = arc.nextstate; + arc.weight = Weight::One(); + fst->AddArc(s, arc); + } + break; + case 4: + ok = ConvertStringToInteger(col[1], &arc.nextstate) && + ConvertStringToInteger(col[2], &arc.ilabel) && + StrToWeight(col[3], false, &arc.weight); + arc.olabel = arc.ilabel; + if (ok) { + d = arc.nextstate; + fst->AddArc(s, arc); + } + break; + default: + ok = false; + } + while (d >= fst->NumStates()) fst->AddState(); + if (!ok) + KALDI_ERR << "Bad line in FSA: " << line; + } +} template // static diff --git a/src/fstext/kaldi-fst-io.h b/src/fstext/kaldi-fst-io.h index 206dc71238a..5de870481ea 100644 --- a/src/fstext/kaldi-fst-io.h +++ b/src/fstext/kaldi-fst-io.h @@ -81,6 +81,12 @@ template void ReadFstKaldi(std::istream &is, bool binary, VectorFst *fst); +// A generic Kaldi-type-IO mechanism of reading FSAs from streams, +// supporting text-mode reading. +// Note that this assumes the input is an acceptor. +template +void ReadFsaKaldi(std::istream &is, VectorFst *fst); + // Read an FST file for LM (G.fst) and make it an acceptor, // and make sure it is sorted on labels fst::VectorFst *ReadAndPrepareLmFst(std::string rxfilename); diff --git a/src/gmm/Makefile b/src/gmm/Makefile index caee6734afe..64fe320507a 100644 --- a/src/gmm/Makefile +++ b/src/gmm/Makefile @@ -9,13 +9,13 @@ TESTFILES = diag-gmm-test mle-diag-gmm-test full-gmm-test mle-full-gmm-test \ OBJFILES = diag-gmm.o diag-gmm-normal.o mle-diag-gmm.o am-diag-gmm.o \ mle-am-diag-gmm.o full-gmm.o full-gmm-normal.o mle-full-gmm.o \ - model-common.o decodable-am-diag-gmm.o model-test-common.o \ - ebw-diag-gmm.o indirect-diff-diag-gmm.o + model-common.o decodable-am-diag-gmm.o model-test-common.o \ + ebw-diag-gmm.o indirect-diff-diag-gmm.o LIBNAME = kaldi-gmm -ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a +ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ + ../base/kaldi-base.a diff --git a/src/gmm/decodable-am-diag-gmm.h b/src/gmm/decodable-am-diag-gmm.h index 745b4f61b14..f2e03005708 100644 --- a/src/gmm/decodable-am-diag-gmm.h +++ b/src/gmm/decodable-am-diag-gmm.h @@ -26,11 +26,9 @@ #include "base/kaldi-common.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "itf/decodable-itf.h" -#include "transform/regression-tree.h" -#include "transform/regtree-fmllr-diag-gmm.h" -#include "transform/regtree-mllr-diag-gmm.h" + namespace kaldi { @@ -46,13 +44,13 @@ class DecodableAmDiagGmmUnmapped : public DecodableInterface { public: /// If you set log_sum_exp_prune to a value greater than 0 it will prune /// in the LogSumExp operation (larger = more exact); I suggest 5. - /// This is advisable if it's spending a long time doing exp - /// operations. + /// This is advisable if it's spending a long time doing exp + /// operations. DecodableAmDiagGmmUnmapped(const AmDiagGmm &am, const Matrix &feats, BaseFloat log_sum_exp_prune = -1.0): acoustic_model_(am), feature_matrix_(feats), - previous_frame_(-1), log_sum_exp_prune_(log_sum_exp_prune), + previous_frame_(-1), log_sum_exp_prune_(log_sum_exp_prune), data_squared_(feats.NumCols()) { ResetLogLikeCache(); } @@ -63,7 +61,7 @@ class DecodableAmDiagGmmUnmapped : public DecodableInterface { return LogLikelihoodZeroBased(frame, state_index - 1); } virtual int32 NumFramesReady() const { return feature_matrix_.NumRows(); } - + // Indices are one-based! This is for compatibility with OpenFst. virtual int32 NumIndices() const { return acoustic_model_.NumPdfs(); } @@ -98,7 +96,7 @@ class DecodableAmDiagGmmUnmapped : public DecodableInterface { class DecodableAmDiagGmm: public DecodableAmDiagGmmUnmapped { public: DecodableAmDiagGmm(const AmDiagGmm &am, - const TransitionModel &tm, + const Transitions &tm, const Matrix &feats, BaseFloat log_sum_exp_prune = -1.0) : DecodableAmDiagGmmUnmapped(am, feats, log_sum_exp_prune), @@ -107,21 +105,21 @@ class DecodableAmDiagGmm: public DecodableAmDiagGmmUnmapped { // Note, frames are numbered from zero. virtual BaseFloat LogLikelihood(int32 frame, int32 tid) { return LogLikelihoodZeroBased(frame, - trans_model_.TransitionIdToPdf(tid)); + trans_model_.TransitionIdToPdfFast(tid)); } // Indices are one-based! This is for compatibility with OpenFst. virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); } - const TransitionModel *TransModel() { return &trans_model_; } + const Transitions *TransModel() { return &trans_model_; } private: // want to access public to have pdf id information - const TransitionModel &trans_model_; // for tid to pdf mapping + const Transitions &trans_model_; // for tid to pdf mapping KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmDiagGmm); }; class DecodableAmDiagGmmScaled: public DecodableAmDiagGmmUnmapped { public: DecodableAmDiagGmmScaled(const AmDiagGmm &am, - const TransitionModel &tm, + const Transitions &tm, const Matrix &feats, BaseFloat scale, BaseFloat log_sum_exp_prune = -1.0): @@ -131,7 +129,7 @@ class DecodableAmDiagGmmScaled: public DecodableAmDiagGmmUnmapped { // This version of the initializer takes ownership of the pointer // "feats" and will delete it when this class is destroyed. DecodableAmDiagGmmScaled(const AmDiagGmm &am, - const TransitionModel &tm, + const Transitions &tm, BaseFloat scale, BaseFloat log_sum_exp_prune, Matrix *feats): @@ -140,20 +138,20 @@ class DecodableAmDiagGmmScaled: public DecodableAmDiagGmmUnmapped { // Note, frames are numbered from zero but transition-ids from one. virtual BaseFloat LogLikelihood(int32 frame, int32 tid) { - return scale_*LogLikelihoodZeroBased(frame, - trans_model_.TransitionIdToPdf(tid)); + return scale_ * LogLikelihoodZeroBased( + frame, trans_model_.TransitionIdToPdfFast(tid)); } // Indices are one-based! This is for compatibility with OpenFst. virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); } - const TransitionModel *TransModel() { return &trans_model_; } + const Transitions *TransModel() { return &trans_model_; } virtual ~DecodableAmDiagGmmScaled() { delete delete_feats_; } - + private: // want to access it public to have pdf id information - const TransitionModel &trans_model_; // for transition-id to pdf mapping + const Transitions &trans_model_; // for transition-id to pdf mapping BaseFloat scale_; Matrix *delete_feats_; KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableAmDiagGmmScaled); diff --git a/src/gmmbin/Makefile b/src/gmmbin/Makefile index 82d10abe9ce..f43dfa96ccb 100644 --- a/src/gmmbin/Makefile +++ b/src/gmmbin/Makefile @@ -6,25 +6,24 @@ include ../kaldi.mk BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \ gmm-decode-faster gmm-decode-simple gmm-align-compiled \ - gmm-sum-accs gmm-est-regtree-fmllr gmm-acc-stats-twofeats \ + gmm-sum-accs gmm-acc-stats-twofeats \ gmm-acc-stats gmm-init-lvtln gmm-est-lvtln-trans gmm-train-lvtln-special \ gmm-acc-mllt gmm-mixup gmm-init-model gmm-transform-means \ - gmm-make-regtree gmm-decode-faster-regtree-fmllr gmm-post-to-gpost \ - gmm-est-fmllr-gpost gmm-est-fmllr gmm-est-regtree-fmllr-ali \ - gmm-est-regtree-mllr gmm-compute-likes \ - gmm-decode-faster-regtree-mllr gmm-latgen-simple \ + gmm-post-to-gpost \ + gmm-est-fmllr-gpost gmm-est-fmllr gmm-compute-likes \ + gmm-latgen-simple \ gmm-rescore-lattice gmm-decode-biglm-faster \ gmm-est-gaussians-ebw gmm-est-weights-ebw gmm-latgen-faster gmm-copy \ gmm-global-acc-stats gmm-global-est gmm-global-sum-accs gmm-gselect \ gmm-latgen-biglm-faster gmm-ismooth-stats gmm-global-get-frame-likes \ gmm-global-est-fmllr gmm-global-to-fgmm gmm-global-acc-stats-twofeats \ - gmm-global-copy gmm-fmpe-acc-stats gmm-acc-stats2 gmm-init-model-flat gmm-info \ + gmm-global-copy gmm-acc-stats2 gmm-init-model-flat gmm-info \ gmm-get-stats-deriv gmm-est-rescale gmm-boost-silence \ gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \ gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \ gmm-est-basis-fmllr-gpost gmm-latgen-faster-parallel \ - gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \ - gmm-global-info gmm-latgen-faster-regtree-fmllr gmm-est-fmllr-global \ + gmm-global-init-from-feats \ + gmm-global-info gmm-est-fmllr-global \ gmm-acc-mllt-global gmm-transform-means-global gmm-global-get-post \ gmm-global-gselect-to-post gmm-global-est-lvtln-trans gmm-init-biphone @@ -37,8 +36,8 @@ TESTFILES = ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a \ ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a ../feat/kaldi-feat.a \ ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a + ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ + ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/gmmbin/gmm-acc-mllt-global.cc b/src/gmmbin/gmm-acc-mllt-global.cc index bed91c053d3..ac3ec2237c9 100644 --- a/src/gmmbin/gmm-acc-mllt-global.cc +++ b/src/gmmbin/gmm-acc-mllt-global.cc @@ -23,7 +23,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/mllt.h" #include "hmm/posterior.h" diff --git a/src/gmmbin/gmm-acc-mllt.cc b/src/gmmbin/gmm-acc-mllt.cc index 6e57f082a62..be0d501b3f5 100644 --- a/src/gmmbin/gmm-acc-mllt.cc +++ b/src/gmmbin/gmm-acc-mllt.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/mllt.h" #include "hmm/posterior.h" @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) { typedef kaldi::int32 int32; AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_filename, &binary); diff --git a/src/gmmbin/gmm-acc-stats-ali.cc b/src/gmmbin/gmm-acc-stats-ali.cc index 5552d45738e..baee5f8b814 100644 --- a/src/gmmbin/gmm-acc-stats-ali.cc +++ b/src/gmmbin/gmm-acc-stats-ali.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" @@ -53,7 +53,7 @@ int main(int argc, char *argv[]) { accs_wxfilename = po.GetArg(4); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_filename, &binary); @@ -61,8 +61,6 @@ int main(int argc, char *argv[]) { am_gmm.Read(ki.Stream(), binary); } - Vector transition_accs; - trans_model.InitStats(&transition_accs); AccumAmDiagGmm gmm_accs; gmm_accs.Init(am_gmm, kGmmAll); @@ -94,8 +92,7 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < alignment.size(); i++) { int32 tid = alignment[i], // transition identifier. - pdf_id = trans_model.TransitionIdToPdf(tid); - trans_model.Accumulate(1.0, tid, &transition_accs); + pdf_id = trans_model.TransitionIdToPdfFast(tid); tot_like_this_file += gmm_accs.AccumulateForGmm(am_gmm, mat.Row(i), pdf_id, 1.0); } @@ -117,7 +114,6 @@ int main(int argc, char *argv[]) { { Output ko(accs_wxfilename, binary); - transition_accs.Write(ko.Stream(), binary); gmm_accs.Write(ko.Stream(), binary); } KALDI_LOG << "Written accs."; diff --git a/src/gmmbin/gmm-acc-stats-twofeats.cc b/src/gmmbin/gmm-acc-stats-twofeats.cc index 05f94ff5ef6..7b3cadfdb9b 100644 --- a/src/gmmbin/gmm-acc-stats-twofeats.cc +++ b/src/gmmbin/gmm-acc-stats-twofeats.cc @@ -23,7 +23,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" #include "hmm/posterior.h" @@ -59,7 +59,7 @@ int main(int argc, char *argv[]) { typedef kaldi::int32 int32; AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_filename, &binary); @@ -67,8 +67,6 @@ int main(int argc, char *argv[]) { am_gmm.Read(ki.Stream(), binary); } - Vector transition_accs; - trans_model.InitStats(&transition_accs); int32 new_dim = 0; AccumAmDiagGmm gmm_accs; // will initialize once we know new_dim. @@ -129,13 +127,6 @@ int main(int argc, char *argv[]) { weight); tot_weight_this_file += weight; } - - // Accumulates for transitions. - for (size_t j = 0; j < posterior[i].size(); j++) { - int32 tid = posterior[i][j].first; - BaseFloat weight = posterior[i][j].second; - trans_model.Accumulate(weight, tid, &transition_accs); - } } KALDI_LOG << "Average like for this file is " << (tot_like_this_file/tot_weight_this_file) << " over " @@ -157,7 +148,6 @@ int main(int argc, char *argv[]) { { Output ko(accs_wxfilename, binary); - transition_accs.Write(ko.Stream(), binary); gmm_accs.Write(ko.Stream(), binary); } KALDI_LOG << "Written accs."; diff --git a/src/gmmbin/gmm-acc-stats.cc b/src/gmmbin/gmm-acc-stats.cc index e213fffdeff..76a3528d4f4 100644 --- a/src/gmmbin/gmm-acc-stats.cc +++ b/src/gmmbin/gmm-acc-stats.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" #include "hmm/posterior.h" @@ -59,7 +59,7 @@ int main(int argc, char *argv[]) { AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_filename, &binary); @@ -67,8 +67,6 @@ int main(int argc, char *argv[]) { am_gmm.Read(ki.Stream(), binary); } - Vector transition_accs; - trans_model.InitStats(&transition_accs); AccumAmDiagGmm gmm_accs; gmm_accs.Init(am_gmm, StringToGmmFlags(update_flags_str)); @@ -110,13 +108,6 @@ int main(int argc, char *argv[]) { * weight; tot_weight += weight; } - - // Accumulates for transitions. - for (size_t j = 0; j < posterior[i].size(); j++) { - int32 tid = posterior[i][j].first; - BaseFloat weight = posterior[i][j].second; - trans_model.Accumulate(weight, tid, &transition_accs); - } } if (num_done % 50 == 0) { KALDI_LOG << "Processed " << num_done << " utterances; for utterance " @@ -136,7 +127,6 @@ int main(int argc, char *argv[]) { { Output ko(accs_wxfilename, binary); - transition_accs.Write(ko.Stream(), binary); gmm_accs.Write(ko.Stream(), binary); } KALDI_LOG << "Written accs."; diff --git a/src/gmmbin/gmm-acc-stats2.cc b/src/gmmbin/gmm-acc-stats2.cc index 70730c8ca7d..15e97d07b73 100644 --- a/src/gmmbin/gmm-acc-stats2.cc +++ b/src/gmmbin/gmm-acc-stats2.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" #include "hmm/posterior.h" @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) { AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_rxfilename, &binary); @@ -70,9 +70,6 @@ int main(int argc, char *argv[]) { am_gmm.Read(ki.Stream(), binary); } - Vector num_trans_accs, den_trans_accs; - trans_model.InitStats(&num_trans_accs); - trans_model.InitStats(&den_trans_accs); AccumAmDiagGmm num_gmm_accs, den_gmm_accs; num_gmm_accs.Init(am_gmm, StringToGmmFlags(update_flags_str)); den_gmm_accs.Init(am_gmm, StringToGmmFlags(update_flags_str)); @@ -110,11 +107,8 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < posterior.size(); i++) { for (size_t j = 0; j < posterior[i].size(); j++) { int32 tid = posterior[i][j].first, - pdf_id = trans_model.TransitionIdToPdf(tid); + pdf_id = trans_model.TransitionIdToPdfFast(tid); BaseFloat weight = posterior[i][j].second; - trans_model.Accumulate(fabs(weight), tid, - (weight > 0.0 ? - &num_trans_accs : &den_trans_accs)); tot_like_this_file += (weight > 0.0 ? &num_gmm_accs : &den_gmm_accs) -> AccumulateForGmm(am_gmm, mat.Row(i), pdf_id, fabs(weight)) * weight; @@ -136,12 +130,10 @@ int main(int argc, char *argv[]) { { Output ko(num_accs_wxfilename, binary); - num_trans_accs.Write(ko.Stream(), binary); num_gmm_accs.Write(ko.Stream(), binary); } { Output ko(den_accs_wxfilename, binary); - den_trans_accs.Write(ko.Stream(), binary); den_gmm_accs.Write(ko.Stream(), binary); } KALDI_LOG << "Written accs."; diff --git a/src/gmmbin/gmm-adapt-map.cc b/src/gmmbin/gmm-adapt-map.cc index ec3eb8cea9b..30fbc1e8d73 100644 --- a/src/gmmbin/gmm-adapt-map.cc +++ b/src/gmmbin/gmm-adapt-map.cc @@ -25,7 +25,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" #include "hmm/posterior.h" @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) { MapAmDiagGmmWriter map_am_writer(map_am_wspecifier); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input is(model_filename, &binary); diff --git a/src/gmmbin/gmm-align-compiled.cc b/src/gmmbin/gmm-align-compiled.cc index 36349774773..f8b5a11d504 100644 --- a/src/gmmbin/gmm-align-compiled.cc +++ b/src/gmmbin/gmm-align-compiled.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-utils.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" @@ -50,17 +50,11 @@ int main(int argc, char *argv[]) { ParseOptions po(usage); AlignConfig align_config; BaseFloat acoustic_scale = 1.0; - BaseFloat transition_scale = 1.0; - BaseFloat self_loop_scale = 1.0; std::string per_frame_acwt_wspecifier; align_config.Register(&po); - po.Register("transition-scale", &transition_scale, - "Transition-probability scale [relative to acoustics]"); po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods"); - po.Register("self-loop-scale", &self_loop_scale, - "Scale of self-loop versus non-self-loop log probs [relative to acoustics]"); po.Register("write-per-frame-acoustic-loglikes", &per_frame_acwt_wspecifier, "Wspecifier for table of vectors containing the acoustic log-likelihoods " "per frame for each utterance. E.g. ark:foo/per_frame_logprobs.1.ark"); @@ -77,7 +71,7 @@ int main(int argc, char *argv[]) { alignment_wspecifier = po.GetArg(4), scores_wspecifier = po.GetOptArg(5); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; @@ -114,13 +108,6 @@ int main(int argc, char *argv[]) { continue; } - { // Add transition-probs to the FST. - std::vector disambig_syms; // empty. - AddTransitionProbs(trans_model, disambig_syms, - transition_scale, self_loop_scale, - &decode_fst); - } - DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, features, acoustic_scale); diff --git a/src/gmmbin/gmm-align.cc b/src/gmmbin/gmm-align.cc index c9c2fde11f6..e84a90cdb9a 100644 --- a/src/gmmbin/gmm-align.cc +++ b/src/gmmbin/gmm-align.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-utils.h" #include "decoder/decoder-wrappers.h" #include "decoder/training-graph-compiler.h" @@ -73,7 +73,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; ReadKaldiObject(tree_in_filename, &ctx_dep); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-basis-fmllr-accs-gpost.cc b/src/gmmbin/gmm-basis-fmllr-accs-gpost.cc index f8f7b5d3433..9001b64ae82 100644 --- a/src/gmmbin/gmm-basis-fmllr-accs-gpost.cc +++ b/src/gmmbin/gmm-basis-fmllr-accs-gpost.cc @@ -26,7 +26,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "transform/basis-fmllr-diag-gmm.h" #include "hmm/posterior.h" @@ -34,7 +34,7 @@ using std::vector; namespace kaldi { void AccumulateForUtterance(const Matrix &feats, const GaussPost &gpost, - const TransitionModel &trans_model, + const Transitions &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats) { for (size_t i = 0; i < gpost.size(); i++) { @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) { gpost_rspecifier = po.GetArg(3), accs_wspecifier = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-basis-fmllr-accs.cc b/src/gmmbin/gmm-basis-fmllr-accs.cc index 58b365318f0..d78d652dfc5 100644 --- a/src/gmmbin/gmm-basis-fmllr-accs.cc +++ b/src/gmmbin/gmm-basis-fmllr-accs.cc @@ -26,7 +26,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "transform/basis-fmllr-diag-gmm.h" #include "hmm/posterior.h" @@ -34,7 +34,7 @@ using std::vector; namespace kaldi { void AccumulateForUtterance(const Matrix &feats, const Posterior &post, - const TransitionModel &trans_model, + const Transitions &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats) { Posterior pdf_post; @@ -82,7 +82,7 @@ int main(int argc, char *argv[]) { post_rspecifier = po.GetArg(3), accs_wspecifier = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-basis-fmllr-training.cc b/src/gmmbin/gmm-basis-fmllr-training.cc index 3d93c3ca877..d433f6903f6 100644 --- a/src/gmmbin/gmm-basis-fmllr-training.cc +++ b/src/gmmbin/gmm-basis-fmllr-training.cc @@ -25,7 +25,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "transform/basis-fmllr-diag-gmm.h" @@ -53,7 +53,7 @@ int main(int argc, char *argv[]) { model_rxfilename = po.GetArg(1), basis_wspecifier = po.GetArg(2); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-boost-silence.cc b/src/gmmbin/gmm-boost-silence.cc index 7c9e4c82806..ef57f1190cb 100644 --- a/src/gmmbin/gmm-boost-silence.cc +++ b/src/gmmbin/gmm-boost-silence.cc @@ -19,7 +19,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/am-diag-gmm.h" int main(int argc, char *argv[]) { @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) { } AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_rxfilename, &binary_read); diff --git a/src/gmmbin/gmm-compute-likes.cc b/src/gmmbin/gmm-compute-likes.cc index 78c813e1c3b..c7101f1a9ae 100644 --- a/src/gmmbin/gmm-compute-likes.cc +++ b/src/gmmbin/gmm-compute-likes.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "base/timer.h" @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) { AmDiagGmm am_gmm; { bool binary; - TransitionModel trans_model; // not needed. + Transitions trans_model; // not needed. Input ki(model_in_filename, &binary); trans_model.Read(ki.Stream(), binary); am_gmm.Read(ki.Stream(), binary); diff --git a/src/gmmbin/gmm-copy.cc b/src/gmmbin/gmm-copy.cc index 0b33bc6d67f..bd42aeb2a25 100644 --- a/src/gmmbin/gmm-copy.cc +++ b/src/gmmbin/gmm-copy.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) { model_out_filename = po.GetArg(2); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); diff --git a/src/gmmbin/gmm-decode-biglm-faster.cc b/src/gmmbin/gmm-decode-biglm-faster.cc index 6e47d68de3c..9e7845e7849 100644 --- a/src/gmmbin/gmm-decode-biglm-faster.cc +++ b/src/gmmbin/gmm-decode-biglm-faster.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/biglm-faster-decoder.h" #include "gmm/decodable-am-diag-gmm.h" @@ -111,7 +111,7 @@ int main(int argc, char *argv[]) alignment_wspecifier = po.GetOptArg(7), lattice_wspecifier = po.GetOptArg(8); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-decode-faster-regtree-fmllr.cc b/src/gmmbin/gmm-decode-faster-regtree-fmllr.cc deleted file mode 100644 index ca39cbe8cb7..00000000000 --- a/src/gmmbin/gmm-decode-faster-regtree-fmllr.cc +++ /dev/null @@ -1,290 +0,0 @@ -// gmmbin/gmm-decode-faster-regtree-fmllr.cc - -// Copyright 2009-2012 Microsoft Corporation; Saarland University; -// Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "transform/regression-tree.h" -#include "transform/regtree-fmllr-diag-gmm.h" -#include "transform/fmllr-diag-gmm.h" -#include "fstext/fstext-lib.h" -#include "decoder/faster-decoder.h" -#include "transform/decodable-am-diag-gmm-regtree.h" -#include "base/timer.h" -#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc - -using fst::SymbolTable; -using fst::VectorFst; -using fst::StdArc; -using kaldi::BaseFloat; -using std::string; -using std::vector; -using kaldi::LatticeWeight; -using kaldi::LatticeArc; - -struct DecodeInfo { - public: - DecodeInfo(const kaldi::AmDiagGmm &am, - const kaldi::TransitionModel &tm, kaldi::FasterDecoder *decoder, - BaseFloat scale, bool allow_partial, - const kaldi::Int32VectorWriter &wwriter, - const kaldi::Int32VectorWriter &awriter, fst::SymbolTable *wsyms) - : acoustic_model(am), trans_model(tm), decoder(decoder), - acoustic_scale(scale), allow_partial(allow_partial), words_writer(wwriter), - alignment_writer(awriter), word_syms(wsyms) {} - - const kaldi::AmDiagGmm &acoustic_model; - const kaldi::TransitionModel &trans_model; - kaldi::FasterDecoder *decoder; - BaseFloat acoustic_scale; - bool allow_partial; - const kaldi::Int32VectorWriter &words_writer; - const kaldi::Int32VectorWriter &alignment_writer; - fst::SymbolTable *word_syms; - - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(DecodeInfo); -}; - -bool DecodeUtterance(kaldi::FasterDecoder *decoder, - kaldi::DecodableInterface *decodable, - DecodeInfo *info, - const string &uttid, - int32 num_frames, - BaseFloat *total_like) { - decoder->Decode(decodable); - KALDI_LOG << "Length of file is " << num_frames; - - VectorFst decoded; // linear FST. - if ( (info->allow_partial || decoder->ReachedFinal()) - && decoder->GetBestPath(&decoded) ) { - if (!decoder->ReachedFinal()) - KALDI_WARN << "Decoder did not reach end-state, outputting partial " - "traceback."; - - vector alignment, words; - LatticeWeight weight; - GetLinearSymbolSequence(decoded, &alignment, &words, &weight); - - info->words_writer.Write(uttid, words); - if (info->alignment_writer.IsOpen()) - info->alignment_writer.Write(uttid, alignment); - if (info->word_syms != NULL) { - std::ostringstream ss; - ss << uttid << ' '; - for (size_t i = 0; i < words.size(); i++) { - string s = info->word_syms->Find(words[i]); - if (s == "") - KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; - ss << s << ' '; - } - ss << '\n'; - KALDI_LOG << ss.str(); - } - - BaseFloat like = -weight.Value1() -weight.Value2(); - KALDI_LOG << "Log-like per frame = " << (like/num_frames); - (*total_like) += like; - return true; - } else { - KALDI_WARN << "Did not successfully decode utterance, length = " - << num_frames; - return false; - } -} - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - - const char *usage = "Decode features using GMM-based model.\n" - "Usage: gmm-decode-faster-regtree-fmllr [options] model-in fst-in " - "regtree-in features-rspecifier transforms-rspecifier " - "words-wspecifier [alignments-wspecifier]\n"; - ParseOptions po(usage); - bool binary = true; - bool allow_partial = true; - BaseFloat acoustic_scale = 0.1; - - std::string word_syms_filename, utt2spk_rspecifier; - FasterDecoderOptions decoder_opts; - decoder_opts.Register(&po, true); // true == include obscure settings. - po.Register("utt2spk", &utt2spk_rspecifier, "rspecifier for utterance to " - "speaker map"); - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - po.Register("word-symbol-table", &word_syms_filename, - "Symbol table for words [for debug output]"); - po.Register("allow-partial", &allow_partial, - "Produce output even when final state was not reached"); - po.Read(argc, argv); - - if (po.NumArgs() < 6 || po.NumArgs() > 7) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - fst_in_filename = po.GetArg(2), - regtree_filename = po.GetArg(3), - feature_rspecifier = po.GetArg(4), - xforms_rspecifier = po.GetArg(5), - words_wspecifier = po.GetArg(6), - alignment_wspecifier = po.GetOptArg(7); - - TransitionModel trans_model; - AmDiagGmm am_gmm; - { - bool binary_read; - Input ki(model_in_filename, &binary_read); - trans_model.Read(ki.Stream(), binary_read); - am_gmm.Read(ki.Stream(), binary_read); - } - - VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_filename); - - RegressionTree regtree; - { - bool binary_read; - Input in(regtree_filename, &binary_read); - regtree.Read(in.Stream(), binary_read, am_gmm); - } - - RandomAccessRegtreeFmllrDiagGmmReaderMapped fmllr_reader(xforms_rspecifier, - utt2spk_rspecifier); - - Int32VectorWriter words_writer(words_wspecifier); - - Int32VectorWriter alignment_writer(alignment_wspecifier); - - fst::SymbolTable *word_syms = NULL; - if (word_syms_filename != "") { - word_syms = fst::SymbolTable::ReadText(word_syms_filename); - if (!word_syms) { - KALDI_ERR << "Could not read symbol table from file " - << word_syms_filename; - } - } - - BaseFloat tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_success = 0, num_fail = 0; - FasterDecoder decoder(*decode_fst, decoder_opts); - - Timer timer; - - DecodeInfo decode_info(am_gmm, trans_model, &decoder, acoustic_scale, - allow_partial, words_writer, alignment_writer, - word_syms); - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - - Matrix features(feature_reader.Value()); - feature_reader.FreeCurrent(); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_fail++; - continue; - } - - if (!fmllr_reader.HasKey(utt)) { // Decode without FMLLR if none found - KALDI_WARN << "No FMLLR transform for key " << utt << - ", decoding without fMLLR."; - kaldi::DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, - features, - acoustic_scale); - if (DecodeUtterance(&decoder, &gmm_decodable, &decode_info, - utt, features.NumRows(), &tot_like)) { - frame_count += gmm_decodable.NumFramesReady(); - num_success++; - } else { - num_fail++; - } - continue; - } - - // If found, load the transforms for the current utterance. - RegtreeFmllrDiagGmm fmllr(fmllr_reader.Value(utt)); - if (fmllr.NumRegClasses() == 1) { - Matrix xformed_features(features); - Matrix fmllr_matrix; - fmllr.GetXformMatrix(0, &fmllr_matrix); - for (int32 i = 0; i < xformed_features.NumRows(); i++) { - SubVector row(xformed_features, i); - ApplyAffineTransform(fmllr_matrix, &row); - } - kaldi::DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, - xformed_features, - acoustic_scale); - - if (DecodeUtterance(&decoder, &gmm_decodable, &decode_info, - utt, xformed_features.NumRows(), &tot_like)) { - frame_count += gmm_decodable.NumFramesReady(); - num_success++; - } else { - num_fail++; - } - } else { - kaldi::DecodableAmDiagGmmRegtreeFmllr gmm_decodable(am_gmm, trans_model, - features, fmllr, - regtree, - acoustic_scale); - if (DecodeUtterance(&decoder, &gmm_decodable, &decode_info, - utt, features.NumRows(), &tot_like)) { - frame_count += gmm_decodable.NumFramesReady(); - num_success++; - } else { - num_fail++; - } - } - } // end looping over all utterances - - KALDI_LOG << "Average log-likelihood per frame is " << (tot_like - / frame_count) << " over " << frame_count << " frames."; - - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken [excluding initialization] " << elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed * 100.0 / frame_count); - KALDI_LOG << "Done " << num_success << " utterances, failed for " - << num_fail; - - delete word_syms; - delete decode_fst; - if (num_success != 0) - return 0; - else - return 1; - } - catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/gmmbin/gmm-decode-faster-regtree-mllr.cc b/src/gmmbin/gmm-decode-faster-regtree-mllr.cc deleted file mode 100644 index 9a5d9486b9f..00000000000 --- a/src/gmmbin/gmm-decode-faster-regtree-mllr.cc +++ /dev/null @@ -1,267 +0,0 @@ -// gmmbin/gmm-decode-faster-regtree-mllr.cc - -// Copyright 2009-2013 Microsoft Corporation; Saarland University; -// Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "transform/regression-tree.h" -#include "transform/regtree-mllr-diag-gmm.h" -#include "fstext/fstext-lib.h" -#include "decoder/faster-decoder.h" -#include "transform/decodable-am-diag-gmm-regtree.h" -#include "base/timer.h" -#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc - -using fst::SymbolTable; -using fst::VectorFst; -using fst::StdArc; -using kaldi::BaseFloat; -using std::string; -using std::vector; -using kaldi::LatticeWeight; -using kaldi::LatticeArc; - -struct DecodeInfo { - public: - DecodeInfo(const kaldi::AmDiagGmm &am, - const kaldi::TransitionModel &tm, kaldi::FasterDecoder *decoder, - BaseFloat scale, bool allow_partial, - const kaldi::Int32VectorWriter &wwriter, - const kaldi::Int32VectorWriter &awriter, fst::SymbolTable *wsyms) - : acoustic_model(am), trans_model(tm), decoder(decoder), - acoustic_scale(scale), allow_partial(allow_partial), words_writer(wwriter), - alignment_writer(awriter), word_syms(wsyms) {} - - const kaldi::AmDiagGmm &acoustic_model; - const kaldi::TransitionModel &trans_model; - kaldi::FasterDecoder *decoder; - BaseFloat acoustic_scale; - bool allow_partial; - const kaldi::Int32VectorWriter &words_writer; - const kaldi::Int32VectorWriter &alignment_writer; - fst::SymbolTable *word_syms; - - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(DecodeInfo); -}; - -bool DecodeUtterance(kaldi::FasterDecoder *decoder, - kaldi::DecodableInterface *decodable, - DecodeInfo *info, - const string &uttid, - int32 num_frames, - BaseFloat *total_like) { - decoder->Decode(decodable); - KALDI_LOG << "Length of file is " << num_frames;; - - VectorFst decoded; // linear FST. - if ( (info->allow_partial || decoder->ReachedFinal()) - && decoder->GetBestPath(&decoded) ) { - if (!decoder->ReachedFinal()) - KALDI_WARN << "Decoder did not reach end-state, outputting partial " - "traceback."; - - vector alignment, words; - LatticeWeight weight; - GetLinearSymbolSequence(decoded, &alignment, &words, &weight); - - info->words_writer.Write(uttid, words); - if (info->alignment_writer.IsOpen()) - info->alignment_writer.Write(uttid, alignment); - if (info->word_syms != NULL) { - std::ostringstream ss; - ss << uttid << ' '; - for (size_t i = 0; i < words.size(); i++) { - string s = info->word_syms->Find(words[i]); - if (s == "") - KALDI_ERR << "Word-id " << words[i] << " not in symbol table."; - ss << s << ' '; - } - ss << '\n'; - KALDI_LOG << ss.str(); - } - - BaseFloat like = -weight.Value1() -weight.Value2(); - KALDI_LOG << "Log-like per frame = " << (like/num_frames); - (*total_like) += like; - return true; - } else { - KALDI_WARN << "Did not successfully decode utterance, length = " - << num_frames; - return false; - } -} - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - - const char *usage = "Decode features using GMM-based model.\n" - "Usage: gmm-decode-faster-regtree-mllr [options] model-in fst-in " - "regtree-in features-rspecifier transforms-rspecifier " - "words-wspecifier [alignments-wspecifier]\n"; - ParseOptions po(usage); - bool binary = true; - bool allow_partial = true; - BaseFloat acoustic_scale = 0.1; - - std::string word_syms_filename, utt2spk_rspecifier; - FasterDecoderOptions decoder_opts; - decoder_opts.Register(&po, true); // true == include obscure settings. - po.Register("utt2spk", &utt2spk_rspecifier, "rspecifier for utterance to " - "speaker map"); - po.Register("binary", &binary, "Write output in binary mode"); - po.Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - po.Register("word-symbol-table", &word_syms_filename, - "Symbol table for words [for debug output]"); - po.Register("allow-partial", &allow_partial, - "Produce output even when final state was not reached"); - po.Read(argc, argv); - - if (po.NumArgs() < 6 || po.NumArgs() > 7) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - fst_in_filename = po.GetArg(2), - regtree_filename = po.GetArg(3), - feature_rspecifier = po.GetArg(4), - xforms_rspecifier = po.GetArg(5), - words_wspecifier = po.GetArg(6), - alignment_wspecifier = po.GetOptArg(7); - - TransitionModel trans_model; - AmDiagGmm am_gmm; - { - bool binary_read; - Input ki(model_in_filename, &binary_read); - trans_model.Read(ki.Stream(), binary_read); - am_gmm.Read(ki.Stream(), binary_read); - } - - VectorFst *decode_fst = fst::ReadFstKaldi(fst_in_filename); - - RegressionTree regtree; - { - bool binary_read; - Input in(regtree_filename, &binary_read); - regtree.Read(in.Stream(), binary_read, am_gmm); - } - - RandomAccessRegtreeMllrDiagGmmReaderMapped mllr_reader(xforms_rspecifier, - utt2spk_rspecifier); - - Int32VectorWriter words_writer(words_wspecifier); - - Int32VectorWriter alignment_writer(alignment_wspecifier); - - fst::SymbolTable *word_syms = NULL; - if (word_syms_filename != "") { - word_syms = fst::SymbolTable::ReadText(word_syms_filename); - if (!word_syms) { - KALDI_ERR << "Could not read symbol table from file " - << word_syms_filename; - } - } - - BaseFloat tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_success = 0, num_fail = 0; - FasterDecoder decoder(*decode_fst, decoder_opts); - - Timer timer; - - DecodeInfo decode_info(am_gmm, trans_model, &decoder, acoustic_scale, - allow_partial, words_writer, alignment_writer, - word_syms); - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string utt = feature_reader.Key(); - - Matrix features(feature_reader.Value()); - feature_reader.FreeCurrent(); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_fail++; - continue; - } - - if (!mllr_reader.HasKey(utt)) { // Decode without MLLR if none found - KALDI_WARN << "No MLLR transform for key " << utt << - ", decoding without MLLR."; - kaldi::DecodableAmDiagGmmScaled gmm_decodable(am_gmm, trans_model, - features, - acoustic_scale); - if (DecodeUtterance(&decoder, &gmm_decodable, &decode_info, - utt, features.NumRows(), &tot_like)) { - frame_count += gmm_decodable.NumFramesReady(); - num_success++; - } else { - num_fail++; - } - continue; - } - - // If found, load the transforms for the current utterance. - const RegtreeMllrDiagGmm &mllr = mllr_reader.Value(utt); - kaldi::DecodableAmDiagGmmRegtreeMllr gmm_decodable(am_gmm, trans_model, - features, mllr, - regtree, - acoustic_scale); - if (DecodeUtterance(&decoder, &gmm_decodable, &decode_info, - utt, features.NumRows(), &tot_like)) { - frame_count += gmm_decodable.NumFramesReady(); - num_success++; - } else { - num_fail++; - } - } // end looping over all utterances - - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken [excluding initialization] " << elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed * 100.0 / frame_count); - KALDI_LOG << "Done " << num_success << " utterances, failed for " - << num_fail; - KALDI_LOG << "Overall log-likelihood per frame is " - << (tot_like / frame_count) << " over " << frame_count - << " frames."; - - delete decode_fst; - if (num_success != 0) - return 0; - else - return 1; - } - catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/gmmbin/gmm-decode-faster.cc b/src/gmmbin/gmm-decode-faster.cc index 34c4ff2c37e..438e3d9c9d1 100644 --- a/src/gmmbin/gmm-decode-faster.cc +++ b/src/gmmbin/gmm-decode-faster.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/faster-decoder.h" #include "gmm/decodable-am-diag-gmm.h" @@ -101,7 +101,7 @@ int main(int argc, char *argv[]) { alignment_wspecifier = po.GetOptArg(5), lattice_wspecifier = po.GetOptArg(6); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-decode-simple.cc b/src/gmmbin/gmm-decode-simple.cc index 5ef35552dc0..ef87585cc1e 100644 --- a/src/gmmbin/gmm-decode-simple.cc +++ b/src/gmmbin/gmm-decode-simple.cc @@ -23,7 +23,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/simple-decoder.h" #include "gmm/decodable-am-diag-gmm.h" @@ -78,7 +78,7 @@ int main(int argc, char *argv[]) { alignment_wspecifier = po.GetOptArg(5), lattice_wspecifier = po.GetOptArg(6); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-est-basis-fmllr-gpost.cc b/src/gmmbin/gmm-est-basis-fmllr-gpost.cc index 54b92d8aa61..3d864c88086 100644 --- a/src/gmmbin/gmm-est-basis-fmllr-gpost.cc +++ b/src/gmmbin/gmm-est-basis-fmllr-gpost.cc @@ -26,7 +26,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "transform/basis-fmllr-diag-gmm.h" #include "hmm/posterior.h" @@ -34,7 +34,7 @@ using std::vector; namespace kaldi { void AccumulateForUtterance(const Matrix &feats, const GaussPost &gpost, - const TransitionModel &trans_model, + const Transitions &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats) { for (size_t i = 0; i < gpost.size(); i++) { @@ -87,7 +87,7 @@ int main(int argc, char *argv[]) { gpost_rspecifier = po.GetArg(4), trans_wspecifier = po.GetArg(5); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-est-basis-fmllr.cc b/src/gmmbin/gmm-est-basis-fmllr.cc index 0d163169ce2..fe64a1b2166 100644 --- a/src/gmmbin/gmm-est-basis-fmllr.cc +++ b/src/gmmbin/gmm-est-basis-fmllr.cc @@ -26,7 +26,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "transform/basis-fmllr-diag-gmm.h" #include "hmm/posterior.h" @@ -34,7 +34,7 @@ using std::vector; namespace kaldi { void AccumulateForUtterance(const Matrix &feats, const Posterior &post, - const TransitionModel &trans_model, + const Transitions &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats) { Posterior pdf_post; @@ -89,7 +89,7 @@ int main(int argc, char *argv[]) { post_rspecifier = po.GetArg(4), trans_wspecifier = po.GetArg(5); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-est-fmllr-global.cc b/src/gmmbin/gmm-est-fmllr-global.cc index b3af0780aa5..d167ba25890 100644 --- a/src/gmmbin/gmm-est-fmllr-global.cc +++ b/src/gmmbin/gmm-est-fmllr-global.cc @@ -27,7 +27,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "hmm/posterior.h" diff --git a/src/gmmbin/gmm-est-fmllr-gpost.cc b/src/gmmbin/gmm-est-fmllr-gpost.cc index d1cae0d7f48..9d830737718 100644 --- a/src/gmmbin/gmm-est-fmllr-gpost.cc +++ b/src/gmmbin/gmm-est-fmllr-gpost.cc @@ -27,14 +27,14 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "hmm/posterior.h" namespace kaldi { void AccumulateForUtterance(const Matrix &feats, const GaussPost &gpost, - const TransitionModel &trans_model, + const Transitions &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats) { for (size_t i = 0; i < gpost.size(); i++) { @@ -81,7 +81,7 @@ int main(int argc, char *argv[]) { gpost_rspecifier = po.GetArg(3), trans_wspecifier = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-est-fmllr-raw-gpost.cc b/src/gmmbin/gmm-est-fmllr-raw-gpost.cc deleted file mode 100644 index 1f5a09f233b..00000000000 --- a/src/gmmbin/gmm-est-fmllr-raw-gpost.cc +++ /dev/null @@ -1,198 +0,0 @@ -// gmmbin/gmm-est-fmllr-raw-gpost.cc - -// Copyright 2013 Johns Hopkins University (author: Daniel Povey) -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "transform/fmllr-raw.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "util/common-utils.h" -#include "hmm/posterior.h" - -namespace kaldi { - - -void AccStatsForUtterance(const TransitionModel &trans_model, - const AmDiagGmm &am_gmm, - const GaussPost &gpost, - const Matrix &feats, - FmllrRawAccs *accs) { - for (size_t t = 0; t < gpost.size(); t++) { - for (size_t i = 0; i < gpost[t].size(); i++) { - int32 pdf = gpost[t][i].first; - const Vector &posterior(gpost[t][i].second); - accs->AccumulateFromPosteriors(am_gmm.GetPdf(pdf), - feats.Row(t), posterior); - } - } -} - - -} - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Estimate fMLLR transforms in the space before splicing and linear transforms\n" - "such as LDA+MLLT, but using models in the space transformed by these transforms\n" - "Requires the original spliced features, and the full LDA+MLLT (or similar) matrix\n" - "including the 'rejected' rows (see the program get-full-lda-mat). Reads in\n" - "Gaussian-level posteriors.\n" - "Usage: gmm-est-fmllr-raw-gpost [options] " - " \n"; - - - int32 raw_feat_dim = 13; - ParseOptions po(usage); - FmllrRawOptions opts; - std::string spk2utt_rspecifier; - po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to " - "utterance-list map"); - po.Register("raw-feat-dim", &raw_feat_dim, "Dimension of raw features " - "prior to splicing"); - opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - std::string model_rxfilename = po.GetArg(1), - full_lda_mat_rxfilename = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - gpost_rspecifier = po.GetArg(4), - transform_wspecifier = po.GetArg(5); - - AmDiagGmm am_gmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_rxfilename, &binary); - trans_model.Read(ki.Stream(), binary); - am_gmm.Read(ki.Stream(), binary); - } - - Matrix full_lda_mat; - ReadKaldiObject(full_lda_mat_rxfilename, &full_lda_mat); - - RandomAccessGaussPostReader gpost_reader(gpost_rspecifier); - BaseFloatMatrixWriter transform_writer(transform_wspecifier); - - double tot_auxf_impr = 0.0, tot_count = 0.0; - - int32 num_done = 0, num_err = 0; - if (!spk2utt_rspecifier.empty()) { // Adapting per speaker - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - FmllrRawAccs accs(raw_feat_dim, am_gmm.Dim(), full_lda_mat); - std::string spk = spk2utt_reader.Key(); - const std::vector &uttlist = spk2utt_reader.Value(); - for (size_t i = 0; i < uttlist.size(); i++) { - std::string utt = uttlist[i]; - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Features not found for utterance " << utt; - num_err++; - continue; - } - if (!gpost_reader.HasKey(utt)) { - KALDI_WARN << "Gaussian-level posteriors not found for utterance " << utt; - num_err++; - continue; - } - const Matrix &feats = feature_reader.Value(utt); - const GaussPost &gpost = gpost_reader.Value(utt); - if (static_cast(gpost.size()) != feats.NumRows()) { - KALDI_WARN << "Size mismatch between gposteriors " << gpost.size() - << " and features " << feats.NumRows(); - num_err++; - continue; - } - - AccStatsForUtterance(trans_model, am_gmm, gpost, feats, &accs); - num_done++; - } - - BaseFloat auxf_impr, count; - { - Matrix transform(raw_feat_dim, raw_feat_dim + 1); - transform.SetUnit(); - accs.Update(opts, &transform, &auxf_impr, &count); - transform_writer.Write(spk, transform); - } - KALDI_LOG << "For speaker " << spk << ", auxf-impr from raw fMLLR is " - << (auxf_impr/count) << " over " << count << " frames."; - tot_auxf_impr += auxf_impr; - tot_count += count; - } - } else { // --spk2utt option not given -> adapt per utterance. - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!gpost_reader.HasKey(utt)) { - KALDI_WARN << "Gaussian-level posteriors not found for utterance " << utt; - num_err++; - continue; - } - const Matrix &feats = feature_reader.Value(); - const GaussPost &gpost = gpost_reader.Value(utt); - - if (static_cast(gpost.size()) != feats.NumRows()) { - KALDI_WARN << "Size mismatch between posteriors " << gpost.size() - << " and features " << feats.NumRows(); - num_err++; - continue; - } - - FmllrRawAccs accs(raw_feat_dim, am_gmm.Dim(), full_lda_mat); - - AccStatsForUtterance(trans_model, am_gmm, gpost, feats, &accs); - - BaseFloat auxf_impr, count; - { - Matrix transform(raw_feat_dim, raw_feat_dim + 1); - transform.SetUnit(); - accs.Update(opts, &transform, &auxf_impr, &count); - transform_writer.Write(utt, transform); - } - KALDI_LOG << "For utterance " << utt << ", auxf-impr from raw fMLLR is " - << (auxf_impr/count) << " over " << count << " frames."; - tot_auxf_impr += auxf_impr; - tot_count += count; - num_done++; - } - } - - KALDI_LOG << "Processed " << num_done << " utterances, " - << num_err << " had errors."; - KALDI_LOG << "Overall raw-fMLLR auxf impr per frame is " - << (tot_auxf_impr / tot_count) << " over " << tot_count - << " frames."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/gmmbin/gmm-est-fmllr-raw.cc b/src/gmmbin/gmm-est-fmllr-raw.cc deleted file mode 100644 index 5e83bfb1fb3..00000000000 --- a/src/gmmbin/gmm-est-fmllr-raw.cc +++ /dev/null @@ -1,199 +0,0 @@ -// gmmbin/gmm-est-fmllr-raw.cc - -// Copyright 2013 Johns Hopkins University (author: Daniel Povey) -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "transform/fmllr-raw.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "util/common-utils.h" -#include "hmm/posterior.h" - -namespace kaldi { - - -void AccStatsForUtterance(const TransitionModel &trans_model, - const AmDiagGmm &am_gmm, - const Posterior &post, - const Matrix &feats, - FmllrRawAccs *accs) { - Posterior pdf_post; - ConvertPosteriorToPdfs(trans_model, post, &pdf_post); - for (size_t t = 0; t < post.size(); t++) { - for (size_t i = 0; i < pdf_post[t].size(); i++) { - int32 pdf = pdf_post[t][i].first; - BaseFloat weight = pdf_post[t][i].second; - accs->AccumulateForGmm(am_gmm.GetPdf(pdf), - feats.Row(t), weight); - } - } -} - - -} - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Estimate fMLLR transforms in the space before splicing and linear transforms\n" - "such as LDA+MLLT, but using models in the space transformed by these transforms\n" - "Requires the original spliced features, and the full LDA+MLLT (or similar) matrix\n" - "including the 'rejected' rows (see the program get-full-lda-mat)\n" - "Usage: gmm-est-fmllr-raw [options] " - " \n"; - - - int32 raw_feat_dim = 13; - ParseOptions po(usage); - FmllrRawOptions opts; - std::string spk2utt_rspecifier; - po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to " - "utterance-list map"); - po.Register("raw-feat-dim", &raw_feat_dim, "Dimension of raw features " - "prior to splicing"); - opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - std::string model_rxfilename = po.GetArg(1), - full_lda_mat_rxfilename = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - post_rspecifier = po.GetArg(4), - transform_wspecifier = po.GetArg(5); - - AmDiagGmm am_gmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_rxfilename, &binary); - trans_model.Read(ki.Stream(), binary); - am_gmm.Read(ki.Stream(), binary); - } - - Matrix full_lda_mat; - ReadKaldiObject(full_lda_mat_rxfilename, &full_lda_mat); - - RandomAccessPosteriorReader post_reader(post_rspecifier); - BaseFloatMatrixWriter transform_writer(transform_wspecifier); - - double tot_auxf_impr = 0.0, tot_count = 0.0; - - int32 num_done = 0, num_err = 0; - if (!spk2utt_rspecifier.empty()) { // Adapting per speaker - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - FmllrRawAccs accs(raw_feat_dim, am_gmm.Dim(), full_lda_mat); - std::string spk = spk2utt_reader.Key(); - const std::vector &uttlist = spk2utt_reader.Value(); - for (size_t i = 0; i < uttlist.size(); i++) { - std::string utt = uttlist[i]; - if (!feature_reader.HasKey(utt)) { - KALDI_WARN << "Features not found for utterance " << utt; - num_err++; - continue; - } - if (!post_reader.HasKey(utt)) { - KALDI_WARN << "Posteriors not found for utterance " << utt; - num_err++; - continue; - } - const Matrix &feats = feature_reader.Value(utt); - const Posterior &post = post_reader.Value(utt); - if (static_cast(post.size()) != feats.NumRows()) { - KALDI_WARN << "Size mismatch between posteriors " << post.size() - << " and features " << feats.NumRows(); - num_err++; - continue; - } - - AccStatsForUtterance(trans_model, am_gmm, post, feats, &accs); - num_done++; - } - - BaseFloat auxf_impr, count; - { - Matrix transform(raw_feat_dim, raw_feat_dim + 1); - transform.SetUnit(); - accs.Update(opts, &transform, &auxf_impr, &count); - transform_writer.Write(spk, transform); - } - KALDI_LOG << "For speaker " << spk << ", auxf-impr from raw fMLLR is " - << (auxf_impr/count) << " over " << count << " frames."; - tot_auxf_impr += auxf_impr; - tot_count += count; - } - } else { // --spk2utt option not given -> adapt per utterance. - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - if (!post_reader.HasKey(utt)) { - KALDI_WARN << "Posteriors not found for utterance " << utt; - num_err++; - continue; - } - const Matrix &feats = feature_reader.Value(); - const Posterior &post = post_reader.Value(utt); - - if (static_cast(post.size()) != feats.NumRows()) { - KALDI_WARN << "Size mismatch between posteriors " << post.size() - << " and features " << feats.NumRows(); - num_err++; - continue; - } - - FmllrRawAccs accs(raw_feat_dim, am_gmm.Dim(), full_lda_mat); - - AccStatsForUtterance(trans_model, am_gmm, post, feats, &accs); - - BaseFloat auxf_impr, count; - { - Matrix transform(raw_feat_dim, raw_feat_dim + 1); - transform.SetUnit(); - accs.Update(opts, &transform, &auxf_impr, &count); - transform_writer.Write(utt, transform); - } - KALDI_LOG << "For utterance " << utt << ", auxf-impr from raw fMLLR is " - << (auxf_impr/count) << " over " << count << " frames."; - tot_auxf_impr += auxf_impr; - tot_count += count; - num_done++; - } - } - - KALDI_LOG << "Processed " << num_done << " utterances, " - << num_err << " had errors."; - KALDI_LOG << "Overall raw-fMLLR auxf impr per frame is " - << (tot_auxf_impr / tot_count) << " over " << tot_count - << " frames."; - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/gmmbin/gmm-est-fmllr.cc b/src/gmmbin/gmm-est-fmllr.cc index 9f8dfd89143..c44a284b2f8 100644 --- a/src/gmmbin/gmm-est-fmllr.cc +++ b/src/gmmbin/gmm-est-fmllr.cc @@ -27,14 +27,14 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "hmm/posterior.h" namespace kaldi { void AccumulateForUtterance(const Matrix &feats, const Posterior &post, - const TransitionModel &trans_model, + const Transitions &trans_model, const AmDiagGmm &am_gmm, FmllrDiagGmmAccs *spk_stats) { Posterior pdf_post; @@ -83,7 +83,7 @@ int main(int argc, char *argv[]) { post_rspecifier = po.GetArg(3), trans_wspecifier = po.GetArg(4); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-est-gaussians-ebw.cc b/src/gmmbin/gmm-est-gaussians-ebw.cc index bbd53c2bec0..cfbb8ece02d 100644 --- a/src/gmmbin/gmm-est-gaussians-ebw.cc +++ b/src/gmmbin/gmm-est-gaussians-ebw.cc @@ -21,7 +21,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/ebw-diag-gmm.h" int main(int argc, char *argv[]) { @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) { model_out_filename = po.GetArg(4); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); diff --git a/src/gmmbin/gmm-est-lvtln-trans.cc b/src/gmmbin/gmm-est-lvtln-trans.cc index abfc24a6585..849560dd437 100644 --- a/src/gmmbin/gmm-est-lvtln-trans.cc +++ b/src/gmmbin/gmm-est-lvtln-trans.cc @@ -26,7 +26,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/lvtln.h" #include "hmm/posterior.h" @@ -86,7 +86,7 @@ int main(int argc, char *argv[]) { { bool binary; Input ki(model_rxfilename, &binary); - TransitionModel trans_model; + Transitions trans_model; trans_model.Read(ki.Stream(), binary); am_gmm.Read(ki.Stream(), binary); } diff --git a/src/gmmbin/gmm-est-map.cc b/src/gmmbin/gmm-est-map.cc index 22ea8acda51..6cbb864fcf7 100644 --- a/src/gmmbin/gmm-est-map.cc +++ b/src/gmmbin/gmm-est-map.cc @@ -22,7 +22,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" int main(int argc, char *argv[]) { @@ -36,7 +36,6 @@ int main(int argc, char *argv[]) { "e.g.: gmm-est-map 1.mdl 1.acc 2.mdl\n"; bool binary_write = true; - MapTransitionUpdateConfig tcfg; MapDiagGmmOptions gmm_opts; std::string update_flags_str = "mvwt"; std::string occs_out_filename; @@ -47,7 +46,6 @@ int main(int argc, char *argv[]) { "update: subset of mvwt."); po.Register("write-occs", &occs_out_filename, "File to write state " "occupancies to."); - tcfg.Register(&po); gmm_opts.Register(&po); po.Read(argc, argv); @@ -65,7 +63,7 @@ int main(int argc, char *argv[]) { model_out_filename = po.GetArg(3); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); @@ -82,14 +80,6 @@ int main(int argc, char *argv[]) { gmm_accs.Read(ki.Stream(), binary, true); // true == add; doesn't matter here. } - if (update_flags & kGmmTransitions) { // Update transition model. - BaseFloat objf_impr, count; - trans_model.MapUpdate(transition_accs, tcfg, &objf_impr, &count); - KALDI_LOG << "Transition model update: Overall " << (objf_impr/count) - << " log-like improvement per frame over " << (count) - << " frames."; - } - { // Update GMMs. BaseFloat objf_impr, count; BaseFloat tot_like = gmm_accs.TotLogLike(), diff --git a/src/gmmbin/gmm-est-regtree-fmllr-ali.cc b/src/gmmbin/gmm-est-regtree-fmllr-ali.cc deleted file mode 100644 index 0158bae8298..00000000000 --- a/src/gmmbin/gmm-est-regtree-fmllr-ali.cc +++ /dev/null @@ -1,202 +0,0 @@ -// gmmbin/gmm-est-regtree-fmllr-ali.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; -#include -using std::vector; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "transform/regtree-fmllr-diag-gmm.h" - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Compute FMLLR transforms per-utterance (default) or per-speaker for " - "the supplied set of speakers (spk2utt option). Note: writes RegtreeFmllrDiagGmm objects\n" - "Usage: gmm-est-regtree-fmllr-ali [options] " - " \n"; - - ParseOptions po(usage); - string spk2utt_rspecifier; - bool binary = true; - po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to " - "utterance-list map"); - po.Register("binary", &binary, "Write output in binary mode"); - // register other modules - RegtreeFmllrOptions opts; - opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - alignments_rspecifier = po.GetArg(3), - regtree_filename = po.GetArg(4), - xforms_wspecifier = po.GetArg(5); - - RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier); - RegtreeFmllrDiagGmmWriter fmllr_writer(xforms_wspecifier); - - AmDiagGmm am_gmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_gmm.Read(ki.Stream(), binary); - } - RegressionTree regtree; - { - bool binary; - Input in(regtree_filename, &binary); - regtree.Read(in.Stream(), binary, am_gmm); - } - - RegtreeFmllrDiagGmm fmllr_xforms; - RegtreeFmllrDiagGmmAccs fmllr_accs; - fmllr_accs.Init(regtree.NumBaseclasses(), am_gmm.Dim()); - - double tot_like = 0.0; - kaldi::int64 tot_t = 0; - - int32 num_done = 0, num_no_alignment = 0, num_other_error = 0; - double tot_objf_impr = 0.0, tot_t_objf = 0.0; - if (spk2utt_rspecifier != "") { // per-speaker adaptation - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - string spk = spk2utt_reader.Key(); - fmllr_accs.SetZero(); - const vector &uttlist = spk2utt_reader.Value(); - for (vector::const_iterator utt_itr = uttlist.begin(), - itr_end = uttlist.end(); utt_itr != itr_end; ++utt_itr) { - if (!feature_reader.HasKey(*utt_itr)) { - KALDI_WARN << "Did not find features for utterance " << *utt_itr; - continue; - } - if (!alignments_reader.HasKey(*utt_itr)) { - KALDI_WARN << "Did not find aligned transcription for utterance " - << *utt_itr; - num_no_alignment++; - continue; - } - const Matrix &feats = feature_reader.Value(*utt_itr); - const vector &alignment = alignments_reader.Value(*utt_itr); - if (static_cast(alignment.size()) != feats.NumRows()) { - KALDI_WARN << "Alignments has wrong size " << (alignment.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - BaseFloat file_like = 0.0; - for (size_t i = 0; i < alignment.size(); i++) { - int32 pdf_id = trans_model.TransitionIdToPdf(alignment[i]); - file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm, - feats.Row(i), pdf_id, 1.0); - } - KALDI_VLOG(2) << "Average like for this file is " << (file_like - / alignment.size()) << " over " << alignment.size() - << " frames.\n"; - tot_like += file_like; - tot_t += alignment.size(); - num_done++; - if (num_done % 10 == 0) KALDI_VLOG(1) - << "Avg like per frame so far is " << (tot_like / tot_t) << '\n'; - } // end looping over all utterances of the current speaker - BaseFloat objf_impr, t; - fmllr_accs.Update(regtree, opts, &fmllr_xforms, &objf_impr, &t); - KALDI_LOG << "fMLLR objf improvement for speaker " << spk << " is " - << (objf_impr/(t+1.0e-10)) << " per frame over " << t - << " frames."; - tot_objf_impr += objf_impr; - tot_t_objf += t; - fmllr_writer.Write(spk, fmllr_xforms); - } // end looping over speakers - } else { // per-utterance adaptation - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string key = feature_reader.Key(); - if (!alignments_reader.HasKey(key)) { - KALDI_WARN << "Did not find aligned transcription for utterance " - << key; - num_no_alignment++; - continue; - } - const Matrix &feats = feature_reader.Value(); - const vector &alignment = alignments_reader.Value(key); - - if (static_cast(alignment.size()) != feats.NumRows()) { - KALDI_WARN << "Alignments has wrong size " << (alignment.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - num_done++; - BaseFloat file_like = 0.0; - fmllr_accs.SetZero(); - for (size_t i = 0; i < alignment.size(); i++) { - int32 pdf_id = trans_model.TransitionIdToPdf(alignment[i]); - file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm, - feats.Row(i), pdf_id, 1.0); - } - KALDI_VLOG(2) << "Average like for this file is " << (file_like - / alignment.size()) << " over " << alignment.size() << " frames."; - tot_like += file_like; - tot_t += alignment.size(); - if (num_done % 10 == 0) KALDI_VLOG(1) - << "Avg like per frame so far is " << (tot_like / tot_t); - BaseFloat objf_impr, t; - fmllr_accs.Update(regtree, opts, &fmllr_xforms, &objf_impr, &t); - KALDI_LOG << "fMLLR objf improvement for utterance " << key << " is " - << (objf_impr/(t+1.0e-10)) << " per frame over " << t - << " frames."; - tot_objf_impr += objf_impr; - tot_t_objf += t; - fmllr_writer.Write(feature_reader.Key(), fmllr_xforms); - } - } - - KALDI_LOG << "Overall objf improvement from fMLLR is " - << (tot_objf_impr/tot_t_objf) - << " per frame over " << tot_t_objf << " frames."; - KALDI_LOG << "Done " << num_done << " files, " << num_no_alignment - << " with no alignments, " << num_other_error - << " with other errors."; - KALDI_LOG << "Overall acoustic like per frame = " << (tot_like / tot_t) - << " over " << tot_t << " frames."; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/gmmbin/gmm-est-regtree-fmllr.cc b/src/gmmbin/gmm-est-regtree-fmllr.cc deleted file mode 100644 index ca807f07fd4..00000000000 --- a/src/gmmbin/gmm-est-regtree-fmllr.cc +++ /dev/null @@ -1,216 +0,0 @@ -// gmmbin/gmm-est-regtree-fmllr.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; -#include -using std::vector; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "hmm/posterior.h" -#include "transform/regtree-fmllr-diag-gmm.h" - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Compute FMLLR transforms per-utterance (default) or per-speaker for " - "the supplied set of speakers (spk2utt option). Note: writes RegtreeFmllrDiagGmm objects\n" - "Usage: gmm-est-regtree-fmllr [options] " - " \n"; - - ParseOptions po(usage); - string spk2utt_rspecifier; - bool binary = true; - po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to " - "utterance-list map"); - po.Register("binary", &binary, "Write output in binary mode"); - // register other modules - RegtreeFmllrOptions opts; - opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - posteriors_rspecifier = po.GetArg(3), - regtree_filename = po.GetArg(4), - xforms_wspecifier = po.GetArg(5); - - RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); - RegtreeFmllrDiagGmmWriter fmllr_writer(xforms_wspecifier); - - AmDiagGmm am_gmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_gmm.Read(ki.Stream(), binary); - } - RegressionTree regtree; - { - bool binary; - Input in(regtree_filename, &binary); - regtree.Read(in.Stream(), binary, am_gmm); - } - - RegtreeFmllrDiagGmm fmllr_xforms; - RegtreeFmllrDiagGmmAccs fmllr_accs; - fmllr_accs.Init(regtree.NumBaseclasses(), am_gmm.Dim()); - - double tot_like = 0.0, tot_t = 0; - - int32 num_done = 0, num_no_posterior = 0, num_other_error = 0; - double tot_objf_impr = 0.0, tot_t_objf = 0.0; - if (spk2utt_rspecifier != "") { // per-speaker adaptation - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - string spk = spk2utt_reader.Key(); - fmllr_accs.SetZero(); - const vector &uttlist = spk2utt_reader.Value(); - for (vector::const_iterator utt_itr = uttlist.begin(), - itr_end = uttlist.end(); utt_itr != itr_end; ++utt_itr) { - if (!feature_reader.HasKey(*utt_itr)) { - KALDI_WARN << "Did not find features for utterance " << *utt_itr; - continue; - } - if (!posteriors_reader.HasKey(*utt_itr)) { - KALDI_WARN << "Did not find posteriors for utterance " - << *utt_itr; - num_no_posterior++; - continue; - } - const Matrix &feats = feature_reader.Value(*utt_itr); - const Posterior &posterior = posteriors_reader.Value(*utt_itr); - if (static_cast(posterior.size()) != feats.NumRows()) { - KALDI_WARN << "Posteriors has wrong size " << (posterior.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - BaseFloat file_like = 0.0, file_t = 0.0; - Posterior pdf_posterior; - ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior); - for (size_t i = 0; i < posterior.size(); i++) { - for (size_t j = 0; j < pdf_posterior[i].size(); j++) { - int32 pdf_id = pdf_posterior[i][j].first; - BaseFloat prob = pdf_posterior[i][j].second; - file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm, - feats.Row(i), pdf_id, - prob); - file_t += prob; - } - } - KALDI_VLOG(2) << "Average like for this file is " << (file_like/file_t) - << " over " << file_t << " frames."; - tot_like += file_like; - tot_t += file_t; - num_done++; - if (num_done % 10 == 0) - KALDI_VLOG(1) << "Avg like per frame so far is " - << (tot_like / tot_t); - } // end looping over all utterances of the current speaker - BaseFloat objf_impr, t; - fmllr_accs.Update(regtree, opts, &fmllr_xforms, &objf_impr, &t); - KALDI_LOG << "fMLLR objf improvement for speaker " << spk << " is " - << (objf_impr/(t+1.0e-10)) << " per frame over " << t - << " frames."; - tot_objf_impr += objf_impr; - tot_t_objf += t; - fmllr_writer.Write(spk, fmllr_xforms); - } // end looping over speakers - } else { // per-utterance adaptation - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string key = feature_reader.Key(); - if (!posteriors_reader.HasKey(key)) { - KALDI_WARN << "Did not find posteriors for utterance " - << key; - num_no_posterior++; - continue; - } - const Matrix &feats = feature_reader.Value(); - const Posterior &posterior = posteriors_reader.Value(key); - - if (static_cast(posterior.size()) != feats.NumRows()) { - KALDI_WARN << "Posteriors has wrong size " << (posterior.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - num_done++; - BaseFloat file_like = 0.0, file_t = 0.0; - fmllr_accs.SetZero(); - Posterior pdf_posterior; - ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior); - for (size_t i = 0; i < posterior.size(); i++) { - for (size_t j = 0; j < pdf_posterior[i].size(); j++) { - int32 pdf_id = pdf_posterior[i][j].first; - BaseFloat prob = pdf_posterior[i][j].second; - file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm, - feats.Row(i), pdf_id, - prob); - file_t += prob; - } - } - KALDI_VLOG(2) << "Average like for this file is " << (file_like/file_t) - << " over " << file_t << " frames."; - tot_like += file_like; - tot_t += file_t; - if (num_done % 10 == 0) - KALDI_VLOG(1) << "Avg like per frame so far is " - << (tot_like / tot_t); - BaseFloat objf_impr, t; - fmllr_accs.Update(regtree, opts, &fmllr_xforms, &objf_impr, &t); - KALDI_LOG << "fMLLR objf improvement for utterance " << key << " is " - << (objf_impr/(t+1.0e-10)) << " per frame over " << t - << " frames."; - tot_objf_impr += objf_impr; - tot_t_objf += t; - fmllr_writer.Write(feature_reader.Key(), fmllr_xforms); - } - } - KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior - << " with no posteriors, " << num_other_error - << " with other errors."; - KALDI_LOG << "Overall objf improvement from MLLR is " << (tot_objf_impr/tot_t_objf) - << " per frame " << " over " << tot_t_objf << " frames."; - KALDI_LOG << "Overall acoustic likelihood was " << (tot_like/tot_t) - << " over " << tot_t << " frames."; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/gmmbin/gmm-est-regtree-mllr.cc b/src/gmmbin/gmm-est-regtree-mllr.cc deleted file mode 100644 index a4df5cc84c1..00000000000 --- a/src/gmmbin/gmm-est-regtree-mllr.cc +++ /dev/null @@ -1,215 +0,0 @@ -// gmmbin/gmm-est-regtree-mllr.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation -// 2014 Guoguo Chen - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -using std::string; -#include -using std::vector; - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "transform/regtree-mllr-diag-gmm.h" -#include "hmm/posterior.h" - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - using namespace kaldi; - const char *usage = - "Compute MLLR transforms per-utterance (default) or per-speaker for " - "the supplied set of speakers (spk2utt option). Note: writes RegtreeMllrDiagGmm objects\n" - "Usage: gmm-est-regtree-mllr [options] " - " \n"; - - ParseOptions po(usage); - string spk2utt_rspecifier; - bool binary = true; - po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to " - "utterance-list map"); - po.Register("binary", &binary, "Write output in binary mode"); - // register other modules - RegtreeMllrOptions opts; - opts.Register(&po); - - po.Read(argc, argv); - - if (po.NumArgs() != 5) { - po.PrintUsage(); - exit(1); - } - - string model_filename = po.GetArg(1), - feature_rspecifier = po.GetArg(2), - posteriors_rspecifier = po.GetArg(3), - regtree_filename = po.GetArg(4), - xforms_wspecifier = po.GetArg(5); - - RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); - RegtreeMllrDiagGmmWriter mllr_writer(xforms_wspecifier); - - AmDiagGmm am_gmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_gmm.Read(ki.Stream(), binary); - } - RegressionTree regtree; - { - bool binary; - Input in(regtree_filename, &binary); - regtree.Read(in.Stream(), binary, am_gmm); - } - - RegtreeMllrDiagGmm mllr_xforms; - RegtreeMllrDiagGmmAccs mllr_accs; - mllr_accs.Init(regtree.NumBaseclasses(), am_gmm.Dim()); - - double tot_like = 0.0, tot_t = 0; - - int32 num_done = 0, num_no_posterior = 0, num_other_error = 0; - double tot_objf_impr = 0.0, tot_t_objf = 0.0; - if (spk2utt_rspecifier != "") { // per-speaker adaptation - SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) { - string spk = spk2utt_reader.Key(); - mllr_accs.SetZero(); - const vector &uttlist = spk2utt_reader.Value(); - for (vector::const_iterator utt_itr = uttlist.begin(), - itr_end = uttlist.end(); utt_itr != itr_end; ++utt_itr) { - if (!feature_reader.HasKey(*utt_itr)) { - KALDI_WARN << "Did not find features for utterance " << *utt_itr; - continue; - } - if (!posteriors_reader.HasKey(*utt_itr)) { - KALDI_WARN << "Did not find posteriors for utterance " - << *utt_itr; - num_no_posterior++; - continue; - } - const Matrix &feats = feature_reader.Value(*utt_itr); - const Posterior &posterior = posteriors_reader.Value(*utt_itr); - if (posterior.size() != feats.NumRows()) { - KALDI_WARN << "Posteriors has wrong size " << (posterior.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - BaseFloat file_like = 0.0, file_t = 0.0; - Posterior pdf_posterior; - ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior); - for (size_t i = 0; i < posterior.size(); i++) { - for (size_t j = 0; j < pdf_posterior[i].size(); j++) { - int32 pdf_id = pdf_posterior[i][j].first; - BaseFloat prob = pdf_posterior[i][j].second; - file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm, - feats.Row(i), pdf_id, - prob); - file_t += prob; - } - } - KALDI_VLOG(2) << "Average like for this file is " << (file_like/file_t) - << " over " << file_t << " frames."; - tot_like += file_like; - tot_t += file_t; - num_done++; - if (num_done % 10 == 0) - KALDI_VLOG(1) << "Avg like per frame so far is " - << (tot_like / tot_t); - } // end looping over all utterances of the current speaker - BaseFloat objf_impr, t; - mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t); - KALDI_LOG << "MLLR objf improvement for speaker " << spk << " is " - << (objf_impr/(t+1.0e-10)) << " per frame over " << t - << " frames."; - tot_objf_impr += objf_impr; - tot_t_objf += t; - mllr_writer.Write(spk, mllr_xforms); - } // end looping over speakers - } else { // per-utterance adaptation - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !feature_reader.Done(); feature_reader.Next()) { - string key = feature_reader.Key(); - if (!posteriors_reader.HasKey(key)) { - KALDI_WARN << "Did not find aligned transcription for utterance " - << key; - num_no_posterior++; - continue; - } - const Matrix &feats = feature_reader.Value(); - const Posterior &posterior = posteriors_reader.Value(key); - - if (posterior.size() != feats.NumRows()) { - KALDI_WARN << "Posteriors has wrong size " << (posterior.size()) - << " vs. " << (feats.NumRows()); - num_other_error++; - continue; - } - - num_done++; - BaseFloat file_like = 0.0, file_t = 0.0; - mllr_accs.SetZero(); - Posterior pdf_posterior; - ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior); - for (size_t i = 0; i < posterior.size(); i++) { - for (size_t j = 0; j < pdf_posterior[i].size(); j++) { - int32 pdf_id = pdf_posterior[i][j].first; - BaseFloat prob = pdf_posterior[i][j].second; - file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm, - feats.Row(i), pdf_id, - prob); - file_t += prob; - } - } - KALDI_VLOG(2) << "Average like for this file is " << (file_like/file_t) - << " over " << file_t << " frames."; - tot_like += file_like; - tot_t += file_t; - if (num_done % 10 == 0) - KALDI_VLOG(1) << "Avg like per frame so far is " << (tot_like / tot_t); - BaseFloat objf_impr, t; - mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t); - KALDI_LOG << "MLLR objf improvement for utterance " << key << " is " - << (objf_impr/(t+1.0e-10)) << " per frame over " << t - << " frames."; - tot_objf_impr += objf_impr; - tot_t_objf += t; - mllr_writer.Write(feature_reader.Key(), mllr_xforms); - } - } - KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior - << " with no posteriors, " << num_other_error - << " with other errors."; - KALDI_LOG << "Overall objf improvement from MLLR is " << (tot_objf_impr/tot_t_objf) - << " per frame " << " over " << tot_t_objf << " frames."; - KALDI_LOG << "Overall acoustic likelihood was " << (tot_like/tot_t) - << " over " << tot_t << " frames."; - return 0; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - diff --git a/src/gmmbin/gmm-est-rescale.cc b/src/gmmbin/gmm-est-rescale.cc index a432b3d77f6..1e9c1e2aa84 100644 --- a/src/gmmbin/gmm-est-rescale.cc +++ b/src/gmmbin/gmm-est-rescale.cc @@ -21,7 +21,7 @@ #include "util/common-utils.h" #include "gmm/indirect-diff-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { using namespace kaldi; @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) { model_wxfilename = po.GetArg(4); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_rxfilename, &binary_read); diff --git a/src/gmmbin/gmm-est-weights-ebw.cc b/src/gmmbin/gmm-est-weights-ebw.cc index f19343a7ac4..9cf2c2d7d04 100644 --- a/src/gmmbin/gmm-est-weights-ebw.cc +++ b/src/gmmbin/gmm-est-weights-ebw.cc @@ -21,7 +21,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/ebw-diag-gmm.h" int main(int argc, char *argv[]) { @@ -62,7 +62,7 @@ int main(int argc, char *argv[]) { model_out_filename = po.GetArg(4); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); diff --git a/src/gmmbin/gmm-est.cc b/src/gmmbin/gmm-est.cc index 18c836a1f50..5bde0923536 100644 --- a/src/gmmbin/gmm-est.cc +++ b/src/gmmbin/gmm-est.cc @@ -1,6 +1,7 @@ // gmmbin/gmm-est.cc // Copyright 2009-2011 Microsoft Corporation +// 2019 Johns Hopkins University (author: Daniel Povey) // See ../../COPYING for clarification regarding multiple authors // @@ -21,7 +22,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" int main(int argc, char *argv[]) { @@ -35,7 +36,6 @@ int main(int argc, char *argv[]) { "e.g.: gmm-est 1.mdl 1.acc 2.mdl\n"; bool binary_write = true; - MleTransitionUpdateConfig tcfg; MleDiagGmmOptions gmm_opts; int32 mixup = 0; int32 mixdown = 0; @@ -61,7 +61,6 @@ int main(int argc, char *argv[]) { "means by standard deviation times this factor."); po.Register("write-occs", &occs_out_filename, "File to write pdf " "occupation counts to."); - tcfg.Register(&po); gmm_opts.Register(&po); po.Read(argc, argv); @@ -79,7 +78,7 @@ int main(int argc, char *argv[]) { model_out_filename = po.GetArg(3); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); @@ -87,23 +86,13 @@ int main(int argc, char *argv[]) { am_gmm.Read(ki.Stream(), binary_read); } - Vector transition_accs; AccumAmDiagGmm gmm_accs; { bool binary; Input ki(stats_filename, &binary); - transition_accs.Read(ki.Stream(), binary); gmm_accs.Read(ki.Stream(), binary, true); // true == add; doesn't matter here. } - if (update_flags & kGmmTransitions) { // Update transition model. - BaseFloat objf_impr, count; - trans_model.MleUpdate(transition_accs, tcfg, &objf_impr, &count); - KALDI_LOG << "Transition model update: Overall " << (objf_impr/count) - << " log-like improvement per frame over " << (count) - << " frames."; - } - { // Update GMMs. BaseFloat objf_impr, count; BaseFloat tot_like = gmm_accs.TotLogLike(), @@ -143,12 +132,10 @@ int main(int argc, char *argv[]) { am_gmm.Write(ko.Stream(), binary_write); } - KALDI_LOG << "Written model to " << model_out_filename; + KALDI_LOG << "Wrote model to " << model_out_filename; return 0; } catch(const std::exception &e) { std::cerr << e.what() << '\n'; return -1; } } - - diff --git a/src/gmmbin/gmm-fmpe-acc-stats.cc b/src/gmmbin/gmm-fmpe-acc-stats.cc deleted file mode 100644 index 4868b63b6ae..00000000000 --- a/src/gmmbin/gmm-fmpe-acc-stats.cc +++ /dev/null @@ -1,155 +0,0 @@ -// gmmbin/gmm-fmpe-acc-stats.cc - -// Copyright 2012 Johns Hopkins University (Author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" -#include "transform/fmpe.h" - - -int main(int argc, char *argv[]) { - using namespace kaldi; - using kaldi::int32; - try { - const char *usage = - "Accumulate stats for fMPE training, using GMM model. Note: this could\n" - "be done using gmm-get-feat-deriv and fmpe-acc-stats (but you'd be computing\n" - "the features twice). Features input should be pre-fMPE features.\n" - "\n" - "Usage: gmm-fmpe-acc-stats [options] " - " \n" - "e.g.: \n" - " gmm-fmpe-acc-stats --model-derivative 1.accs 1.mdl 1.fmpe \"$feats\" ark:1.gselect ark:1.post 1.fmpe_stats\n"; - - ParseOptions po(usage); - bool binary = true; - std::string model_derivative_rxfilename; - po.Register("binary", &binary, "If true, write stats in binary mode."); - po.Register("model-derivative", &model_derivative_rxfilename, - "GMM-accs file containing model derivative [note: contains no transition stats]. Used for indirect differential. Warning: this will only work correctly in the case of MMI/BMMI objective function, with non-canceled stats."); - po.Read(argc, argv); - - if (po.NumArgs() != 6) { - po.PrintUsage(); - exit(1); - } - - std::string model_rxfilename = po.GetArg(1), - fmpe_rxfilename = po.GetArg(2), - feature_rspecifier = po.GetArg(3), - gselect_rspecifier = po.GetArg(4), - posteriors_rspecifier = po.GetArg(5), - stats_wxfilename = po.GetArg(6); - - AmDiagGmm am_gmm; - TransitionModel trans_model; - { - bool binary; - Input ki(model_rxfilename, &binary); - trans_model.Read(ki.Stream(), binary); - am_gmm.Read(ki.Stream(), binary); - } - - Fmpe fmpe; - ReadKaldiObject(fmpe_rxfilename, &fmpe); - - - bool have_indirect = (model_derivative_rxfilename != ""); - AccumAmDiagGmm model_derivative; - if (have_indirect) - ReadKaldiObject(model_derivative_rxfilename, &model_derivative); - - FmpeStats fmpe_stats(fmpe); - - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier); - RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); - - BaseFloat tot_like = 0.0; // tot like weighted by posterior. - int32 num_frames = 0; - int32 num_done = 0, num_err = 0; - - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string key = feature_reader.Key(); - if (!posteriors_reader.HasKey(key)) { - num_err++; - KALDI_WARN << "No posteriors for utterance " << key; - continue; - } - const Matrix &feat_in = feature_reader.Value(); - const Posterior &posterior = posteriors_reader.Value(key); - - if (static_cast(posterior.size()) != feat_in.NumRows()) { - KALDI_WARN << "Posterior vector has wrong size " << - (posterior.size()) << " vs. "<< (feat_in.NumRows()); - num_err++; - continue; - } - - if (!gselect_reader.HasKey(key)) { - KALDI_WARN << "No gselect information for key " << key; - num_err++; - continue; - } - const std::vector > &gselect = - gselect_reader.Value(key); - if (static_cast(gselect.size()) != feat_in.NumRows()) { - KALDI_WARN << "gselect information has wrong size"; - num_err++; - continue; - } - - num_done++; - Matrix fmpe_feat(feat_in.NumRows(), feat_in.NumCols()); - fmpe.ComputeFeatures(feat_in, gselect, &fmpe_feat); - fmpe_feat.AddMat(1.0, feat_in); - - Matrix direct_deriv, indirect_deriv; - - tot_like += ComputeAmGmmFeatureDeriv(am_gmm, trans_model, posterior, - fmpe_feat, &direct_deriv, - (have_indirect ? &model_derivative : NULL), - (have_indirect ? &indirect_deriv : NULL)); - num_frames += feat_in.NumRows(); - - fmpe.AccStats(feat_in, gselect, direct_deriv, - (have_indirect ? &indirect_deriv : NULL), &fmpe_stats); - - if (num_done % 100 == 0) - KALDI_LOG << "Processed " << num_done << " utterances."; - } - - KALDI_LOG << "Done " << num_done << " files, " << num_err - << " with errors."; - KALDI_LOG << "Overall weighted acoustic likelihood per frame is " - << (tot_like/num_frames) << " over " << num_frames << " frames."; - - Output ko(stats_wxfilename, binary); - fmpe_stats.Write(ko.Stream(), binary); - - return (num_done != 0 ? 0 : 1); - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} - - diff --git a/src/gmmbin/gmm-get-stats-deriv.cc b/src/gmmbin/gmm-get-stats-deriv.cc index 939fe260b34..a6fd9764719 100644 --- a/src/gmmbin/gmm-get-stats-deriv.cc +++ b/src/gmmbin/gmm-get-stats-deriv.cc @@ -21,7 +21,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/indirect-diff-diag-gmm.h" int main(int argc, char *argv[]) { @@ -64,7 +64,7 @@ int main(int argc, char *argv[]) { deriv_wxfilename = po.GetArg(5); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_rxfilename, &binary_read); diff --git a/src/gmmbin/gmm-global-est-fmllr.cc b/src/gmmbin/gmm-global-est-fmllr.cc index b1d5b68e594..951b8addf2d 100644 --- a/src/gmmbin/gmm-global-est-fmllr.cc +++ b/src/gmmbin/gmm-global-est-fmllr.cc @@ -25,7 +25,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" namespace kaldi { diff --git a/src/gmmbin/gmm-global-est-lvtln-trans.cc b/src/gmmbin/gmm-global-est-lvtln-trans.cc index 10bb5bec5d5..95b56503f2c 100644 --- a/src/gmmbin/gmm-global-est-lvtln-trans.cc +++ b/src/gmmbin/gmm-global-est-lvtln-trans.cc @@ -26,7 +26,7 @@ using std::vector; #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/lvtln.h" #include "hmm/posterior.h" diff --git a/src/gmmbin/gmm-global-info.cc b/src/gmmbin/gmm-global-info.cc index 7c21005b449..00222ef81c3 100644 --- a/src/gmmbin/gmm-global-info.cc +++ b/src/gmmbin/gmm-global-info.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { diff --git a/src/gmmbin/gmm-gselect.cc b/src/gmmbin/gmm-gselect.cc index a873b962591..357998e996d 100644 --- a/src/gmmbin/gmm-gselect.cc +++ b/src/gmmbin/gmm-gselect.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { diff --git a/src/gmmbin/gmm-info.cc b/src/gmmbin/gmm-info.cc index 31f7aea0921..689c68150ec 100644 --- a/src/gmmbin/gmm-info.cc +++ b/src/gmmbin/gmm-info.cc @@ -20,7 +20,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { try { @@ -46,7 +46,7 @@ int main(int argc, char *argv[]) { std::string model_in_filename = po.GetArg(1); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); @@ -58,8 +58,6 @@ int main(int argc, char *argv[]) { std::cout << "number of pdfs " << trans_model.NumPdfs() << '\n'; std::cout << "number of transition-ids " << trans_model.NumTransitionIds() << '\n'; - std::cout << "number of transition-states " - << trans_model.NumTransitionStates() << '\n'; std::cout << "feature dimension " << am_gmm.Dim() << '\n'; std::cout << "number of gaussians " << am_gmm.NumGauss() << '\n'; return 0; diff --git a/src/gmmbin/gmm-init-biphone.cc b/src/gmmbin/gmm-init-biphone.cc index 0775a5c7b23..10fc9ad4048 100644 --- a/src/gmmbin/gmm-init-biphone.cc +++ b/src/gmmbin/gmm-init-biphone.cc @@ -23,8 +23,8 @@ #include "gmm/am-diag-gmm.h" #include "tree/event-map.h" #include "tree/context-dep.h" -#include "hmm/hmm-topology.h" -#include "hmm/transition-model.h" +#include "hmm/topology.h" +#include "hmm/transitions.h" namespace kaldi { // This function reads a file like: @@ -314,7 +314,7 @@ int main(int argc, char *argv[]) { Vector glob_mean(dim); glob_mean.Set(1.0); - HmmTopology topo; + Topology topo; bool binary_in; Input ki(topo_filename, &binary_in); topo.Read(ki.Stream(), binary_in); @@ -375,7 +375,7 @@ int main(int argc, char *argv[]) { am_gmm.AddPdf(gmm); // Now the transition model: - TransitionModel trans_model(*ctx_dep, topo); + Transitions trans_model(*ctx_dep, topo); { Output ko(model_filename, binary); diff --git a/src/gmmbin/gmm-init-model-flat.cc b/src/gmmbin/gmm-init-model-flat.cc index fecd91f49fd..d41b99c35e6 100644 --- a/src/gmmbin/gmm-init-model-flat.cc +++ b/src/gmmbin/gmm-init-model-flat.cc @@ -21,7 +21,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" #include "tree/build-tree-utils.h" #include "tree/context-dep.h" @@ -104,7 +104,7 @@ int main(int argc, char *argv[]) { ContextDependency ctx_dep; ReadKaldiObject(tree_filename, &ctx_dep); - HmmTopology topo; + Topology topo; ReadKaldiObject(topo_filename, &topo); Vector global_inverse_var, global_mean; @@ -138,7 +138,7 @@ int main(int argc, char *argv[]) { for (int i = 0; i < num_pdfs; i++) am_gmm.AddPdf(gmm); - TransitionModel trans_model(ctx_dep, topo); + Transitions trans_model(ctx_dep, topo); { Output ko(model_out_filename, binary); diff --git a/src/gmmbin/gmm-init-model.cc b/src/gmmbin/gmm-init-model.cc index e2d943b19eb..a081f326b1c 100644 --- a/src/gmmbin/gmm-init-model.cc +++ b/src/gmmbin/gmm-init-model.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" #include "tree/build-tree-utils.h" #include "tree/context-dep.h" @@ -35,7 +35,7 @@ namespace kaldi { void InitAmGmm(const BuildTreeStatsType &stats, const EventMap &to_pdf_map, AmDiagGmm *am_gmm, - const TransitionModel &trans_model, + const Transitions &trans_model, BaseFloat var_floor) { // Get stats split by tree-leaf ( == pdf): std::vector split_stats; @@ -126,7 +126,7 @@ void InitAmGmmFromOld(const BuildTreeStatsType &stats, ContextDependency old_tree; { // Read old_gm_gmm bool binary_in; - TransitionModel old_trans_model; + Transitions old_trans_model; Input ki(old_model_rxfilename, &binary_in); old_trans_model.Read(ki.Stream(), binary_in); old_am_gmm.Read(ki.Stream(), binary_in); @@ -270,12 +270,12 @@ int main(int argc, char *argv[]) { } KALDI_LOG << "Number of separate statistics is " << stats.size(); - HmmTopology topo; + Topology topo; ReadKaldiObject(topo_filename, &topo); const EventMap &to_pdf = ctx_dep.ToPdfMap(); // not owned here. - TransitionModel trans_model(ctx_dep, topo); + Transitions trans_model(ctx_dep, topo); // Now, the summed_stats will be used to initialize the GMM. AmDiagGmm am_gmm; diff --git a/src/gmmbin/gmm-init-mono.cc b/src/gmmbin/gmm-init-mono.cc index 3c370c36515..a91948e446b 100644 --- a/src/gmmbin/gmm-init-mono.cc +++ b/src/gmmbin/gmm-init-mono.cc @@ -21,8 +21,8 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/hmm-topology.h" -#include "hmm/transition-model.h" +#include "hmm/topology.h" +#include "hmm/transitions.h" #include "tree/context-dep.h" namespace kaldi { @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) { glob_mean.CopyFromVec(mean_stats); } - HmmTopology topo; + Topology topo; bool binary_in; Input ki(topo_filename, &binary_in); topo.Read(ki.Stream(), binary_in); @@ -164,7 +164,7 @@ int main(int argc, char *argv[]) { } // Now the transition model: - TransitionModel trans_model(*ctx_dep, topo); + Transitions trans_model(*ctx_dep, topo); { Output ko(model_filename, binary); diff --git a/src/gmmbin/gmm-ismooth-stats.cc b/src/gmmbin/gmm-ismooth-stats.cc index b29e1efc1c3..a524d27b47b 100644 --- a/src/gmmbin/gmm-ismooth-stats.cc +++ b/src/gmmbin/gmm-ismooth-stats.cc @@ -21,7 +21,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/ebw-diag-gmm.h" int main(int argc, char *argv[]) { @@ -77,7 +77,7 @@ int main(int argc, char *argv[]) { stats.Write(ko.Stream(), binary_write); } else if (smooth_from_model) { // Smoothing from model... AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; Vector dst_transition_accs; AccumAmDiagGmm dst_stats; { // read src model diff --git a/src/gmmbin/gmm-latgen-biglm-faster.cc b/src/gmmbin/gmm-latgen-biglm-faster.cc index d4e0645b16c..0d881b41ebb 100644 --- a/src/gmmbin/gmm-latgen-biglm-faster.cc +++ b/src/gmmbin/gmm-latgen-biglm-faster.cc @@ -24,7 +24,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/lattice-biglm-faster-decoder.h" #include "gmm/decodable-am-diag-gmm.h" @@ -35,7 +35,7 @@ namespace kaldi { // Takes care of output. Returns true on success. bool DecodeUtterance(LatticeBiglmFasterDecoder &decoder, // not const but is really an input. DecodableInterface &decodable, // not const but is really an input. - const TransitionModel &trans_model, + const Transitions &trans_model, const fst::SymbolTable *word_syms, std::string utt, double acoustic_scale, @@ -186,7 +186,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetOptArg(7), alignment_wspecifier = po.GetOptArg(8); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-latgen-faster-parallel.cc b/src/gmmbin/gmm-latgen-faster-parallel.cc index 41f414bcb9c..8cc0aa5dad4 100644 --- a/src/gmmbin/gmm-latgen-faster-parallel.cc +++ b/src/gmmbin/gmm-latgen-faster-parallel.cc @@ -24,7 +24,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" #include "gmm/decodable-am-diag-gmm.h" @@ -82,7 +82,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetOptArg(5), alignment_wspecifier = po.GetOptArg(6); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-latgen-faster-regtree-fmllr.cc b/src/gmmbin/gmm-latgen-faster-regtree-fmllr.cc deleted file mode 100644 index 36031b13c1e..00000000000 --- a/src/gmmbin/gmm-latgen-faster-regtree-fmllr.cc +++ /dev/null @@ -1,218 +0,0 @@ -// gmmbin/gmm-latgen-faster-regtree-fmllr.cc - -// Copyright 2009-2012 Microsoft Corporation -// 2012-2013 Johns Hopkins University (author: Daniel Povey) -// 2014 Alpha Cephei Inc. - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#include "base/kaldi-common.h" -#include "util/common-utils.h" -#include "gmm/am-diag-gmm.h" -#include "tree/context-dep.h" -#include "hmm/transition-model.h" -#include "fstext/fstext-lib.h" -#include "decoder/decoder-wrappers.h" -#include "gmm/decodable-am-diag-gmm.h" -#include "base/timer.h" -#include "transform/regression-tree.h" -#include "transform/regtree-fmllr-diag-gmm.h" -#include "transform/decodable-am-diag-gmm-regtree.h" -#include "feat/feature-functions.h" // feature reversal - -int main(int argc, char *argv[]) { - try { - using namespace kaldi; - typedef kaldi::int32 int32; - using fst::SymbolTable; - using fst::Fst; - using fst::StdArc; - - const char *usage = - "Generate lattices using GMM-based model and RegTree-FMLLR adaptation.\n" - "Usage: gmm-latgen-faster-regtree-fmllr [options] model-in regtree-in (fst-in|fsts-rspecifier) features-rspecifier transform-rspecifier" - " lattice-wspecifier [ words-wspecifier [alignments-wspecifier] ]\n"; - ParseOptions po(usage); - Timer timer; - bool allow_partial = false; - BaseFloat acoustic_scale = 0.1; - LatticeFasterDecoderConfig config; - - std::string word_syms_filename, utt2spk_rspecifier; - config.Register(&po); - po.Register("utt2spk", &utt2spk_rspecifier, "rspecifier for utterance to " - "speaker map used to load the transform"); - po.Register("acoustic-scale", &acoustic_scale, - "Scaling factor for acoustic likelihoods"); - po.Register("word-symbol-table", &word_syms_filename, - "Symbol table for words [for debug output]"); - po.Register("allow-partial", &allow_partial, - "If true, produce output even if end state was not reached."); - - po.Read(argc, argv); - - if (po.NumArgs() < 4 || po.NumArgs() > 6) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - regtree_in_str = po.GetArg(2), - fst_in_str = po.GetArg(3), - feature_rspecifier = po.GetArg(4), - xforms_rspecifier = po.GetArg(5), - lattice_wspecifier = po.GetArg(6), - words_wspecifier = po.GetOptArg(7), - alignment_wspecifier = po.GetOptArg(8); - - TransitionModel trans_model; - AmDiagGmm am_gmm; - { - bool binary; - Input ki(model_in_filename, &binary); - trans_model.Read(ki.Stream(), binary); - am_gmm.Read(ki.Stream(), binary); - } - - RegressionTree regtree; - { - bool binary_read; - Input in(regtree_in_str, &binary_read); - regtree.Read(in.Stream(), binary_read, am_gmm); - } - - RandomAccessRegtreeFmllrDiagGmmReaderMapped fmllr_reader(xforms_rspecifier, - utt2spk_rspecifier); - - bool determinize = config.determinize_lattice; - CompactLatticeWriter compact_lattice_writer; - LatticeWriter lattice_writer; - if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier) - : lattice_writer.Open(lattice_wspecifier))) - KALDI_ERR << "Could not open table for writing lattices: " - << lattice_wspecifier; - - Int32VectorWriter words_writer(words_wspecifier); - - Int32VectorWriter alignment_writer(alignment_wspecifier); - - fst::SymbolTable *word_syms = NULL; - if (word_syms_filename != "") - if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) - KALDI_ERR << "Could not read symbol table from file " - << word_syms_filename; - - double tot_like = 0.0; - kaldi::int64 frame_count = 0; - int num_done = 0, num_err = 0; - - if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) { - SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); - // Input FST is just one FST, not a table of FSTs. - Fst *decode_fst = fst::ReadFstKaldiGeneric(fst_in_str); - - { - LatticeFasterDecoder decoder(*decode_fst, config); - - for (; !feature_reader.Done(); feature_reader.Next()) { - std::string utt = feature_reader.Key(); - Matrix features (feature_reader.Value()); - feature_reader.FreeCurrent(); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_err++; - continue; - } - if (!fmllr_reader.HasKey(utt)) { - KALDI_WARN << "Not decoding utterance " << utt - << " because no transform available."; - num_err++; - continue; - } - - RegtreeFmllrDiagGmm fmllr(fmllr_reader.Value(utt)); - - kaldi::DecodableAmDiagGmmRegtreeFmllr gmm_decodable(am_gmm, trans_model, - features, fmllr, - regtree, - acoustic_scale); - double like; - if (DecodeUtteranceLatticeFaster( - decoder, gmm_decodable, trans_model, word_syms, utt, acoustic_scale, - determinize, allow_partial, &alignment_writer, &words_writer, - &compact_lattice_writer, &lattice_writer, &like)) { - tot_like += like; - frame_count += features.NumRows(); - num_done++; - } else num_err++; - } - } - delete decode_fst; // delete this only after decoder goes out of scope. - } else { // We have different FSTs for different utterances. - SequentialTableReader fst_reader(fst_in_str); - RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier); - for (; !fst_reader.Done(); fst_reader.Next()) { - std::string utt = fst_reader.Key(); - const Matrix &features = feature_reader.Value(utt); - if (features.NumRows() == 0) { - KALDI_WARN << "Zero-length utterance: " << utt; - num_err++; - continue; - } - if (!fmllr_reader.HasKey(utt)) { - KALDI_WARN << "Not decoding utterance " << utt - << " because no transform available."; - num_err++; - continue; - } - - RegtreeFmllrDiagGmm fmllr(fmllr_reader.Value(utt)); - kaldi::DecodableAmDiagGmmRegtreeFmllr gmm_decodable(am_gmm, trans_model, - features, fmllr, - regtree, - acoustic_scale); - - LatticeFasterDecoder decoder(fst_reader.Value(), config); - double like; - if (DecodeUtteranceLatticeFaster( - decoder, gmm_decodable, trans_model, word_syms, utt, acoustic_scale, - determinize, allow_partial, &alignment_writer, &words_writer, - &compact_lattice_writer, &lattice_writer, &like)) { - tot_like += like; - frame_count += features.NumRows(); - num_done++; - } else num_err++; - } - } - - double elapsed = timer.Elapsed(); - KALDI_LOG << "Time taken "<< elapsed - << "s: real-time factor assuming 100 frames/sec is " - << (elapsed*100.0/frame_count); - KALDI_LOG << "Done " << num_done << " utterances, failed for " - << num_err; - KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) << " over " - << frame_count << " frames."; - - delete word_syms; - if (num_done != 0) return 0; - else return 1; - } catch(const std::exception &e) { - std::cerr << e.what(); - return -1; - } -} diff --git a/src/gmmbin/gmm-latgen-faster.cc b/src/gmmbin/gmm-latgen-faster.cc index 6bc475d1b79..75a9d95aacd 100644 --- a/src/gmmbin/gmm-latgen-faster.cc +++ b/src/gmmbin/gmm-latgen-faster.cc @@ -24,7 +24,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" #include "gmm/decodable-am-diag-gmm.h" @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetOptArg(5), alignment_wspecifier = po.GetOptArg(6); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-latgen-map.cc b/src/gmmbin/gmm-latgen-map.cc index ccc15f5a20c..6717eaadacb 100644 --- a/src/gmmbin/gmm-latgen-map.cc +++ b/src/gmmbin/gmm-latgen-map.cc @@ -26,7 +26,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "gmm/mle-am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/fmllr-diag-gmm.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetOptArg(6), alignment_wspecifier = po.GetOptArg(7); - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input is(model_in_filename, &binary_read); diff --git a/src/gmmbin/gmm-latgen-simple.cc b/src/gmmbin/gmm-latgen-simple.cc index 812bee7fef4..d7ffe86c4ae 100644 --- a/src/gmmbin/gmm-latgen-simple.cc +++ b/src/gmmbin/gmm-latgen-simple.cc @@ -24,7 +24,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "decoder/decoder-wrappers.h" #include "gmm/decodable-am-diag-gmm.h" @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) { words_wspecifier = po.GetOptArg(5), alignment_wspecifier = po.GetOptArg(6); - TransitionModel trans_model; + Transitions trans_model; AmDiagGmm am_gmm; { bool binary; diff --git a/src/gmmbin/gmm-make-regtree.cc b/src/gmmbin/gmm-make-regtree.cc deleted file mode 100644 index 8c79d013e0d..00000000000 --- a/src/gmmbin/gmm-make-regtree.cc +++ /dev/null @@ -1,107 +0,0 @@ -// gmmbin/gmm-make-regtree.cc - -// Copyright 2009-2011 Saarland University; Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include "base/kaldi-common.h" -#include "util/kaldi-io.h" -#include "util/text-utils.h" -#include "gmm/mle-am-diag-gmm.h" -#include "tree/context-dep.h" -#include "hmm/transition-model.h" -#include "transform/regression-tree.h" - - -int main(int argc, char *argv[]) { - try { - typedef kaldi::int32 int32; - typedef kaldi::BaseFloat BaseFloat; - - const char *usage = - "Build regression class tree.\n" - "Usage: gmm-make-regtree [options] \n" - "E.g.: gmm-make-regtree --silphones=1:2:3 --state-occs=1.occs 1.mdl 1.regtree\n" - " [Note: state-occs come from --write-occs option of gmm-est]\n"; - - std::string occs_in_filename; - std::string sil_phones_str; - bool binary_write = true; - int32 max_leaves = 1; - kaldi::ParseOptions po(usage); - po.Register("state-occs", &occs_in_filename, "File containing state occupancies (use --write-occs in gmm-est)"); - po.Register("sil-phones", &sil_phones_str, "Colon-separated list of integer ids of silence phones, e.g. 1:2:3; if used, create top-level speech/sil split (only one reg-class for silence)."); - po.Register("binary", &binary_write, "Write output in binary mode"); - po.Register("max-leaves", &max_leaves, "Maximum number of leaves in regression tree."); - po.Read(argc, argv); - - if (po.NumArgs() != 2) { - po.PrintUsage(); - exit(1); - } - - std::string model_in_filename = po.GetArg(1), - tree_out_filename = po.GetArg(2); - - kaldi::AmDiagGmm am_gmm; - kaldi::TransitionModel trans_model; - { - bool binary_read; - kaldi::Input ki(model_in_filename, &binary_read); - trans_model.Read(ki.Stream(), binary_read); - am_gmm.Read(ki.Stream(), binary_read); - } - - kaldi::Vector state_occs; - if (occs_in_filename != "") { - bool binary_read; - kaldi::Input ki(occs_in_filename, &binary_read); - state_occs.Read(ki.Stream(), binary_read); - } else { - KALDI_LOG << "--state-occs option not provided so using constant occupancies."; - state_occs.Resize(am_gmm.NumPdfs()); - state_occs.Set(1.0); - } - - std::vector sil_pdfs; - if (sil_phones_str != "") { - std::vector sil_phones; - if (!kaldi::SplitStringToIntegers(sil_phones_str, ":", false, &sil_phones)) - KALDI_ERR << "invalid sil-phones option " << sil_phones_str; - std::sort(sil_phones.begin(), sil_phones.end()); - bool ans = GetPdfsForPhones(trans_model, sil_phones, &sil_pdfs); - if (!ans) - KALDI_WARN << "Pdfs associated with silence phones are not only " - "associated with silence phones: your speech-silence split " - "may not be meaningful."; - } - - kaldi::RegressionTree regtree; - regtree.BuildTree(state_occs, sil_pdfs, am_gmm, max_leaves); - // Write out the regression tree - { - kaldi::Output ko(tree_out_filename, binary_write); - regtree.Write(ko.Stream(), binary_write); - } - - KALDI_LOG << "Written regression tree to " << tree_out_filename; - } catch(const std::exception &e) { - std::cerr << e.what() << '\n'; - return -1; - } -} - - diff --git a/src/gmmbin/gmm-mixup.cc b/src/gmmbin/gmm-mixup.cc index a76b3805d89..51919560b10 100644 --- a/src/gmmbin/gmm-mixup.cc +++ b/src/gmmbin/gmm-mixup.cc @@ -21,7 +21,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "gmm/mle-am-diag-gmm.h" int main(int argc, char *argv[]) { @@ -70,7 +70,7 @@ int main(int argc, char *argv[]) { model_out_filename = po.GetArg(3); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_filename, &binary_read); diff --git a/src/gmmbin/gmm-post-to-gpost.cc b/src/gmmbin/gmm-post-to-gpost.cc index 59da0f9a1ac..1260c9b922a 100644 --- a/src/gmmbin/gmm-post-to-gpost.cc +++ b/src/gmmbin/gmm-post-to-gpost.cc @@ -22,7 +22,7 @@ #include "base/kaldi-common.h" #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/posterior.h" int main(int argc, char *argv[]) { @@ -56,7 +56,7 @@ int main(int argc, char *argv[]) { typedef kaldi::int32 int32; AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_filename, &binary); diff --git a/src/gmmbin/gmm-rescore-lattice.cc b/src/gmmbin/gmm-rescore-lattice.cc index 54156442e64..36088cac304 100644 --- a/src/gmmbin/gmm-rescore-lattice.cc +++ b/src/gmmbin/gmm-rescore-lattice.cc @@ -22,7 +22,7 @@ #include "util/common-utils.h" #include "util/stl-utils.h" #include "gmm/am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "fstext/fstext-lib.h" #include "lat/kaldi-lattice.h" #include "lat/lattice-functions.h" @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) { lats_wspecifier = po.GetArg(4); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary; Input ki(model_filename, &binary); diff --git a/src/gmmbin/gmm-sum-accs.cc b/src/gmmbin/gmm-sum-accs.cc index c9886e867f5..6d99c4a35c9 100644 --- a/src/gmmbin/gmm-sum-accs.cc +++ b/src/gmmbin/gmm-sum-accs.cc @@ -19,7 +19,7 @@ #include "util/common-utils.h" #include "gmm/mle-am-diag-gmm.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" int main(int argc, char *argv[]) { @@ -50,16 +50,12 @@ int main(int argc, char *argv[]) { std::string stats_in_filename = po.GetArg(i); bool binary_read; kaldi::Input ki(stats_in_filename, &binary_read); - transition_accs.Read(ki.Stream(), binary_read, true /*add read values*/); gmm_accs.Read(ki.Stream(), binary_read, true /*add read values*/); } // Write out the accs - { - kaldi::Output ko(stats_out_filename, binary); - transition_accs.Write(ko.Stream(), binary); - gmm_accs.Write(ko.Stream(), binary); - } + WriteKaldiObject(gmm_accs, stats_out_filename, binary); + KALDI_LOG << "Summed " << num_accs << " stats, total count " << gmm_accs.TotCount() << ", avg like/frame " << (gmm_accs.TotLogLike() / gmm_accs.TotCount()); @@ -70,5 +66,3 @@ int main(int argc, char *argv[]) { return -1; } } - - diff --git a/src/gmmbin/gmm-transform-means-global.cc b/src/gmmbin/gmm-transform-means-global.cc index 6b1a6be8330..857b602c19b 100644 --- a/src/gmmbin/gmm-transform-means-global.cc +++ b/src/gmmbin/gmm-transform-means-global.cc @@ -22,7 +22,7 @@ #include "util/common-utils.h" #include "gmm/diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/mllt.h" int main(int argc, char *argv[]) { diff --git a/src/gmmbin/gmm-transform-means.cc b/src/gmmbin/gmm-transform-means.cc index 5c08ec32b10..3a27d73a947 100644 --- a/src/gmmbin/gmm-transform-means.cc +++ b/src/gmmbin/gmm-transform-means.cc @@ -22,7 +22,7 @@ #include "util/common-utils.h" #include "gmm/am-diag-gmm.h" #include "tree/context-dep.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "transform/mllt.h" int main(int argc, char *argv[]) { @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) { ReadKaldiObject(mat_rxfilename, &mat); AmDiagGmm am_gmm; - TransitionModel trans_model; + Transitions trans_model; { bool binary_read; Input ki(model_in_rxfilename, &binary_read); diff --git a/src/gst-plugin/gst-online-gmm-decode-faster.cc b/src/gst-plugin/gst-online-gmm-decode-faster.cc index 958bce41d80..094d398960a 100644 --- a/src/gst-plugin/gst-online-gmm-decode-faster.cc +++ b/src/gst-plugin/gst-online-gmm-decode-faster.cc @@ -389,7 +389,7 @@ gst_online_gmm_decode_faster_allocate(GstOnlineGmmDecodeFaster * filter) { Input ki(filter->lda_mat_rspecifier_, &binary_in); filter->lda_transform_->Read(ki.Stream(), binary_in); } - filter->trans_model_ = new TransitionModel(); + filter->trans_model_ = new Transitions(); filter->am_gmm_ = new AmDiagGmm(); { bool binary; diff --git a/src/gst-plugin/gst-online-gmm-decode-faster.h b/src/gst-plugin/gst-online-gmm-decode-faster.h index b950d1e0a12..529c510115a 100644 --- a/src/gst-plugin/gst-online-gmm-decode-faster.h +++ b/src/gst-plugin/gst-online-gmm-decode-faster.h @@ -65,7 +65,7 @@ struct _GstOnlineGmmDecodeFaster { OnlineFasterDecoder *decoder_; Matrix *lda_transform_; - TransitionModel *trans_model_; + Transitions *trans_model_; AmDiagGmm *am_gmm_; fst::Fst *decode_fst_; fst::SymbolTable *word_syms_; diff --git a/src/hmm/Makefile b/src/hmm/Makefile index 0ad5da74c28..fb8c57397c8 100644 --- a/src/hmm/Makefile +++ b/src/hmm/Makefile @@ -3,14 +3,13 @@ all: include ../kaldi.mk -TESTFILES = hmm-topology-test hmm-utils-test transition-model-test posterior-test +TESTFILES = topology-test hmm-utils-test transitions-test posterior-test -OBJFILES = hmm-topology.o transition-model.o hmm-utils.o tree-accu.o \ +OBJFILES = topology.o transitions.o hmm-utils.o tree-accu.o \ posterior.o hmm-test-utils.o LIBNAME = kaldi-hmm -ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ - ../base/kaldi-base.a +ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ + ../base/kaldi-base.a include ../makefiles/default_rules.mk - diff --git a/src/hmm/hmm-test-utils.cc b/src/hmm/hmm-test-utils.cc index ceca116c828..6eae1a119b2 100644 --- a/src/hmm/hmm-test-utils.cc +++ b/src/hmm/hmm-test-utils.cc @@ -23,7 +23,7 @@ namespace kaldi { -TransitionModel *GenRandTransitionModel(ContextDependency **ctx_dep_out) { +Transitions *GenRandTransitions(ContextDependency **ctx_dep_out) { std::vector phones; phones.push_back(1); for (int32 i = 2; i < 20; i++) @@ -38,16 +38,16 @@ TransitionModel *GenRandTransitionModel(ContextDependency **ctx_dep_out) { GenRandContextDependencyLarge(phones, N, P, true, &num_pdf_classes); - HmmTopology topo = GenRandTopology(phones, num_pdf_classes); + Topology topo = GenRandTopology(phones, num_pdf_classes); - TransitionModel *trans_model = new TransitionModel(*ctx_dep, topo); + Transitions *trans_model = new Transitions(*ctx_dep, topo); if (ctx_dep_out == NULL) delete ctx_dep; else *ctx_dep_out = ctx_dep; return trans_model; } -HmmTopology GetDefaultTopology(const std::vector &phones_in) { +Topology GetDefaultTopology(const std::vector &phones_in) { std::vector phones(phones_in); std::sort(phones.begin(), phones.end()); KALDI_ASSERT(IsSortedAndUniq(phones) && !phones.empty()); @@ -59,24 +59,19 @@ HmmTopology GetDefaultTopology(const std::vector &phones_in) { for (size_t i = 0; i < phones.size(); i++) topo_string << phones[i] << " "; - topo_string << "\n" - " 0 0\n" - " 0 0.5\n" - " 1 0.5\n" - " \n" - " 1 1 \n" - " 1 0.5\n" - " 2 0.5\n" - " \n" - " 2 2\n" - " 2 0.5\n" - " 3 0.5\n" - " \n" - " 3 \n" - " \n" - " \n"; - - HmmTopology topo; + topo_string << + "\n" + "0 1 1 0.0\n" + "1 1 1 0.693\n" + "1 2 2 0.693\n" + "2 2 2 0.693\n" + "2 3 3 0.693\n" + "3 3 3 0.693\n" + "3 0.693\n\n" + "\n" + "\n"; + + Topology topo; std::istringstream iss(topo_string.str()); topo.Read(iss, false); return topo; @@ -84,15 +79,15 @@ HmmTopology GetDefaultTopology(const std::vector &phones_in) { } -HmmTopology GenRandTopology(const std::vector &phones_in, - const std::vector &num_pdf_classes) { +Topology GenRandTopology(const std::vector &phones_in, + const std::vector &num_pdf_classes) { std::vector phones(phones_in); std::sort(phones.begin(), phones.end()); KALDI_ASSERT(IsSortedAndUniq(phones) && !phones.empty()); std::ostringstream topo_string; - std::map > num_pdf_classes_to_phones; + std::map > num_pdf_classes_to_phones; for (size_t i = 0; i < phones.size(); i++) { int32 p = phones[i]; KALDI_ASSERT(static_cast(p) < num_pdf_classes.size()); @@ -112,66 +107,43 @@ HmmTopology GenRandTopology(const std::vector &phones_in, const std::vector &phones = iter->second; for (size_t i = 0; i < phones.size(); i++) topo_string << phones[i] << " "; - topo_string << " "; - bool ergodic = (RandInt(0, 1) == 0); - if (ergodic) { - // Note, this type of topology is not something we ever use in practice- it - // has an initial nonemitting state (no PdfClass specified). But it's - // supported so we're testing it. - std::vector state_to_pdf_class; - state_to_pdf_class.push_back(-1); // state zero, nonemitting. - for (int32 i = 0; i < this_num_pdf_classes; i++) { - int32 num_states = RandInt(1, 2); - for (int32 j = 0; j < num_states; j++) - state_to_pdf_class.push_back(i); - } - state_to_pdf_class.push_back(-1); // final non-emitting state. - { // state zero is nonemitting. This is not something used in any current - // example script. - topo_string << " 0\n"; - BaseFloat prob = 1.0 / (state_to_pdf_class.size() - 2); - for (size_t i = 1; i + 1 < state_to_pdf_class.size(); i++) { - topo_string << " " << i << ' ' << prob << '\n'; - } - topo_string << "\n"; - } - // ergodic part. - for (size_t i = 1; i + 1 < state_to_pdf_class.size(); i++) { - BaseFloat prob = 1.0 / (state_to_pdf_class.size() - 1); - topo_string << " " << i << " " - << state_to_pdf_class[i] << '\n'; - for (size_t j = 1; j < state_to_pdf_class.size(); j++) - topo_string << " " << j << ' ' << prob << '\n'; - topo_string << "\n"; - } - // final, nonemitting state. No pdf-class, no transitions. - topo_string << " " << (state_to_pdf_class.size() - 1) << " \n"; - } else { - // feedforward topology. - int32 cur_state = 0; - for (int32 pdf_class = 0; pdf_class < this_num_pdf_classes; pdf_class++) { - int32 this_num_states = RandInt(1, 2); - for (int32 s = 0; s < this_num_states; s++) { - topo_string << " " << cur_state << " " << pdf_class - << "\n " << cur_state << " 0.5\n " - << (cur_state + 1) << " 0.5\n\n"; - cur_state++; - } - } - // final, non-emitting state. - topo_string << " " << cur_state << " \n"; + topo_string << "\n"; + + switch (this_num_pdf_classes) { + case 1: + topo_string << "0 1 1 0.0\n" + "1 1 1 0.693\n" + "1 0.693\n\n"; + break; + case 2: + topo_string << "0 1 1 0.0\n" + "1 1 1 0.693\n" + "1 2 2 0.693\n" + "2 2 2 0.693\n" + "2 0.693\n\n"; + break; + case 3: + topo_string << "0 1 1 0.0\n" + "1 1 1 0.693\n" + "1 2 2 0.693\n" + "2 3 3 0.0\n" // mix it up a bit. + "3 3 3 0.693\n" + "3 0.693\n\n"; + break; + default: + KALDI_ERR << "Un-handled num-pdf-classes\n"; } topo_string << "\n"; } topo_string << "\n"; - HmmTopology topo; + Topology topo; std::istringstream iss(topo_string.str()); topo.Read(iss, false); return topo; } -HmmTopology GenRandTopology() { +Topology GenRandTopology() { std::vector phones; phones.push_back(1); for (int32 i = 2; i < 20; i++) @@ -182,63 +154,54 @@ HmmTopology GenRandTopology() { } else { std::vector num_pdf_classes(phones.back() + 1, -1); for (int32 i = 0; i < phones.size(); i++) - num_pdf_classes[phones[i]] = RandInt(1, 5); + num_pdf_classes[phones[i]] = RandInt(1, 3); return GenRandTopology(phones, num_pdf_classes); } } -void GeneratePathThroughHmm(const HmmTopology &topology, - bool reorder, +void GeneratePathThroughHmm(const Topology &topology, int32 phone, std::vector > *path) { path->clear(); - const HmmTopology::TopologyEntry &this_entry = - topology.TopologyForPhone(phone); + auto const &this_entry = topology.TopologyForPhone(phone); // an FST int32 cur_state = 0; // start-state is always state zero. - int32 num_states = this_entry.size(), final_state = num_states - 1; + + // Note: final_state == num_states - 1 is actually not something + // that would be generally true, but it is true for the topologies we + // use in the test code. + int32 num_states = this_entry.NumStates(), final_state = num_states - 1; KALDI_ASSERT(num_states > 1); // there has to be a final nonemitting state // that's different from the start state. - std::vector > pending_self_loops; + while (cur_state != final_state) { - const HmmTopology::HmmState &cur_hmm_state = this_entry[cur_state]; - int32 num_transitions = cur_hmm_state.transitions.size(), - transition_index = RandInt(0, num_transitions - 1); - if (cur_hmm_state.forward_pdf_class != -1) { - std::pair pr(cur_state, transition_index); - if (!reorder) { - path->push_back(pr); - } else { - bool is_self_loop = (cur_state == - cur_hmm_state.transitions[transition_index].first); - if (is_self_loop) { // save these up, we'll put them after the forward - // transition. - pending_self_loops.push_back(pr); - } else { - // non-self-loop: output it and then flush out any self-loops we - // stored up. - path->push_back(pr); - path->insert(path->end(), pending_self_loops.begin(), - pending_self_loops.end()); - pending_self_loops.clear(); - } - } - } - cur_state = cur_hmm_state.transitions[transition_index].first; + int32 num_transitions = this_entry.NumArcs(cur_state), + arc_index = RandInt(0, num_transitions - 1); + fst::ArcIterator aiter(this_entry, cur_state); + aiter.Seek(arc_index); + auto const &arc(aiter.Value()); + KALDI_ASSERT(arc.ilabel > 0); + std::pair pr(cur_state, arc_index); + path->push_back(pr); + cur_state = arc.nextstate; } - KALDI_ASSERT(pending_self_loops.empty()); } void GenerateRandomAlignment(const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - bool reorder, + const Transitions &trans_model, const std::vector &phone_sequence, std::vector *alignment) { int32 context_width = ctx_dep.ContextWidth(), central_position = ctx_dep.CentralPosition(), num_phones = phone_sequence.size(); + + auto all_phones = trans_model.GetPhones(); + int32 model_max_phone = *std::max_element(all_phones.begin(), + all_phones.end()); alignment->clear(); for (int32 i = 0; i < num_phones; i++) { + KALDI_ASSERT(phone_sequence[i] > 0 + && phone_sequence[i] <= model_max_phone); std::vector context_window; context_window.reserve(context_width); for (int32 j = i - central_position; @@ -248,26 +211,35 @@ void GenerateRandomAlignment(const ContextDependencyInterface &ctx_dep, else context_window.push_back(0); // zero for out-of-window phones } // 'path' is the path through this phone's HMM, represented as - // (emitting-HMM-state, transition-index) pairs + // (source-HMM-state, transition-index) pairs std::vector > path; int32 phone = phone_sequence[i]; - GeneratePathThroughHmm(trans_model.GetTopo(), reorder, phone, &path); + GeneratePathThroughHmm(trans_model.GetTopo(), phone, &path); for (size_t k = 0; k < path.size(); k++) { - const HmmTopology::TopologyEntry &entry = - trans_model.GetTopo().TopologyForPhone(phone); + auto const &entry = trans_model.GetTopo().TopologyForPhone(phone); int32 hmm_state = path[k].first, - transition_index = path[k].second, - forward_pdf_class = entry[hmm_state].forward_pdf_class, - self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class, + arc_index = path[k].second, forward_pdf_id, self_loop_pdf_id; + fst::ArcIterator aiter(entry, hmm_state); + aiter.Seek(arc_index); + auto const &arc(aiter.Value()); + int32 forward_pdf_class = arc.ilabel, + self_loop_pdf_class = -1; + for (fst::ArcIterator aiter_next(entry, arc.nextstate); + !aiter_next.Done(); aiter_next.Next()) + if (aiter_next.Value().nextstate == arc.nextstate) + self_loop_pdf_class = aiter_next.Value().ilabel; + bool ans = ctx_dep.Compute(context_window, forward_pdf_class, &forward_pdf_id); KALDI_ASSERT(ans && "context-dependency computation failed."); - ans = ctx_dep.Compute(context_window, self_loop_pdf_class, &self_loop_pdf_id); - KALDI_ASSERT(ans && "context-dependency computation failed."); - int32 transition_state = trans_model.TupleToTransitionState( - phone, hmm_state, forward_pdf_id, self_loop_pdf_id), - transition_id = trans_model.PairToTransitionId(transition_state, - transition_index); + if (self_loop_pdf_class != -1) { + ans = ctx_dep.Compute(context_window, self_loop_pdf_class, &self_loop_pdf_id); + KALDI_ASSERT(ans && "context-dependency computation failed."); + } else { + self_loop_pdf_id = -1; + } + int32 transition_id = trans_model.TupleToTransitionId(phone, hmm_state, arc_index, + forward_pdf_id, self_loop_pdf_id); alignment->push_back(transition_id); } } diff --git a/src/hmm/hmm-test-utils.h b/src/hmm/hmm-test-utils.h index 4faaa92fa66..32c901c1791 100644 --- a/src/hmm/hmm-test-utils.h +++ b/src/hmm/hmm-test-utils.h @@ -21,38 +21,38 @@ #ifndef KALDI_HMM_HMM_TEST_UTILS_H_ #define KALDI_HMM_HMM_TEST_UTILS_H_ -#include "hmm/hmm-topology.h" -#include "hmm/transition-model.h" +#include "hmm/topology.h" +#include "hmm/transitions.h" #include "lat/kaldi-lattice.h" #include "tree/context-dep.h" namespace kaldi { -// Here we put a convenience function for generating a TransitionModel object -- +// Here we put a convenience function for generating a Transitions object -- // useful in test code. We may put other testing-related things here in time. -// This function returns a randomly generated TransitionModel object. +// This function returns a randomly generated Transitions object. // If 'ctx_dep' is not NULL, it outputs to *ctx_dep a pointer to the // tree that was used to generate the transition model. -TransitionModel *GenRandTransitionModel(ContextDependency **ctx_dep); +Transitions *GenRandTransitions(ContextDependency **ctx_dep); -/// This function returns a HmmTopology object giving a normal 3-state topology, +/// This function returns a Topology object giving a normal 3-state topology, /// covering all phones in the list "phones". This is mainly of use in testing /// code. -HmmTopology GetDefaultTopology(const std::vector &phones); +Topology GetDefaultTopology(const std::vector &phones); -/// This method of generating an arbitrary HmmTopology object allows you to +/// This method of generating an arbitrary Topology object allows you to /// specify the number of pdf-classes for each phone separately. /// 'num_pdf_classes' is indexed by the phone-index (so the length will be /// longer than the length of the 'phones' vector, which for example lacks the /// zero index and may have gaps). -HmmTopology GenRandTopology(const std::vector &phones, +Topology GenRandTopology(const std::vector &phones, const std::vector &num_pdf_classes); /// This version of GenRandTopology() generates the phone list and number of pdf /// classes randomly. -HmmTopology GenRandTopology(); +Topology GenRandTopology(); /// This function generates a random path through the HMM for the given /// phone. The 'path' output is a list of pairs (HMM-state, transition-index) @@ -60,8 +60,7 @@ HmmTopology GenRandTopology(); /// used in other test code. /// the 'reorder' option is as described in the documentation; if true, the /// self-loops from a state are reordered to come after the forward-transition. -void GeneratePathThroughHmm(const HmmTopology &topology, - bool reorder, +void GeneratePathThroughHmm(const Topology &topology, int32 phone, std::vector > *path); @@ -69,8 +68,7 @@ void GeneratePathThroughHmm(const HmmTopology &topology, /// For use in test code, this function generates an alignment (a sequence of /// transition-ids) corresponding to a given phone sequence. void GenerateRandomAlignment(const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - bool reorder, + const Transitions &trans_model, const std::vector &phone_sequence, std::vector *alignment); diff --git a/src/hmm/hmm-topology.cc b/src/hmm/hmm-topology.cc deleted file mode 100644 index 29634ecda0b..00000000000 --- a/src/hmm/hmm-topology.cc +++ /dev/null @@ -1,387 +0,0 @@ -// hmm/hmm-topology.cc - -// Copyright 2009-2011 Microsoft Corporation -// 2014 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "hmm/hmm-topology.h" -#include "util/text-utils.h" - - -namespace kaldi { - - - -void HmmTopology::GetPhoneToNumPdfClasses(std::vector *phone2num_pdf_classes) const { - KALDI_ASSERT(!phones_.empty()); - phone2num_pdf_classes->clear(); - phone2num_pdf_classes->resize(phones_.back() + 1, -1); - for (size_t i = 0; i < phones_.size(); i++) - (*phone2num_pdf_classes)[phones_[i]] = NumPdfClasses(phones_[i]); -} - -void HmmTopology::Read(std::istream &is, bool binary) { - ExpectToken(is, binary, ""); - if (!binary) { // Text-mode read, different "human-readable" format. - phones_.clear(); - phone2idx_.clear(); - entries_.clear(); - std::string token; - while ( ! (is >> token).fail() ) { - if (token == "") { break; } // finished parsing. - else if (token != "") { - KALDI_ERR << "Reading HmmTopology object, expected or , got "<"); - std::vector phones; - std::string s; - while (1) { - is >> s; - if (is.fail()) KALDI_ERR << "Reading HmmTopology object, unexpected end of file while expecting phones."; - if (s == "") break; - else { - int32 phone; - if (!ConvertStringToInteger(s, &phone)) - KALDI_ERR << "Reading HmmTopology object, expected " - << "integer, got instead " << s; - phones.push_back(phone); - } - } - - std::vector this_entry; - std::string token; - ReadToken(is, binary, &token); - while (token != "") { - if (token != "") - KALDI_ERR << "Expected or , got instead " << token; - int32 state; - ReadBasicType(is, binary, &state); - if (state != static_cast(this_entry.size())) - KALDI_ERR << "States are expected to be in order from zero, expected " - << this_entry.size() << ", got " << state; - ReadToken(is, binary, &token); - int32 forward_pdf_class = kNoPdf; // -1 by default, means no pdf. - if (token == "") { - ReadBasicType(is, binary, &forward_pdf_class); - this_entry.push_back(HmmState(forward_pdf_class)); - ReadToken(is, binary, &token); - if (token == "") - KALDI_ERR << "pdf classes should be defined using " - << "or / pair"; - } else if (token == "") { - int32 self_loop_pdf_class = kNoPdf; - ReadBasicType(is, binary, &forward_pdf_class); - ReadToken(is, binary, &token); - if (token != "") - KALDI_ERR << "Expected , got instead " << token; - ReadBasicType(is, binary, &self_loop_pdf_class); - this_entry.push_back(HmmState(forward_pdf_class, self_loop_pdf_class)); - ReadToken(is, binary, &token); - } else - this_entry.push_back(HmmState(forward_pdf_class)); - while (token == "") { - int32 dst_state; - BaseFloat trans_prob; - ReadBasicType(is, binary, &dst_state); - ReadBasicType(is, binary, &trans_prob); - this_entry.back().transitions.push_back(std::make_pair(dst_state, trans_prob)); - ReadToken(is, binary, &token); - } - if (token == "") // TODO: remove this clause after a while. - KALDI_ERR << "You are trying to read old-format topology with new Kaldi."; - if (token != "") - KALDI_ERR << "Expected , got instead " << token; - ReadToken(is, binary, &token); - } - int32 my_index = entries_.size(); - entries_.push_back(this_entry); - - for (size_t i = 0; i < phones.size(); i++) { - int32 phone = phones[i]; - if (static_cast(phone2idx_.size()) <= phone) - phone2idx_.resize(phone+1, -1); // -1 is invalid index. - KALDI_ASSERT(phone > 0); - if (phone2idx_[phone] != -1) - KALDI_ERR << "Phone with index "<<(i)<<" appears in multiple topology entries."; - phone2idx_[phone] = my_index; - phones_.push_back(phone); - } - } - } - std::sort(phones_.begin(), phones_.end()); - KALDI_ASSERT(IsSortedAndUniq(phones_)); - } else { // binary I/O, just read member objects directly from disk. - ReadIntegerVector(is, binary, &phones_); - ReadIntegerVector(is, binary, &phone2idx_); - int32 sz; - ReadBasicType(is, binary, &sz); - bool is_hmm = true; - if (sz == -1) { - is_hmm = false; - ReadBasicType(is, binary, &sz); - } - entries_.resize(sz); - for (int32 i = 0; i < sz; i++) { - int32 thist_sz; - ReadBasicType(is, binary, &thist_sz); - entries_[i].resize(thist_sz); - for (int32 j = 0 ; j < thist_sz; j++) { - ReadBasicType(is, binary, &(entries_[i][j].forward_pdf_class)); - if (is_hmm) - entries_[i][j].self_loop_pdf_class = entries_[i][j].forward_pdf_class; - else - ReadBasicType(is, binary, &(entries_[i][j].self_loop_pdf_class)); - int32 thiss_sz; - ReadBasicType(is, binary, &thiss_sz); - entries_[i][j].transitions.resize(thiss_sz); - for (int32 k = 0; k < thiss_sz; k++) { - ReadBasicType(is, binary, &(entries_[i][j].transitions[k].first)); - ReadBasicType(is, binary, &(entries_[i][j].transitions[k].second)); - } - } - } - ExpectToken(is, binary, ""); - } - Check(); // Will throw if not ok. -} - - -void HmmTopology::Write(std::ostream &os, bool binary) const { - bool is_hmm = IsHmm(); - WriteToken(os, binary, ""); - if (!binary) { // Text-mode write. - os << "\n"; - for (int32 i = 0; i < static_cast (entries_.size()); i++) { - WriteToken(os, binary, ""); - os << "\n"; - WriteToken(os, binary, ""); - os << "\n"; - for (size_t j = 0; j < phone2idx_.size(); j++) { - if (phone2idx_[j] == i) - os << j << " "; - } - os << "\n"; - WriteToken(os, binary, ""); - os << "\n"; - for (size_t j = 0; j < entries_[i].size(); j++) { - WriteToken(os, binary, ""); - WriteBasicType(os, binary, static_cast(j)); - if (entries_[i][j].forward_pdf_class != kNoPdf) { - if (is_hmm) { - WriteToken(os, binary, ""); - WriteBasicType(os, binary, entries_[i][j].forward_pdf_class); - } else { - WriteToken(os, binary, ""); - WriteBasicType(os, binary, entries_[i][j].forward_pdf_class); - KALDI_ASSERT(entries_[i][j].self_loop_pdf_class != kNoPdf); - WriteToken(os, binary, ""); - WriteBasicType(os, binary, entries_[i][j].self_loop_pdf_class); - } - } - for (size_t k = 0; k < entries_[i][j].transitions.size(); k++) { - WriteToken(os, binary, ""); - WriteBasicType(os, binary, entries_[i][j].transitions[k].first); - WriteBasicType(os, binary, entries_[i][j].transitions[k].second); - } - WriteToken(os, binary, ""); - os << "\n"; - } - WriteToken(os, binary, ""); - os << "\n"; - } - } else { - WriteIntegerVector(os, binary, phones_); - WriteIntegerVector(os, binary, phone2idx_); - // -1 is put here as a signal that the object has the new, - // extended format with SelfLoopPdfClass - if (!is_hmm) WriteBasicType(os, binary, static_cast(-1)); - WriteBasicType(os, binary, static_cast(entries_.size())); - for (size_t i = 0; i < entries_.size(); i++) { - WriteBasicType(os, binary, static_cast(entries_[i].size())); - for (size_t j = 0; j < entries_[i].size(); j++) { - WriteBasicType(os, binary, entries_[i][j].forward_pdf_class); - if (!is_hmm) WriteBasicType(os, binary, entries_[i][j].self_loop_pdf_class); - WriteBasicType(os, binary, static_cast(entries_[i][j].transitions.size())); - for (size_t k = 0; k < entries_[i][j].transitions.size(); k++) { - WriteBasicType(os, binary, entries_[i][j].transitions[k].first); - WriteBasicType(os, binary, entries_[i][j].transitions[k].second); - } - } - } - } - WriteToken(os, binary, ""); - if (!binary) os << "\n"; -} - -void HmmTopology::Check() { - if (entries_.empty() || phones_.empty() || phone2idx_.empty()) - KALDI_ERR << "HmmTopology::Check(), empty object."; - std::vector is_seen(entries_.size(), false); - for (size_t i = 0; i < phones_.size(); i++) { - int32 phone = phones_[i]; - if (static_cast(phone) >= phone2idx_.size() || - static_cast(phone2idx_[phone]) >= entries_.size()) - KALDI_ERR << "HmmTopology::Check(), phone has no valid index."; - is_seen[phone2idx_[phone]] = true; - } - for (size_t i = 0; i < entries_.size(); i++) { - if (!is_seen[i]) - KALDI_ERR << "HmmTopoloy::Check(), entry with no corresponding phones."; - int32 num_states = static_cast(entries_[i].size()); - if (num_states <= 1) - KALDI_ERR << "HmmTopology::Check(), cannot only have one state (i.e., must " - "have at least one emitting state)."; - if (!entries_[i][num_states-1].transitions.empty()) - KALDI_ERR << "HmmTopology::Check(), last state must have no transitions."; - // not sure how necessary this next stipulation is. - if (entries_[i][num_states-1].forward_pdf_class != kNoPdf) - KALDI_ERR << "HmmTopology::Check(), last state must not be emitting."; - - std::vector has_trans_in(num_states, false); - std::vector seen_pdf_classes; - - for (int32 j = 0; j < num_states; j++) { // j is the state-id. - BaseFloat tot_prob = 0.0; - if (entries_[i][j].forward_pdf_class != kNoPdf) { - seen_pdf_classes.push_back(entries_[i][j].forward_pdf_class); - seen_pdf_classes.push_back(entries_[i][j].self_loop_pdf_class); - } - std::set seen_transition; - for (int32 k = 0; - static_cast(k) < entries_[i][j].transitions.size(); - k++) { - tot_prob += entries_[i][j].transitions[k].second; - if (entries_[i][j].transitions[k].second <= 0.0) - KALDI_ERR << "HmmTopology::Check(), negative or zero transition prob."; - int32 dst_state = entries_[i][j].transitions[k].first; - // The commented code in the next few lines disallows a completely - // skippable phone, as this would cause to stop working some mechanisms - // that are being built, which enable the creation of phone-level lattices - // and rescoring these with a different lexicon and LM. - if (dst_state == num_states-1 // && j != 0 - && entries_[i][j].forward_pdf_class == kNoPdf) - KALDI_ERR << "We do not allow any state to be " - "nonemitting and have a transition to the final-state (this would " - "stop the SplitToPhones function from identifying the last state " - "of a phone."; - if (dst_state < 0 || dst_state >= num_states) - KALDI_ERR << "HmmTopology::Check(), invalid dest state " << (dst_state); - if (seen_transition.count(dst_state) != 0) - KALDI_ERR << "HmmTopology::Check(), duplicate transition found."; - if (dst_state == k) { // self_loop... - KALDI_ASSERT(entries_[i][j].self_loop_pdf_class != kNoPdf && - "Nonemitting states cannot have self-loops."); - } - seen_transition.insert(dst_state); - has_trans_in[dst_state] = true; - } - if (j+1 < num_states) { - KALDI_ASSERT(tot_prob > 0.0 && "Non-final state must have transitions out." - "(with nonzero probability)"); - if (fabs(tot_prob - 1.0) > 0.01) - KALDI_WARN << "Total probability for state " << j << - " in topology entry is " << tot_prob; - } else - KALDI_ASSERT(tot_prob == 0.0); - } - // make sure all but start state have input transitions. - for (int32 j = 1; j < num_states; j++) - if (!has_trans_in[j]) - KALDI_ERR << "HmmTopology::Check, state "<<(j)<<" has no input transitions."; - SortAndUniq(&seen_pdf_classes); - if (seen_pdf_classes.front() != 0 || - seen_pdf_classes.back() != static_cast(seen_pdf_classes.size()) - 1) { - KALDI_ERR << "HmmTopology::Check(), pdf_classes are expected to be " - "contiguous and start from zero."; - } - } -} - -bool HmmTopology::IsHmm() const { - const std::vector &phones = GetPhones(); - KALDI_ASSERT(!phones.empty()); - for (size_t i = 0; i < phones.size(); i++) { - int32 phone = phones[i]; - const TopologyEntry &entry = TopologyForPhone(phone); - for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... - int32 forward_pdf_class = entry[j].forward_pdf_class, - self_loop_pdf_class = entry[j].self_loop_pdf_class; - if (forward_pdf_class != self_loop_pdf_class) - return false; - } - } - return true; -} - -const HmmTopology::TopologyEntry& HmmTopology::TopologyForPhone(int32 phone) const { // Will throw if phone not covered. - if (static_cast(phone) >= phone2idx_.size() || phone2idx_[phone] == -1) { - KALDI_ERR << "TopologyForPhone(), phone "<<(phone)<<" not covered."; - } - return entries_[phone2idx_[phone]]; -} - -int32 HmmTopology::NumPdfClasses(int32 phone) const { - // will throw if phone not covered. - const TopologyEntry &entry = TopologyForPhone(phone); - int32 max_pdf_class = 0; - for (size_t i = 0; i < entry.size(); i++) { - max_pdf_class = std::max(max_pdf_class, entry[i].forward_pdf_class); - max_pdf_class = std::max(max_pdf_class, entry[i].self_loop_pdf_class); - } - return max_pdf_class+1; -} - -int32 HmmTopology::MinLength(int32 phone) const { - const TopologyEntry &entry = TopologyForPhone(phone); - // min_length[state] gives the minimum length for sequences up to and - // including that state. - std::vector min_length(entry.size(), - std::numeric_limits::max()); - KALDI_ASSERT(!entry.empty()); - - min_length[0] = (entry[0].forward_pdf_class == -1 ? 0 : 1); - int32 num_states = min_length.size(); - bool changed = true; - while (changed) { - changed = false; - for (int32 s = 0; s < num_states; s++) { - const HmmState &this_state = entry[s]; - std::vector >::const_iterator - iter = this_state.transitions.begin(), - end = this_state.transitions.end(); - for (; iter != end; ++iter) { - int32 next_state = iter->first; - KALDI_ASSERT(next_state < num_states); - int32 next_state_min_length = min_length[s] + - (entry[next_state].forward_pdf_class == -1 ? 0 : 1); - if (next_state_min_length < min_length[next_state]) { - min_length[next_state] = next_state_min_length; - if (next_state < s) - changed = true; - // the test of 'next_state < s' is an optimization for speed. - } - } - } - } - KALDI_ASSERT(min_length.back() != std::numeric_limits::max()); - // the last state is the final-state. - return min_length.back(); -} - -} // End namespace kaldi diff --git a/src/hmm/hmm-topology.h b/src/hmm/hmm-topology.h deleted file mode 100644 index 750d35bcfe4..00000000000 --- a/src/hmm/hmm-topology.h +++ /dev/null @@ -1,194 +0,0 @@ -// hmm/hmm-topology.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_HMM_HMM_TOPOLOGY_H_ -#define KALDI_HMM_HMM_TOPOLOGY_H_ - -#include "base/kaldi-common.h" -#include "util/const-integer-set.h" - - -namespace kaldi { - - -/// \addtogroup hmm_group -/// @{ - -/* - // The following would be the text form for the "normal" HMM topology. - // Note that the first state is the start state, and the final state, - // which must have no output transitions and must be nonemitting, has - // an exit probability of one (no other state can have nonzero exit - // probability; you can treat the transition probability to the final - // state as an exit probability). - // Note also that it's valid to omit the "" entry of the , which - // will mean we won't have a pdf on that state [non-emitting state]. This is equivalent - // to setting the to -1. We do this normally just for the final state. - // The Topology object can have multiple blocks. - // This is useful if there are multiple types of topology in the system. - - - - 1 2 3 4 5 6 7 8 - 0 0 - 0 0.5 - 1 0.5 - - 1 1 - 1 0.5 - 2 0.5 - - 2 2 - 2 0.5 - 3 0.5 - 0.5 - - 3 - - - -*/ - -// kNoPdf is used where pdf_class or pdf would be used, to indicate, -// none is there. Mainly useful in skippable models, but also used -// for end states. -// A caveat with nonemitting states is that their out-transitions -// are not trainable, due to technical issues with the way -// we decided to accumulate the stats. Any transitions arising from (*) -// HMM states with "kNoPdf" as the label are second-class transitions, -// They do not have "transition-states" or "transition-ids" associated -// with them. They are used to create the FST version of the -// HMMs, where they lead to epsilon arcs. -// (*) "arising from" is a bit of a technical term here, due to the way -// (if reorder == true), we put the transition-id associated with the -// outward arcs of the state, on the input transition to the state. - -/// A constant used in the HmmTopology class as the \ref pdf_class "pdf-class" -/// kNoPdf, which is used when a HMM-state is nonemitting (has no associated -/// PDF). - -static const int32 kNoPdf = -1; - -/// A class for storing topology information for phones. See \ref hmm for context. -/// This object is sometimes accessed in a file by itself, but more often -/// as a class member of the Transition class (this is for convenience to reduce -/// the number of files programs have to access). - -class HmmTopology { - public: - /// A structure defined inside HmmTopology to represent a HMM state. - struct HmmState { - /// The \ref pdf_class forward-pdf-class, typically 0, 1 or 2 (the same as the HMM-state index), - /// but may be different to enable us to hardwire sharing of state, and may be - /// equal to \ref kNoPdf == -1 in order to specify nonemitting states (unusual). - int32 forward_pdf_class; - - /// The \ref pdf_class self-loop pdf-class, similar to \ref pdf_class forward-pdf-class. - /// They will either both be \ref kNoPdf, or neither be \ref kNoPdf. - int32 self_loop_pdf_class; - - /// A list of transitions, indexed by what we call a 'transition-index'. - /// The first member of each pair is the index of the next HmmState, and the - /// second is the default transition probability (before training). - std::vector > transitions; - - explicit HmmState(int32 pdf_class) { - this->forward_pdf_class = pdf_class; - this->self_loop_pdf_class = pdf_class; - } - explicit HmmState(int32 forward_pdf_class, int32 self_loop_pdf_class) { - KALDI_ASSERT((forward_pdf_class != kNoPdf && self_loop_pdf_class != kNoPdf) || - (forward_pdf_class == kNoPdf && self_loop_pdf_class == kNoPdf)); - this->forward_pdf_class = forward_pdf_class; - this->self_loop_pdf_class = self_loop_pdf_class; - } - - bool operator == (const HmmState &other) const { - return (forward_pdf_class == other.forward_pdf_class && - self_loop_pdf_class == other.self_loop_pdf_class && - transitions == other.transitions); - } - - HmmState(): forward_pdf_class(-1), self_loop_pdf_class(-1) { } - }; - - /// TopologyEntry is a typedef that represents the topology of - /// a single (prototype) state. - typedef std::vector TopologyEntry; - - void Read(std::istream &is, bool binary); - void Write(std::ostream &os, bool binary) const; - - // Checks that the object is valid, and throw exception otherwise. - void Check(); - - /// Returns true if this HmmTopology is really 'hmm-like', i.e. the pdf-class on - /// the self-loops and forward transitions of all states are identical. [note: in HMMs, - /// the densities are associated with the states.] We have extended this to - /// support 'non-hmm-like' topologies (where those pdf-classes are different), - /// in order to make for more compact decoding graphs in our so-called 'chain models' - /// (AKA lattice-free MMI), where we use 1-state topologies that have different pdf-classes - /// for the self-loop and the forward transition. Note that we always use the 'reorder=true' - /// option so the 'forward transition' actually comes before the self-loop. - bool IsHmm() const; - - /// Returns the topology entry (i.e. vector of HmmState) for this phone; - /// will throw exception if phone not covered by the topology. - const TopologyEntry &TopologyForPhone(int32 phone) const; - - /// Returns the number of \ref pdf_class "pdf-classes" for this phone; - /// throws exception if phone not covered by this topology. - int32 NumPdfClasses(int32 phone) const; - - /// Returns a reference to a sorted, unique list of phones covered by - /// the topology (these phones will be positive integers, and usually - /// contiguous and starting from one but the toolkit doesn't assume - /// they are contiguous). - const std::vector &GetPhones() const { return phones_; }; - - /// Outputs a vector of int32, indexed by phone, that gives the - /// number of \ref pdf_class pdf-classes for the phones; this is - /// used by tree-building code such as BuildTree(). - void GetPhoneToNumPdfClasses(std::vector *phone2num_pdf_classes) const; - - // Returns the minimum number of frames it takes to traverse this model for - // this phone: e.g. 3 for the normal HMM topology. - int32 MinLength(int32 phone) const; - - HmmTopology() {} - - bool operator == (const HmmTopology &other) const { - return phones_ == other.phones_ && phone2idx_ == other.phone2idx_ - && entries_ == other.entries_; - } - // Allow default assignment operator and copy constructor. - private: - std::vector phones_; // list of all phones we have topology for. Sorted, uniq. no epsilon (zero) phone. - std::vector phone2idx_; // map from phones to indexes into the entries vector (or -1 for not present). - std::vector entries_; -}; - - -/// @} end "addtogroup hmm_group" - - -} // end namespace kaldi - - -#endif diff --git a/src/hmm/hmm-utils-test.cc b/src/hmm/hmm-utils-test.cc index 69728cc8ca7..5d7f4fcc2c3 100644 --- a/src/hmm/hmm-utils-test.cc +++ b/src/hmm/hmm-utils-test.cc @@ -202,7 +202,7 @@ void TestAccumulateTreeStatsOptions() { void TestSplitToPhones() { ContextDependency *ctx_dep = NULL; - TransitionModel *trans_model = GenRandTransitionModel(&ctx_dep); + Transitions *trans_model = GenRandTransitions(&ctx_dep); std::vector phone_seq; int32 num_phones = RandInt(0, 10); const std::vector &phone_list = trans_model->GetPhones(); @@ -210,18 +210,18 @@ void TestSplitToPhones() { int32 rand_phone = phone_list[RandInt(0, phone_list.size() - 1)]; phone_seq.push_back(rand_phone); } - bool reorder = (RandInt(0, 1) == 0); std::vector alignment; - GenerateRandomAlignment(*ctx_dep, *trans_model, reorder, + GenerateRandomAlignment(*ctx_dep, *trans_model, phone_seq, &alignment); std::vector > split_alignment; - SplitToPhones(*trans_model, alignment, &split_alignment); + bool ans = SplitToPhones(*trans_model, alignment, &split_alignment); + KALDI_ASSERT(ans); KALDI_ASSERT(split_alignment.size() == phone_seq.size()); for (size_t i = 0; i < split_alignment.size(); i++) { KALDI_ASSERT(!split_alignment[i].empty()); for (size_t j = 0; j < split_alignment[i].size(); j++) { int32 transition_id = split_alignment[i][j]; - KALDI_ASSERT(trans_model->TransitionIdToPhone(transition_id) == + KALDI_ASSERT(trans_model->InfoForTransitionId(transition_id).phone == phone_seq[i]); } } @@ -230,18 +230,14 @@ void TestSplitToPhones() { } void TestConvertAlignment() { - bool old_reorder = (RandInt(0, 1) == 1), - new_reorder = (RandInt(0, 1) == 1), - new_tree = (RandInt(0, 1) == 1), + bool new_tree = (RandInt(0, 1) == 1), new_topology = (RandInt(0, 1) == 1); if (!new_tree) new_topology = true; int32 subsample_factor = RandInt(1, 3); - KALDI_LOG << " old-reorder = " << old_reorder - << ", new-reorder = " << new_reorder - << ", new-tree = " << new_tree + KALDI_LOG << ", new-tree = " << new_tree << ", subsample-factor = " << subsample_factor; std::vector phones; @@ -273,11 +269,11 @@ void TestConvertAlignment() { } - HmmTopology topo_old = GenRandTopology(phones, num_pdf_classes_old), + Topology topo_old = GenRandTopology(phones, num_pdf_classes_old), topo_new = (new_topology ? GenRandTopology(phones, num_pdf_classes_new) : topo_old); - TransitionModel trans_model_old(*ctx_dep_old, topo_old), + Transitions trans_model_old(*ctx_dep_old, topo_old), trans_model_new(*ctx_dep_new, topo_new); std::vector phone_sequence; @@ -286,15 +282,15 @@ void TestConvertAlignment() { phone_sequence.push_back(phones[RandInt(0, phones.size() - 1)]); std::vector old_alignment; GenerateRandomAlignment(*ctx_dep_old, trans_model_old, - old_reorder, phone_sequence, + phone_sequence, &old_alignment); std::vector new_alignment; bool ans = ConvertAlignment(trans_model_old, trans_model_new, *ctx_dep_new, old_alignment, subsample_factor, false, - new_reorder, NULL, &new_alignment); - if(!ans) { + NULL, &new_alignment); + if (!ans) { KALDI_WARN << "Alignment conversion failed"; // make sure it failed for a good reason. KALDI_ASSERT(new_topology || subsample_factor > 1); @@ -305,14 +301,14 @@ void TestConvertAlignment() { KALDI_ASSERT(b1 && b2); KALDI_ASSERT(old_split.size() == new_split.size()); for (size_t i = 0; i < new_split.size(); i++) - KALDI_ASSERT(trans_model_old.TransitionIdToPhone(old_split[i].front()) == - trans_model_new.TransitionIdToPhone(new_split[i].front())); + KALDI_ASSERT(trans_model_old.InfoForTransitionId(old_split[i].front()).phone == + trans_model_new.InfoForTransitionId(new_split[i].front()).phone); if (!new_topology && subsample_factor == 1) { // we should be able to convert back and it'll be the same. std::vector old_alignment_copy; bool ans = ConvertAlignment(trans_model_new, trans_model_old, *ctx_dep_old, new_alignment, subsample_factor, false, - old_reorder, NULL, &old_alignment_copy); + NULL, &old_alignment_copy); KALDI_ASSERT(ans); KALDI_ASSERT(old_alignment_copy == old_alignment); } @@ -336,4 +332,3 @@ int main() { kaldi::TestConvertAlignment(); std::cout << "Test OK.\n"; } - diff --git a/src/hmm/hmm-utils.cc b/src/hmm/hmm-utils.cc index 06edf8d5976..7bd6070f151 100644 --- a/src/hmm/hmm-utils.cc +++ b/src/hmm/hmm-utils.cc @@ -2,6 +2,7 @@ // Copyright 2009-2011 Microsoft Corporation // 2018 Johns Hopkins University (author: Daniel Povey) +// 2019 Daniel Galvez // See ../../COPYING for clarification regarding multiple authors // @@ -18,6 +19,7 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include #include #include "hmm/hmm-utils.h" @@ -27,16 +29,12 @@ namespace kaldi { - - -fst::VectorFst *GetHmmAsFsa( - std::vector phone_window, +std::shared_ptr GetHmmAsFsa( + const std::vector &phone_window, const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - const HTransducerConfig &config, + const Transitions &trans_model, + bool include_self_loops, HmmCacheType *cache) { - using namespace fst; - if (static_cast(phone_window.size()) != ctx_dep.ContextWidth()) KALDI_ERR << "Context size mismatch, ilabel-info [from context FST is " << phone_window.size() << ", context-dependency object " @@ -48,16 +46,14 @@ fst::VectorFst *GetHmmAsFsa( KALDI_ERR << "phone == 0. Some mismatch happened, or there is " "a code error."; - const HmmTopology &topo = trans_model.GetTopo(); - const HmmTopology::TopologyEntry &entry = topo.TopologyForPhone(phone); + const Topology &topo = trans_model.GetTopo(); - // vector of the pdfs, indexed by pdf-class (pdf-classes must start from zero - // and be contiguous). - std::vector pdfs(topo.NumPdfClasses(phone)); - for (int32 pdf_class = 0; - pdf_class < static_cast(pdfs.size()); + // vector of the pdf-ids, indexed by pdf-class minus one. + std::vector pdf_ids(topo.NumPdfClasses(phone)); + for (int32 pdf_class = 1; + pdf_class <= static_cast(pdf_ids.size()); pdf_class++) { - if (! ctx_dep.Compute(phone_window, pdf_class, &(pdfs[pdf_class])) ) { + if (! ctx_dep.Compute(phone_window, pdf_class, &(pdf_ids[pdf_class - 1])) ) { std::ostringstream ctx_ss; for (size_t i = 0; i < phone_window.size(); i++) ctx_ss << phone_window[i] << ' '; @@ -70,80 +66,65 @@ fst::VectorFst *GetHmmAsFsa( " that general nature."; } } - std::pair > cache_index(phone, pdfs); + + std::pair > cache_index(phone, pdf_ids); if (cache != NULL) { HmmCacheType::iterator iter = cache->find(cache_index); if (iter != cache->end()) return iter->second; } - VectorFst *ans = new VectorFst; + using Arc = fst::StdArc; + using StateId = Arc::StateId; + using Weight = Arc::Weight; - typedef StdArc Arc; - typedef Arc::Weight Weight; - typedef Arc::StateId StateId; - typedef Arc::Label Label; + const fst::StdVectorFst &entry = topo.TopologyForPhone(phone); + // the elements correction_factors are factors only in the semiring; + // physically they are costs to be added. + std::vector correction_factors; + if (include_self_loops) + correction_factors.resize(entry.NumStates(), 0); + else + correction_factors = topo.CorrectionFactorsForPhone(phone); + const std::vector &self_loop_pdf_classes = + topo.SelfLoopPdfClassesForPhone(phone); + std::shared_ptr ans( + new fst::StdVectorFst()); + StateId num_states = entry.NumStates(); + for (StateId s = 0; s < num_states; s++) + ans->AddState(); + KALDI_PARANOID_ASSERT(entry.Start() == 0); // required by topology class. + ans->SetStart(0); - std::vector state_ids; - for (size_t i = 0; i < entry.size(); i++) - state_ids.push_back(ans->AddState()); - KALDI_ASSERT(state_ids.size() != 0); // Or empty topology entry. - ans->SetStart(state_ids[0]); - StateId final = state_ids.back(); - ans->SetFinal(final, Weight::One()); - - for (int32 hmm_state = 0; - hmm_state < static_cast(entry.size()); - hmm_state++) { - int32 forward_pdf_class = entry[hmm_state].forward_pdf_class, forward_pdf; - int32 self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class, self_loop_pdf; - if (forward_pdf_class == kNoPdf) { // nonemitting state. - forward_pdf = kNoPdf; - self_loop_pdf = kNoPdf; - } else { - KALDI_ASSERT(forward_pdf_class < static_cast(pdfs.size())); - KALDI_ASSERT(self_loop_pdf_class < static_cast(pdfs.size())); - forward_pdf = pdfs[forward_pdf_class]; - self_loop_pdf = pdfs[self_loop_pdf_class]; - } - int32 trans_idx; - for (trans_idx = 0; - trans_idx < static_cast(entry[hmm_state].transitions.size()); - trans_idx++) { - BaseFloat log_prob; - Label label; - int32 dest_state = entry[hmm_state].transitions[trans_idx].first; - bool is_self_loop = (dest_state == hmm_state); - if (is_self_loop) - continue; // We will add self-loops in at a later stage of processing, - // not in this function. - if (forward_pdf_class == kNoPdf) { - // no pdf, hence non-estimated probability. - // [would not happen with normal topology] . There is no transition-state - // involved in this case. - log_prob = Log(entry[hmm_state].transitions[trans_idx].second); - label = 0; - } else { // normal probability. - int32 trans_state = - trans_model.TupleToTransitionState(phone, hmm_state, forward_pdf, self_loop_pdf); - int32 trans_id = - trans_model.PairToTransitionId(trans_state, trans_idx); - log_prob = trans_model.GetTransitionLogProbIgnoringSelfLoops(trans_id); - // log_prob is a negative number (or zero)... - label = trans_id; - } - // Will add probability-scale later (we may want to push first). - ans->AddArc(state_ids[hmm_state], - Arc(label, label, Weight(-log_prob), state_ids[dest_state])); + for (StateId s = 0; s < num_states; s++) { + Weight correction_weight(correction_factors[s]); + ans->SetFinal(s, Times(correction_weight, entry.Final(s))); + + for (fst::ArcIterator aiter(entry, s); + !aiter.Done(); aiter.Next()) { + if (!include_self_loops && aiter.Value().nextstate == s) + continue; + Arc arc = aiter.Value(); + + // self_loop_pdf_class is the pdf-class of the self-loop of the destination + // state of this arc, if any, else -1. + int32 self_loop_pdf_class = self_loop_pdf_classes[arc.nextstate]; + // self_loop_pdf_id is the pdf-id of the self-loop in the destination + // state of this arc, if any, else -1. + int32 self_loop_pdf_id = (self_loop_pdf_class != -1 ? + pdf_ids[self_loop_pdf_class - 1] : -1); + int32 pdf_class = arc.ilabel, + pdf_id = pdf_ids[pdf_class - 1], + trans_id = trans_model.TupleToTransitionId( + phone, s, aiter.Position(), pdf_id, self_loop_pdf_id); + + arc.ilabel = trans_id; + arc.olabel = trans_id; + arc.weight = Times(correction_weight, arc.weight); + ans->AddArc(s, arc); } } - fst::RemoveEpsLocal(ans); // this is safe and will not blow up. - - // Now apply probability scale. - // We waited till after the possible weight-pushing steps, - // because weight-pushing needs "real" weights in order to work. - ApplyProbabilityScale(config.transition_scale, ans); if (cache != NULL) (*cache)[cache_index] = ans; return ans; @@ -151,95 +132,14 @@ fst::VectorFst *GetHmmAsFsa( -fst::VectorFst* -GetHmmAsFsaSimple(std::vector phone_window, - const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - BaseFloat prob_scale) { - using namespace fst; - - if (static_cast(phone_window.size()) != ctx_dep.ContextWidth()) - KALDI_ERR <<"Context size mismatch, ilabel-info [from context FST is " - <<(phone_window.size())<<", context-dependency object " - "expects "<<(ctx_dep.ContextWidth()); - - int P = ctx_dep.CentralPosition(); - int32 phone = phone_window[P]; - KALDI_ASSERT(phone != 0); - - const HmmTopology &topo = trans_model.GetTopo(); - const HmmTopology::TopologyEntry &entry = topo.TopologyForPhone(phone); - - VectorFst *ans = new VectorFst; - - // Create a mini-FST with a superfinal state [in case we have emitting - // final-states, which we usually will.] - typedef StdArc Arc; - typedef Arc::Weight Weight; - typedef Arc::StateId StateId; - typedef Arc::Label Label; - - std::vector state_ids; - for (size_t i = 0; i < entry.size(); i++) - state_ids.push_back(ans->AddState()); - KALDI_ASSERT(state_ids.size() > 1); // Or invalid topology entry. - ans->SetStart(state_ids[0]); - StateId final = state_ids.back(); - ans->SetFinal(final, Weight::One()); - - for (int32 hmm_state = 0; - hmm_state < static_cast(entry.size()); - hmm_state++) { - int32 forward_pdf_class = entry[hmm_state].forward_pdf_class, forward_pdf; - int32 self_loop_pdf_class = entry[hmm_state].self_loop_pdf_class, self_loop_pdf; - if (forward_pdf_class == kNoPdf) { // nonemitting state; not generally used. - forward_pdf = kNoPdf; - self_loop_pdf = kNoPdf; - } else { - bool ans = ctx_dep.Compute(phone_window, forward_pdf_class, &forward_pdf); - KALDI_ASSERT(ans && "Context-dependency computation failed."); - ans = ctx_dep.Compute(phone_window, self_loop_pdf_class, &self_loop_pdf); - KALDI_ASSERT(ans && "Context-dependency computation failed."); - } - int32 trans_idx; - for (trans_idx = 0; - trans_idx < static_cast(entry[hmm_state].transitions.size()); - trans_idx++) { - BaseFloat log_prob; - Label label; - int32 dest_state = entry[hmm_state].transitions[trans_idx].first; - if (forward_pdf_class == kNoPdf) { - // no pdf, hence non-estimated probability. very unusual case. [would - // not happen with normal topology] . There is no transition-state - // involved in this case. - KALDI_ASSERT(hmm_state != dest_state); - log_prob = Log(entry[hmm_state].transitions[trans_idx].second); - label = 0; - } else { // normal probability. - int32 trans_state = - trans_model.TupleToTransitionState(phone, hmm_state, forward_pdf, self_loop_pdf); - int32 trans_id = - trans_model.PairToTransitionId(trans_state, trans_idx); - log_prob = prob_scale * trans_model.GetTransitionLogProb(trans_id); - // log_prob is a negative number (or zero)... - label = trans_id; - } - ans->AddArc(state_ids[hmm_state], - Arc(label, label, Weight(-log_prob), state_ids[dest_state])); - } - } - return ans; -} - - - /// This utility function, used in GetHTransducer(), creates an FSA (finite /// state acceptor, i.e. an FST with ilabels equal to olabels) with a single /// successful path, with a single label on it. -static inline fst::VectorFst *MakeTrivialAcceptor(int32 label) { +static inline std::unique_ptr> +MakeTrivialAcceptor(int32 label) { typedef fst::StdArc Arc; typedef Arc::Weight Weight; - fst::VectorFst *ans = new fst::VectorFst; + std::unique_ptr> ans(new fst::VectorFst); ans->AddState(); ans->AddState(); ans->SetStart(0); @@ -251,11 +151,12 @@ static inline fst::VectorFst *MakeTrivialAcceptor(int32 label) { // The H transducer has a separate outgoing arc for each of the symbols in ilabel_info. -fst::VectorFst *GetHTransducer(const std::vector > &ilabel_info, - const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - const HTransducerConfig &config, - std::vector *disambig_syms_left) { +std::unique_ptr> +GetHTransducer(const std::vector > &ilabel_info, + const ContextDependencyInterface &ctx_dep, + const Transitions &trans_model, + const HTransducerConfig &config, + std::vector *disambig_syms_left) { KALDI_ASSERT(ilabel_info.size() >= 1 && ilabel_info[0].size() == 0); // make sure that eps == eps. HmmCacheType cache; // "cache" is an optimization that prevents GetHmmAsFsa repeating work @@ -266,7 +167,14 @@ fst::VectorFst *GetHTransducer(const std::vector typedef Arc::StateId StateId; typedef Arc::Label Label; - std::vector* > fsts(ilabel_info.size(), NULL); + // I would prefer to do this: + // std::vector>> fsts(ilabel_info.size(), std::unique_ptr(nullptr)); + // But the second arg of constructor (2) at https://en.cppreference.com/w/cpp/container/vector/vector + // must be able to be turned into a const-reference, which std::unique_ptr cannot be. + std::vector>> fsts; + for(std::size_t i = 0; i < ilabel_info.size(); ++i) { + fsts.emplace_back(std::unique_ptr>(nullptr)); + } std::vector phones = trans_model.GetPhones(); KALDI_ASSERT(disambig_syms_left != 0); @@ -315,26 +223,29 @@ fst::VectorFst *GetHTransducer(const std::vector } else { // Real phone-in-context. std::vector phone_window = ilabel_info[j]; - VectorFst *fst = GetHmmAsFsa(phone_window, - ctx_dep, - trans_model, - config, - &cache); - fsts[j] = fst; + std::shared_ptr> fst = GetHmmAsFsa(phone_window, + ctx_dep, + trans_model, + config.include_self_loops, + &cache); + std::unique_ptr> u_fst(fst->Copy()); + fsts[j] = std::move(u_fst); } } - VectorFst *ans = MakeLoopFst(fsts); - SortAndUniq(&fsts); // remove duplicate pointers, which we will have - // in general, since we used the cache. - DeletePointers(&fsts); + // fsts_bare is as fsts, but with bare pointers. + std::vector *> fsts_bare(fsts.size()); + for (size_t i = 0; i < fsts.size(); i++) + fsts_bare[i] = fsts[i].get(); + + std::unique_ptr> ans(MakeLoopFst(fsts_bare)); return ans; } void GetIlabelMapping (const std::vector > &ilabel_info_old, const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, std::vector *old2new_map) { KALDI_ASSERT(old2new_map != NULL); @@ -366,8 +277,8 @@ void GetIlabelMapping (const std::vector > &ilabel_info_old, int32 central_phone = vec[P]; int32 num_pdf_classes = trans_model.GetTopo().NumPdfClasses(central_phone); std::vector state_seq(num_pdf_classes); // Indexed by pdf-class - for (int32 pdf_class = 0; pdf_class < num_pdf_classes; pdf_class++) { - if (!ctx_dep.Compute(vec, pdf_class, &(state_seq[pdf_class]))) { + for (int32 pdf_class = 1; pdf_class <= num_pdf_classes; pdf_class++) { + if (!ctx_dep.Compute(vec, pdf_class, &(state_seq[pdf_class - 1]))) { std::ostringstream ss; WriteIntegerVector(ss, false, vec); KALDI_ERR << "tree did not succeed in converting phone window "< > &ilabel_info_old, -fst::VectorFst *GetPdfToTransitionIdTransducer(const TransitionModel &trans_model) { +std::unique_ptr> +GetPdfToTransitionIdTransducer(const Transitions &trans_model) { using namespace fst; - VectorFst *ans = new VectorFst; + std::unique_ptr> ans(new VectorFst); typedef VectorFst::Weight Weight; typedef StdArc Arc; ans->AddState(); ans->SetStart(0); ans->SetFinal(0, Weight::One()); for (int32 tid = 1; tid <= trans_model.NumTransitionIds(); tid++) { - int32 pdf = trans_model.TransitionIdToPdf(tid); - ans->AddArc(0, Arc(pdf+1, tid, Weight::One(), 0)); // note the offset of 1 on the pdfs. + int32 pdf = trans_model.TransitionIdToPdfFast(tid); + ans->AddArc(0, Arc(pdf+1, tid, Weight::One(), 0)); // note the offset of 1 on the pdf_ids. // it's because 0 is a valid pdf. } return ans; } +struct TransitionState { +public: + TransitionState(const Transitions::TransitionIdInfo& info): + info(info) { } + bool operator==(const TransitionState& other) const { + return info.phone == other.info.phone && + info.topo_state == other.info.topo_state && + info.pdf_id == other.info.pdf_id; + } + + bool operator!=(const TransitionState& other) const { + return !(*this == other); + } -class TidToTstateMapper { + TransitionState& operator=(TransitionState other) { +// TODO: Fix this bizarre error when I uncomment this: + // this->info = other.info; + KALDI_ASSERT(false); +// hmm-utils.cc: In member function ‘kaldi::TransitionState& kaldi::TransitionState::operator=(kaldi::TransitionState)’: +// hmm-utils.cc:351:24: error: passing ‘const kaldi::Transitions::TransitionIdInfo’ as ‘this’ argument discards qualifiers [-fpermissive] +// this->info = other.info; +// ^~~~ +// In file included from ../hmm/hmm-utils.h:27:0, +// from hmm-utils.cc:25: +// ../hmm/transitions.h:107:10: note: in call to ‘kaldi::Transitions::TransitionIdInfo& kaldi::Transitions::TransitionIdInfo::operator=(const kaldi::Transitions::TransitionIdInfo&)’ + + return *this; + } + + bool operator<(const TransitionState& other) const { + return info < other.info; + } + + const Transitions::TransitionIdInfo& info; +}; + +class TidToSelfLoopMapper { public: - // Function object used in MakePrecedingInputSymbolsSameClass and - // MakeFollowingInputSymbolsSameClass (as called by AddSelfLoopsReorder and - // AddSelfLoopsNoReorder). It maps transition-ids to transition-states (and - // -1 to -1, 0 to 0 and disambiguation symbols to 0). If check_no_self_loops - // == true, it also checks that there are no self-loops in the graph (i.e. in - // the labels it is called with). This is just a convenient place to put this - // check. - - // This maps valid transition-ids to transition states, maps kNoLabel to -1, and - // maps all other symbols (i.e. epsilon symbols, disambig symbols, and symbols - // with values over 100000/kNontermBigNumber) to zero. - // Its point is to provide an equivalence class on labels that's relevant to what - // the self-loop will be on the following (or preceding) state. - TidToTstateMapper(const TransitionModel &trans_model, - const std::vector &disambig_syms, - bool check_no_self_loops): + // Function object used in MakePrecedingInputSymbolsSameClass and. + // It maps a transition-ids t to the transition-id on the self-loop + // of the destination-state of t (or 0 if there is no self-loop). + // + // If currently_self_loop_free == true, it also checks that there are no + // self-loops in the graph (i.e. in the labels it is called with). This is + // just a convenient place to put this check. + + // This maps valid transition-ids to transition states, and maps all other + // symbols (i.e. epsilon symbols, disambig symbols, and symbols with values + // over 100000/kNontermBigNumber) to zero. (and -1 == kNoLabel to -1). + // Its purpose is to provide an + // equivalence class on labels that's relevant to what the self-loop will be + // on the following state. + TidToSelfLoopMapper(const Transitions &trans_model, + const std::vector &disambig_syms, + bool currently_self_loop_free): trans_model_(trans_model), disambig_syms_(disambig_syms), - check_no_self_loops_(check_no_self_loops) { } - typedef int32 Result; - int32 operator() (int32 label) const { - if (label == static_cast(fst::kNoLabel)) return -1; // -1 -> -1 - else if (label >= 1 && label <= trans_model_.NumTransitionIds()) { - if (check_no_self_loops_ && trans_model_.IsSelfLoop(label)) + currently_self_loop_free_(currently_self_loop_free) { } + + int32 operator() (int32 tid) const { + if (tid > 0 && tid <= trans_model_.NumTransitionIds()) { + if (currently_self_loop_free_ && trans_model_.InfoForTransitionId(tid).is_self_loop) KALDI_ERR << "AddSelfLoops: graph already has self-loops."; - return trans_model_.TransitionIdToTransitionState(label); + return trans_model_.InfoForTransitionId(tid).self_loop_transition_id; + } else if (tid == fst::kNoLabel) { + return -1; } else { // 0 or (presumably) disambiguation symbol. Map to zero int32 big_number = fst::kNontermBigNumber; // 1000000 - if (label != 0 && label < big_number) + if (tid != 0 && tid < big_number) { KALDI_ASSERT(std::binary_search(disambig_syms_.begin(), disambig_syms_.end(), - label)); // or invalid label + tid) && + "It looks like you have an invalid symbol in your graph: "); + } return 0; } } private: - const TransitionModel &trans_model_; + const Transitions &trans_model_; const std::vector &disambig_syms_; // sorted. - bool check_no_self_loops_; + bool currently_self_loop_free_; }; -// This is the code that expands an FST from transition-states to -// transition-ids, in the case where reorder == true, i.e. the non-optional -// transition is before the self-loop. -static void AddSelfLoopsReorder(const TransitionModel &trans_model, - const std::vector &disambig_syms, - BaseFloat self_loop_scale, - bool check_no_self_loops, - fst::VectorFst *fst) { +// Returns true if the outgoing arcs of the state s sum to 1.0 +template +static bool StateIsStochastic(FST fst, typename FST::StateId s) { + using namespace fst; + using Arc = typename FST::Arc; + using Weight = typename Arc::Weight; + Weight total_prob = Weight::Zero(); + for (MutableArcIterator > aiter(&fst, s); + !aiter.Done(); + aiter.Next()) { + total_prob = Plus(total_prob, aiter.Value().weight); + } + return fst::ApproxEqual(total_prob, Weight::One()); +} + +void AddSelfLoops(const Transitions &trans_model, + const std::vector &disambig_syms, + bool currently_self_loop_free, + bool use_weights, + fst::VectorFst *fst) { + KALDI_ASSERT(fst->Start() != fst::kNoStateId); using namespace fst; typedef StdArc Arc; typedef Arc::Label Label; typedef Arc::StateId StateId; typedef Arc::Weight Weight; - TidToTstateMapper f(trans_model, disambig_syms, check_no_self_loops); + TidToSelfLoopMapper f(trans_model, disambig_syms, currently_self_loop_free); + // Duplicate states as necessary so that each state will require at most one // self-loop to be added to it. Approximately this means that if a // state has multiple different symbols on arcs entering it, it will be // duplicated, with one copy per incoming symbol. MakePrecedingInputSymbolsSameClass(true, fst, f); - int32 kNoTransState = f(kNoLabel); - KALDI_ASSERT(kNoTransState == -1); - - // use the following to keep track of the transition-state for each state. - std::vector state_in(fst->NumStates(), kNoTransState); - // This first loop just works out the label into each state, // and converts the transitions in the graph from transition-states // to transition-ids. + // state_in maps each state in the fst to its TransitionState - for (StateIterator > siter(*fst); - !siter.Done(); - siter.Next()) { - StateId s = siter.Value(); + + StateId num_states = fst->NumStates(); + // self_loop_transition_id gives the transition-id of the self-loop of this + // state, or zero if it doesn't require a self-loop. + // -1 is where we don't know the self-loop transition id (if any) + // for this state yet. + std::vector self_loop_transition_id(num_states, -1); + + for (StateId s = 0; s < num_states; s++) { for (MutableArcIterator > aiter(fst, s); !aiter.Done(); aiter.Next()) { - Arc arc = aiter.Value(); - int32 trans_state = f(arc.ilabel); - if (state_in[arc.nextstate] == kNoTransState) - state_in[arc.nextstate] = trans_state; - else { - KALDI_ASSERT(state_in[arc.nextstate] == trans_state); + const Arc &arc = aiter.Value(); + int32 next_state_self_loop_transition_id = f(arc.ilabel); + if (self_loop_transition_id[arc.nextstate] == -1) { + // Note: next_state_self_loop_transition_id could be + self_loop_transition_id[arc.nextstate] = + next_state_self_loop_transition_id; + } else { + KALDI_ASSERT(self_loop_transition_id[arc.nextstate] == + next_state_self_loop_transition_id); // or probably an error in MakePrecedingInputSymbolsSame. } } } - KALDI_ASSERT(state_in[fst->Start()] == kNoStateId || state_in[fst->Start()] == 0); - // or MakePrecedingInputSymbolsSame failed. - - // The next loop looks at each graph state, adds the self-loop [if needed] and - // multiples all the out-transitions' probs (and final-prob) by the - // forward-prob for that state (which is one minus self-loop-prob). We do it - // like this to maintain stochasticity (i.e. rather than multiplying the arcs - // with the corresponding labels on them by this probability). - - for (StateId s = 0; s < static_cast(state_in.size()); s++) { - if (state_in[s] > 0) { // defined, and not eps or a disambiguation symbol or a - // nonterminal-related sybol for grammar decoding... - int32 trans_state = static_cast(state_in[s]); - // First multiply all probabilities by "forward" probability. - BaseFloat log_prob = trans_model.GetNonSelfLoopLogProb(trans_state); - fst->SetFinal(s, Times(fst->Final(s), Weight(-log_prob*self_loop_scale))); - for (MutableArcIterator > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - arc.weight = Times(arc.weight, Weight(-log_prob*self_loop_scale)); - aiter.SetValue(arc); - } - // Now add self-loop, if needed. - int32 trans_id = trans_model.SelfLoopOf(trans_state); - if (trans_id != 0) { // has self-loop. - BaseFloat log_prob = trans_model.GetTransitionLogProb(trans_id); - fst->AddArc(s, Arc(trans_id, 0, Weight(-log_prob*self_loop_scale), s)); + if (!currently_self_loop_free) { + // there might be some self-loops present already, so make sure we don't + // duplicate them. + for (StateId s = 0; s < num_states; s++) { + for (MutableArcIterator > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + const Arc &arc = aiter.Value(); + int32 tid = arc.ilabel; + if (tid > 0 && tid <= trans_model.NumTransitionIds() && + trans_model.InfoForTransitionId(tid).is_self_loop) + self_loop_transition_id[s] = 0; } } + } else { + // We shouldn't have added a self-loop to the start state. + KALDI_ASSERT(self_loop_transition_id[fst->Start()] <= 0); } -} - - -// this is the code that expands an FST from transition-states to -// transition-ids, in the case where reorder == false, i.e. non-optional -// transition is after the self-loop. -static void AddSelfLoopsNoReorder( - const TransitionModel &trans_model, - const std::vector &disambig_syms, - BaseFloat self_loop_scale, - bool check_no_self_loops, - fst::VectorFst *fst) { - using namespace fst; - typedef StdArc Arc; - typedef Arc::Label Label; - typedef Arc::StateId StateId; - typedef Arc::Weight Weight; - - // Duplicate states as necessary so that each state has at most one self-loop - // on it. - TidToTstateMapper f(trans_model, disambig_syms, check_no_self_loops); - MakeFollowingInputSymbolsSameClass(true, fst, f); - StateId num_states = fst->NumStates(); - for (StateId s = 0; s < num_states; s++) { - int32 my_trans_state = f(kNoLabel); - KALDI_ASSERT(my_trans_state == -1); - for (MutableArcIterator > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - if (my_trans_state == -1) my_trans_state = f(arc.ilabel); - else KALDI_ASSERT(my_trans_state == f(arc.ilabel)); // or MakeFollowingInputSymbolsSameClass failed. - if (my_trans_state > 0) { // transition-id; multiply weight... - BaseFloat log_prob = trans_model.GetNonSelfLoopLogProb(my_trans_state); - arc.weight = Times(arc.weight, Weight(-log_prob*self_loop_scale)); + // The next loop looks at each graph state, adds the self-loop [if needed] and + // multiples all the out-transitions' probs (and final-prob) by the inverse of + // the correction factor that we used when creating the no-self-loops graph. + // We do it like this to maintain stochasticity throughout the graph compilation + // process. + + if (use_weights) { + for (StateId s = 0; s < num_states; s++) { + int32 tid = self_loop_transition_id[s]; + if (tid <= 0) + continue; + const auto &info(trans_model.InfoForTransitionId(tid)); + + BaseFloat self_loop_cost = info.transition_cost, + correction_factor = trans_model.GetTopo().CorrectionFactorsForPhone( + info.phone)[info.topo_state]; + Weight correction(-correction_factor), + self_loop_weight(self_loop_cost); + + fst->SetFinal(s, Times(fst->Final(s), correction)); + for (MutableArcIterator > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + arc.weight = Times(arc.weight, correction); aiter.SetValue(arc); } + // Add self-loop. ilabel is `tid`, olabel is epsilon (0). + fst->AddArc(s, Arc(tid, 0, self_loop_weight, s)); } - if (fst->Final(s) != Weight::Zero()) { - KALDI_ASSERT(my_trans_state == kNoLabel || my_trans_state == 0); // or MakeFollowingInputSymbolsSameClass failed. - } - if (my_trans_state != kNoLabel && my_trans_state != 0) { - // a transition-state; add self-loop, if it has one. - int32 trans_id = trans_model.SelfLoopOf(my_trans_state); - if (trans_id != 0) { // has self-loop. - BaseFloat log_prob = trans_model.GetTransitionLogProb(trans_id); - fst->AddArc(s, Arc(trans_id, 0, Weight(-log_prob*self_loop_scale), s)); - } - } - } -} - -void AddSelfLoops(const TransitionModel &trans_model, - const std::vector &disambig_syms, - BaseFloat self_loop_scale, - bool reorder, - bool check_no_self_loops, - fst::VectorFst *fst) { - KALDI_ASSERT(fst->Start() != fst::kNoStateId); - if (reorder) - AddSelfLoopsReorder(trans_model, disambig_syms, self_loop_scale, - check_no_self_loops, fst); - else - AddSelfLoopsNoReorder(trans_model, disambig_syms, self_loop_scale, - check_no_self_loops, fst); -} - -// IsReordered returns true if the transitions were possibly reordered. This reordering -// can happen in AddSelfLoops, if the "reorder" option was true. -// This makes the out-transition occur before the self-loop transition. -// The function returns false (no reordering) if there is not enough information in -// the alignment to tell (i.e. no self-loop were taken), and in this case the calling -// code doesn't care what the answer is. -// The "alignment" vector contains a sequence of TransitionIds. - -static bool IsReordered(const TransitionModel &trans_model, - const std::vector &alignment) { - for (size_t i = 0; i + 1 < alignment.size(); i++) { - int32 tstate1 = trans_model.TransitionIdToTransitionState(alignment[i]), - tstate2 = trans_model.TransitionIdToTransitionState(alignment[i+1]); - if (tstate1 != tstate2) { - bool is_loop_1 = trans_model.IsSelfLoop(alignment[i]), - is_loop_2 = trans_model.IsSelfLoop(alignment[i+1]); - KALDI_ASSERT(!(is_loop_1 && is_loop_2)); // Invalid. - if (is_loop_1) return true; // Reordered. self-loop is last. - if (is_loop_2) return false; // Not reordered. self-loop is first. + } else { + for (StateId s = 0; s < num_states; s++) { + int32 tid = self_loop_transition_id[s]; + // Add self-loop. ilabel is `tid`, olabel is epsilon (0). + fst->AddArc(s, Arc(tid, 0, Weight::One(), s)); } } - - // Just one trans-state in whole sequence. - if (alignment.empty()) return false; - else { - bool is_loop_front = trans_model.IsSelfLoop(alignment.front()), - is_loop_back = trans_model.IsSelfLoop(alignment.back()); - if (is_loop_front) return false; // Not reordered. Self-loop is first. - if (is_loop_back) return true; // Reordered. Self-loop is last. - return false; // We really don't know in this case but calling code should - // not care. - } } // SplitToPhonesInternal takes as input the "alignment" vector containing @@ -656,9 +549,8 @@ static bool IsReordered(const TransitionModel &trans_model, // checks (if the input does not start at the start of a phone or does not // end at the end of a phone, we should expect that false will be returned). -static bool SplitToPhonesInternal(const TransitionModel &trans_model, +static bool SplitToPhonesInternal(const Transitions &trans_model, const std::vector &alignment, - bool reordered, std::vector > *split_output) { if (alignment.empty()) return true; // nothing to split. std::vector end_points; // points at which phones end [in an @@ -666,69 +558,44 @@ static bool SplitToPhonesInternal(const TransitionModel &trans_model, // each phone].. bool was_ok = true; - for (size_t i = 0; i < alignment.size(); i++) { + int32 prev_phone = trans_model.InfoForTransitionId(alignment[0]).phone; + // i = 0 can't be an end point, it's the start of the sequence, + // so we start with 1. + for (size_t i = 1; i < alignment.size(); i++) { int32 trans_id = alignment[i]; - if (trans_model.IsFinal(trans_id)) { // is final-prob - if (!reordered) end_points.push_back(i+1); - else { // reordered. - while (i+1 < alignment.size() && - trans_model.IsSelfLoop(alignment[i+1])) { - KALDI_ASSERT(trans_model.TransitionIdToTransitionState(alignment[i]) == - trans_model.TransitionIdToTransitionState(alignment[i+1])); - i++; - } - end_points.push_back(i+1); - } - } else if (i+1 == alignment.size()) { - // need to have an end-point at the actual end. - // but this is an error- should have been detected already. + const auto &info = trans_model.InfoForTransitionId(trans_id); + if (info.is_initial) { + end_points.push_back(i); + } else if (info.phone != prev_phone) { + KALDI_WARN << "Not OK."; was_ok = false; - end_points.push_back(i+1); - } else { - int32 this_state = trans_model.TransitionIdToTransitionState(alignment[i]), - next_state = trans_model.TransitionIdToTransitionState(alignment[i+1]); - if (this_state == next_state) continue; // optimization. - int32 this_phone = trans_model.TransitionStateToPhone(this_state), - next_phone = trans_model.TransitionStateToPhone(next_state); - if (this_phone != next_phone) { - // The phone changed, but this is an error-- we should have detected this via the - // IsFinal check. - was_ok = false; - end_points.push_back(i+1); - } } + prev_phone = info.phone; + } + end_points.push_back(alignment.size()); + if (!trans_model.InfoForTransitionId(alignment.back()).is_final) { + KALDI_WARN << "Not OK."; + was_ok = false; } - size_t cur_point = 0; - for (size_t i = 0; i < end_points.size(); i++) { + size_t cur_start = 0; + for (int32 end_point: end_points) { split_output->push_back(std::vector()); - // The next if-statement checks if the initial trans-id at the current end - // point is the initial-state of the current phone if that initial-state - // is emitting (a cursory check that the alignment is plausible). - int32 trans_state = - trans_model.TransitionIdToTransitionState(alignment[cur_point]); - int32 phone = trans_model.TransitionStateToPhone(trans_state); - int32 forward_pdf_class = trans_model.GetTopo().TopologyForPhone(phone)[0].forward_pdf_class; - if (forward_pdf_class != kNoPdf) // initial-state of the current phone is emitting - if (trans_model.TransitionStateToHmmState(trans_state) != 0) - was_ok = false; - for (size_t j = cur_point; j < end_points[i]; j++) + for (size_t j = cur_start; j < end_point; j++) split_output->back().push_back(alignment[j]); - cur_point = end_points[i]; + cur_start = end_point; } return was_ok; } -bool SplitToPhones(const TransitionModel &trans_model, +bool SplitToPhones(const Transitions &trans_model, const std::vector &alignment, std::vector > *split_alignment) { KALDI_ASSERT(split_alignment != NULL); split_alignment->clear(); - bool is_reordered = IsReordered(trans_model, alignment); - return SplitToPhonesInternal(trans_model, alignment, - is_reordered, split_alignment); + return SplitToPhonesInternal(trans_model, alignment, split_alignment); } @@ -740,31 +607,32 @@ bool SplitToPhones(const TransitionModel &trans_model, 'subsample' value is not 1). */ static inline void ConvertAlignmentForPhone( - const TransitionModel &old_trans_model, - const TransitionModel &new_trans_model, + const Transitions &old_trans_model, + const Transitions &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector &old_phone_alignment, const std::vector &new_phone_window, - bool old_is_reordered, - bool new_is_reordered, std::vector *new_phone_alignment) { + KALDI_ASSERT(!old_phone_alignment.empty()); int32 alignment_size = old_phone_alignment.size(); static bool warned_topology = false; int32 P = new_ctx_dep.CentralPosition(), - old_central_phone = old_trans_model.TransitionIdToPhone( - old_phone_alignment[0]), + old_central_phone = old_trans_model.InfoForTransitionId( + old_phone_alignment[0]).phone, new_central_phone = new_phone_window[P]; - const HmmTopology &old_topo = old_trans_model.GetTopo(), + const Topology &old_topo = old_trans_model.GetTopo(), &new_topo = new_trans_model.GetTopo(); - bool topology_mismatch = !(old_topo.TopologyForPhone(old_central_phone) == - new_topo.TopologyForPhone(new_central_phone)); - if (topology_mismatch) { - if (!warned_topology) { - warned_topology = true; - KALDI_WARN << "Topology mismatch detected; automatically converting. " - << "Won't warn again."; - } + // TODO(galv): Do we need the transition costs to be the same? Right + // now, I am assuming that we do, but it is unclear to me that we + // really need this. + bool topology_mismatch = !fst::Equal(old_topo.TopologyForPhone(old_central_phone), + new_topo.TopologyForPhone(new_central_phone), + 0.0); + if (topology_mismatch && !warned_topology) { + warned_topology = true; + KALDI_WARN << "Topology mismatch detected; automatically converting. " + << "Won't warn again."; } bool length_mismatch = (new_phone_alignment->size() != old_phone_alignment.size()); @@ -773,16 +641,12 @@ static inline void ConvertAlignmentForPhone( // old alignment. GetRandomAlignmentForPhone(new_ctx_dep, new_trans_model, new_phone_window, new_phone_alignment); - if (new_is_reordered) - ChangeReorderingOfAlignment(new_trans_model, new_phone_alignment); return; } - KALDI_ASSERT(!old_phone_alignment.empty()); - int32 new_num_pdf_classes = new_topo.NumPdfClasses(new_central_phone); - std::vector pdf_ids(new_num_pdf_classes); // Indexed by pdf-class - for (int32 pdf_class = 0; pdf_class < new_num_pdf_classes; pdf_class++) { + std::vector pdf_ids(new_num_pdf_classes + 1); // Indexed by pdf-class + for (int32 pdf_class = 1; pdf_class <= new_num_pdf_classes; pdf_class++) { if (!new_ctx_dep.Compute(new_phone_window, pdf_class, &(pdf_ids[pdf_class]))) { std::ostringstream ss; @@ -793,28 +657,23 @@ static inline void ConvertAlignmentForPhone( } // the topologies and lengths match -> we can directly transfer - // the alignment. + // the alignment (assume the pdf-classes are identical). for (int32 j = 0; j < alignment_size; j++) { - int32 old_tid = old_phone_alignment[j], - old_tstate = old_trans_model.TransitionIdToTransitionState(old_tid); - int32 forward_pdf_class = - old_trans_model.TransitionStateToForwardPdfClass(old_tstate), - self_loop_pdf_class = - old_trans_model.TransitionStateToSelfLoopPdfClass(old_tstate); - int32 hmm_state = old_trans_model.TransitionIdToHmmState(old_tid); - int32 trans_idx = old_trans_model.TransitionIdToTransitionIndex(old_tid); - int32 new_forward_pdf = pdf_ids[forward_pdf_class]; - int32 new_self_loop_pdf = pdf_ids[self_loop_pdf_class]; - int32 new_trans_state = - new_trans_model.TupleToTransitionState(new_central_phone, hmm_state, - new_forward_pdf, new_self_loop_pdf); + int32 old_tid = old_phone_alignment[j]; + auto&& info = old_trans_model.InfoForTransitionId(old_tid); + int32 old_pdf_class = old_trans_model.PdfClassForTid(old_tid); + int32 old_self_loop_pdf_class = ( + info.self_loop_pdf_id != -1 ? + old_trans_model.PdfClassForTid(info.self_loop_transition_id) : -1); + int32 new_pdf_id = pdf_ids[old_pdf_class]; + int32 new_self_loop_pdf_id = (old_self_loop_pdf_class != -1 ? + pdf_ids[old_self_loop_pdf_class] : -1); int32 new_tid = - new_trans_model.PairToTransitionId(new_trans_state, trans_idx); + new_trans_model.TupleToTransitionId(new_central_phone, info.topo_state, + info.arc_index, new_pdf_id, + new_self_loop_pdf_id); (*new_phone_alignment)[j] = new_tid; } - - if (new_is_reordered != old_is_reordered) - ChangeReorderingOfAlignment(new_trans_model, new_phone_alignment); } @@ -846,7 +705,7 @@ static inline void ConvertAlignmentForPhone( reduced-frame-rate system. @param new_lengths [out] The vector for storing new lengths. */ -static bool ComputeNewPhoneLengths(const HmmTopology &topology, +static bool ComputeNewPhoneLengths(const Topology &topology, const std::vector &mapped_phones, const std::vector &old_lengths, int32 conversion_shift, @@ -923,17 +782,16 @@ static bool ComputeNewPhoneLengths(const HmmTopology &topology, 'conversion_shift' is for. */ -static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model, - const TransitionModel &new_trans_model, - const ContextDependencyInterface &new_ctx_dep, - const std::vector &old_alignment, - int32 conversion_shift, - int32 subsample_factor, - bool new_is_reordered, - const std::vector *phone_map, - std::vector *new_alignment) { +static bool ConvertAlignmentInternal( + const Transitions &old_trans_model, + const Transitions &new_trans_model, + const ContextDependencyInterface &new_ctx_dep, + const std::vector &old_alignment, + int32 conversion_shift, + int32 subsample_factor, + const std::vector *phone_map, + std::vector *new_alignment) { KALDI_ASSERT(0 <= conversion_shift && conversion_shift < subsample_factor); - bool old_is_reordered = IsReordered(old_trans_model, old_alignment); KALDI_ASSERT(new_alignment != NULL); new_alignment->clear(); new_alignment->reserve(old_alignment.size()); @@ -944,7 +802,7 @@ static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model, std::vector mapped_phones(phone_sequence_length); for (size_t i = 0; i < phone_sequence_length; i++) { KALDI_ASSERT(!old_split[i].empty()); - mapped_phones[i] = old_trans_model.TransitionIdToPhone(old_split[i][0]); + mapped_phones[i] = old_trans_model.InfoForTransitionId(old_split[i][0]).phone; if (phone_map != NULL) { // Map the phone sequence. int32 sz = phone_map->size(); if (mapped_phones[i] < 0 || mapped_phones[i] >= sz || @@ -998,7 +856,6 @@ static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model, ConvertAlignmentForPhone(old_trans_model, new_trans_model, new_ctx_dep, old_alignment_for_phone, new_phone_window, - old_is_reordered, new_is_reordered, &new_alignment_for_phone); new_alignment->insert(new_alignment->end(), new_alignment_for_phone.begin(), @@ -1010,29 +867,35 @@ static bool ConvertAlignmentInternal(const TransitionModel &old_trans_model, return true; } -bool ConvertAlignment(const TransitionModel &old_trans_model, - const TransitionModel &new_trans_model, +bool ConvertAlignment(const Transitions &old_trans_model, + const Transitions &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector &old_alignment, int32 subsample_factor, bool repeat_frames, - bool new_is_reordered, const std::vector *phone_map, std::vector *new_alignment) { - if (!repeat_frames || subsample_factor == 1) { + if (subsample_factor == 1 && repeat_frames) + KALDI_WARN << "repeat_frames being set to true has no effect when " + "subsample_factor=1 (its default value)"; + + if (subsample_factor == 1 || !repeat_frames) { return ConvertAlignmentInternal(old_trans_model, new_trans_model, new_ctx_dep, old_alignment, - subsample_factor - 1, + subsample_factor - 1, // == 0 subsample_factor, - new_is_reordered, phone_map, new_alignment); // The value "subsample_factor - 1" for conversion_shift above ensures the // alignments have the same length as the output of 'subsample-feats' } else { + // either repeat_frames or subsample_factor >= 2. But if repeat_frames == True + // then and subsample_factor == 1, then it is the same as the above. std::vector > shifted_alignments(subsample_factor); + // We create alignments for all shifts from [subsample_factor -1 + // to 0], inclusive. for (int32 conversion_shift = subsample_factor - 1; conversion_shift >= 0; conversion_shift--) { if (!ConvertAlignmentInternal(old_trans_model, @@ -1041,7 +904,6 @@ bool ConvertAlignment(const TransitionModel &old_trans_model, old_alignment, conversion_shift, subsample_factor, - new_is_reordered, phone_map, &shifted_alignments[conversion_shift])) return false; @@ -1061,34 +923,9 @@ bool ConvertAlignment(const TransitionModel &old_trans_model, return true; } -// Returns the scaled, but not negated, log-prob, with the given scaling factors. -static BaseFloat GetScaledTransitionLogProb(const TransitionModel &trans_model, - int32 trans_id, - BaseFloat transition_scale, - BaseFloat self_loop_scale) { - if (transition_scale == self_loop_scale) { - return trans_model.GetTransitionLogProb(trans_id) * transition_scale; - } else { - if (trans_model.IsSelfLoop(trans_id)) { - return self_loop_scale * trans_model.GetTransitionLogProb(trans_id); - } else { - int32 trans_state = trans_model.TransitionIdToTransitionState(trans_id); - return self_loop_scale * trans_model.GetNonSelfLoopLogProb(trans_state) - + transition_scale * trans_model.GetTransitionLogProbIgnoringSelfLoops(trans_id); - // This could be simplified to - // (self_loop_scale - transition_scale) * trans_model.GetNonSelfLoopLogProb(trans_state) - // + trans_model.GetTransitionLogProb(trans_id); - // this simplifies if self_loop_scale == 0.0 - } - } -} - - - -void AddTransitionProbs(const TransitionModel &trans_model, +void AddTransitionProbs(const Transitions &trans_model, const std::vector &disambig_syms, // may be empty BaseFloat transition_scale, - BaseFloat self_loop_scale, fst::VectorFst *fst) { using namespace fst; KALDI_ASSERT(IsSortedAndUniq(disambig_syms)); @@ -1102,25 +939,21 @@ void AddTransitionProbs(const TransitionModel &trans_model, StdArc arc = aiter.Value(); StdArc::Label l = arc.ilabel; if (l >= 1 && l <= num_tids) { // a transition-id. - BaseFloat scaled_log_prob = GetScaledTransitionLogProb(trans_model, - l, - transition_scale, - self_loop_scale); + BaseFloat scaled_log_prob = + trans_model.InfoForTransitionId(l).transition_cost * transition_scale; arc.weight = Times(arc.weight, TropicalWeight(-scaled_log_prob)); - } else if (l != 0) { - if (!std::binary_search(disambig_syms.begin(), disambig_syms.end(), - arc.ilabel)) - KALDI_ERR << "AddTransitionProbs: invalid symbol " << arc.ilabel - << " on graph input side."; + } else if (l != 0 && !std::binary_search(disambig_syms.begin(), + disambig_syms.end(),l)) { + KALDI_ERR << "AddTransitionProbs: invalid symbol " << arc.ilabel + << " on graph input side."; } aiter.SetValue(arc); } } } -void AddTransitionProbs(const TransitionModel &trans_model, +void AddTransitionProbs(const Transitions &trans_model, BaseFloat transition_scale, - BaseFloat self_loop_scale, Lattice *lat) { using namespace fst; int num_tids = trans_model.NumTransitionIds(); @@ -1133,10 +966,8 @@ void AddTransitionProbs(const TransitionModel &trans_model, LatticeArc arc = aiter.Value(); LatticeArc::Label l = arc.ilabel; if (l >= 1 && l <= num_tids) { // a transition-id. - BaseFloat scaled_log_prob = GetScaledTransitionLogProb(trans_model, - l, - transition_scale, - self_loop_scale); + BaseFloat scaled_log_prob = + trans_model.InfoForTransitionId(l).transition_cost * transition_scale; // cost is negated log prob. arc.weight.SetValue1(arc.weight.Value1() - scaled_log_prob); } else if (l != 0) { @@ -1204,16 +1035,77 @@ bool ConvertPhnxToProns(const std::vector &phnx, } + + +void AddTransitionProbs(const Transitions &trans_model, + const std::vector &disambig_syms, // may be empty + fst::VectorFst *fst) { + using namespace fst; + KALDI_ASSERT(IsSortedAndUniq(disambig_syms)); + int num_tids = trans_model.NumTransitionIds(); + for (StateIterator > siter(*fst); + !siter.Done(); + siter.Next()) { + for (MutableArcIterator > aiter(fst, siter.Value()); + !aiter.Done(); + aiter.Next()) { + StdArc arc = aiter.Value(); + StdArc::Label l = arc.ilabel; + if (l >= 1 && l <= num_tids) { // a transition-id. + BaseFloat cost = trans_model.InfoForTransitionId(l).transition_cost; + arc.weight = Times(arc.weight, TropicalWeight(cost)); + } else if (l != 0) { + if (!std::binary_search(disambig_syms.begin(), disambig_syms.end(), + arc.ilabel)) + KALDI_ERR << "AddTransitionProbs: invalid symbol " << arc.ilabel + << " on graph input side."; + } + aiter.SetValue(arc); + } + } +} + +void AddTransitionProbs(const Transitions &trans_model, + Lattice *lat) { + using namespace fst; + int num_tids = trans_model.NumTransitionIds(); + for (fst::StateIterator siter(*lat); + !siter.Done(); + siter.Next()) { + for (MutableArcIterator aiter(lat, siter.Value()); + !aiter.Done(); + aiter.Next()) { + LatticeArc arc = aiter.Value(); + LatticeArc::Label l = arc.ilabel; + if (l >= 1 && l <= num_tids) { // a transition-id. + BaseFloat cost = trans_model.InfoForTransitionId(l).transition_cost; + arc.weight.SetValue1(arc.weight.Value1() + cost); + } else if (l != 0) { + KALDI_ERR << "AddTransitionProbs: invalid symbol " << arc.ilabel + << " on lattice input side."; + } + aiter.SetValue(arc); + } + } +} + + + + + void GetRandomAlignmentForPhone(const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const std::vector &phone_window, std::vector *alignment) { typedef fst::StdArc Arc; int32 length = alignment->size(); - BaseFloat prob_scale = 0.0; - fst::VectorFst *fst = GetHmmAsFsaSimple(phone_window, ctx_dep, - trans_model, prob_scale); - fst::RmEpsilon(fst); + bool include_self_loops = true; + std::shared_ptr fst = + GetHmmAsFsa(phone_window, ctx_dep, + trans_model, + include_self_loops); + + fst::RmEpsilon(fst.get()); fst::VectorFst length_constraint_fst; { // set up length_constraint_fst. @@ -1253,41 +1145,10 @@ void GetRandomAlignmentForPhone(const ContextDependencyInterface &ctx_dep, bool ans = fst::GetLinearSymbolSequence( single_path_fst, &symbol_sequence, NULL, NULL); KALDI_ASSERT(ans && symbol_sequence.size() == length); + KALDI_PARANOID_ASSERT( + trans_model.InfoForTransitionId(symbol_sequence.front()).is_initial && + trans_model.InfoForTransitionId(symbol_sequence.back()).is_final); symbol_sequence.swap(*alignment); - delete fst; -} - -void ChangeReorderingOfAlignment(const TransitionModel &trans_model, - std::vector *alignment) { - int32 start_pos = 0, size = alignment->size(); - while (start_pos != size) { - int32 start_tid = (*alignment)[start_pos]; - int32 cur_tstate = trans_model.TransitionIdToTransitionState(start_tid); - bool start_is_self_loop = trans_model.IsSelfLoop(start_tid) ? 0 : 1; - int32 end_pos = start_pos + 1; - // If the first instance of this transition-state was a self-loop, then eat - // only non-self-loops of this state; if it was a non-self-loop, then eat - // only self-loops of this state. Imposing this condition on self-loops - // would only actually matter in the rare circumstances that phones can - // have length 1. - while (end_pos != size && - trans_model.TransitionIdToTransitionState((*alignment)[end_pos]) == - cur_tstate) { - bool this_is_self_loop = trans_model.IsSelfLoop((*alignment)[end_pos]); - if (!this_is_self_loop) { - if (start_is_self_loop) { - break; // stop before including this transition-id. - } else { - end_pos++; - break; // stop after including this transition-id. - } - } - end_pos++; - } - std::swap((*alignment)[start_pos], (*alignment)[end_pos - 1]); - start_pos = end_pos; - } } - } // namespace kaldi diff --git a/src/hmm/hmm-utils.h b/src/hmm/hmm-utils.h index a8ad846949e..bc9e3eaeaa7 100644 --- a/src/hmm/hmm-utils.h +++ b/src/hmm/hmm-utils.h @@ -1,6 +1,7 @@ // hmm/hmm-utils.h // Copyright 2009-2011 Microsoft Corporation +// 2019 Daniel Galvez // See ../../COPYING for clarification regarding multiple authors // @@ -20,8 +21,10 @@ #ifndef KALDI_HMM_HMM_UTILS_H_ #define KALDI_HMM_HMM_UTILS_H_ -#include "hmm/hmm-topology.h" -#include "hmm/transition-model.h" +#include + +#include "hmm/topology.h" +#include "hmm/transitions.h" #include "lat/kaldi-lattice.h" namespace kaldi { @@ -34,19 +37,16 @@ namespace kaldi { /// Configuration class for the GetHTransducer() function; see /// \ref hmm_graph_config for context. struct HTransducerConfig { - /// Transition log-prob scale, see \ref hmm_scale. - /// Note this doesn't apply to self-loops; GetHTransducer() does - /// not include self-loops. - BaseFloat transition_scale; int32 nonterm_phones_offset; + // We don't currently make `include_self_loops` configurable from the command + // line; it's included in order to make it obvious how to add the self loops. + bool include_self_loops; HTransducerConfig(): - transition_scale(1.0), - nonterm_phones_offset(-1) { } + nonterm_phones_offset(-1), + include_self_loops(false) { } void Register (OptionsItf *opts) { - opts->Register("transition-scale", &transition_scale, - "Scale of transition probs (relative to LM)"); opts->Register("nonterm-phones-offset", &nonterm_phones_offset, "The integer id of #nonterm_bos in phones.txt, if present. " "Only needs to be set if you are doing grammar decoding, " @@ -66,7 +66,7 @@ struct HmmCacheHash { /// HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used /// as cache in GetHmmAsFsa, as an optimization. typedef unordered_map >, - fst::VectorFst*, + std::shared_ptr, HmmCacheHash> HmmCacheType; @@ -76,38 +76,32 @@ typedef unordered_map >, /// "Fst". This acceptor does not include self-loops; you have to call /// AddSelfLoops() for that. (We do that at a later graph compilation phase, /// for efficiency). The labels on the FSA correspond to transition-ids. +/// But now we already have self-loops... Problematic? /// /// as the symbols. /// For documentation in context, see \ref hmm_graph_get_hmm_as_fst -/// @param context_window A vector representing the phonetic context; see +/// @param [in] context_window A vector representing the phonetic context; see /// \ref tree_window "here" for explanation. -/// @param ctx_dep The object that contains the phonetic decision-tree -/// @param trans_model The transition-model object, which provides +/// @param [in] ctx_dep The object that contains the phonetic decision-tree +/// @param [in] trans_model The transition-model object, which provides /// the mappings to transition-ids and also the transition /// probabilities. -/// @param config Configuration object, see \ref HTransducerConfig. +/// @param [in] include_self_loops. If true, self-loop arcs will be +/// included in the result; if false, they will be omitted and +/// the probabilities appropriately renormalized; you can +/// add them later using AddSelfLoops(). /// @param cache Object used as a lookaside buffer to save computation; /// if it finds that the object it needs is already there, it will -/// just return a pointer value from "cache"-- not that this means +/// just return a pointer value from "cache"-- note that this means /// you have to be careful not to delete things twice. -fst::VectorFst *GetHmmAsFsa( - std::vector context_window, +std::shared_ptr GetHmmAsFsa( + const std::vector &context_window, const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - const HTransducerConfig &config, + const Transitions &trans_model, + bool include_self_loops = false, HmmCacheType *cache = NULL); -/// Included mainly as a form of documentation, not used in any other code -/// currently. Creates the acceptor FST with self-loops, and with fewer -/// options. -fst::VectorFst* -GetHmmAsFsaSimple(std::vector context_window, - const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - BaseFloat prob_scale); - - /** * Returns the H tranducer; result owned by caller. Caution: our version of * the H transducer does not include self-loops; you have to add those later. @@ -123,10 +117,10 @@ GetHmmAsFsaSimple(std::vector context_window, * the input of the transducer (i.e. same symbol type as whatever is on the * input of the transducer */ -fst::VectorFst* +std::unique_ptr> GetHTransducer(const std::vector > &ilabel_info, const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const HTransducerConfig &config, std::vector *disambig_syms_left); @@ -148,7 +142,7 @@ GetHTransducer(const std::vector > &ilabel_info, */ void GetIlabelMapping(const std::vector > &ilabel_info_old, const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, std::vector *old2new_map); @@ -164,34 +158,30 @@ void GetIlabelMapping(const std::vector > &ilabel_info_old, * same as disambiguation symbols, assuming they are special symbols for * grammar decoding. * - * @param trans_model [in] Transition model - * @param disambig_syms [in] Sorted, uniq list of disambiguation symbols, required - * if the graph contains disambiguation symbols but only needed for sanity checks. - * @param self_loop_scale [in] Transition-probability scale for self-loops; c.f. - * \ref hmm_scale - * @param reorder [in] If true, reorders the transitions (see \ref hmm_reorder). - * You'll normally want this to be true. - * @param check_no_self_loops [in] If true, it will check that there are no - * self-loops in the original graph; you'll normally want - * this to be true. If false, it will allow them, and - * will add self-loops after the original self-loop - * transitions, assuming reorder==true... this happens to - * be what we want when converting normal to unconstrained - * chain examples. WARNING: this was added in 2018; - * if you get a compilation error, add this as 'true', - * which emulates the behavior of older code. - * @param fst [in, out] The FST to be modified. + * @param [in] trans_model Transition model + * @param [in] disambig_syms Sorted, unique list of disambiguation symbols, required + * if the graph contains disambiguation symbols but only needed for sanity checks. + * @param [in] currently_self_loop_free If true, we require (and check) that + * the graph was free of self-loops at entry. If + * false, it assumes that some states may already have + * self-loops, and will refrain from adding duplicate + * self-loop to them. + * @param [in] use_weights If true, weights will be used (which + * includes a correction term to make things continue to + * sum to one); otherwise, we add the new self-loop arcs + * with probability One(). + * @param fst [in, out] The FST to be modified. This should normally be HCLG + * or any other FST with transition ids as its input + * labels. */ -void AddSelfLoops(const TransitionModel &trans_model, +void AddSelfLoops(const Transitions &trans_model, const std::vector &disambig_syms, // used as a check only. - BaseFloat self_loop_scale, - bool reorder, - bool check_no_self_loops, + bool currently_self_loop_free, + bool use_weights, fst::VectorFst *fst); /** - * Adds transition-probs, with the supplied - * scales (see \ref hmm_scale), to the graph. + * Adds transition-prob to the graph. * Useful if you want to create a graph without transition probs, then possibly * train the model (including the transition probs) but keep the graph fixed, * and add back in the transition probs. It assumes the fst has transition-ids @@ -200,36 +190,29 @@ void AddSelfLoops(const TransitionModel &trans_model, * @param disambig_syms [in] A list of disambiguation symbols, required if the * graph has disambiguation symbols on its input but only * used for checks. - * @param transition_scale [in] A scale on transition-probabilities apart from - * those involving self-loops; see \ref hmm_scale. - * @param self_loop_scale [in] A scale on self-loop transition probabilities; - * see \ref hmm_scale. * @param fst [in, out] The FST to be modified. */ -void AddTransitionProbs(const TransitionModel &trans_model, +void AddTransitionProbs(const Transitions &trans_model, const std::vector &disambig_syms, - BaseFloat transition_scale, - BaseFloat self_loop_scale, fst::VectorFst *fst); /** This is as AddSelfLoops(), but operates on a Lattice, where it affects the graph part of the weight (the first element of the pair). */ -void AddTransitionProbs(const TransitionModel &trans_model, - BaseFloat transition_scale, - BaseFloat self_loop_scale, +void AddTransitionProbs(const Transitions &trans_model, Lattice *lat); + /// Returns a transducer from pdfs plus one (input) to transition-ids (output). -/// Currenly of use only for testing. -fst::VectorFst* -GetPdfToTransitionIdTransducer(const TransitionModel &trans_model); +/// Currently of use only for testing. +std::unique_ptr> +GetPdfToTransitionIdTransducer(const Transitions &trans_model); /// Converts all transition-ids in the FST to pdfs plus one. /// Placeholder: not implemented yet! -void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model, +void ConvertTransitionIdsToPdfs(const Transitions &trans_model, const std::vector &disambig_syms, fst::VectorFst *fst); @@ -248,7 +231,7 @@ void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model, /// die or throw an exception. /// This function works out by itself whether the graph was created /// with "reordering", and just does the right thing. -bool SplitToPhones(const TransitionModel &trans_model, +bool SplitToPhones(const Transitions &trans_model, const std::vector &alignment, std::vector > *split_alignment); @@ -277,20 +260,16 @@ bool SplitToPhones(const TransitionModel &trans_model, 'subsample_factor' separately generated alignments, to keep the phone boundaries the same as the input where possible.] - @param reorder [in] True if you want the pdf-ids on the new alignment to - be 'reordered'. (vs. the way they appear in - the HmmTopology object) @param phone_map [in] If non-NULL, map from old to new phones. @param new_alignment [out] The converted alignment. */ -bool ConvertAlignment(const TransitionModel &old_trans_model, - const TransitionModel &new_trans_model, +bool ConvertAlignment(const Transitions &old_trans_model, + const Transitions &new_trans_model, const ContextDependencyInterface &new_ctx_dep, const std::vector &old_alignment, int32 subsample_factor, // 1 in the normal case -> no subsampling. bool repeat_frames, - bool reorder, const std::vector *phone_map, // may be NULL std::vector *new_alignment); @@ -319,16 +298,10 @@ bool ConvertPhnxToProns(const std::vector &phnx, The alignment will be without 'reordering'. */ void GetRandomAlignmentForPhone(const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, + const Transitions &trans_model, const std::vector &phone_window, std::vector *alignment); -/* - If the alignment was non-reordered makes it reordered, and vice versa. -*/ -void ChangeReorderingOfAlignment(const TransitionModel &trans_model, - std::vector *alignment); - /// @} end "addtogroup hmm_group" } // end namespace kaldi diff --git a/src/hmm/posterior.cc b/src/hmm/posterior.cc index 860a979a0ce..4742c0f7824 100644 --- a/src/hmm/posterior.cc +++ b/src/hmm/posterior.cc @@ -299,19 +299,19 @@ void AlignmentToPosterior(const std::vector &ali, } struct ComparePosteriorByPdfs { - const TransitionModel *tmodel_; - ComparePosteriorByPdfs(const TransitionModel &tmodel): tmodel_(&tmodel) {} + const Transitions *tmodel_; + ComparePosteriorByPdfs(const Transitions &tmodel): tmodel_(&tmodel) {} bool operator() (const std::pair &a, const std::pair &b) { - if (tmodel_->TransitionIdToPdf(a.first) - < tmodel_->TransitionIdToPdf(b.first)) + if (tmodel_->TransitionIdToPdfFast(a.first) + < tmodel_->TransitionIdToPdfFast(b.first)) return true; else return false; } }; -void SortPosteriorByPdfs(const TransitionModel &tmodel, +void SortPosteriorByPdfs(const Transitions &tmodel, Posterior *post) { ComparePosteriorByPdfs compare(tmodel); for (size_t i = 0; i < post->size(); i++) { @@ -319,7 +319,7 @@ void SortPosteriorByPdfs(const TransitionModel &tmodel, } } -void ConvertPosteriorToPdfs(const TransitionModel &tmodel, +void ConvertPosteriorToPdfs(const Transitions &tmodel, const Posterior &post_in, Posterior *post_out) { post_out->clear(); @@ -328,7 +328,7 @@ void ConvertPosteriorToPdfs(const TransitionModel &tmodel, unordered_map pdf_to_post; for (size_t j = 0; j < post_in[i].size(); j++) { int32 tid = post_in[i][j].first, - pdf_id = tmodel.TransitionIdToPdf(tid); + pdf_id = tmodel.TransitionIdToPdfFast(tid); BaseFloat post = post_in[i][j].second; if (pdf_to_post.count(pdf_id) == 0) pdf_to_post[pdf_id] = post; @@ -345,7 +345,7 @@ void ConvertPosteriorToPdfs(const TransitionModel &tmodel, } } -void ConvertPosteriorToPhones(const TransitionModel &tmodel, +void ConvertPosteriorToPhones(const Transitions &tmodel, const Posterior &post_in, Posterior *post_out) { post_out->clear(); @@ -354,7 +354,7 @@ void ConvertPosteriorToPhones(const TransitionModel &tmodel, std::map phone_to_post; for (size_t j = 0; j < post_in[i].size(); j++) { int32 tid = post_in[i][j].first, - phone_id = tmodel.TransitionIdToPhone(tid); + phone_id = tmodel.InfoForTransitionId(tid).phone; BaseFloat post = post_in[i][j].second; if (phone_to_post.count(phone_id) == 0) phone_to_post[phone_id] = post; @@ -372,7 +372,7 @@ void ConvertPosteriorToPhones(const TransitionModel &tmodel, } -void WeightSilencePost(const TransitionModel &trans_model, +void WeightSilencePost(const Transitions &trans_model, const ConstIntegerSet &silence_set, BaseFloat silence_scale, Posterior *post) { @@ -381,7 +381,7 @@ void WeightSilencePost(const TransitionModel &trans_model, this_post.reserve((*post)[i].size()); for (size_t j = 0; j < (*post)[i].size(); j++) { int32 tid = (*post)[i][j].first, - phone = trans_model.TransitionIdToPhone(tid); + phone = trans_model.InfoForTransitionId(tid).phone; BaseFloat weight = (*post)[i][j].second; if (silence_set.count(phone) != 0) { // is a silence. if (silence_scale != 0.0) @@ -395,7 +395,7 @@ void WeightSilencePost(const TransitionModel &trans_model, } -void WeightSilencePostDistributed(const TransitionModel &trans_model, +void WeightSilencePostDistributed(const Transitions &trans_model, const ConstIntegerSet &silence_set, BaseFloat silence_scale, Posterior *post) { @@ -405,7 +405,7 @@ void WeightSilencePostDistributed(const TransitionModel &trans_model, BaseFloat sil_weight = 0.0, nonsil_weight = 0.0; for (size_t j = 0; j < (*post)[i].size(); j++) { int32 tid = (*post)[i][j].first, - phone = trans_model.TransitionIdToPhone(tid); + phone = trans_model.InfoForTransitionId(tid).phone; BaseFloat weight = (*post)[i][j].second; if (silence_set.count(phone) != 0) sil_weight += weight; else nonsil_weight += weight; @@ -537,7 +537,7 @@ template void PosteriorToMatrix(const Posterior &post, template void PosteriorToPdfMatrix(const Posterior &post, - const TransitionModel &model, + const Transitions &model, Matrix *mat) { // Allocate the matrix, int32 num_rows = post.size(), @@ -546,7 +546,7 @@ void PosteriorToPdfMatrix(const Posterior &post, // Fill from Posterior, for (int32 t = 0; t < post.size(); t++) { for (int32 i = 0; i < post[t].size(); i++) { - int32 col = model.TransitionIdToPdf(post[t][i].first); + int32 col = model.TransitionIdToPdfFast(post[t][i].first); if (col >= num_cols) { KALDI_ERR << "Out-of-bound Posterior element with index " << col << ", higher than number of columns " << num_cols; @@ -557,10 +557,10 @@ void PosteriorToPdfMatrix(const Posterior &post, } // instantiate the template function, template void PosteriorToPdfMatrix(const Posterior &post, - const TransitionModel &model, + const Transitions &model, Matrix *mat); template void PosteriorToPdfMatrix(const Posterior &post, - const TransitionModel &model, + const Transitions &model, Matrix *mat); } // End namespace kaldi diff --git a/src/hmm/posterior.h b/src/hmm/posterior.h index e153c249740..7663cf0ce42 100644 --- a/src/hmm/posterior.h +++ b/src/hmm/posterior.h @@ -26,7 +26,7 @@ #include "base/kaldi-common.h" #include "util/const-integer-set.h" #include "util/kaldi-table.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "matrix/kaldi-matrix.h" @@ -205,19 +205,19 @@ void AlignmentToPosterior(const std::vector &ali, /// Sorts posterior entries so that transition-ids with same pdf-id are next to /// each other. -void SortPosteriorByPdfs(const TransitionModel &tmodel, +void SortPosteriorByPdfs(const Transitions &tmodel, Posterior *post); /// Converts a posterior over transition-ids to be a posterior /// over pdf-ids. -void ConvertPosteriorToPdfs(const TransitionModel &tmodel, +void ConvertPosteriorToPdfs(const Transitions &tmodel, const Posterior &post_in, Posterior *post_out); /// Converts a posterior over transition-ids to be a posterior /// over phones. -void ConvertPosteriorToPhones(const TransitionModel &tmodel, +void ConvertPosteriorToPhones(const Transitions &tmodel, const Posterior &post_in, Posterior *post_out); @@ -225,7 +225,7 @@ void ConvertPosteriorToPhones(const TransitionModel &tmodel, /// in the set "silence_set" by scale "silence_scale". /// The interface was changed in Feb 2014 to do the modification /// "in-place" rather than having separate input and output. -void WeightSilencePost(const TransitionModel &trans_model, +void WeightSilencePost(const Transitions &trans_model, const ConstIntegerSet &silence_set, BaseFloat silence_scale, Posterior *post); @@ -236,7 +236,7 @@ void WeightSilencePost(const TransitionModel &trans_model, /// has the effect that frames that are mostly silence get down-weighted. /// The interface was changed in Feb 2014 to do the modification /// "in-place" rather than having separate input and output. -void WeightSilencePostDistributed(const TransitionModel &trans_model, +void WeightSilencePostDistributed(const Transitions &trans_model, const ConstIntegerSet &silence_set, BaseFloat silence_scale, Posterior *post); @@ -250,11 +250,11 @@ void PosteriorToMatrix(const Posterior &post, /// This converts a Posterior to a Matrix. The number of matrix-rows is the same /// as the 'post.size()', the number of matrix-columns is defined by 'NumPdfs' -/// in the TransitionModel. +/// in the Transitions. /// The elements which are not specified in 'Posterior' are equal to zero. template void PosteriorToPdfMatrix(const Posterior &post, - const TransitionModel &model, + const Transitions &model, Matrix *mat); /// @} end "addtogroup posterior_group" diff --git a/src/hmm/hmm-topology-test.cc b/src/hmm/topology-test.cc similarity index 64% rename from src/hmm/hmm-topology-test.cc rename to src/hmm/topology-test.cc index 14081d2355d..7073ce94866 100644 --- a/src/hmm/hmm-topology-test.cc +++ b/src/hmm/topology-test.cc @@ -2,6 +2,7 @@ // Copyright 2009-2011 Microsoft Corporation // 2015 Johns Hopkins University (author: Daniel Povey) +// 2019 Hossein Hadian // See ../../COPYING for clarification regarding multiple authors // @@ -18,58 +19,47 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/hmm-topology.h" +#include "hmm/topology.h" #include "hmm/hmm-test-utils.h" namespace kaldi { -void TestHmmTopology() { - bool binary = (Rand()%2 == 0); +void TestTopology() { + bool binary = (Rand() % 2 == 0); std::string input_str = "\n" "\n" " 1 2 3 4 5 6 7 8 9 \n" - " 0 0\n" - " 0 0.5\n" - " 1 0.5\n" - " \n" - " 1 1 \n" - " 1 0.5\n" - " 2 0.5\n" - " \n" - " 2 2\n" - " 2 0.5\n" - " 3 0.5\n" - " \n" - " 3 \n" + " 0 1 1 0\n" + " 1 1 1 0.693\n" + " 1 2 2 0.693\n" + " 2 2 2 0.693\n" + " 2 3 3 0.693\n" + " 3 3 3 0.693\n" + " 3 0.693\n\n" " \n" - " \n" - " 10 11 13 \n" - " 0 0\n" - " 0 0.5\n" - " 1 0.5\n" - " \n" - " 1 1 \n" - " 1 0.5\n" - " 2 0.5\n" - " \n" - " 2 " - " \n" - " \n"; + + "\n" + " 10 11 13 \n" + // " 0 0 1 0.693\n" // disallowed! + " 0 1 1 0\n" + " 1 1 2 0.693\n" + " 1 2 2 0.693\n" + " 2 \n\n" + "\n" + "\n"; std::string chain_input_str = "\n" "\n" " 1 2 3 4 5 6 7 8 9 \n" - " 0 0 1\n" - " 0 0.5\n" - " 1 0.5\n" - " \n" - " 1 \n" + " 0 1 1 0.0\n" + " 1 1 2 0.693\n" + " 1 0.693\n\n" "\n" "\n"; - HmmTopology topo; + Topology topo; if (RandInt(0, 1) == 0) { topo = GenRandTopology(); @@ -83,8 +73,7 @@ void TestHmmTopology() { std::ostringstream oss; topo.Write(oss, binary); - HmmTopology topo2; - // std::cout << oss.str() << '\n' << std::flush; + Topology topo2; std::istringstream iss2(oss.str()); topo2.Read(iss2, binary); @@ -96,7 +85,7 @@ void TestHmmTopology() { } { // test chain topology - HmmTopology chain_topo; + Topology chain_topo; std::istringstream chain_iss(chain_input_str); chain_topo.Read(chain_iss, false); KALDI_ASSERT(chain_topo.MinLength(3) == 1); @@ -116,8 +105,7 @@ void TestHmmTopology() { int main() { // repeat the test ten times for (int i = 0; i < 10; i++) { - kaldi::TestHmmTopology(); + kaldi::TestTopology(); } std::cout << "Test OK.\n"; } - diff --git a/src/hmm/topology.cc b/src/hmm/topology.cc new file mode 100644 index 00000000000..4a90a0d5414 --- /dev/null +++ b/src/hmm/topology.cc @@ -0,0 +1,366 @@ +// hmm/topology.cc + +// Copyright 2009-2011 Microsoft Corporation +// 2014-2019 Johns Hopkins University (author: Daniel Povey) +// 2019 Daniel Galvez +// 2019 Hossein Hadian + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "util/common-utils.h" +#include "hmm/topology.h" +#include "util/stl-utils.h" +#include "util/text-utils.h" +#include "fstext/kaldi-fst-io.h" +#include "fstext/fstext-utils.h" + + +namespace kaldi { + +void Topology::Read(std::istream &is, bool binary) { + ExpectToken(is, binary, ""); + if (!binary) { + phones_.clear(); + phone2idx_.clear(); + entries_.clear(); + std::string token; + while ( ! (is >> token).fail() ) { + if (token == "") { + break; // finished parsing. + } else if (token != "") { + KALDI_ERR << "Reading Topology object, expected or " + ", got "<"); + std::vector phones; + std::string s; + while (1) { + is >> s; + if (is.fail()) + KALDI_ERR << "Reading Topology object, unexpected end of file " + "while expecting phones."; + if (s == "") break; + else { + int32 phone; + if (!ConvertStringToInteger(s, &phone)) + KALDI_ERR << "Reading Topology object, expected " + << "integer, got instead " << s; + KALDI_ASSERT(phone > 0); + phones.push_back(phone); + } + } + + int32 entry_index = entries_.size(); + fst::StdVectorFst fst; + ReadFsaKaldi(is, &fst); + entries_.push_back(fst); + + for (int32 phone : phones) { + if (static_cast(phone2idx_.size()) <= phone) + phone2idx_.resize(phone + 1, -1); // -1 is invalid index. + if (phone2idx_[phone] != -1) { + KALDI_ERR << "Phone " + << phone << " appears in multiple topology entries."; + } + phone2idx_[phone] = entry_index; + phones_.push_back(phone); + } + ExpectToken(is, binary, ""); + } + } + std::sort(phones_.begin(), phones_.end()); + KALDI_ASSERT(IsSortedAndUniq(phones_)); + } else { + ReadIntegerVector(is, binary, &phones_); + ReadIntegerVector(is, binary, &phone2idx_); + int32 number_topology_entries; + ReadBasicType(is, binary, &number_topology_entries); + for (size_t index = 0; index < number_topology_entries; ++index) { + fst::StdVectorFst fst; + ReadFstKaldi(is, binary, &fst); + entries_.push_back(fst); + } + ExpectToken(is, binary, ""); + } + ComputeDerived(); + Check(); +} + +// This function writes an FSA in text mode to an output stream. +template +static void WriteFsa(std::ostream &os, const fst::VectorFst &fst) { + os << '\n'; + bool acceptor = true, write_one = false; + fst::FstPrinter printer(fst, fst.InputSymbols(), fst.OutputSymbols(), + NULL, acceptor, write_one, "\t"); + printer.Print(&os, ""); + if (os.fail()) + KALDI_ERR << "Stream failure detected writing FST to stream."; + os << '\n'; + if (!os.good()) + KALDI_ERR << "Error writing FST to stream."; +} + +void Topology::Write(std::ostream &os, bool binary) const { + WriteToken(os, binary, ""); + if (!binary) { + for (int index = 0; index < entries_.size(); ++index) { + WriteToken(os, binary, ""); + WriteToken(os, binary, ""); + for (auto phone: phones_) + if (phone2idx_[phone] == index) + os << phone << " "; + os << ""; + WriteFsa(os, entries_[index]); + os << "\n"; + } + } else { + WriteIntegerVector(os, binary, phones_); + WriteIntegerVector(os, binary, phone2idx_); + int32 number_topology_entries = entries_.size(); + WriteBasicType(os, binary, number_topology_entries); + for (auto const& fst : entries_) + WriteFstKaldi(os, binary, fst); + } + WriteToken(os, binary, ""); +} + +void Topology::Check() { + if (entries_.empty() || phones_.empty() || phone2idx_.empty()) + KALDI_ERR << "Empty object."; + std::vector is_seen(entries_.size(), false); + for (size_t i = 0; i < phones_.size(); i++) { + int32 phone = phones_[i]; + if (static_cast(phone) >= phone2idx_.size() || + static_cast(phone2idx_[phone]) >= entries_.size()) + KALDI_ERR << "Phone " << phone << " has no valid index."; + is_seen[phone2idx_[phone]] = true; + } + if (!std::accumulate(is_seen.begin(), + is_seen.end(), true, std::logical_and())) + KALDI_ERR << "Entry with no corresponding phones."; + + for (auto const& entry: entries_) { + if (!fst::Verify(entry)) { + KALDI_ERR << "Ill-formed FST provided."; + } + if (entry.NumStates() <= 1) + KALDI_ERR << "Cannot only have one state (must have a " + << "final state and a start state)."; + bool has_final_state = false; + std::vector seen_pdf_classes; + for (fst::StateIterator state_iter(entry); + !state_iter.Done(); state_iter.Next()) { + StateId state = state_iter.Value(); + if (entry.Final(state) != Weight::Zero()) + has_final_state = true; + + BaseFloat outward_prob_sum = exp(-entry.Final(state).Value()); + for (fst::ArcIterator aiter(entry, state); + !aiter.Done(); aiter.Next()) { + const fst::StdArc &arc(aiter.Value()); + if (arc.ilabel != arc.olabel) + KALDI_ERR << "The topology must be an acceptor but ilabel != olabel."; + if (arc.ilabel == 0) + KALDI_ERR << "Epsilon arcs (pdf-class 0) are not allowed."; + if (arc.nextstate == entry.Start()) + KALDI_ERR << "Start state may not have any inward transitions."; + seen_pdf_classes.push_back(arc.ilabel); + outward_prob_sum += exp(-arc.weight.Value()); + } + if (!ApproxEqual(outward_prob_sum, 1.0)) + KALDI_WARN << "Outward transition probabilities should sum to 1.0 " + "for each state, value was: " << outward_prob_sum; + } + if (!has_final_state) { + KALDI_ERR << "Must have a final state."; + } + + if (entry.Final(entry.Start()) != Weight::Zero()) + KALDI_ERR << "Start state must not be a final state."; + + if (entry.Start() != 0) { + KALDI_ERR << "Topology::Check(), start state must be 0."; + } + + SortAndUniq(&seen_pdf_classes); + if (seen_pdf_classes.front() != 1 || + seen_pdf_classes.back() != static_cast(seen_pdf_classes.size())) + KALDI_ERR << "pdf_classes are expected to be " + "contiguous and start from 1."; + + int num_states = entry.NumStates(); + int num_arcs = NumArcs(entry); + fst::StdVectorFst fst(entry); // Call Connect on a copy. + fst::Connect(&fst); + if (entry.NumStates() == 0) + KALDI_ERR << "Some of the states in the topolgy are not reachable."; + if (fst.NumStates() != num_states || NumArcs(fst) != num_arcs) + KALDI_ERR << "Topology changed after calling Connect()."; + } + KALDI_ASSERT(self_loop_correction_factors_.size() == entries_.size() && + self_loop_pdf_classes_.size() == entries_.size()); +} + +const fst::StdVectorFst& Topology::TopologyForPhone(int32 phone) const { + if (static_cast(phone) >= phone2idx_.size() + || phone2idx_[phone] == -1) + KALDI_ERR << "TopologyForPhone(), phone " << phone << " not covered."; + return entries_[phone2idx_[phone]]; +} + +const std::vector& Topology::CorrectionFactorsForPhone(int32 phone) const { + if (static_cast(phone) >= phone2idx_.size() + || phone2idx_[phone] == -1) + KALDI_ERR << "TopologyForPhone(), phone " << phone << " not covered."; + return self_loop_correction_factors_[phone2idx_[phone]]; +} + +const std::vector& Topology::SelfLoopPdfClassesForPhone(int32 phone) const { + if (static_cast(phone) >= phone2idx_.size() + || phone2idx_[phone] == -1) { + KALDI_ERR << "TopologyForPhone(), phone " << phone << " not covered."; + } + return self_loop_pdf_classes_[phone2idx_[phone]]; +} + + +int32 Topology::NumPdfClasses(int32 phone) const { + // will throw if phone not covered. + const fst::StdVectorFst &entry = TopologyForPhone(phone); + + std::set pdfs; + for (fst::StateIterator siter(entry); + !siter.Done(); siter.Next()) { + StateId state_id = siter.Value(); + for (fst::ArcIterator aiter(entry, state_id); + !aiter.Done(); aiter.Next()) { + pdfs.insert(aiter.Value().ilabel); + } + } + return pdfs.size(); +} + +void Topology::GetPhoneToNumPdfClasses( + std::vector *phone2num_pdf_classes) const { + KALDI_ASSERT(!phones_.empty()); + phone2num_pdf_classes->clear(); + phone2num_pdf_classes->resize(phones_.back() + 1, -1); + for (auto phone: phones_) + (*phone2num_pdf_classes)[phone] = NumPdfClasses(phone); +} + +int32 Topology::MinLength(int32 phone) const { + using Weight = typename fst::StdFst::Weight; + using StateId = typename fst::StdFst::StateId; + const fst::StdVectorFst& this_topo = TopologyForPhone(phone); + // 1) Prepare a new FST with arc weight of 1.f and final state weight of 0.f + // (Note that 0.f == Weight::One() in Tropical Semiring). + // Since we are using the Std + // We need to use a VectorFst in order to mutate members + std::unique_ptr topo_copy(this_topo.Copy()); + + std::vector final_states; + for (fst::StateIterator siter(*topo_copy); + !siter.Done(); siter.Next()) { + StateId state_id = siter.Value(); + + if (topo_copy->Final(state_id) != Weight::Zero()) { + final_states.push_back(state_id); + topo_copy->SetFinal(state_id, Weight::One()); + } + + for (fst::MutableArcIterator aiter(topo_copy.get(), state_id); + !aiter.Done(); aiter.Next()) { + Arc original_arc = aiter.Value(); + Arc distance_one_arc(original_arc.ilabel, original_arc.olabel, + Weight(1.0f), original_arc.nextstate); + aiter.SetValue(distance_one_arc); + } + } + KALDI_ASSERT(!final_states.empty()); + // Now run single-source nearest neightbors + std::vector distances; + fst::ShortestDistance(*topo_copy, &distances); + fst::NaturalLess less; + auto min_final_state_iter = + std::min_element(final_states.begin(), final_states.end(), + [&distances, &less](StateId state1, StateId state2) { + return less(distances[state1], distances[state2]); + }); + Weight distance = distances[*min_final_state_iter]; + return static_cast(distance.Value()); +} + +bool Topology::operator==(const Topology &other) const { + if (phones_ != other.phones_ || phone2idx_ != other.phone2idx_ || + entries_.size() != other.entries_.size()) { + return false; + } else { + for(size_t i = 0; i < entries_.size(); ++i) { + if (!fst::Equal(entries_[i], other.entries_[i], /*delta=*/0, + fst::kEqualFsts)) { + return false; + } + } + return true; + } +} + + +void Topology::ComputeDerived() { + using Arc = fst::StdArc; + using StateId = Arc::StateId; + using Weight = Arc::Weight; + + self_loop_correction_factors_.resize(entries_.size()); + self_loop_pdf_classes_.resize(entries_.size()); + for (size_t i = 0; i < entries_.size(); i++) { + const fst::StdVectorFst &entry = entries_[i]; + std::vector &correction_factors( + self_loop_correction_factors_[i]); + std::vector &self_loop_pdf_classes( + self_loop_pdf_classes_[i]); + StateId num_states = entry.NumStates(); + correction_factors.resize(num_states); + self_loop_pdf_classes.resize(num_states, -1); + for (StateId s = 0; s < num_states; s++) { + float tot_prob = exp(-entry.Final(s).Value()), + self_loop_prob = 0.0; + for (fst::ArcIterator aiter(entry, s); + !aiter.Done(); aiter.Next()) { + const Arc& arc = aiter.Value(); + float this_prob = exp(-arc.weight.Value()); + tot_prob += this_prob; + if (arc.nextstate == s) { + self_loop_prob += this_prob; + KALDI_ASSERT(self_loop_pdf_classes[s] == -1 && + "State in topology has more than one self-loop"); + self_loop_pdf_classes[s] = arc.ilabel; + } + } + KALDI_ASSERT(tot_prob > 0 && "Invalid topology"); + // correction_factor is initialized with a number <= 0 that will be added + // to costs. It will result in properly normalized probs after removing + // the self-loop, assuming the topo was properly normalized before. + correction_factors[s] = log((tot_prob - self_loop_prob) / tot_prob); + } + } +} + +} // End namespace kaldi diff --git a/src/hmm/topology.h b/src/hmm/topology.h new file mode 100644 index 00000000000..55ec4dcf35c --- /dev/null +++ b/src/hmm/topology.h @@ -0,0 +1,186 @@ +// hmm/topology.h + +// Copyright 2009-2011 Microsoft Corporation +// 2019 Johns Hopkins University (author: Daniel Povey) +// 2019 Daniel Galvez + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_HMM_HMM_TOPOLOGY_H_ +#define KALDI_HMM_HMM_TOPOLOGY_H_ + +#include +#include "base/kaldi-common.h" + + +namespace kaldi { + + +/// \addtogroup hmm_group +/// @{ + +/* + The following would be the text form for the "normal" 3-state HMM topology/ + "bakis model", with the typical reordering that we do to improve the + compactness of the compiled FSTs. The format is the OpenFst acceptor format. + The fields are, for transitions, + + and, for final-states, + + + The may be interpreted as negative log probabilities. + We normally set them so as to sum to one, in order to keep the fully + compiled (HCLG) graph fairly stochastic (meaning: sum-to-one, like an + HMM). + + The integers on the arcs, which we call 'pdf-classes', define which + arcs share the same "pdf" and which ones are distinct. + + Preconditions on topology: + - pdf-classes (3rd field on arcs) must + form a contiguous list of numbers starting from 1, although + different arcs with the same pdf-class are allowed. (We avoid 0 + because it is "special" in OpenFST, it is used for epsilon). + - The start state must be state 0 and there must be no + transitions entering it. + - The start state must not be final. + - No phone (in the ... block) may have the value 0. + + + + + 1 2 3 4 5 6 7 8 + 0 1 1 0.0 + 1 1 1 0.693 + 1 2 2 0.693 + 2 2 2 0.693 + 2 3 3 0.693 + 3 3 3 0.693 + 3 0.693 + + +*/ + + +/// A class for storing topology information for phones. See \ref hmm for context. +/// This object is sometimes accessed in a file by itself, but more often +/// as a class member of the Transition class (this is for convenience to reduce +/// the number of files programs have to access). + +class Topology { + public: + + void Read(std::istream &is, bool binary); + void Write(std::ostream &os, bool binary) const; + + // Checks that the object is valid, and throw exception otherwise. + void Check(); + + /// Returns the topology entry for this phone; + /// will throw exception if phone not covered by the topology. + const fst::StdVectorFst &TopologyForPhone(int32 phone) const; + + /// Returns a reference to a vector of floats of size + /// `TopologyForPhone(phone).NumStates()`; this contains numbers <= 0 which are to be + /// added to the final-costs and non-self-loop arc costs when creating graphs + /// without self-loops (we call it a correction factor becuause in the + /// semiring it's multiplied, although physically it is added); this + /// correction factor will ensure that the probability sum of the + /// non-self-loop arcs and final-prob of each state has the same value that it + /// did before removing the self-loop. It's used to make sure that + /// intermediate FSTs made during graph compilation are as stochastic as + /// possible. + /// The user could compute this themselves, but we provide it + /// directly for speed. + const std::vector &CorrectionFactorsForPhone(int32 phone) const; + + /// For each phone, this will return a vector of size + /// `TopologyForPhone(phone).NumStates()` containing, for each state + /// in this phone's topology entry, the pdf-class of the self-loop on + /// that state (if any), and otherwise, -1. This could be computed + /// by the user from the FST, but is provided for convenience. + const std::vector &SelfLoopPdfClassesForPhone(int32 phone) const; + + /// Returns the number of \ref pdf_class "pdf-classes" for this phone; + /// throws exception if phone not covered by this topology. + int32 NumPdfClasses(int32 phone) const; + + /// Returns a reference to a sorted, unique list of phones covered by + /// the topology (these phones will be positive integers, and usually + /// contiguous and starting from one but the toolkit doesn't assume + /// they are contiguous). + const std::vector &GetPhones() const { return phones_; }; + + /// Outputs a vector of int32, indexed by phone, that gives the + /// number of \ref pdf_class pdf-classes for the phones; this is + /// used by tree-building code such as BuildTree(). + void GetPhoneToNumPdfClasses(std::vector *phone2num_pdf_classes) const; + + // Returns the minimum number of arcs/frames it takes to traverse this model + // for this phone: e.g. 3 for the normal HMM topology. + int32 MinLength(int32 phone) const; + + Topology() {} + + bool operator == (const Topology &other) const; + + // Allow default assignment operator and copy constructor. + private: + + void ComputeDerived(); + + using Arc = typename fst::StdVectorFst::Arc; + using StateId = typename fst::StdVectorFst::StateId; + using Weight = typename fst::StdVectorFst::Weight; + + std::vector phones_; // list of all phones we have topology for. + // Sorted, uniq. no epsilon (zero) phone. + std::vector phone2idx_; // map from phones to indexes into the entries + // vector (or -1 for not present). + std::vector entries_; // list of topology entries, indexed + // by the elements of phone2indx_. + + // Below this point are 'derived quantities' (things not written to disk, + // that can be worked out from the information above). + + // This is a vector indexed by 'idx' (the same as the index into entries_) and + // then by state-id in the corresponding topology entry; it contains the + // correction factor that we add to the costs of arcs leaving that state (and + // its final-cost) if we remove the self-loop; it's a number <= 0. This will + // make the probability sum of this state have the same value it did before + // removing the self-loop, hopefully 1.0. (viewing the costs as negated + // log-probs, of course). Doing this will make the no-self-loop FST + // stochastic if it was stochastic with the self-loops. + std::vector > self_loop_correction_factors_; + + // This is a vector indexed by 'idx' (the same as the index into entries_) and + // then by state-id in the corresponding topology entry; it contains the + // pdf-class of the self-loop of each state that had a self-loop, or -1 + // for the states that didn't have self-loops. Note: the pdf-class is + // a number >0 which is the label on the arc in the topology entries (ilabel + // or olabel; they are the same because the topology entries are + // acceptors). + std::vector > self_loop_pdf_classes_; +}; + + +/// @} end "addtogroup hmm_group" + + +} // end namespace kaldi + + +#endif diff --git a/src/hmm/transition-model.cc b/src/hmm/transition-model.cc deleted file mode 100644 index 420a94585ea..00000000000 --- a/src/hmm/transition-model.cc +++ /dev/null @@ -1,924 +0,0 @@ -// hmm/transition-model.cc - -// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) -// Johns Hopkins University (author: Guoguo Chen) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#include -#include "hmm/transition-model.h" -#include "tree/context-dep.h" - -namespace kaldi { - -void TransitionModel::ComputeTuples(const ContextDependencyInterface &ctx_dep) { - if (IsHmm()) - ComputeTuplesIsHmm(ctx_dep); - else - ComputeTuplesNotHmm(ctx_dep); - - // now tuples_ is populated with all possible tuples of (phone, hmm_state, pdf, self_loop_pdf). - std::sort(tuples_.begin(), tuples_.end()); // sort to enable reverse lookup. - // this sorting defines the transition-ids. -} - -void TransitionModel::ComputeTuplesIsHmm(const ContextDependencyInterface &ctx_dep) { - const std::vector &phones = topo_.GetPhones(); - KALDI_ASSERT(!phones.empty()); - - // this is the case for normal models. but not for chain models - std::vector > > pdf_info; - std::vector num_pdf_classes( 1 + *std::max_element(phones.begin(), phones.end()), -1); - for (size_t i = 0; i < phones.size(); i++) - num_pdf_classes[phones[i]] = topo_.NumPdfClasses(phones[i]); - ctx_dep.GetPdfInfo(phones, num_pdf_classes, &pdf_info); - // pdf_info is list indexed by pdf of which (phone, pdf_class) it - // can correspond to. - - std::map, std::vector > to_hmm_state_list; - // to_hmm_state_list is a map from (phone, pdf_class) to the list - // of hmm-states in the HMM for that phone that that (phone, pdf-class) - // can correspond to. - for (size_t i = 0; i < phones.size(); i++) { // setting up to_hmm_state_list. - int32 phone = phones[i]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(phone); - for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... - int32 pdf_class = entry[j].forward_pdf_class; - if (pdf_class != kNoPdf) { - to_hmm_state_list[std::make_pair(phone, pdf_class)].push_back(j); - } - } - } - - for (int32 pdf = 0; pdf < static_cast(pdf_info.size()); pdf++) { - for (size_t j = 0; j < pdf_info[pdf].size(); j++) { - int32 phone = pdf_info[pdf][j].first, - pdf_class = pdf_info[pdf][j].second; - const std::vector &state_vec = to_hmm_state_list[std::make_pair(phone, pdf_class)]; - KALDI_ASSERT(!state_vec.empty()); - // state_vec is a list of the possible HMM-states that emit this - // pdf_class. - for (size_t k = 0; k < state_vec.size(); k++) { - int32 hmm_state = state_vec[k]; - tuples_.push_back(Tuple(phone, hmm_state, pdf, pdf)); - } - } - } -} - -void TransitionModel::ComputeTuplesNotHmm(const ContextDependencyInterface &ctx_dep) { - const std::vector &phones = topo_.GetPhones(); - KALDI_ASSERT(!phones.empty()); - - // pdf_info is a set of lists indexed by phone. Each list is indexed by - // (pdf-class, self-loop pdf-class) of each state of that phone, and the element - // is a list of possible (pdf, self-loop pdf) pairs that (pdf-class, self-loop pdf-class) - // pair generates. - std::vector > > > pdf_info; - // pdf_class_pairs is a set of lists indexed by phone. Each list stores - // (pdf-class, self-loop pdf-class) of each state of that phone. - std::vector > > pdf_class_pairs; - pdf_class_pairs.resize(1 + *std::max_element(phones.begin(), phones.end())); - for (size_t i = 0; i < phones.size(); i++) { - int32 phone = phones[i]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(phone); - for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... - int32 forward_pdf_class = entry[j].forward_pdf_class, self_loop_pdf_class = entry[j].self_loop_pdf_class; - if (forward_pdf_class != kNoPdf) - pdf_class_pairs[phone].push_back(std::make_pair(forward_pdf_class, self_loop_pdf_class)); - } - } - ctx_dep.GetPdfInfo(phones, pdf_class_pairs, &pdf_info); - - std::vector, std::vector > > to_hmm_state_list; - to_hmm_state_list.resize(1 + *std::max_element(phones.begin(), phones.end())); - // to_hmm_state_list is a phone-indexed set of maps from (pdf-class, self-loop pdf_class) to the list - // of hmm-states in the HMM for that phone that that (pdf-class, self-loop pdf-class) - // can correspond to. - for (size_t i = 0; i < phones.size(); i++) { // setting up to_hmm_state_list. - int32 phone = phones[i]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(phone); - std::map, std::vector > phone_to_hmm_state_list; - for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... - int32 forward_pdf_class = entry[j].forward_pdf_class, self_loop_pdf_class = entry[j].self_loop_pdf_class; - if (forward_pdf_class != kNoPdf) { - phone_to_hmm_state_list[std::make_pair(forward_pdf_class, self_loop_pdf_class)].push_back(j); - } - } - to_hmm_state_list[phone] = phone_to_hmm_state_list; - } - - for (int32 i = 0; i < phones.size(); i++) { - int32 phone = phones[i]; - for (int32 j = 0; j < static_cast(pdf_info[phone].size()); j++) { - int32 pdf_class = pdf_class_pairs[phone][j].first, - self_loop_pdf_class = pdf_class_pairs[phone][j].second; - const std::vector &state_vec = - to_hmm_state_list[phone][std::make_pair(pdf_class, self_loop_pdf_class)]; - KALDI_ASSERT(!state_vec.empty()); - for (size_t k = 0; k < state_vec.size(); k++) { - int32 hmm_state = state_vec[k]; - for (size_t m = 0; m < pdf_info[phone][j].size(); m++) { - int32 pdf = pdf_info[phone][j][m].first, - self_loop_pdf = pdf_info[phone][j][m].second; - tuples_.push_back(Tuple(phone, hmm_state, pdf, self_loop_pdf)); - } - } - } - } -} - -void TransitionModel::ComputeDerived() { - state2id_.resize(tuples_.size()+2); // indexed by transition-state, which - // is one based, but also an entry for one past end of list. - - int32 cur_transition_id = 1; - num_pdfs_ = 0; - for (int32 tstate = 1; - tstate <= static_cast(tuples_.size()+1); // not a typo. - tstate++) { - state2id_[tstate] = cur_transition_id; - if (static_cast(tstate) <= tuples_.size()) { - int32 phone = tuples_[tstate-1].phone, - hmm_state = tuples_[tstate-1].hmm_state, - forward_pdf = tuples_[tstate-1].forward_pdf, - self_loop_pdf = tuples_[tstate-1].self_loop_pdf; - num_pdfs_ = std::max(num_pdfs_, 1 + forward_pdf); - num_pdfs_ = std::max(num_pdfs_, 1 + self_loop_pdf); - const HmmTopology::HmmState &state = topo_.TopologyForPhone(phone)[hmm_state]; - int32 my_num_ids = static_cast(state.transitions.size()); - cur_transition_id += my_num_ids; // # trans out of this state. - } - } - - id2state_.resize(cur_transition_id); // cur_transition_id is #transition-ids+1. - id2pdf_id_.resize(cur_transition_id); - for (int32 tstate = 1; tstate <= static_cast(tuples_.size()); tstate++) { - for (int32 tid = state2id_[tstate]; tid < state2id_[tstate+1]; tid++) { - id2state_[tid] = tstate; - if (IsSelfLoop(tid)) - id2pdf_id_[tid] = tuples_[tstate-1].self_loop_pdf; - else - id2pdf_id_[tid] = tuples_[tstate-1].forward_pdf; - } - } - - // The following statements put copies a large number in the region of memory - // past the end of the id2pdf_id_ array, while leaving the array as it was - // before. The goal of this is to speed up decoding by disabling a check - // inside TransitionIdToPdf() that the transition-id was within the correct - // range. - int32 num_big_numbers = std::min(2000, cur_transition_id); - id2pdf_id_.resize(cur_transition_id + num_big_numbers, - std::numeric_limits::max()); - id2pdf_id_.resize(cur_transition_id); -} - -void TransitionModel::InitializeProbs() { - log_probs_.Resize(NumTransitionIds()+1); // one-based array, zeroth element empty. - for (int32 trans_id = 1; trans_id <= NumTransitionIds(); trans_id++) { - int32 trans_state = id2state_[trans_id]; - int32 trans_index = trans_id - state2id_[trans_state]; - const Tuple &tuple = tuples_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(tuple.phone); - KALDI_ASSERT(static_cast(tuple.hmm_state) < entry.size()); - BaseFloat prob = entry[tuple.hmm_state].transitions[trans_index].second; - if (prob <= 0.0) - KALDI_ERR << "TransitionModel::InitializeProbs, zero " - "probability [should remove that entry in the topology]"; - if (prob > 1.0) - KALDI_WARN << "TransitionModel::InitializeProbs, prob greater than one."; - log_probs_(trans_id) = Log(prob); - } - ComputeDerivedOfProbs(); -} - -void TransitionModel::Check() const { - KALDI_ASSERT(NumTransitionIds() != 0 && NumTransitionStates() != 0); - { - int32 sum = 0; - for (int32 ts = 1; ts <= NumTransitionStates(); ts++) sum += NumTransitionIndices(ts); - KALDI_ASSERT(sum == NumTransitionIds()); - } - for (int32 tid = 1; tid <= NumTransitionIds(); tid++) { - int32 tstate = TransitionIdToTransitionState(tid), - index = TransitionIdToTransitionIndex(tid); - KALDI_ASSERT(tstate > 0 && tstate <=NumTransitionStates() && index >= 0); - KALDI_ASSERT(tid == PairToTransitionId(tstate, index)); - int32 phone = TransitionStateToPhone(tstate), - hmm_state = TransitionStateToHmmState(tstate), - forward_pdf = TransitionStateToForwardPdf(tstate), - self_loop_pdf = TransitionStateToSelfLoopPdf(tstate); - KALDI_ASSERT(tstate == TupleToTransitionState(phone, hmm_state, forward_pdf, self_loop_pdf)); - KALDI_ASSERT(log_probs_(tid) <= 0.0 && log_probs_(tid) - log_probs_(tid) == 0.0); - // checking finite and non-positive (and not out-of-bounds). - } -} - -bool TransitionModel::IsHmm() const { - const std::vector &phones = topo_.GetPhones(); - KALDI_ASSERT(!phones.empty()); - for (size_t i = 0; i < phones.size(); i++) { - int32 phone = phones[i]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(phone); - for (int32 j = 0; j < static_cast(entry.size()); j++) { // for each state... - if (entry[j].forward_pdf_class != entry[j].self_loop_pdf_class) - return false; - } - } - return true; -} - -TransitionModel::TransitionModel(const ContextDependencyInterface &ctx_dep, - const HmmTopology &hmm_topo): topo_(hmm_topo) { - // First thing is to get all possible tuples. - ComputeTuples(ctx_dep); - ComputeDerived(); - InitializeProbs(); - Check(); -} - -int32 TransitionModel::TupleToTransitionState(int32 phone, int32 hmm_state, int32 pdf, int32 self_loop_pdf) const { - Tuple tuple(phone, hmm_state, pdf, self_loop_pdf); - // Note: if this ever gets too expensive, which is unlikely, we can refactor - // this code to sort first on pdf, and then index on pdf, so those - // that have the same pdf are in a contiguous range. - std::vector::const_iterator iter = - std::lower_bound(tuples_.begin(), tuples_.end(), tuple); - if (iter == tuples_.end() || !(*iter == tuple)) { - KALDI_ERR << "TransitionModel::TupleToTransitionState, tuple not found." - << " (incompatible tree and model?)"; - } - // tuples_ is indexed by transition_state-1, so add one. - return static_cast((iter - tuples_.begin())) + 1; -} - - -int32 TransitionModel::NumTransitionIndices(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - return static_cast(state2id_[trans_state+1]-state2id_[trans_state]); -} - -int32 TransitionModel::TransitionIdToTransitionState(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && static_cast(trans_id) < id2state_.size()); - return id2state_[trans_id]; -} - -int32 TransitionModel::TransitionIdToTransitionIndex(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && static_cast(trans_id) < id2state_.size()); - return trans_id - state2id_[id2state_[trans_id]]; -} - -int32 TransitionModel::TransitionStateToPhone(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - return tuples_[trans_state-1].phone; -} - -int32 TransitionModel::TransitionStateToForwardPdf(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - return tuples_[trans_state-1].forward_pdf; -} - -int32 TransitionModel::TransitionStateToForwardPdfClass( - int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - const Tuple &t = tuples_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(t.phone); - KALDI_ASSERT(static_cast(t.hmm_state) < entry.size()); - return entry[t.hmm_state].forward_pdf_class; -} - - -int32 TransitionModel::TransitionStateToSelfLoopPdfClass( - int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - const Tuple &t = tuples_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(t.phone); - KALDI_ASSERT(static_cast(t.hmm_state) < entry.size()); - return entry[t.hmm_state].self_loop_pdf_class; -} - - -int32 TransitionModel::TransitionStateToSelfLoopPdf(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - return tuples_[trans_state-1].self_loop_pdf; -} - -int32 TransitionModel::TransitionStateToHmmState(int32 trans_state) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - return tuples_[trans_state-1].hmm_state; -} - -int32 TransitionModel::PairToTransitionId(int32 trans_state, int32 trans_index) const { - KALDI_ASSERT(static_cast(trans_state) <= tuples_.size()); - KALDI_ASSERT(trans_index < state2id_[trans_state+1] - state2id_[trans_state]); - return state2id_[trans_state] + trans_index; -} - -int32 TransitionModel::NumPhones() const { - int32 num_trans_state = tuples_.size(); - int32 max_phone_id = 0; - for (int32 i = 0; i < num_trans_state; ++i) { - if (tuples_[i].phone > max_phone_id) - max_phone_id = tuples_[i].phone; - } - return max_phone_id; -} - - -bool TransitionModel::IsFinal(int32 trans_id) const { - KALDI_ASSERT(static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - int32 trans_index = trans_id - state2id_[trans_state]; - const Tuple &tuple = tuples_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(tuple.phone); - KALDI_ASSERT(static_cast(tuple.hmm_state) < entry.size()); - KALDI_ASSERT(static_cast(tuple.hmm_state) < entry.size()); - KALDI_ASSERT(static_cast(trans_index) < - entry[tuple.hmm_state].transitions.size()); - // return true if the transition goes to the final state of the - // topology entry. - return (entry[tuple.hmm_state].transitions[trans_index].first + 1 == - static_cast(entry.size())); -} - - - -int32 TransitionModel::SelfLoopOf(int32 trans_state) const { // returns the self-loop transition-id, - KALDI_ASSERT(static_cast(trans_state-1) < tuples_.size()); - const Tuple &tuple = tuples_[trans_state-1]; - // or zero if does not exist. - int32 phone = tuple.phone, hmm_state = tuple.hmm_state; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(phone); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - for (int32 trans_index = 0; - trans_index < static_cast(entry[hmm_state].transitions.size()); - trans_index++) - if (entry[hmm_state].transitions[trans_index].first == hmm_state) - return PairToTransitionId(trans_state, trans_index); - return 0; // invalid transition id. -} - -void TransitionModel::ComputeDerivedOfProbs() { - non_self_loop_log_probs_.Resize(NumTransitionStates()+1); // this array indexed - // by transition-state with nothing in zeroth element. - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 tid = SelfLoopOf(tstate); - if (tid == 0) { // no self-loop - non_self_loop_log_probs_(tstate) = 0.0; // log(1.0) - } else { - BaseFloat self_loop_prob = Exp(GetTransitionLogProb(tid)), - non_self_loop_prob = 1.0 - self_loop_prob; - if (non_self_loop_prob <= 0.0) { - KALDI_WARN << "ComputeDerivedOfProbs(): non-self-loop prob is " << non_self_loop_prob; - non_self_loop_prob = 1.0e-10; // just so we can continue... - } - non_self_loop_log_probs_(tstate) = Log(non_self_loop_prob); // will be negative. - } - } -} - -void TransitionModel::Read(std::istream &is, bool binary) { - ExpectToken(is, binary, ""); - topo_.Read(is, binary); - std::string token; - ReadToken(is, binary, &token); - int32 size; - ReadBasicType(is, binary, &size); - tuples_.resize(size); - for (int32 i = 0; i < size; i++) { - ReadBasicType(is, binary, &(tuples_[i].phone)); - ReadBasicType(is, binary, &(tuples_[i].hmm_state)); - ReadBasicType(is, binary, &(tuples_[i].forward_pdf)); - if (token == "") - ReadBasicType(is, binary, &(tuples_[i].self_loop_pdf)); - else if (token == "") - tuples_[i].self_loop_pdf = tuples_[i].forward_pdf; - } - ReadToken(is, binary, &token); - KALDI_ASSERT(token == "" || token == ""); - ComputeDerived(); - ExpectToken(is, binary, ""); - log_probs_.Read(is, binary); - ExpectToken(is, binary, ""); - ExpectToken(is, binary, ""); - ComputeDerivedOfProbs(); - Check(); -} - -void TransitionModel::Write(std::ostream &os, bool binary) const { - bool is_hmm = IsHmm(); - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - topo_.Write(os, binary); - if (is_hmm) - WriteToken(os, binary, ""); - else - WriteToken(os, binary, ""); - WriteBasicType(os, binary, static_cast(tuples_.size())); - if (!binary) os << "\n"; - for (int32 i = 0; i < static_cast (tuples_.size()); i++) { - WriteBasicType(os, binary, tuples_[i].phone); - WriteBasicType(os, binary, tuples_[i].hmm_state); - WriteBasicType(os, binary, tuples_[i].forward_pdf); - if (!is_hmm) - WriteBasicType(os, binary, tuples_[i].self_loop_pdf); - if (!binary) os << "\n"; - } - if (is_hmm) - WriteToken(os, binary, ""); - else - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - log_probs_.Write(os, binary); - WriteToken(os, binary, ""); - if (!binary) os << "\n"; - WriteToken(os, binary, ""); - if (!binary) os << "\n"; -} - -BaseFloat TransitionModel::GetTransitionProb(int32 trans_id) const { - return Exp(log_probs_(trans_id)); -} - -BaseFloat TransitionModel::GetTransitionLogProb(int32 trans_id) const { - return log_probs_(trans_id); -} - -BaseFloat TransitionModel::GetNonSelfLoopLogProb(int32 trans_state) const { - KALDI_ASSERT(trans_state != 0); - return non_self_loop_log_probs_(trans_state); -} - -BaseFloat TransitionModel::GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0); - KALDI_PARANOID_ASSERT(!IsSelfLoop(trans_id)); - return log_probs_(trans_id) - GetNonSelfLoopLogProb(TransitionIdToTransitionState(trans_id)); -} - -// stats are counts/weights, indexed by transition-id. -void TransitionModel::MleUpdate(const Vector &stats, - const MleTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out) { - if (cfg.share_for_pdfs) { - MleUpdateShared(stats, cfg, objf_impr_out, count_out); - return; - } - BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; - int32 num_skipped = 0, num_floored = 0; - KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 n = NumTransitionIndices(tstate); - KALDI_ASSERT(n>=1); - if (n > 1) { // no point updating if only one transition... - Vector counts(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - counts(tidx) = stats(tid); - } - double tstate_tot = counts.Sum(); - count_sum += tstate_tot; - if (tstate_tot < cfg.mincount) { num_skipped++; } - else { - Vector old_probs(n), new_probs(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); - } - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = counts(tidx) / tstate_tot; - for (int32 i = 0; i < 3; i++) { // keep flooring+renormalizing for 3 times.. - new_probs.Scale(1.0 / new_probs.Sum()); - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = std::max(new_probs(tidx), cfg.floor); - } - // Compute objf change - for (int32 tidx = 0; tidx < n; tidx++) { - if (new_probs(tidx) == cfg.floor) num_floored++; - double objf_change = counts(tidx) * (Log(new_probs(tidx)) - - Log(old_probs(tidx))); - objf_impr_sum += objf_change; - } - // Commit updated values. - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - log_probs_(tid) = Log(new_probs(tidx)); - if (log_probs_(tid) - log_probs_(tid) != 0.0) - KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; - } - } - } - } - KALDI_LOG << "TransitionModel::Update, objf change is " - << (objf_impr_sum / count_sum) << " per frame over " << count_sum - << " frames. "; - KALDI_LOG << num_floored << " probabilities floored, " << num_skipped - << " out of " << NumTransitionStates() << " transition-states " - "skipped due to insuffient data (it is normal to have some skipped.)"; - if (objf_impr_out) *objf_impr_out = objf_impr_sum; - if (count_out) *count_out = count_sum; - ComputeDerivedOfProbs(); -} - - -// stats are counts/weights, indexed by transition-id. -void TransitionModel::MapUpdate(const Vector &stats, - const MapTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out) { - KALDI_ASSERT(cfg.tau > 0.0); - if (cfg.share_for_pdfs) { - MapUpdateShared(stats, cfg, objf_impr_out, count_out); - return; - } - BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; - KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 n = NumTransitionIndices(tstate); - KALDI_ASSERT(n>=1); - if (n > 1) { // no point updating if only one transition... - Vector counts(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - counts(tidx) = stats(tid); - } - double tstate_tot = counts.Sum(); - count_sum += tstate_tot; - Vector old_probs(n), new_probs(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); - } - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = (counts(tidx) + cfg.tau * old_probs(tidx)) / - (cfg.tau + tstate_tot); - // Compute objf change - for (int32 tidx = 0; tidx < n; tidx++) { - double objf_change = counts(tidx) * (Log(new_probs(tidx)) - - Log(old_probs(tidx))); - objf_impr_sum += objf_change; - } - // Commit updated values. - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - log_probs_(tid) = Log(new_probs(tidx)); - if (log_probs_(tid) - log_probs_(tid) != 0.0) - KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; - } - } - } - KALDI_LOG << "Objf change is " << (objf_impr_sum / count_sum) - << " per frame over " << count_sum - << " frames."; - if (objf_impr_out) *objf_impr_out = objf_impr_sum; - if (count_out) *count_out = count_sum; - ComputeDerivedOfProbs(); -} - - - -/// This version of the Update() function is for if the user specifies -/// --share-for-pdfs=true. We share the transitions for all states that -/// share the same pdf. -void TransitionModel::MleUpdateShared(const Vector &stats, - const MleTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out) { - KALDI_ASSERT(cfg.share_for_pdfs); - - BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; - int32 num_skipped = 0, num_floored = 0; - KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); - std::map > pdf_to_tstate; - - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 pdf = TransitionStateToForwardPdf(tstate); - pdf_to_tstate[pdf].insert(tstate); - if (!IsHmm()) { - pdf = TransitionStateToSelfLoopPdf(tstate); - pdf_to_tstate[pdf].insert(tstate); - } - } - std::map >::iterator map_iter; - for (map_iter = pdf_to_tstate.begin(); - map_iter != pdf_to_tstate.end(); - ++map_iter) { - // map_iter->first is pdf-id... not needed. - const std::set &tstates = map_iter->second; - KALDI_ASSERT(!tstates.empty()); - int32 one_tstate = *(tstates.begin()); - int32 n = NumTransitionIndices(one_tstate); - KALDI_ASSERT(n >= 1); - if (n > 1) { // Only update if >1 transition... - Vector counts(n); - for (std::set::const_iterator iter = tstates.begin(); - iter != tstates.end(); - ++iter) { - int32 tstate = *iter; - if (NumTransitionIndices(tstate) != n) - KALDI_ERR << "Mismatch in #transition indices: you cannot " - "use the --share-for-pdfs option with this topology " - "and sharing scheme."; - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - counts(tidx) += stats(tid); - } - } - double pdf_tot = counts.Sum(); - count_sum += pdf_tot; - if (pdf_tot < cfg.mincount) { num_skipped++; } - else { - // Note: when calculating objf improvement, we - // assume we previously had the same tying scheme so - // we can get the params from one_tstate and they're valid - // for all. - Vector old_probs(n), new_probs(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(one_tstate, tidx); - old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); - } - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = counts(tidx) / pdf_tot; - for (int32 i = 0; i < 3; i++) { // keep flooring+renormalizing for 3 times.. - new_probs.Scale(1.0 / new_probs.Sum()); - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = std::max(new_probs(tidx), cfg.floor); - } - // Compute objf change - for (int32 tidx = 0; tidx < n; tidx++) { - if (new_probs(tidx) == cfg.floor) num_floored++; - double objf_change = counts(tidx) * (Log(new_probs(tidx)) - - Log(old_probs(tidx))); - objf_impr_sum += objf_change; - } - // Commit updated values. - for (std::set::const_iterator iter = tstates.begin(); - iter != tstates.end(); - ++iter) { - int32 tstate = *iter; - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - log_probs_(tid) = Log(new_probs(tidx)); - if (log_probs_(tid) - log_probs_(tid) != 0.0) - KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; - } - } - } - } - } - KALDI_LOG << "Objf change is " << (objf_impr_sum / count_sum) - << " per frame over " << count_sum << " frames; " - << num_floored << " probabilities floored, " - << num_skipped << " pdf-ids skipped due to insuffient data."; - if (objf_impr_out) *objf_impr_out = objf_impr_sum; - if (count_out) *count_out = count_sum; - ComputeDerivedOfProbs(); -} - - -/// This version of the MapUpdate() function is for if the user specifies -/// --share-for-pdfs=true. We share the transitions for all states that -/// share the same pdf. -void TransitionModel::MapUpdateShared(const Vector &stats, - const MapTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out) { - KALDI_ASSERT(cfg.share_for_pdfs); - - BaseFloat count_sum = 0.0, objf_impr_sum = 0.0; - KALDI_ASSERT(stats.Dim() == NumTransitionIds()+1); - std::map > pdf_to_tstate; - - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - int32 pdf = TransitionStateToForwardPdf(tstate); - pdf_to_tstate[pdf].insert(tstate); - if (!IsHmm()) { - pdf = TransitionStateToSelfLoopPdf(tstate); - pdf_to_tstate[pdf].insert(tstate); - } - } - std::map >::iterator map_iter; - for (map_iter = pdf_to_tstate.begin(); - map_iter != pdf_to_tstate.end(); - ++map_iter) { - // map_iter->first is pdf-id... not needed. - const std::set &tstates = map_iter->second; - KALDI_ASSERT(!tstates.empty()); - int32 one_tstate = *(tstates.begin()); - int32 n = NumTransitionIndices(one_tstate); - KALDI_ASSERT(n >= 1); - if (n > 1) { // Only update if >1 transition... - Vector counts(n); - for (std::set::const_iterator iter = tstates.begin(); - iter != tstates.end(); - ++iter) { - int32 tstate = *iter; - if (NumTransitionIndices(tstate) != n) - KALDI_ERR << "Mismatch in #transition indices: you cannot " - "use the --share-for-pdfs option with this topology " - "and sharing scheme."; - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - counts(tidx) += stats(tid); - } - } - double pdf_tot = counts.Sum(); - count_sum += pdf_tot; - - // Note: when calculating objf improvement, we - // assume we previously had the same tying scheme so - // we can get the params from one_tstate and they're valid - // for all. - Vector old_probs(n), new_probs(n); - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(one_tstate, tidx); - old_probs(tidx) = new_probs(tidx) = GetTransitionProb(tid); - } - for (int32 tidx = 0; tidx < n; tidx++) - new_probs(tidx) = (counts(tidx) + old_probs(tidx) * cfg.tau) / - (pdf_tot + cfg.tau); - // Compute objf change - for (int32 tidx = 0; tidx < n; tidx++) { - double objf_change = counts(tidx) * (Log(new_probs(tidx)) - - Log(old_probs(tidx))); - objf_impr_sum += objf_change; - } - // Commit updated values. - for (std::set::const_iterator iter = tstates.begin(); - iter != tstates.end(); - ++iter) { - int32 tstate = *iter; - for (int32 tidx = 0; tidx < n; tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - log_probs_(tid) = Log(new_probs(tidx)); - if (log_probs_(tid) - log_probs_(tid) != 0.0) - KALDI_ERR << "Log probs is inf or NaN: error in update or bad stats?"; - } - } - } - } - KALDI_LOG << "Objf change is " << (objf_impr_sum / count_sum) - << " per frame over " << count_sum - << " frames."; - if (objf_impr_out) *objf_impr_out = objf_impr_sum; - if (count_out) *count_out = count_sum; - ComputeDerivedOfProbs(); -} - - -int32 TransitionModel::TransitionIdToPhone(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - return tuples_[trans_state-1].phone; -} - -int32 TransitionModel::TransitionIdToPdfClass(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - - const Tuple &t = tuples_[trans_state-1]; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(t.phone); - KALDI_ASSERT(static_cast(t.hmm_state) < entry.size()); - if (IsSelfLoop(trans_id)) - return entry[t.hmm_state].self_loop_pdf_class; - else - return entry[t.hmm_state].forward_pdf_class; -} - - -int32 TransitionModel::TransitionIdToHmmState(int32 trans_id) const { - KALDI_ASSERT(trans_id != 0 && static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - const Tuple &t = tuples_[trans_state-1]; - return t.hmm_state; -} - -void TransitionModel::Print(std::ostream &os, - const std::vector &phone_names, - const Vector *occs) { - if (occs != NULL) - KALDI_ASSERT(occs->Dim() == NumPdfs()); - bool is_hmm = IsHmm(); - for (int32 tstate = 1; tstate <= NumTransitionStates(); tstate++) { - const Tuple &tuple = tuples_[tstate-1]; - KALDI_ASSERT(static_cast(tuple.phone) < phone_names.size()); - std::string phone_name = phone_names[tuple.phone]; - - os << "Transition-state " << tstate << ": phone = " << phone_name - << " hmm-state = " << tuple.hmm_state; - if (is_hmm) - os << " pdf = " << tuple.forward_pdf << '\n'; - else - os << " forward-pdf = " << tuple.forward_pdf << " self-loop-pdf = " - << tuple.self_loop_pdf << '\n'; - for (int32 tidx = 0; tidx < NumTransitionIndices(tstate); tidx++) { - int32 tid = PairToTransitionId(tstate, tidx); - BaseFloat p = GetTransitionProb(tid); - os << " Transition-id = " << tid << " p = " << p; - if (occs != NULL) { - if (IsSelfLoop(tid)) - os << " count of pdf = " << (*occs)(tuple.self_loop_pdf); - else - os << " count of pdf = " << (*occs)(tuple.forward_pdf); - } - // now describe what it's a transition to. - if (IsSelfLoop(tid)) os << " [self-loop]\n"; - else { - int32 hmm_state = tuple.hmm_state; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(tuple.phone); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - int32 next_hmm_state = entry[hmm_state].transitions[tidx].first; - KALDI_ASSERT(next_hmm_state != hmm_state); - os << " [" << hmm_state << " -> " << next_hmm_state << "]\n"; - } - } - } -} - -bool GetPdfsForPhones(const TransitionModel &trans_model, - const std::vector &phones, - std::vector *pdfs) { - KALDI_ASSERT(IsSortedAndUniq(phones)); - KALDI_ASSERT(pdfs != NULL); - pdfs->clear(); - for (int32 tstate = 1; tstate <= trans_model.NumTransitionStates(); tstate++) { - if (std::binary_search(phones.begin(), phones.end(), - trans_model.TransitionStateToPhone(tstate))) { - pdfs->push_back(trans_model.TransitionStateToForwardPdf(tstate)); - pdfs->push_back(trans_model.TransitionStateToSelfLoopPdf(tstate)); - } - } - SortAndUniq(pdfs); - - for (int32 tstate = 1; tstate <= trans_model.NumTransitionStates(); tstate++) - if ((std::binary_search(pdfs->begin(), pdfs->end(), - trans_model.TransitionStateToForwardPdf(tstate)) || - std::binary_search(pdfs->begin(), pdfs->end(), - trans_model.TransitionStateToSelfLoopPdf(tstate))) - && !std::binary_search(phones.begin(), phones.end(), - trans_model.TransitionStateToPhone(tstate))) - return false; - return true; -} - -bool GetPhonesForPdfs(const TransitionModel &trans_model, - const std::vector &pdfs, - std::vector *phones) { - KALDI_ASSERT(IsSortedAndUniq(pdfs)); - KALDI_ASSERT(phones != NULL); - phones->clear(); - for (int32 tstate = 1; tstate <= trans_model.NumTransitionStates(); tstate++) { - if (std::binary_search(pdfs.begin(), pdfs.end(), - trans_model.TransitionStateToForwardPdf(tstate)) || - std::binary_search(pdfs.begin(), pdfs.end(), - trans_model.TransitionStateToSelfLoopPdf(tstate))) - phones->push_back(trans_model.TransitionStateToPhone(tstate)); - } - SortAndUniq(phones); - - for (int32 tstate = 1; tstate <= trans_model.NumTransitionStates(); tstate++) - if (std::binary_search(phones->begin(), phones->end(), - trans_model.TransitionStateToPhone(tstate)) - && !(std::binary_search(pdfs.begin(), pdfs.end(), - trans_model.TransitionStateToForwardPdf(tstate)) && - std::binary_search(pdfs.begin(), pdfs.end(), - trans_model.TransitionStateToSelfLoopPdf(tstate))) ) - return false; - return true; -} - -bool TransitionModel::Compatible(const TransitionModel &other) const { - return (topo_ == other.topo_ && tuples_ == other.tuples_ && - state2id_ == other.state2id_ && id2state_ == other.id2state_ - && num_pdfs_ == other.num_pdfs_); -} - -bool TransitionModel::IsSelfLoop(int32 trans_id) const { - KALDI_ASSERT(static_cast(trans_id) < id2state_.size()); - int32 trans_state = id2state_[trans_id]; - int32 trans_index = trans_id - state2id_[trans_state]; - const Tuple &tuple = tuples_[trans_state-1]; - int32 phone = tuple.phone, hmm_state = tuple.hmm_state; - const HmmTopology::TopologyEntry &entry = topo_.TopologyForPhone(phone); - KALDI_ASSERT(static_cast(hmm_state) < entry.size()); - return (static_cast(trans_index) < entry[hmm_state].transitions.size() - && entry[hmm_state].transitions[trans_index].first == hmm_state); -} - -} // End namespace kaldi diff --git a/src/hmm/transition-model.h b/src/hmm/transition-model.h deleted file mode 100644 index c97980405c1..00000000000 --- a/src/hmm/transition-model.h +++ /dev/null @@ -1,371 +0,0 @@ -// hmm/transition-model.h - -// Copyright 2009-2012 Microsoft Corporation -// Johns Hopkins University (author: Guoguo Chen) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_HMM_TRANSITION_MODEL_H_ -#define KALDI_HMM_TRANSITION_MODEL_H_ - -#include "base/kaldi-common.h" -#include "util/const-integer-set.h" -#include "fst/fst-decl.h" // forward declarations. -#include "hmm/hmm-topology.h" -#include "itf/options-itf.h" -#include "itf/context-dep-itf.h" -#include "matrix/kaldi-vector.h" - -namespace kaldi { - -/// \addtogroup hmm_group -/// @{ - -// The class TransitionModel is a repository for the transition probabilities. -// It also handles certain integer mappings. -// The basic model is as follows. Each phone has a HMM topology defined in -// hmm-topology.h. Each HMM-state of each of these phones has a number of -// transitions (and final-probs) out of it. Each HMM-state defined in the -// HmmTopology class has an associated "pdf_class". This gets replaced with -// an actual pdf-id via the tree. The transition model associates the -// transition probs with the (phone, HMM-state, pdf-id). We associate with -// each such triple a transition-state. Each -// transition-state has a number of associated probabilities to estimate; -// this depends on the number of transitions/final-probs in the topology for -// that (phone, HMM-state). Each probability has an associated transition-index. -// We associate with each (transition-state, transition-index) a unique transition-id. -// Each individual probability estimated by the transition-model is associated with a -// transition-id. -// -// List of the various types of quantity referred to here and what they mean: -// phone: a phone index (1, 2, 3 ...) -// HMM-state: a number (0, 1, 2...) that indexes TopologyEntry (see hmm-topology.h) -// pdf-id: a number output by the Compute function of ContextDependency (it -// indexes pdf's, either forward or self-loop). Zero-based. -// transition-state: the states for which we estimate transition probabilities for transitions -// out of them. In some topologies, will map one-to-one with pdf-ids. -// One-based, since it appears on FSTs. -// transition-index: identifier of a transition (or final-prob) in the HMM. Indexes the -// "transitions" vector in HmmTopology::HmmState. [if it is out of range, -// equal to transitions.size(), it refers to the final-prob.] -// Zero-based. -// transition-id: identifier of a unique parameter of the TransitionModel. -// Associated with a (transition-state, transition-index) pair. -// One-based, since it appears on FSTs. -// -// List of the possible mappings TransitionModel can do: -// (phone, HMM-state, forward-pdf-id, self-loop-pdf-id) -> transition-state -// (transition-state, transition-index) -> transition-id -// Reverse mappings: -// transition-id -> transition-state -// transition-id -> transition-index -// transition-state -> phone -// transition-state -> HMM-state -// transition-state -> forward-pdf-id -// transition-state -> self-loop-pdf-id -// -// The main things the TransitionModel object can do are: -// Get initialized (need ContextDependency and HmmTopology objects). -// Read/write. -// Update [given a vector of counts indexed by transition-id]. -// Do the various integer mappings mentioned above. -// Get the probability (or log-probability) associated with a particular transition-id. - - -// Note: this was previously called TransitionUpdateConfig. -struct MleTransitionUpdateConfig { - BaseFloat floor; - BaseFloat mincount; - bool share_for_pdfs; // If true, share all transition parameters that have the same pdf. - MleTransitionUpdateConfig(BaseFloat floor = 0.01, - BaseFloat mincount = 5.0, - bool share_for_pdfs = false): - floor(floor), mincount(mincount), share_for_pdfs(share_for_pdfs) {} - - void Register (OptionsItf *opts) { - opts->Register("transition-floor", &floor, - "Floor for transition probabilities"); - opts->Register("transition-min-count", &mincount, - "Minimum count required to update transitions from a state"); - opts->Register("share-for-pdfs", &share_for_pdfs, - "If true, share all transition parameters where the states " - "have the same pdf."); - } -}; - -struct MapTransitionUpdateConfig { - BaseFloat tau; - bool share_for_pdfs; // If true, share all transition parameters that have the same pdf. - MapTransitionUpdateConfig(): tau(5.0), share_for_pdfs(false) { } - - void Register (OptionsItf *opts) { - opts->Register("transition-tau", &tau, "Tau value for MAP estimation of transition " - "probabilities."); - opts->Register("share-for-pdfs", &share_for_pdfs, - "If true, share all transition parameters where the states " - "have the same pdf."); - } -}; - -class TransitionModel { - - public: - /// Initialize the object [e.g. at the start of training]. - /// The class keeps a copy of the HmmTopology object, but not - /// the ContextDependency object. - TransitionModel(const ContextDependencyInterface &ctx_dep, - const HmmTopology &hmm_topo); - - - /// Constructor that takes no arguments: typically used prior to calling Read. - TransitionModel(): num_pdfs_(0) { } - - void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols. - void Write(std::ostream &os, bool binary) const; - - - /// return reference to HMM-topology object. - const HmmTopology &GetTopo() const { return topo_; } - - /// \name Integer mapping functions - /// @{ - - int32 TupleToTransitionState(int32 phone, int32 hmm_state, int32 pdf, int32 self_loop_pdf) const; - int32 PairToTransitionId(int32 trans_state, int32 trans_index) const; - int32 TransitionIdToTransitionState(int32 trans_id) const; - int32 TransitionIdToTransitionIndex(int32 trans_id) const; - int32 TransitionStateToPhone(int32 trans_state) const; - int32 TransitionStateToHmmState(int32 trans_state) const; - int32 TransitionStateToForwardPdfClass(int32 trans_state) const; - int32 TransitionStateToSelfLoopPdfClass(int32 trans_state) const; - int32 TransitionStateToForwardPdf(int32 trans_state) const; - int32 TransitionStateToSelfLoopPdf(int32 trans_state) const; - int32 SelfLoopOf(int32 trans_state) const; // returns the self-loop transition-id, or zero if - // this state doesn't have a self-loop. - - inline int32 TransitionIdToPdf(int32 trans_id) const; - // TransitionIdToPdfFast is as TransitionIdToPdf but skips an assertion - // (unless we're in paranoid mode). - inline int32 TransitionIdToPdfFast(int32 trans_id) const; - - int32 TransitionIdToPhone(int32 trans_id) const; - int32 TransitionIdToPdfClass(int32 trans_id) const; - int32 TransitionIdToHmmState(int32 trans_id) const; - - /// @} - - bool IsFinal(int32 trans_id) const; // returns true if this trans_id goes to the final state - // (which is bound to be nonemitting). - bool IsSelfLoop(int32 trans_id) const; // return true if this trans_id corresponds to a self-loop. - - /// Returns the total number of transition-ids (note, these are one-based). - inline int32 NumTransitionIds() const { return id2state_.size()-1; } - - /// Returns the number of transition-indices for a particular transition-state. - /// Note: "Indices" is the plural of "index". Index is not the same as "id", - /// here. A transition-index is a zero-based offset into the transitions - /// out of a particular transition state. - int32 NumTransitionIndices(int32 trans_state) const; - - /// Returns the total number of transition-states (note, these are one-based). - int32 NumTransitionStates() const { return tuples_.size(); } - - // NumPdfs() actually returns the highest-numbered pdf we ever saw, plus one. - // In normal cases this should equal the number of pdfs in the system, but if you - // initialized this object with fewer than all the phones, and it happens that - // an unseen phone has the highest-numbered pdf, this might be different. - int32 NumPdfs() const { return num_pdfs_; } - - // This loops over the tuples and finds the highest phone index present. If - // the FST symbol table for the phones is created in the expected way, i.e.: - // starting from 1 ( is 0) and numbered contiguously till the last phone, - // this will be the total number of phones. - int32 NumPhones() const; - - /// Returns a sorted, unique list of phones. - const std::vector &GetPhones() const { return topo_.GetPhones(); } - - // Transition-parameter-getting functions: - BaseFloat GetTransitionProb(int32 trans_id) const; - BaseFloat GetTransitionLogProb(int32 trans_id) const; - - // The following functions are more specialized functions for getting - // transition probabilities, that are provided for convenience. - - /// Returns the log-probability of a particular non-self-loop transition - /// after subtracting the probability mass of the self-loop and renormalizing; - /// will crash if called on a self-loop. Specifically: - /// for non-self-loops it returns the log of (that prob divided by (1 minus - /// self-loop-prob-for-that-state)). - BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const; - - /// Returns the log-prob of the non-self-loop probability - /// mass for this transition state. (you can get the self-loop prob, if a self-loop - /// exists, by calling GetTransitionLogProb(SelfLoopOf(trans_state)). - BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const; - - /// Does Maximum Likelihood estimation. The stats are counts/weights, indexed - /// by transition-id. This was previously called Update(). - void MleUpdate(const Vector &stats, - const MleTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out); - - /// Does Maximum A Posteriori (MAP) estimation. The stats are counts/weights, - /// indexed by transition-id. - void MapUpdate(const Vector &stats, - const MapTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, - BaseFloat *count_out); - - /// Print will print the transition model in a human-readable way, for purposes of human - /// inspection. The "occs" are optional (they are indexed by pdf-id). - void Print(std::ostream &os, - const std::vector &phone_names, - const Vector *occs = NULL); - - - void InitStats(Vector *stats) const { stats->Resize(NumTransitionIds()+1); } - - void Accumulate(BaseFloat prob, int32 trans_id, Vector *stats) const { - KALDI_ASSERT(trans_id <= NumTransitionIds()); - (*stats)(trans_id) += prob; - // This is trivial and doesn't require class members, but leaves us more open - // to design changes than doing it manually. - } - - /// returns true if all the integer class members are identical (but does not - /// compare the transition probabilities. - bool Compatible(const TransitionModel &other) const; - - private: - void MleUpdateShared(const Vector &stats, - const MleTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, BaseFloat *count_out); - void MapUpdateShared(const Vector &stats, - const MapTransitionUpdateConfig &cfg, - BaseFloat *objf_impr_out, BaseFloat *count_out); - void ComputeTuples(const ContextDependencyInterface &ctx_dep); // called from constructor. initializes tuples_. - void ComputeTuplesIsHmm(const ContextDependencyInterface &ctx_dep); - void ComputeTuplesNotHmm(const ContextDependencyInterface &ctx_dep); - void ComputeDerived(); // called from constructor and Read function: computes state2id_ and id2state_. - void ComputeDerivedOfProbs(); // computes quantities derived from log-probs (currently just - // non_self_loop_log_probs_; called whenever log-probs change. - void InitializeProbs(); // called from constructor. - void Check() const; - bool IsHmm() const; - - struct Tuple { - int32 phone; - int32 hmm_state; - int32 forward_pdf; - int32 self_loop_pdf; - Tuple() { } - Tuple(int32 phone, int32 hmm_state, int32 forward_pdf, int32 self_loop_pdf): - phone(phone), hmm_state(hmm_state), forward_pdf(forward_pdf), self_loop_pdf(self_loop_pdf) { } - bool operator < (const Tuple &other) const { - if (phone < other.phone) return true; - else if (phone > other.phone) return false; - else if (hmm_state < other.hmm_state) return true; - else if (hmm_state > other.hmm_state) return false; - else if (forward_pdf < other.forward_pdf) return true; - else if (forward_pdf > other.forward_pdf) return false; - else return (self_loop_pdf < other.self_loop_pdf); - } - bool operator == (const Tuple &other) const { - return (phone == other.phone && hmm_state == other.hmm_state - && forward_pdf == other.forward_pdf && self_loop_pdf == other.self_loop_pdf); - } - }; - - HmmTopology topo_; - - /// Tuples indexed by transition state minus one; - /// the tuples are in sorted order which allows us to do the reverse mapping from - /// tuple to transition state - std::vector tuples_; - - /// Gives the first transition_id of each transition-state; indexed by - /// the transition-state. Array indexed 1..num-transition-states+1 (the last one - /// is needed so we can know the num-transitions of the last transition-state. - std::vector state2id_; - - /// For each transition-id, the corresponding transition - /// state (indexed by transition-id). - std::vector id2state_; - - std::vector id2pdf_id_; - - /// For each transition-id, the corresponding log-prob. Indexed by transition-id. - Vector log_probs_; - - /// For each transition-state, the log of (1 - self-loop-prob). Indexed by - /// transition-state. - Vector non_self_loop_log_probs_; - - /// This is actually one plus the highest-numbered pdf we ever got back from the - /// tree (but the tree numbers pdfs contiguously from zero so this is the number - /// of pdfs). - int32 num_pdfs_; - - KALDI_DISALLOW_COPY_AND_ASSIGN(TransitionModel); -}; - -inline int32 TransitionModel::TransitionIdToPdf(int32 trans_id) const { - KALDI_ASSERT( - static_cast(trans_id) < id2pdf_id_.size() && - "Likely graph/model mismatch (graph built from wrong model?)"); - return id2pdf_id_[trans_id]; -} - -inline int32 TransitionModel::TransitionIdToPdfFast(int32 trans_id) const { - // Note: it's a little dangerous to assert this only in paranoid mode. - // However, this function is called in the inner loop of decoders and - // the assertion likely takes a significant amount of time. We make - // sure that past the end of the id2pdf_id_ array there are big - // numbers, which will make the calling code more likely to segfault - // (rather than silently die) if this is called for out-of-range values. - KALDI_PARANOID_ASSERT( - static_cast(trans_id) < id2pdf_id_.size() && - "Likely graph/model mismatch (graph built from wrong model?)"); - return id2pdf_id_[trans_id]; -} - -/// Works out which pdfs might correspond to the given phones. Will return true -/// if these pdfs correspond *just* to these phones, false if these pdfs are also -/// used by other phones. -/// @param trans_model [in] Transition-model used to work out this information -/// @param phones [in] A sorted, uniq vector that represents a set of phones -/// @param pdfs [out] Will be set to a sorted, uniq list of pdf-ids that correspond -/// to one of this set of phones. -/// @return Returns true if all of the pdfs output to "pdfs" correspond to phones from -/// just this set (false if they may be shared with phones outside this set). -bool GetPdfsForPhones(const TransitionModel &trans_model, - const std::vector &phones, - std::vector *pdfs); - -/// Works out which phones might correspond to the given pdfs. Similar to the -/// above GetPdfsForPhones(, ,) -bool GetPhonesForPdfs(const TransitionModel &trans_model, - const std::vector &pdfs, - std::vector *phones); -/// @} - - -} // end namespace kaldi - - -#endif diff --git a/src/hmm/transition-model-test.cc b/src/hmm/transitions-test.cc similarity index 83% rename from src/hmm/transition-model-test.cc rename to src/hmm/transitions-test.cc index 841c714efb1..8e2fe403f34 100644 --- a/src/hmm/transition-model-test.cc +++ b/src/hmm/transitions-test.cc @@ -17,22 +17,20 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "hmm/hmm-test-utils.h" namespace kaldi { -void TestTransitionModel() { - - TransitionModel *trans_model = GenRandTransitionModel(NULL); - +void TestTransitions() { + Transitions *trans_model = GenRandTransitions(NULL); bool binary = (rand() % 2 == 0); std::ostringstream os; trans_model->Write(os, binary); - TransitionModel trans_model2; + Transitions trans_model2; std::istringstream is2(os.str()); trans_model2.Read(is2, binary); @@ -41,7 +39,7 @@ void TestTransitionModel() { trans_model->Write(os1, false); trans_model2.Write(os2, false); KALDI_ASSERT(os1.str() == os2.str()); - KALDI_ASSERT(trans_model->Compatible(trans_model2)); + KALDI_ASSERT(*trans_model == trans_model2); } delete trans_model; } @@ -50,7 +48,6 @@ void TestTransitionModel() { int main() { for (int i = 0; i < 2; i++) - kaldi::TestTransitionModel(); + kaldi::TestTransitions(); KALDI_LOG << "Test OK.\n"; } - diff --git a/src/hmm/transitions.cc b/src/hmm/transitions.cc new file mode 100644 index 00000000000..7319fe0063a --- /dev/null +++ b/src/hmm/transitions.cc @@ -0,0 +1,344 @@ +// hmm/transitions.cc + +// Copyright 2009-2012 Microsoft Corporation +// Johns Hopkins University (author: Guoguo Chen) +// 2012-2019 Johns Hopkins University (Author: Daniel Povey) +// 2019 Hossein Hadian + + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include +#include "hmm/transitions.h" +#include "tree/context-dep.h" +#include "util/common-utils.h" +#include "fstext/fstext-utils.h" + +namespace kaldi { + +bool Transitions::operator == (const Transitions &other) const { + return topo_ == other.topo_ && info_ == other.info_ && + num_pdfs_ == other.num_pdfs_; +} + +bool Transitions::Compatible(const Transitions& other) const { + KALDI_ASSERT(false); + return false; +} + +void Transitions::ComputeInfo(const ContextDependencyInterface &ctx_dep) { + using StateId = typename fst::StdFst::StateId; + const std::vector &phones = topo_.GetPhones(); + KALDI_ASSERT(!phones.empty()); + + // pdf_class_pairs is a set of lists indexed by phone. Each list stores + // all unique (pdf-class, self-loop pdf-class) pairs that that phone + // can have (on its arcs). + std::vector > > pdf_class_pairs; + pdf_class_pairs.resize(1 + *std::max_element(phones.begin(), phones.end())); + // to_arc_list is a list indexed by phone. For each phone, it has a map which + // maps a possible pdf class pair (pdf-class, self-loop pdf-class) to all + // the arcs in that phone that match that pdf class pair. An arc is represented + // as a (topo-state, arc-index) pair. + std::vector, std::vector > > > to_arc_list; + to_arc_list.resize(1 + *std::max_element(phones.begin(), phones.end())); + + for (size_t i = 0; i < phones.size(); i++) { + int32 phone = phones[i]; + const fst::StdVectorFst &entry = topo_.TopologyForPhone(phone); + int num_states = entry.NumStates(); + + std::vector state_to_self_loop_pdf_class(num_states, kNoPdf); + for (StateId state = 0; state < num_states; ++state) { + for (fst::ArcIterator aiter(entry, state); !aiter.Done(); aiter.Next()) { + const fst::StdArc &arc(aiter.Value()); + if (arc.nextstate == state) { + if (state_to_self_loop_pdf_class[state] != kNoPdf) + KALDI_ERR << "State " << state << " in topology of phone " + << phone << " has more than one self-loop."; + state_to_self_loop_pdf_class[state] = arc.ilabel; + } + } + } + + std::map, std::vector > > &this_to_arc_list( + to_arc_list[phone]); + for (StateId state = 0; state < num_states; ++state) { + for (fst::ArcIterator aiter(entry, state); + !aiter.Done(); aiter.Next()) { + const fst::StdArc &arc(aiter.Value()); + int32 forward_pdf_class = arc.ilabel, + self_loop_pdf_class = state_to_self_loop_pdf_class[arc.nextstate]; + auto state_arc_pair = std::make_pair(state, int32(aiter.Position())); + auto pdf_class_pair = std::make_pair(forward_pdf_class, self_loop_pdf_class); + this_to_arc_list[pdf_class_pair].push_back(state_arc_pair); + } + } + for (auto const &pdf_class_to_arc: this_to_arc_list) { + pdf_class_pairs[phone].push_back(pdf_class_to_arc.first); + } + } + // pdf_info will be a set of lists indexed by phone. Each list is indexed by + // the same index as we index into pdf_class_pairs[phone], and the element is + // a list of possible (pdf, self-loop pdf) pairs that that (pdf-class, + // self-loop pdf-class) pair generates. + std::vector > > > pdf_info; + + ctx_dep.GetPdfInfo(phones, pdf_class_pairs, &pdf_info); + + info_.push_back(TransitionIdInfo()); // transition-id is 1-based, add a + // dummy for element zero. + + for (int32 i = 0; i < phones.size(); i++) { + int32 phone = phones[i]; + for (int32 j = 0; j < static_cast(pdf_info[phone].size()); j++) { // loop on pdf-class pairs + int32 pdf_class = pdf_class_pairs[phone][j].first, + self_loop_pdf_class = pdf_class_pairs[phone][j].second; + auto const &state_arc_vec = + to_arc_list[phone][std::make_pair(pdf_class, self_loop_pdf_class)]; + KALDI_ASSERT(!state_arc_vec.empty()); + for (auto const& state_arc_pair: state_arc_vec) { // loop on all arcs matching this pdf-class pair + int32 topo_state = state_arc_pair.first, + arc_index = state_arc_pair.second; + for (size_t m = 0; m < pdf_info[phone][j].size(); m++) { // loop on all pdf pairs for this pdf-class pair + int32 pdf = pdf_info[phone][j][m].first, + self_loop_pdf = pdf_info[phone][j][m].second; + if (self_loop_pdf_class == -1) + self_loop_pdf = -1; + TransitionIdInfo tuple{.phone = phone, .topo_state = topo_state, + .arc_index = arc_index, .pdf_id = pdf, .self_loop_pdf_id = self_loop_pdf}; + info_.push_back(tuple); + } + } + } + } + + std::sort(info_.begin(), info_.end()); // sort to enable reverse lookup. +} + +void Transitions::ComputeDerived() { + pdf_ids_.resize(info_.size()); + for (int32 tid = 1; tid <= NumTransitionIds(); ++tid) { + TransitionIdInfo &transition = info_[tid]; + auto const &entry = topo_.TopologyForPhone(transition.phone); // an FST + fst::ArcIterator aiter(entry, transition.topo_state); + aiter.Seek(transition.arc_index); + auto const &arc(aiter.Value()); + + transition.is_self_loop = (arc.nextstate == transition.topo_state); + transition.is_initial = (transition.topo_state == 0); + transition.is_final = (entry.Final(arc.nextstate) != fst::StdFst::Weight::Zero()); + transition.transition_cost = arc.weight.Value(); + if (transition.self_loop_pdf_id == -1) + transition.self_loop_transition_id = 0; + else { + // Find the self-loop of the destination state: + int32 arc_index = -1; + for (fst::ArcIterator aiter_next(entry, arc.nextstate); + !aiter_next.Done(); aiter_next.Next()) + if (aiter_next.Value().nextstate == arc.nextstate) { // Found the self-loop + arc_index = aiter_next.Position(); + break; + } + KALDI_ASSERT(arc_index != -1); + transition.self_loop_transition_id = + TupleToTransitionId(transition.phone, arc.nextstate, + arc_index, transition.self_loop_pdf_id, + transition.self_loop_pdf_id); + } + pdf_ids_[tid] = transition.pdf_id; + } +} + +Transitions::Transitions(const ContextDependencyInterface &ctx_dep, + const Topology &topo): topo_(topo), + num_pdfs_(ctx_dep.NumPdfs()) { + // First thing is to get all possible tuples. + ComputeInfo(ctx_dep); + ComputeDerived(); + Check(); +} + +int32 Transitions::TupleToTransitionId(int32 phone, int32 topo_state, + int32 arc_index, int32 pdf_id, + int32 self_loop_pdf_id) const { + TransitionIdInfo tuple{.phone = phone, .topo_state = topo_state, + .arc_index = arc_index, .pdf_id = pdf_id, .self_loop_pdf_id = self_loop_pdf_id}; + // Note: if this ever gets too expensive, which is unlikely, we can refactor + // this code to sort first on pdf, and then index on pdf, so those + // that have the same pdf are in a contiguous range. + auto lowerbound = std::lower_bound(info_.begin(), info_.end(), tuple); + if (lowerbound == info_.end() || !(*lowerbound == tuple)) { + bool is_end = (lowerbound == info_.end()); + const TransitionIdInfo &this_tuple = *lowerbound; + KALDI_ERR << "Tuple not found. (incompatible tree and model?)" + << std::boolalpha << is_end + << ", this_tuple pdf_id " << this_tuple.pdf_id; + } + + return static_cast((lowerbound - info_.begin())); +} + +void Transitions::Read(std::istream &is, bool binary) { + ExpectToken(is, binary, ""); + topo_.Read(is, binary); + ExpectToken(is, binary, ""); + int32 size; + ReadBasicType(is, binary, &size); + info_.resize(size); + for (int32 i = 0; i < size; i++) { + ReadBasicType(is, binary, &(info_[i].phone)); + ReadBasicType(is, binary, &(info_[i].topo_state)); + ReadBasicType(is, binary, &(info_[i].arc_index)); + ReadBasicType(is, binary, &(info_[i].pdf_id)); + ReadBasicType(is, binary, &(info_[i].self_loop_pdf_id)); + } + ExpectToken(is, binary, ""); + ReadBasicType(is, binary, &num_pdfs_); + ExpectToken(is, binary, ""); + ComputeDerived(); + Check(); +} + +void Transitions::Write(std::ostream &os, bool binary) const { + WriteToken(os, binary, ""); + if (!binary) os << "\n"; + topo_.Write(os, binary); + WriteToken(os, binary, ""); + WriteBasicType(os, binary, static_cast(info_.size())); + if (!binary) os << "\n"; + for (int32 i = 0; i < static_cast (info_.size()); i++) { + WriteBasicType(os, binary, info_[i].phone); + WriteBasicType(os, binary, info_[i].topo_state); + WriteBasicType(os, binary, info_[i].arc_index); + WriteBasicType(os, binary, info_[i].pdf_id); + WriteBasicType(os, binary, info_[i].self_loop_pdf_id); + if (!binary) os << "\n"; + } + WriteToken(os, binary, ""); + if (!binary) os << "\n"; + WriteBasicType(os, binary, num_pdfs_); + WriteToken(os, binary, ""); + if (!binary) os << "\n"; +} + +void Transitions::Check() const { + +} +const Transitions::TransitionIdInfo& +Transitions::InfoForTransitionId(int32 transition_id) const { + KALDI_ASSERT(transition_id > 0 && transition_id < info_.size()); + return info_[transition_id]; +} +void Transitions::Print(std::ostream &os, + const std::vector &phone_names, + const Vector *occs) { + if (occs != NULL) + KALDI_ASSERT(occs->Dim() == NumPdfs()); + for (int32 tid = 1; tid <= NumTransitionIds(); tid++) { + auto const &transition = info_[tid]; + KALDI_ASSERT(static_cast(transition.phone) < phone_names.size()); + std::string phone_name = phone_names[transition.phone]; + + os << "Transition-id " << tid << ": phone = " << phone_name + << " topo-state = " << transition.topo_state + << " arc-index = " << transition.arc_index + << " forward-pdf = " << transition.pdf_id << " self-loop-pdf = " + << transition.self_loop_pdf_id + << " p = " << transition.transition_cost; + if (occs != NULL) { + if (transition.is_self_loop) + os << " count of pdf = " << (*occs)(transition.self_loop_pdf_id); + else + os << " count of pdf = " << (*occs)(transition.pdf_id); + } + if (transition.is_self_loop) os << " [self-loop]\n"; + else { + auto const &entry = topo_.TopologyForPhone(transition.phone); // an FST + fst::ArcIterator aiter(entry, transition.topo_state); + aiter.Seek(transition.arc_index); + auto const &arc(aiter.Value()); + os << " [" << transition.topo_state << " -> " << arc.nextstate << "]\n"; + } + } +} + +int32 Transitions::PdfClassForTid(int32 tid) const { + auto&& info = InfoForTransitionId(tid); + auto&& fst = GetTopo().TopologyForPhone(info.phone); + fst::ArcIterator > aiter(fst, info.topo_state); + aiter.Seek(info.arc_index); + int32 pdf_class = aiter.Value().ilabel; + return pdf_class; +} + +bool GetPdfsForPhones(const Transitions &trans_model, + const std::vector &phones, + std::vector *pdfs) { + KALDI_ASSERT(IsSortedAndUniq(phones)); + KALDI_ASSERT(pdfs != NULL); + pdfs->clear(); + for (int32 tid = 1; tid <= trans_model.NumTransitionIds(); tid++) { + auto const &transition = trans_model.InfoForTransitionId(tid); + if (std::binary_search(phones.begin(), phones.end(), transition.phone)) { + pdfs->push_back(transition.pdf_id); + pdfs->push_back(transition.self_loop_pdf_id); + } + } + SortAndUniq(pdfs); + + for (int32 tid = 1; tid <= trans_model.NumTransitionIds(); tid++) { + auto const &transition = trans_model.InfoForTransitionId(tid); + if ((std::binary_search(pdfs->begin(), pdfs->end(), + transition.pdf_id) || + std::binary_search(pdfs->begin(), pdfs->end(), + transition.self_loop_pdf_id)) + && !std::binary_search(phones.begin(), phones.end(), + transition.phone)) + return false; + } + return true; +} + +bool GetPhonesForPdfs(const Transitions &trans_model, + const std::vector &pdfs, + std::vector *phones) { + KALDI_ASSERT(IsSortedAndUniq(pdfs)); + KALDI_ASSERT(phones != NULL); + phones->clear(); + for (int32 tid = 1; tid <= trans_model.NumTransitionIds(); tid++) { + auto const &transition = trans_model.InfoForTransitionId(tid); + if (std::binary_search(pdfs.begin(), pdfs.end(), transition.pdf_id) || + std::binary_search(pdfs.begin(), pdfs.end(), transition.self_loop_pdf_id)) + phones->push_back(transition.phone); + } + SortAndUniq(phones); + + for (int32 tid = 1; tid <= trans_model.NumTransitionIds(); tid++) { + auto const &transition = trans_model.InfoForTransitionId(tid); + if (std::binary_search(phones->begin(), phones->end(), + transition.phone) + && !(std::binary_search(pdfs.begin(), pdfs.end(), + transition.pdf_id) && + std::binary_search(pdfs.begin(), pdfs.end(), + transition.self_loop_pdf_id))) + return false; + } + return true; +} + + +} // End namespace kaldi diff --git a/src/hmm/transitions.h b/src/hmm/transitions.h new file mode 100644 index 00000000000..6bab0e627dc --- /dev/null +++ b/src/hmm/transitions.h @@ -0,0 +1,279 @@ +// hmm/transitions.h + +// Copyright 2009-2012 Microsoft Corporation +// 2015 Guoguo Chen +// 2019 Johns Hopkins University (author: Daniel Povey) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef KALDI_HMM_TRANSITIONS_H_ +#define KALDI_HMM_TRANSITIONS_H_ + +#include "base/kaldi-common.h" +#include "util/const-integer-set.h" +#include "fst/fst-decl.h" // forward declarations. +#include "hmm/topology.h" +#include "itf/options-itf.h" +#include "itf/context-dep-itf.h" +#include "matrix/kaldi-vector.h" + +namespace kaldi { + +static const int kNoPdf = -1; + +// The class Transitions handles various integer mappings. +// It used to be the home for the trainable transitions, but these +// no longer exist. This class can be initialized from the +// tree and the topology. +// +// The topology of an individual phone is as defined in topology.h. +// +// This class basically defines the concept of a "transition-id", +// which is a construct that we use in compiled decoding graphs +// to make it easy to look up the 'pdf-id' (think of this as the +// distribution or neural net output column associated with this +// state) and also figure out which phone we are in and which +// arc in that phone. +// +// In the original Kaldi, this object contained trainable transition +// probabilities, but these have been removed to simplify things. +// +// A transition-id maps to a 4-tuple as follows: +// (pdf-id, phone, topo-state, arc-index) +// where 'topo-state' is the state index in the fst::StdFst +// for the topology, and 'arc-index' is the index of +// the arc leaving that state (zero for the first-listed one, +// one for the second, etc.) + + +// List of the various types of quantity referred to here and what they mean: +// phone: a phone index (1, 2, 3 ...) +// topo-state: a state index in the phone-topology FST (see topology.h) +// arc-index: The index of the arc leaving this topo-state: +// 0 for the first-listed one, 1 for the second. Will be used +// to Seek() in the ArcIterator. +// pdf-id: A number output by the Compute() function of ContextDependency (it +// indexes pdf's, either forward or self-loop). Zero-based. +// In DNN-based systems this would be the column index of +// the neural net output. +// Here, it's "this state". Presumably the source? +// (*)self-loop-pdf-id: The pdf-id associated with the self-loop of this state, +// if there is one (we do not allow >1), or -1 if there is no +// self-loop. This will be the same as 'pdf-id' if this transition +// *is* the self-loop. It might seem odd that we require this +// to get the transition-id for a non-self-loop arc; the reason +// why it's necessary is that we initially create the graph +// without self-loops (for efficiency) and we need to be able +// to look up the corresponding self-loop transition-id to +// add self-loops to the graph. Duh! That makes complete sense! +// +// transition-id: The numbers that we put on the decoding-graph arcs. +// Each transition-id is associated with a 4-tuple +// (pdf-id, phone, topo-state, arc-index). +// + + +class Transitions { + + public: + /// Initialize the object. This is deterministic, so initializing + /// from the same objects will give you an equivalent numbering. + /// The class keeps a copy of the Topology object, but not + /// the ContextDependency object. + Transitions(const ContextDependencyInterface &ctx_dep, + const Topology &topo); + + + /// Constructor that takes no arguments: typically used prior to calling Read. + Transitions(): num_pdfs_(0) { } + + void Read(std::istream &is, bool binary); + void Write(std::ostream &os, bool binary) const; + + // This struct is the information associated with one transition-id. + // You can work out the transition-id from the first 5 fields. + struct TransitionIdInfo { + + int32 phone; // The phone + int32 topo_state; // The state in the topology FST for this phone + int32 arc_index; // The arc-index leaving this state + int32 pdf_id; // The pdf-id associated with this arc (obtained from the + // tree and phonetic-context information, etc.) + + int32 self_loop_pdf_id; // The pdf-id associated with the self-loop + // transition (if any) leaving the *destination* + // state of this arc, or -1 if that state has no + // self-loop. Search for (*) above for + // explanation. + + // The remaining fields are 'derived information' that are worked out + // from the information above and from the phone topology, and placed + // here for convenience. + + // is_self_loop is true if this is a self-loop (a transition to the same + // state). We often need to know this, so it's convenient to have this + // information here. + bool is_self_loop; + // is_initial is true if this is a transition leaving the + // initial state. + // you transition through the HMM (we check that the topology has no + // other transitions to the first HMM-state). + bool is_initial; + + // is_final is true if this is a transition entering a final + // state. This is used together with is_initial (and boundary + // information) to locate phone boundaries, e.g. for lattice + // word alignment: an 'is_final' transition-id followed by an + // 'is_initial' transition-id marks a phone boundary, which + // we know because we do not allow the start-state in + // topologies to be final. + bool is_final; + + // transition_cost is the cost (negative log-prob) of this transition). + BaseFloat transition_cost; + // The transition-id associated with the self-loop of the *destination* of + // this arc, if there is one, or 0 if there is no such self-loop. + int32 self_loop_transition_id; + + + bool operator < (const TransitionIdInfo &other) const { + if (phone < other.phone) return true; + else if (phone > other.phone) return false; + else if (topo_state < other.topo_state) return true; + else if (topo_state > other.topo_state) return false; + else if (arc_index < other.arc_index) return true; + else if (arc_index > other.arc_index) return false; + else if (pdf_id < other.pdf_id) return true; + else if (pdf_id > other.pdf_id) return false; + else return (self_loop_pdf_id < other.self_loop_pdf_id); + } + + // Compare all non-derived members. + bool operator == (const TransitionIdInfo &other) const { + return (phone == other.phone && topo_state == other.topo_state && + arc_index == other.arc_index && pdf_id == other.pdf_id && + self_loop_pdf_id == other.self_loop_pdf_id); + } + }; + + + /// return reference to HMM-topology object. + const Topology &GetTopo() const { return topo_; } + + const TransitionIdInfo &InfoForTransitionId(int32 transition_id) const; + + inline int32 TransitionIdToPdfFast(int32 trans_id) const; + + /// This allows you to look up a transition-id. It returns 0 if nothing + /// was found. + int32 TupleToTransitionId(int32 phone, int32 topo_state, int32 arc_index, + int32 pdf_id, int32 self_loop_pdf_id) const; + + + /// Returns the total number of transition-ids (note, these are one-based). + inline int32 NumTransitionIds() const { return info_.size() - 1; } + + // NumPdfs() returns the number of pdfs (pdf-ids) in the tree, + // as returned by ctx_dep.NumPdfs() for the tree passed to the constructor. + int32 NumPdfs() const { return num_pdfs_; } + + /// Returns a sorted, unique list of phones. + const std::vector &GetPhones() const { return topo_.GetPhones(); } + + + /// Print will print the transition model in a human-readable way, for purposes of human + /// inspection. The "occs" are optional (they are indexed by pdf-id). + void Print(std::ostream &os, + const std::vector &phone_names, + const Vector *occs = NULL); + + int32 PdfClassForTid(int32 tid) const; + + /// returns true if this is identical to 'other' + bool operator == (const Transitions &other) const; + + bool Compatible(const Transitions& other) const; + + private: + + // Called from constructor. initializes info_ (at least, the first + // 5 fields); the implementation then has to call ComputeDerived() + // to initalize the rest. + void ComputeInfo(const ContextDependencyInterface &ctx_dep); + + void ComputeDerived(); // Called from constructor and Read function. + + void Check() const; + + + Topology topo_; + + /// Information about transition-ids, indexed by transition-id. + /// the tuples are in lexicographic sorted order which allows us to do the + /// reverse mapping from tuple to transition id. + std::vector info_; + + + /// Accessing pdf_ids_[i] allows us to look up info_[i].pdf_id in a way that + /// is more friendly to memory caches than accessing info_; this is done in + /// the inner loops of decoders so it makes sense to optimize for it. + std::vector pdf_ids_; + + /// This is a copy of the NumPdfs() returned by the tree when we constructed + /// this object. Note: pdf-ids are zero-based. + int32 num_pdfs_; + + KALDI_DISALLOW_COPY_AND_ASSIGN(Transitions); +}; + +inline int32 Transitions::TransitionIdToPdfFast(int32 trans_id) const { + // Note: it's a little dangerous to assert this only in paranoid mode. + // However, this function is called in the inner loop of decoders and + // the assertion likely takes a significant amount of time. We make + // sure that past the end of thd id2pdf_id_ array there are big + // numbers, which will make the calling code more likely to segfault + // (rather than silently die) if this is called for out-of-range values. + KALDI_PARANOID_ASSERT( + static_cast(trans_id) < pdf_ids_.size() && + "Likely graph/model mismatch (graph built from wrong model?)"); + return pdf_ids_[trans_id]; +} + +/// Works out which pdfs might correspond to the given phones. Will return true +/// if these pdfs correspond *just* to these phones, false if these pdfs are also +/// used by other phones. +/// @param trans_model [in] Transition-model used to work out this information +/// @param phones [in] A sorted, uniq vector that represents a set of phones +/// @param pdfs [out] Will be set to a sorted, uniq list of pdf-ids that correspond +/// to one of this set of phones. +/// @return Returns true if all of the pdfs output to "pdfs" correspond to phones from +/// just this set (false if they may be shared with phones outside this set). +bool GetPdfsForPhones(const Transitions &trans_model, + const std::vector &phones, + std::vector *pdfs); + +/// Works out which phones might correspond to the given pdfs. Similar to the +/// above GetPdfsForPhones(, ,) +bool GetPhonesForPdfs(const Transitions &trans_model, + const std::vector &pdfs, + std::vector *phones); +/// @} + + +} // end namespace kaldi + + +#endif diff --git a/src/hmm/tree-accu.cc b/src/hmm/tree-accu.cc index c8ce49d9bc7..18a613b8a5c 100644 --- a/src/hmm/tree-accu.cc +++ b/src/hmm/tree-accu.cc @@ -33,7 +33,7 @@ static int32 MapPhone(const std::vector &phone_map, } -void AccumulateTreeStats(const TransitionModel &trans_model, +void AccumulateTreeStats(const Transitions &trans_model, const AccumulateTreeStatsInfo &info, const std::vector &alignment, const Matrix &features, @@ -54,8 +54,8 @@ void AccumulateTreeStats(const TransitionModel &trans_model, i + info.central_position < static_cast(split_alignment.size())) { int32 central_phone = MapPhone(info.phone_map, - trans_model.TransitionIdToPhone( - split_alignment[i+info.central_position][0])); + trans_model.InfoForTransitionId( + split_alignment[i+info.central_position][0]).phone); bool is_ctx_dep = !std::binary_search(info.ci_phones.begin(), info.ci_phones.end(), central_phone); @@ -65,7 +65,7 @@ void AccumulateTreeStats(const TransitionModel &trans_model, if (i + j >= 0 && i + j < static_cast(split_alignment.size())) phone = MapPhone(info.phone_map, - trans_model.TransitionIdToPhone(split_alignment[i+j][0])); + trans_model.InfoForTransitionId(split_alignment[i+j][0]).phone); else phone = 0; // ContextDependency class uses 0 to mean "out of window"; // we also set the phone arbitrarily to 0 @@ -84,9 +84,8 @@ void AccumulateTreeStats(const TransitionModel &trans_model, for (int32 j = 0; j < static_cast(split_alignment[i+info.central_position].size());j++) { // for central phone of this window... EventType evec_more(evec); - int32 pdf_class = trans_model.TransitionIdToPdfClass( - split_alignment[i+info.central_position][j]); - // pdf_class will normally by 0, 1 or 2 for 3-state HMM. + int32 pdf_class = trans_model.PdfClassForTid(split_alignment[i+info.central_position][j]); + // pdf_class will normally be 0, 1 or 2 for 3-state HMM. std::pair pr(kPdfClass, pdf_class); evec_more.push_back(pr); std::sort(evec_more.begin(), evec_more.end()); // these must be sorted! diff --git a/src/hmm/tree-accu.h b/src/hmm/tree-accu.h index 92e83c535c7..fd3e09567b5 100644 --- a/src/hmm/tree-accu.h +++ b/src/hmm/tree-accu.h @@ -23,7 +23,7 @@ #include // For isspace. #include #include "base/kaldi-common.h" -#include "hmm/transition-model.h" +#include "hmm/transitions.h" #include "tree/clusterable-classes.h" #include "tree/build-tree-questions.h" // needed for this typedef: // typedef std::vector > BuildTreeStatsType; @@ -74,7 +74,7 @@ struct AccumulateTreeStatsInfo { /// "normal" way). It adds to 'stats' the stats obtained from this file. Any /// new GaussClusterable* pointers in "stats" will be allocated with "new". -void AccumulateTreeStats(const TransitionModel &trans_model, +void AccumulateTreeStats(const Transitions &trans_model, const AccumulateTreeStatsInfo &info, const std::vector &alignment, const Matrix &features, diff --git a/src/itf/context-dep-itf.h b/src/itf/context-dep-itf.h index b62bd11e11a..9db5a36c70c 100644 --- a/src/itf/context-dep-itf.h +++ b/src/itf/context-dep-itf.h @@ -62,9 +62,9 @@ class ContextDependencyInterface { /// GetPdfInfo returns a vector indexed by pdf-id, saying for each pdf which /// pairs of (phone, pdf-class) it can correspond to. (Usually just one). - /// c.f. hmm/hmm-topology.h for meaning of pdf-class. + /// c.f. hmm/topology.h for meaning of pdf-class. /// This is the old, simpler interface of GetPdfInfo(), and that this one can - /// only be called if the HmmTopology object's IsHmm() function call returns + /// only be called if the Topology object's IsHmm() function call returns /// true. virtual void GetPdfInfo( const std::vector &phones, // list of phones diff --git a/src/ivector/Makefile b/src/ivector/Makefile index 1154da6880b..ad53c9007b2 100644 --- a/src/ivector/Makefile +++ b/src/ivector/Makefile @@ -13,7 +13,7 @@ OBJFILES = ivector-extractor.o voice-activity-detection.o plda.o \ LIBNAME = kaldi-ivector ADDLIBS = ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ + ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ ../base/kaldi-base.a diff --git a/src/ivectorbin/Makefile b/src/ivectorbin/Makefile index 8dc3498b83b..c261ed3e28e 100644 --- a/src/ivectorbin/Makefile +++ b/src/ivectorbin/Makefile @@ -26,7 +26,7 @@ TESTFILES = ADDLIBS = ../ivector/kaldi-ivector.a ../hmm/kaldi-hmm.a ../gmm/kaldi-gmm.a \ - ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \ + ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \ ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/kws/Makefile b/src/kws/Makefile index c4367eb2958..9dc7bddab70 100644 --- a/src/kws/Makefile +++ b/src/kws/Makefile @@ -10,7 +10,7 @@ OBJFILES = kws-functions.o kws-functions2.o kws-scoring.o LIBNAME = kaldi-kws ADDLIBS = ../lat/kaldi-lat.a ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \ - ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/kwsbin/Makefile b/src/kwsbin/Makefile index bcc2685b7f3..f03b0a07f92 100644 --- a/src/kwsbin/Makefile +++ b/src/kwsbin/Makefile @@ -17,6 +17,6 @@ TESTFILES = ADDLIBS = ../kws/kaldi-kws.a ../lat/kaldi-lat.a ../fstext/kaldi-fstext.a \ ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \ - ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/lat/Makefile b/src/lat/Makefile index 56521486826..3d4c6afcc79 100644 --- a/src/lat/Makefile +++ b/src/lat/Makefile @@ -16,7 +16,7 @@ OBJFILES = kaldi-lattice.o lattice-functions.o word-align-lattice.o \ LIBNAME = kaldi-lat ADDLIBS = ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a \ - ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a ../base/kaldi-base.a include ../makefiles/default_rules.mk diff --git a/src/lat/determinize-lattice-pruned.cc b/src/lat/determinize-lattice-pruned.cc index bdf8c3fabc8..cdc920ef95b 100644 --- a/src/lat/determinize-lattice-pruned.cc +++ b/src/lat/determinize-lattice-pruned.cc @@ -1294,7 +1294,7 @@ bool DeterminizeLatticePruned(const ExpandedFst > &ifst, template typename ArcTpl::Label DeterminizeLatticeInsertPhones( - const kaldi::TransitionModel &trans_model, + const kaldi::Transitions &trans_model, MutableFst > *fst) { // Define some types. typedef ArcTpl Arc; @@ -1316,32 +1316,28 @@ typename ArcTpl::Label DeterminizeLatticeInsertPhones( !aiter.Done(); aiter.Next()) { Arc arc = aiter.Value(); - // Note: the words are on the input symbol side and transition-id's are on + // Note: the words are on the input symbol side and transition-ids are on // the output symbol side. - if ((arc.olabel != 0) - && (trans_model.TransitionIdToHmmState(arc.olabel) == 0) - && (!trans_model.IsSelfLoop(arc.olabel))) { - Label phone = - static_cast