NVIDIA · pratapaprasanna · Aug 27, 2019 · Sep 9, 2019 · Oct 20, 2019 · Nov 6, 2019
diff --git a/README.md b/README.md
@@ -65,3 +65,71 @@ If you use OpenSeq2Seq, please cite [this paper](https://arxiv.org/abs/1805.1038
     primaryClass={cs.CL}
 }
 ```
+
+## Install Decoders
+
+### Install boost/automake and bison
+```
+sudo apt-get install libboost-all-dev -y
+sudo apt-get install automake -y
+sudo apt-get install bison -y
+```
+
+### Install SWIG
+ ```
+ git clone https://github.com/swig/swig.git
+cd swig
+./autogen.sh
+./configure
+make
+sudo make install
+ ```
+ #### Test once
+ ```
+ $ swig
+ ```
+ if you encounter 
+```
+$ swig: error while loading shared libraries: libpcre.so.1: cannot open shared object file: No such file or directory
+```
+## Install PCRE
+```
+cd /usr/local/src
+sudo curl --remote-name ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.42.tar.gz
+
+tar -xzvf pcre-8.42.tar.gz
+cd pcre-8.42
+sudo ./configure --prefix=/usr/local/mac-dev-env/pcre-8.42
+sudo make
+sudo make install 
+sudo ln -s mac-dev-env/pcre-8.42 /usr/local/pcre
+echo 'export PATH=/usr/local/pcre/bin:$PATH' >> ~/.bash_profile
+source ~/.bash_profile
+cd .libs
+sudo mv -v libpcre.so.* /usr/lib/
+```
+If the above doesnt works then please use the latest version as follows:
+
+```
+sudo curl --remote-name https://ftp.pcre.org/pub/pcre/pcre-8.43.tar.bz2
+tar xjf  pcre-8.43.tar.bz2 
+cd pcre-8.43/
+sudo ./configure --prefix=/usr/local/mac-dev-env/pcre-8.43
+sudo make
+sudo make install 
+sudo ln -s mac-dev-env/pcre-8.43 /usr/local/pcre
+echo 'export PATH=/usr/local/pcre/bin:$PATH' >> ~/.bash_profile
+source ~/.bash_profile
+cd .libs
+sudo mv -v libpcre.so.* /usr/lib/
+```
+
+If the symlink is already used..either delete or use another symlink
+## Final Output
+
+```
+$ swig
+Must specify an input file. Use -help for available options.
+```
+
+### ThankYou
diff --git a/docs/sources/source/getting-started/asr.rst b/docs/sources/source/getting-started/asr.rst
@@ -26,6 +26,7 @@ dataset size will be around 224GB (including archives and original compressed au
 Now, everything should be setup to train the model::
 
     python run.py --config_file=example_configs/speech2text/ds2_librispeech_larc_config.py --mode=train_eval
+    python run.py --config_file=example_configs/speech2text/ds2_librispeech_larc_config.py --mode=train_eval --infer_dataset=example_configs/datasets/infer.csv
 
 If you want to run evaluation/inference with the trained model, replace
 ``--mode=train_eval`` with ``--mode=eval`` or ``--mode=infer``.

diff --git a/open_seq2seq/utils/utils.py b/open_seq2seq/utils/utils.py
@@ -506,7 +506,13 @@ def get_base_config(args):
                       help='whether to log output, git info, cmd args, etc.')
   parser.add_argument('--use_xla_jit', dest='use_xla_jit', action='store_true',
                       help='whether to use XLA_JIT to compile and run the model.')
+  parser.add_argument('--infer_dataset', dest='infer_dataset',
+                      help='infer_dataset csv file.')
+  parser.add_argument('--train_dataset', dest='train_dataset',
+                      help='train_dataset csv file.')
   args, unknown = parser.parse_known_args(args)
+  infer_params = args.infer_dataset
+  train_params = args.train_dataset
 
   if args.mode not in [
       'train',
@@ -519,7 +525,10 @@ def get_base_config(args):
                      "['train', 'eval', 'train_eval', 'infer', "
                      "'interactive_infer']")
   config_module = runpy.run_path(args.config_file, init_globals={'tf': tf})
-
+  if infer_params:
+    config_module['infer_params']['data_layer_params']['dataset_files'] = infer_params.split(',')
+  if train_params:
+    config_module['train_params']['data_layer_params']['dataset_files'] = train_params.split(',')
   base_config = config_module.get('base_params', None)
   if base_config is None:
     raise ValueError('base_config dictionary has to be '
@@ -541,7 +550,6 @@ def get_base_config(args):
       parser_unk.add_argument('--' + pm, default=value, type=ast.literal_eval)
   config_update = parser_unk.parse_args(unknown)
   nested_update(base_config, nest_dict(vars(config_update)))
-
   return args, base_config, base_model, config_module
 
 def get_calibration_config(arguments):

diff --git a/scripts/decode.py b/scripts/decode.py
@@ -4,7 +4,7 @@
 '''
 
 import argparse
-
+import time
 import pickle
 import numpy as np
 
@@ -89,7 +89,6 @@
 
 num_cpus = multiprocessing.cpu_count()
 
-
 def levenshtein(a, b):
   """Calculates the Levenshtein distance between a and b.
   The code was taken from: http://hetland.org/coding/python/levenshtein.py
@@ -170,6 +169,8 @@ def softmax(x):
 
 
 def evaluate_wer(logits, labels, vocab, decoder):
+  eval_start=time.time()
+  print("evaluation started at   ",eval_start)
   total_dist = 0.0
   total_count = 0.0
   wer_per_sample = np.empty(shape=len(labels))
@@ -187,31 +188,52 @@ def evaluate_wer(logits, labels, vocab, decoder):
     wer_per_sample[idx] = dist / len(label.split())
   print('# empty preds: {}'.format(empty_preds))
   wer = total_dist / total_count
+  eval_end=time.time()
+  print("evaluation took %s time"%(eval_end-eval_start))
   return wer, wer_per_sample
 
+def divide_chunks(l, n): 
+    # looping till length l 
+    for i in range(0, len(l), n):  
+        yield l[i:i + n] 
 
+data_load_start=time.time()
 data = load_dump(args.logits)
 labels = load_labels(args.labels)
 logits = get_logits(data, labels)
 vocab = load_vocab(args.vocab)
 vocab[-1] = '_'
-
+data_load_end=time.time()
+print("Data loading took %s seconds" %(data_load_end-data_load_start) )
 probs_batch = []
 for line in labels:
   audio_filename = line[0]
   probs_batch.append(softmax(logits[audio_filename]))
+batch_prob_end=time.time()
+print("Batch logit loading took %s seconds" %(batch_prob_end-data_load_end) )
 
 if args.mode == 'eval':
+  eval_start=time.time()
   wer, _ = evaluate_wer(logits, labels, vocab, greedy_decoder)
   print('Greedy WER = {:.4f}'.format(wer))
   best_result = {'wer': 1e6, 'alpha': 0.0, 'beta': 0.0, 'beams': None} 
   for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step):
     for beta in np.arange(args.beta, args.beta_max, args.beta_step):
       scorer = Scorer(alpha, beta, model_path=args.lm, vocabulary=vocab[:-1])
-      res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], 
-                                          beam_size=args.beam_width, 
-                                          num_processes=num_cpus,
-                                          ext_scoring_func=scorer)
+      print("scorer complete")
+      probs_batch_list = list(divide_chunks(probs_batch, 500))
+      res=[]
+      for  probs_batch in probs_batch_list:
+        f=time.time()
+        result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], 
+                                            beam_size=args.beam_width, 
+                                            num_processes=num_cpus,
+                                            ext_scoring_func=scorer)
+        e=time.time()
+        for j in result:
+          res.append(j)
+        print("500 files batched took %s time"%(e-f))
+
       total_dist = 0.0
       total_count = 0.0
       for idx, line in enumerate(labels):
@@ -230,7 +252,8 @@ def evaluate_wer(logits, labels, vocab, decoder):
       print('alpha={:.2f}, beta={:.2f}: WER={:.4f}'.format(alpha, beta, wer))
   print('BEST: alpha={:.2f}, beta={:.2f}, WER={:.4f}'.format(
         best_result['alpha'], best_result['beta'], best_result['wer']))
-
+  eval_end=time.time()
+  print("evaluation took %s seconds",eval_end-eval_start)  
   if args.dump_all_beams_to:
    with open(args.dump_all_beams_to, 'w') as f:
      for beam in best_result['beams']:
@@ -239,19 +262,48 @@ def evaluate_wer(logits, labels, vocab, decoder):
          f.write('{} 0.0 0.0 {}\n'.format(pred[0], pred[1]))
        f.write('E=>>>>>>>>\n')
 
+elif args.mode == 'greedy':
+    print("Greedy Mode")
+    greedy_preds = np.empty(shape=(len(labels), 2), dtype=object)
+    for idx, line in enumerate(labels):
+        filename = line[0]
+        greedy_preds[idx, 0] = filename
+        greedy_preds[idx, 1] = greedy_decoder(logits[filename], vocab)
+
+    np.savetxt(args.infer_output_file, greedy_preds, fmt='%s', delimiter=',',
+              header='wav_filename,greedy')
+
+
 elif args.mode == 'infer':
-  scorer = Scorer(args.alpha, args.beta, model_path=args.lm, vocabulary=vocab[:-1])
-  res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], 
-                                      beam_size=args.beam_width, 
-                                      num_processes=num_cpus,
-                                      ext_scoring_func=scorer)
-  infer_preds = np.empty(shape=(len(labels), 2), dtype=object)
-  for idx, line in enumerate(labels):
-    filename = line[0]
-    score, text = [v for v in zip(*res[idx])]
-    infer_preds[idx, 0] = filename
-    infer_preds[idx, 1] = text[0]
+    print("Inference Mode")
+    infer_start=time.time()
+    scorer = Scorer(args.alpha, args.beta, model_path=args.lm, vocabulary=vocab[:-1])
+
+    probs_batch_list = list(divide_chunks(probs_batch, 500))
+    res=[]
+    for  probs_batch in probs_batch_list:
+      f=time.time()
+      result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], 
+                                          beam_size=args.beam_width, 
+                                          num_processes=num_cpus,
+                                          ext_scoring_func=scorer)
+      e=time.time()
+
+      for j in result:
+        res.append(j)
+
+      print("500 files batched took %s time"%(e-f))
+
+    infer_preds = np.empty(shape=(len(labels), 3), dtype=object)
+    for idx, line in enumerate(labels):
+      filename = line[0]
+      score, text = [v for v in zip(*res[idx])]
+      infer_preds[idx, 0] = filename
+      infer_preds[idx, 1] = text[0]
+      #Greedy
+      infer_preds[idx, 2] = greedy_decoder(logits[filename], vocab)
 
-  np.savetxt(args.infer_output_file, infer_preds, fmt='%s', delimiter=',',
-             header='wav_filename,transcript')
+    infer_end=time.time()
+    print("Inference took %s seconds",infer_end-infer_start)  
+    np.savetxt(args.infer_output_file, infer_preds, fmt='%s', delimiter=',',header='wav_filename,lm,greedy')
 
diff --git a/setup.py b/setup.py
@@ -0,0 +1,23 @@
+import setuptools
+import numpy as np
+
+with open("README.md", "r") as fh:
+    long_description = fh.read()
+
+setuptools.setup(
+    name="open_seq2seq",
+    version="0.0.1",
+    author="voicezen",
+    author_email="[email protected]",
+    description="Python repo for components and analysis",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/voicezen/jivaka",
+    packages=setuptools.find_packages(),
+    include_dirs=[np.get_include()],
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+)