From 971db6619963a1a04168f8721db5290eb7ef33a0 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Tue, 27 Aug 2019 00:58:06 +0000 Subject: [PATCH 1/7] Fixed finetuning (with load_model parameter) Signed-off-by: Vitaly Lavrukhin Signed-off-by: Praveen Puglia --- open_seq2seq/models/speech2text_test.py | 35 +++++++++++++++ open_seq2seq/utils/funcs.py | 59 +++++++++++++++---------- 2 files changed, 70 insertions(+), 24 deletions(-) diff --git a/open_seq2seq/models/speech2text_test.py b/open_seq2seq/models/speech2text_test.py index 270651313..d89d7c543 100644 --- a/open_seq2seq/models/speech2text_test.py +++ b/open_seq2seq/models/speech2text_test.py @@ -102,6 +102,41 @@ def convergence_test(self, train_loss_threshold, self.assertLess(eval_loss, eval_loss_threshold) self.assertLess(eval_dict['Eval WER'], eval_wer_threshold) + def finetuning_test(self, train_loss_threshold, + eval_loss_threshold, eval_wer_threshold): + for dtype in [tf.float32, "mixed"]: + + # pre-training + train_config, eval_config = self.prepare_config() + train_config.update({ + "dtype": dtype, + }) + eval_config.update({ + "dtype": dtype, + }) + loss, eval_loss, eval_dict = self.run_model(train_config, eval_config) + + self.assertLess(loss, train_loss_threshold) + self.assertLess(eval_loss, eval_loss_threshold) + self.assertLess(eval_dict['Eval WER'], eval_wer_threshold) + + # finetuning + restore_dir = train_config['logdir'] + train_config['logdir'] = tempfile.mktemp() + eval_config['logdir'] = train_config['logdir'] + train_config.update({ + "load_model": restore_dir, + "lr_policy_params": { + "learning_rate": 0.0001, + "power": 2, + } + }) + loss_ft, eval_loss_ft, eval_dict_ft = self.run_model(train_config, eval_config) + + self.assertLess(loss_ft, train_loss_threshold) + self.assertLess(eval_loss_ft, eval_loss_threshold) + self.assertLess(eval_dict_ft['Eval WER'], eval_wer_threshold) + def convergence_with_iter_size_test(self): try: import horovod.tensorflow as hvd diff --git a/open_seq2seq/utils/funcs.py b/open_seq2seq/utils/funcs.py index 387fc2867..afea43c97 100644 --- a/open_seq2seq/utils/funcs.py +++ b/open_seq2seq/utils/funcs.py @@ -116,9 +116,33 @@ def train(train_model, eval_model=None, debug_port=None, custom_hooks=None): # checkpoint. restoring = load_model_dir and not tf.train.latest_checkpoint(checkpoint_dir) if restoring: - scaffold = TransferScaffold( - local_init_op=tf.group(tf.local_variables_initializer(), init_data_layer) + vars_in_checkpoint = {} + for var_name, var_shape in tf.train.list_variables(load_model_dir): + vars_in_checkpoint[var_name] = var_shape + + print('VARS_IN_CHECKPOINT:') + print(vars_in_checkpoint) + + vars_to_load = [] + for var in tf.global_variables(): + var_name = var.name.split(':')[0] + if var_name in vars_in_checkpoint: + if var.shape == vars_in_checkpoint[var_name] and \ + 'global_step' not in var_name: + vars_to_load.append(var) + + print('VARS_TO_LOAD:') + for var in vars_to_load: + print(var) + + load_model_fn = tf.contrib.framework.assign_from_checkpoint_fn( + tf.train.latest_checkpoint(load_model_dir), vars_to_load ) + scaffold = tf.train.Scaffold( + local_init_op=tf.group(tf.local_variables_initializer(), init_data_layer), + init_fn = lambda scaffold_self, sess: load_model_fn(sess) + ) + else: scaffold = tf.train.Scaffold( local_init_op=tf.group(tf.local_variables_initializer(), init_data_layer) @@ -134,28 +158,15 @@ def train(train_model, eval_model=None, debug_port=None, custom_hooks=None): "train model does not define get_num_objects_per_step method.") # starting training - if restoring: - sess = TransferMonitoredTrainingSession( - scaffold=scaffold, - checkpoint_dir=checkpoint_dir, - save_summaries_steps=train_model.params['save_summaries_steps'], - config=sess_config, - save_checkpoint_secs=None, - log_step_count_steps=train_model.params['save_summaries_steps'], - stop_grace_period_secs=300, - hooks=hooks, - load_model_dir=load_model_dir, - load_fc=train_model.params['load_fc']) - else: - sess = tf.train.MonitoredTrainingSession( - scaffold=scaffold, - checkpoint_dir=checkpoint_dir, - save_summaries_steps=train_model.params['save_summaries_steps'], - config=sess_config, - save_checkpoint_secs=None, - log_step_count_steps=train_model.params['save_summaries_steps'], - stop_grace_period_secs=300, - hooks=hooks) + sess = tf.train.MonitoredTrainingSession( + scaffold=scaffold, + checkpoint_dir=checkpoint_dir, + save_summaries_steps=train_model.params['save_summaries_steps'], + config=sess_config, + save_checkpoint_secs=None, + log_step_count_steps=train_model.params['save_summaries_steps'], + stop_grace_period_secs=300, + hooks=hooks) step = 0 num_bench_updates = 0 while True: From b5aa5fbea337481778c41680ce5bd30bfe804504 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Mon, 9 Sep 2019 22:08:06 +0000 Subject: [PATCH 2/7] Updated ctc_decoder_with_lm to upstream TF Signed-off-by: Vitaly Lavrukhin Signed-off-by: Praveen Puglia --- ctc_decoder_with_lm/README.md | 31 ++-------------------- ctc_decoder_with_lm/beam_search.cc | 42 +++++++++++++++--------------- ctc_decoder_with_lm/beam_search.h | 2 +- 3 files changed, 24 insertions(+), 51 deletions(-) diff --git a/ctc_decoder_with_lm/README.md b/ctc_decoder_with_lm/README.md index 4b864b569..30295bba5 100644 --- a/ctc_decoder_with_lm/README.md +++ b/ctc_decoder_with_lm/README.md @@ -21,34 +21,7 @@ You'll need the following pre-requisites downloaded/installed: * [TensorFlow source and requirements](https://www.tensorflow.org/install/install_sources) * [libsox](https://sourceforge.net/projects/sox/) - -## Preparation - -Create a symbolic link in your TensorFlow checkout to `ctc_decoder_with_lm` directory. If your DeepSpeech and TensorFlow checkouts are side by side in the same directory, do: - -``` -cd tensorflow -ln -s ../OpenSeq2Seq/ctc_decoder_with_lm ./ -``` - ## Building -## Step 1 : Build Tensorflow -You need to re-build TensorFlow. -Follow the [instructions](https://www.tensorflow.org/install/install_sources) on the TensorFlow site for your platform, up to the end of 'Configure the installation': - -``` -./configure -bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package -bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg -sudo pip install /tmp/tensorflow_pkg/tensorflow*.whl -sudo pip install --upgrade numpy -``` - -## Step 2: Build CTC beam search decoder: - -``` -bazel build -c opt --copt=-O3 --config=cuda //tensorflow:libtensorflow_cc.so //tensorflow:libtensorflow_framework.so //ctc_decoder_with_lm:libctc_decoder_with_kenlm.so //ctc_decoder_with_lm:generate_trie -cp bazel-bin/ctc_decoder_with_lm/*.so OpenSeq2Seq/ctc_decoder_with_lm/ -cp bazel-bin/ctc_decoder_with_lm/generate_trie OpenSeq2Seq/ctc_decoder_with_lm/ -``` +Please see the detailed instructions in [OpenSeq2Seq documentation](https://nvidia.github.io/OpenSeq2Seq/html/installation.html#how-to-build-a-custom-native-tf-op-for-ctc-decoder-with-language-model-optional). + diff --git a/ctc_decoder_with_lm/beam_search.cc b/ctc_decoder_with_lm/beam_search.cc index 0e140a368..2f4219a3c 100644 --- a/ctc_decoder_with_lm/beam_search.cc +++ b/ctc_decoder_with_lm/beam_search.cc @@ -37,8 +37,8 @@ namespace ctc { template > -class CTCBeamSearchNormLogDecoder : public CTCDecoder { + ctc_beam_search::BeamComparer> +class CTCBeamSearchNormLogDecoder : public CTCDecoder { // Beam Search // // Example (GravesTh Fig. 7.5): @@ -70,12 +70,12 @@ class CTCBeamSearchNormLogDecoder : public CTCDecoder { // starts at 0). This special case can be calculated as: // P(l=abc? @ t=3) = P(a @ 0)*P(b @ 1)*P(c @ 2)*P(? @ 3) // but we calculate it recursively for speed purposes. - typedef ctc_beam_search::BeamEntry BeamEntry; - typedef ctc_beam_search::BeamRoot BeamRoot; - typedef ctc_beam_search::BeamProbability BeamProbability; + typedef ctc_beam_search::BeamEntry BeamEntry; + typedef ctc_beam_search::BeamRoot BeamRoot; + typedef ctc_beam_search::BeamProbability BeamProbability; public: - typedef BaseBeamScorer DefaultBeamScorer; + typedef BaseBeamScorer DefaultBeamScorer; // The beam search decoder is constructed specifying the beam_width (number of // candidates to keep at each decoding timestep) and a beam scorer (used for @@ -84,9 +84,9 @@ class CTCBeamSearchNormLogDecoder : public CTCDecoder { // implementation, CTCBeamSearchDecoder<>::DefaultBeamScorer, generates the // standard beam search. CTCBeamSearchNormLogDecoder(int num_classes, int beam_width, - BaseBeamScorer* scorer, int batch_size = 1, + BaseBeamScorer* scorer, int batch_size = 1, bool merge_repeated = false) - : CTCDecoder(num_classes, batch_size, merge_repeated), + : CTCDecoder(num_classes, batch_size, merge_repeated), beam_width_(beam_width), leaves_(beam_width), beam_scorer_(CHECK_NOTNULL(scorer)) { @@ -96,10 +96,10 @@ class CTCBeamSearchNormLogDecoder : public CTCDecoder { ~CTCBeamSearchNormLogDecoder() override {} // Run the hibernating beam search algorithm on the given input. - Status Decode(const CTCDecoder::SequenceLength& seq_len, - const std::vector& input, - std::vector* output, - CTCDecoder::ScoreOutput* scores) override; + Status Decode(const CTCDecoder::SequenceLength& seq_len, + const std::vector::Input>& input, + std::vector::Output>* output, + CTCDecoder::ScoreOutput* scores) override; // Calculate the next step of the beam search and update the internal state. template @@ -111,7 +111,7 @@ class CTCBeamSearchNormLogDecoder : public CTCDecoder { std::vector* top_k_indices); // Retrieve the beam scorer instance used during decoding. - BaseBeamScorer* GetBeamScorer() const { return beam_scorer_; } + BaseBeamScorer* GetBeamScorer() const { return beam_scorer_; } // Set label selection parameters for faster decoding. // See comments for label_selection_size_ and label_selection_margin_. @@ -129,7 +129,7 @@ class CTCBeamSearchNormLogDecoder : public CTCDecoder { std::vector* log_probs, bool merge_repeated) const; gtl::TopN leaves_; - BaseBeamScorer* beam_scorer_; + BaseBeamScorer* beam_scorer_; private: int beam_width_; @@ -156,15 +156,15 @@ class CTCBeamSearchNormLogDecoder : public CTCDecoder { template Status CTCBeamSearchNormLogDecoder::Decode( - const CTCDecoder::SequenceLength& seq_len, - const std::vector& input, - std::vector* output, ScoreOutput* scores) { + const CTCDecoder::SequenceLength& seq_len, + const std::vector::Input>& input, + std::vector::Output>* output, ScoreOutput* scores) { // Storage for top paths. std::vector> beams; std::vector beam_log_probabilities; int top_n = output->size(); if (std::any_of(output->begin(), output->end(), - [this](const CTCDecoder::Output& output) -> bool { + [this](const CTCDecoder::Output& output) -> bool { return output.size() < this->batch_size_; })) { return errors::InvalidArgument( @@ -325,7 +325,7 @@ void CTCBeamSearchNormLogDecoder::Step( // isn't full, or the lowest probability entry in the beam has a // lower probability than the leaf. auto is_candidate = [this](const BeamProbability& prob) { - return (prob.total > kLogZero && + return (prob.total > kLogZero() && (leaves_.size() < beam_width_ || prob.total > leaves_.peek_bottom()->newp.total)); }; @@ -345,7 +345,7 @@ void CTCBeamSearchNormLogDecoder::Step( BeamEntry& c = b->GetChild(label); if (!c.Active()) { // Pblank(l=abcd @ t=6) = 0 - c.newp.blank = kLogZero; + c.newp.blank = kLogZero(); // If new child label is identical to beam label: // Plabel(l=abcc @ t=6) = Pblank(l=abc @ t=5) * P(c @ 6) // Otherwise: @@ -727,7 +727,7 @@ class CTCBeamSearchDecoderWithLMOp : public tf::OpKernel { beam_search.Step(input_bi); } - typedef tf::ctc::ctc_beam_search::BeamEntry BeamEntry; + typedef tf::ctc::ctc_beam_search::BeamEntry BeamEntry; std::unique_ptr> branches(beam_search.leaves_.Extract()); beam_search.leaves_.Reset(); for (int i = 0; i < branches->size(); ++i) { diff --git a/ctc_decoder_with_lm/beam_search.h b/ctc_decoder_with_lm/beam_search.h index bf54ca35a..e5b1c2aa3 100644 --- a/ctc_decoder_with_lm/beam_search.h +++ b/ctc_decoder_with_lm/beam_search.h @@ -31,7 +31,7 @@ struct WordLMBeamState { bool new_word; }; -class WordLMBeamScorer : public tensorflow::ctc::BaseBeamScorer { +class WordLMBeamScorer : public tensorflow::ctc::BaseBeamScorer { public: WordLMBeamScorer(const std::string &kenlm_path, const std::string &trie_path, const std::string &alphabet_path, From 3832a2c9db2b3989f22c2392016d2256faf8a8e1 Mon Sep 17 00:00:00 2001 From: Goutham Pratapa Date: Sun, 20 Oct 2019 17:50:21 +0530 Subject: [PATCH 3/7] Add infer and train_params from CLI. eg: python run.py \ --config_file=example_configs/speech2text/jasper10x5_LibriSpeech_nvgrad.py \ --mode=infer \ --infer_dataset=/Users/gouthampratapa/work/OpenSeq2Seq/infer.csv python run.py \ --config_file=example_configs/speech2text/jasper10x5_LibriSpeech_nvgrad.py \ --mode=train_eval \ --train_dataset=/Users/gouthampratapa/work/OpenSeq2Seq/train_v1.csv /Users/gouthampratapa/work/OpenSeq2Seq/train_v2.csv Signed-off-by: Praveen Puglia --- docs/sources/source/getting-started/asr.rst | 1 + open_seq2seq/utils/utils.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/sources/source/getting-started/asr.rst b/docs/sources/source/getting-started/asr.rst index 848085bb6..f0ecf4624 100644 --- a/docs/sources/source/getting-started/asr.rst +++ b/docs/sources/source/getting-started/asr.rst @@ -26,6 +26,7 @@ dataset size will be around 224GB (including archives and original compressed au Now, everything should be setup to train the model:: python run.py --config_file=example_configs/speech2text/ds2_librispeech_larc_config.py --mode=train_eval + python run.py --config_file=example_configs/speech2text/ds2_librispeech_larc_config.py --mode=train_eval --infer_dataset=example_configs/datasets/infer.csv If you want to run evaluation/inference with the trained model, replace ``--mode=train_eval`` with ``--mode=eval`` or ``--mode=infer``. diff --git a/open_seq2seq/utils/utils.py b/open_seq2seq/utils/utils.py index 69dd0f45e..112a9f525 100644 --- a/open_seq2seq/utils/utils.py +++ b/open_seq2seq/utils/utils.py @@ -506,7 +506,13 @@ def get_base_config(args): help='whether to log output, git info, cmd args, etc.') parser.add_argument('--use_xla_jit', dest='use_xla_jit', action='store_true', help='whether to use XLA_JIT to compile and run the model.') + parser.add_argument('--infer_dataset', dest='infer_dataset', + help='infer_dataset csv file.') + parser.add_argument('--train_dataset', dest='train_dataset', + help='train_dataset csv file.') args, unknown = parser.parse_known_args(args) + infer_params = args.infer_dataset + train_params = args.train_dataset if args.mode not in [ 'train', @@ -519,7 +525,10 @@ def get_base_config(args): "['train', 'eval', 'train_eval', 'infer', " "'interactive_infer']") config_module = runpy.run_path(args.config_file, init_globals={'tf': tf}) - + if infer_params: + config_module['infer_params']['data_layer_params']['dataset_files'] = infer_params.split(',') + if train_params: + config_module['train_params']['data_layer_params']['dataset_files'] = train_params.split(',') base_config = config_module.get('base_params', None) if base_config is None: raise ValueError('base_config dictionary has to be ' @@ -541,7 +550,6 @@ def get_base_config(args): parser_unk.add_argument('--' + pm, default=value, type=ast.literal_eval) config_update = parser_unk.parse_args(unknown) nested_update(base_config, nest_dict(vars(config_update))) - return args, base_config, base_model, config_module def get_calibration_config(arguments): From f43b7064e56a1db18c18fcda7ae26add6a11b628 Mon Sep 17 00:00:00 2001 From: voicezen Date: Wed, 6 Nov 2019 16:29:40 +0530 Subject: [PATCH 4/7] chunk processing of logits Signed-off-by: Praveen Puglia --- scripts/decode.py | 70 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/scripts/decode.py b/scripts/decode.py index 2d5c3babc..b86379e76 100644 --- a/scripts/decode.py +++ b/scripts/decode.py @@ -4,7 +4,7 @@ ''' import argparse - +import time import pickle import numpy as np @@ -89,7 +89,6 @@ num_cpus = multiprocessing.cpu_count() - def levenshtein(a, b): """Calculates the Levenshtein distance between a and b. The code was taken from: http://hetland.org/coding/python/levenshtein.py @@ -170,6 +169,8 @@ def softmax(x): def evaluate_wer(logits, labels, vocab, decoder): + eval_start=time.time() + print("evaluation started at ",eval_start) total_dist = 0.0 total_count = 0.0 wer_per_sample = np.empty(shape=len(labels)) @@ -187,31 +188,57 @@ def evaluate_wer(logits, labels, vocab, decoder): wer_per_sample[idx] = dist / len(label.split()) print('# empty preds: {}'.format(empty_preds)) wer = total_dist / total_count + eval_end=time.time() + print("evaluation took %s time"%(eval_end-eval_start)) return wer, wer_per_sample +def divide_chunks(l, n): + # looping till length l + for i in range(0, len(l), n): + yield l[i:i + n] +data_load_start=time.time() data = load_dump(args.logits) labels = load_labels(args.labels) logits = get_logits(data, labels) vocab = load_vocab(args.vocab) vocab[-1] = '_' - +data_load_end=time.time() +print("Data loading took %s seconds" %(data_load_end-data_load_start) ) probs_batch = [] for line in labels: audio_filename = line[0] probs_batch.append(softmax(logits[audio_filename])) +batch_prob_end=time.time() +print("Batch logit loading took %s seconds" %(batch_prob_end-data_load_end) ) if args.mode == 'eval': + eval_start=time.time() wer, _ = evaluate_wer(logits, labels, vocab, greedy_decoder) print('Greedy WER = {:.4f}'.format(wer)) best_result = {'wer': 1e6, 'alpha': 0.0, 'beta': 0.0, 'beams': None} for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step): for beta in np.arange(args.beta, args.beta_max, args.beta_step): scorer = Scorer(alpha, beta, model_path=args.lm, vocabulary=vocab[:-1]) - res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], - beam_size=args.beam_width, - num_processes=num_cpus, - ext_scoring_func=scorer) + print("scorer complete") + probs_batch_list = list(divide_chunks(probs_batch, 500)) + res=[] + for probs_batch in probs_batch_list: + f=time.time() + result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], + beam_size=args.beam_width, + num_processes=num_cpus, + ext_scoring_func=scorer) + e=time.time() + for j in result: + res.append(j) + print("500 files batched took %s time"%(e-f)) + + + # res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], + # beam_size=args.beam_width, + # num_processes=num_cpus, + # ext_scoring_func=scorer) total_dist = 0.0 total_count = 0.0 for idx, line in enumerate(labels): @@ -230,7 +257,8 @@ def evaluate_wer(logits, labels, vocab, decoder): print('alpha={:.2f}, beta={:.2f}: WER={:.4f}'.format(alpha, beta, wer)) print('BEST: alpha={:.2f}, beta={:.2f}, WER={:.4f}'.format( best_result['alpha'], best_result['beta'], best_result['wer'])) - + eval_end=time.time() + print("evaluation took %s seconds",eval_end-eval_start) if args.dump_all_beams_to: with open(args.dump_all_beams_to, 'w') as f: for beam in best_result['beams']: @@ -240,18 +268,34 @@ def evaluate_wer(logits, labels, vocab, decoder): f.write('E=>>>>>>>>\n') elif args.mode == 'infer': + print("Inference Mode") + infer_start=time.time() scorer = Scorer(args.alpha, args.beta, model_path=args.lm, vocabulary=vocab[:-1]) - res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], - beam_size=args.beam_width, - num_processes=num_cpus, - ext_scoring_func=scorer) + + probs_batch_list = list(divide_chunks(probs_batch, 500)) + res=[] + for probs_batch in probs_batch_list: + f=time.time() + result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], + beam_size=args.beam_width, + num_processes=num_cpus, + ext_scoring_func=scorer) + e=time.time() + + for j in result: + res.append(j) + + print("500 files batched took %s time"%(e-f)) + infer_preds = np.empty(shape=(len(labels), 2), dtype=object) for idx, line in enumerate(labels): filename = line[0] score, text = [v for v in zip(*res[idx])] infer_preds[idx, 0] = filename infer_preds[idx, 1] = text[0] - + + infer_end=time.time() + print("Inference took %s seconds",infer_end-infer_start) np.savetxt(args.infer_output_file, infer_preds, fmt='%s', delimiter=',', header='wav_filename,transcript') From d82b4e7638d1144e2d055ef56f79caeb0bef546b Mon Sep 17 00:00:00 2001 From: voicezen Date: Sun, 1 Dec 2019 00:59:16 +0530 Subject: [PATCH 5/7] add greedy only and greedy plus lm output Signed-off-by: Praveen Puglia --- scripts/decode.py | 80 ++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/scripts/decode.py b/scripts/decode.py index b86379e76..4cf56ab21 100644 --- a/scripts/decode.py +++ b/scripts/decode.py @@ -233,12 +233,7 @@ def divide_chunks(l, n): for j in result: res.append(j) print("500 files batched took %s time"%(e-f)) - - - # res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], - # beam_size=args.beam_width, - # num_processes=num_cpus, - # ext_scoring_func=scorer) + total_dist = 0.0 total_count = 0.0 for idx, line in enumerate(labels): @@ -267,35 +262,48 @@ def divide_chunks(l, n): f.write('{} 0.0 0.0 {}\n'.format(pred[0], pred[1])) f.write('E=>>>>>>>>\n') +elif args.mode == 'greedy': + print("Greedy Mode") + greedy_preds = np.empty(shape=(len(labels), 2), dtype=object) + for idx, line in enumerate(labels): + filename = line[0] + greedy_preds[idx, 0] = filename + greedy_preds[idx, 1] = greedy_decoder(logits[filename], vocab) + + np.savetxt(args.infer_output_file, greedy_preds, fmt='%s', delimiter=',', + header='wav_filename,greedy') + + elif args.mode == 'infer': - print("Inference Mode") - infer_start=time.time() - scorer = Scorer(args.alpha, args.beta, model_path=args.lm, vocabulary=vocab[:-1]) - - probs_batch_list = list(divide_chunks(probs_batch, 500)) - res=[] - for probs_batch in probs_batch_list: - f=time.time() - result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], - beam_size=args.beam_width, - num_processes=num_cpus, - ext_scoring_func=scorer) - e=time.time() - - for j in result: - res.append(j) - - print("500 files batched took %s time"%(e-f)) - - infer_preds = np.empty(shape=(len(labels), 2), dtype=object) - for idx, line in enumerate(labels): - filename = line[0] - score, text = [v for v in zip(*res[idx])] - infer_preds[idx, 0] = filename - infer_preds[idx, 1] = text[0] - - infer_end=time.time() - print("Inference took %s seconds",infer_end-infer_start) - np.savetxt(args.infer_output_file, infer_preds, fmt='%s', delimiter=',', - header='wav_filename,transcript') + print("Inference Mode") + infer_start=time.time() + scorer = Scorer(args.alpha, args.beta, model_path=args.lm, vocabulary=vocab[:-1]) + + probs_batch_list = list(divide_chunks(probs_batch, 500)) + res=[] + for probs_batch in probs_batch_list: + f=time.time() + result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1], + beam_size=args.beam_width, + num_processes=num_cpus, + ext_scoring_func=scorer) + e=time.time() + + for j in result: + res.append(j) + + print("500 files batched took %s time"%(e-f)) + + infer_preds = np.empty(shape=(len(labels), 3), dtype=object) + for idx, line in enumerate(labels): + filename = line[0] + score, text = [v for v in zip(*res[idx])] + infer_preds[idx, 0] = filename + infer_preds[idx, 1] = text[0] + #Greedy + infer_preds[idx, 2] = greedy_decoder(logits[filename], vocab) + + infer_end=time.time() + print("Inference took %s seconds",infer_end-infer_start) + np.savetxt(args.infer_output_file, infer_preds, fmt='%s', delimiter=',',header='wav_filename,lm,greedy') From 304a8c9d6eaa653691d2b17e67aaf3c2a37d4bfa Mon Sep 17 00:00:00 2001 From: Aayush Kubba Date: Thu, 2 Jan 2020 21:18:41 +0530 Subject: [PATCH 6/7] Update README.md --- README.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/README.md b/README.md index ddd522b52..ef167b45e 100644 --- a/README.md +++ b/README.md @@ -65,3 +65,71 @@ If you use OpenSeq2Seq, please cite [this paper](https://arxiv.org/abs/1805.1038 primaryClass={cs.CL} } ``` + +## Install Decoders + +### Install boost/automake and bison +``` +sudo apt-get install libboost-all-dev -y +sudo apt-get install automake -y +sudo apt-get install bison -y +``` + +### Install SWIG + ``` + git clone https://github.com/swig/swig.git +cd swig +./autogen.sh +./configure +make +sudo make install + ``` + #### Test once + ``` + $ swig + ``` + if you encounter +``` +$ swig: error while loading shared libraries: libpcre.so.1: cannot open shared object file: No such file or directory +``` +## Install PCRE +``` +cd /usr/local/src +sudo curl --remote-name ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.42.tar.gz + +tar -xzvf pcre-8.42.tar.gz +cd pcre-8.42 +sudo ./configure --prefix=/usr/local/mac-dev-env/pcre-8.42 +sudo make +sudo make install +sudo ln -s mac-dev-env/pcre-8.42 /usr/local/pcre +echo 'export PATH=/usr/local/pcre/bin:$PATH' >> ~/.bash_profile +source ~/.bash_profile +cd .libs +sudo mv -v libpcre.so.* /usr/lib/ +``` +If the above doesnt works then please use the latest version as follows: + +``` +sudo curl --remote-name https://ftp.pcre.org/pub/pcre/pcre-8.43.tar.bz2 +tar xjf pcre-8.43.tar.bz2 +cd pcre-8.43/ +sudo ./configure --prefix=/usr/local/mac-dev-env/pcre-8.43 +sudo make +sudo make install +sudo ln -s mac-dev-env/pcre-8.43 /usr/local/pcre +echo 'export PATH=/usr/local/pcre/bin:$PATH' >> ~/.bash_profile +source ~/.bash_profile +cd .libs +sudo mv -v libpcre.so.* /usr/lib/ +``` + +If the symlink is already used..either delete or use another symlink +## Final Output + +``` +$ swig +Must specify an input file. Use -help for available options. +``` + +### ThankYou From 27a1c9551a71939686f9f5c79b3ba4fe8bc3ef49 Mon Sep 17 00:00:00 2001 From: GouthamPratapa Date: Mon, 27 Apr 2020 10:57:35 +0530 Subject: [PATCH 7/7] Add setup.py to install/develop. --- setup.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..c085e178e --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +import setuptools +import numpy as np + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name="open_seq2seq", + version="0.0.1", + author="voicezen", + author_email="all@voicezen.ai", + description="Python repo for components and analysis", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/voicezen/jivaka", + packages=setuptools.find_packages(), + include_dirs=[np.get_include()], + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], +)