Skip to content
This repository has been archived by the owner on Aug 3, 2021. It is now read-only.

Os2s package #534

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,71 @@ If you use OpenSeq2Seq, please cite [this paper](https://arxiv.org/abs/1805.1038
primaryClass={cs.CL}
}
```

## Install Decoders

### Install boost/automake and bison
```
sudo apt-get install libboost-all-dev -y
sudo apt-get install automake -y
sudo apt-get install bison -y
```

### Install SWIG
```
git clone https://github.com/swig/swig.git
cd swig
./autogen.sh
./configure
make
sudo make install
```
#### Test once
```
$ swig
```
if you encounter
```
$ swig: error while loading shared libraries: libpcre.so.1: cannot open shared object file: No such file or directory
```
## Install PCRE
```
cd /usr/local/src
sudo curl --remote-name ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.42.tar.gz

tar -xzvf pcre-8.42.tar.gz
cd pcre-8.42
sudo ./configure --prefix=/usr/local/mac-dev-env/pcre-8.42
sudo make
sudo make install
sudo ln -s mac-dev-env/pcre-8.42 /usr/local/pcre
echo 'export PATH=/usr/local/pcre/bin:$PATH' >> ~/.bash_profile
source ~/.bash_profile
cd .libs
sudo mv -v libpcre.so.* /usr/lib/
```
If the above doesnt works then please use the latest version as follows:

```
sudo curl --remote-name https://ftp.pcre.org/pub/pcre/pcre-8.43.tar.bz2
tar xjf pcre-8.43.tar.bz2
cd pcre-8.43/
sudo ./configure --prefix=/usr/local/mac-dev-env/pcre-8.43
sudo make
sudo make install
sudo ln -s mac-dev-env/pcre-8.43 /usr/local/pcre
echo 'export PATH=/usr/local/pcre/bin:$PATH' >> ~/.bash_profile
source ~/.bash_profile
cd .libs
sudo mv -v libpcre.so.* /usr/lib/
```

If the symlink is already used..either delete or use another symlink
## Final Output

```
$ swig
Must specify an input file. Use -help for available options.
```

### ThankYou
1 change: 1 addition & 0 deletions docs/sources/source/getting-started/asr.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dataset size will be around 224GB (including archives and original compressed au
Now, everything should be setup to train the model::

python run.py --config_file=example_configs/speech2text/ds2_librispeech_larc_config.py --mode=train_eval
python run.py --config_file=example_configs/speech2text/ds2_librispeech_larc_config.py --mode=train_eval --infer_dataset=example_configs/datasets/infer.csv

If you want to run evaluation/inference with the trained model, replace
``--mode=train_eval`` with ``--mode=eval`` or ``--mode=infer``.
Expand Down
12 changes: 10 additions & 2 deletions open_seq2seq/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,13 @@ def get_base_config(args):
help='whether to log output, git info, cmd args, etc.')
parser.add_argument('--use_xla_jit', dest='use_xla_jit', action='store_true',
help='whether to use XLA_JIT to compile and run the model.')
parser.add_argument('--infer_dataset', dest='infer_dataset',
help='infer_dataset csv file.')
parser.add_argument('--train_dataset', dest='train_dataset',
help='train_dataset csv file.')
args, unknown = parser.parse_known_args(args)
infer_params = args.infer_dataset
train_params = args.train_dataset

if args.mode not in [
'train',
Expand All @@ -519,7 +525,10 @@ def get_base_config(args):
"['train', 'eval', 'train_eval', 'infer', "
"'interactive_infer']")
config_module = runpy.run_path(args.config_file, init_globals={'tf': tf})

if infer_params:
config_module['infer_params']['data_layer_params']['dataset_files'] = infer_params.split(',')
if train_params:
config_module['train_params']['data_layer_params']['dataset_files'] = train_params.split(',')
base_config = config_module.get('base_params', None)
if base_config is None:
raise ValueError('base_config dictionary has to be '
Expand All @@ -541,7 +550,6 @@ def get_base_config(args):
parser_unk.add_argument('--' + pm, default=value, type=ast.literal_eval)
config_update = parser_unk.parse_args(unknown)
nested_update(base_config, nest_dict(vars(config_update)))

return args, base_config, base_model, config_module

def get_calibration_config(arguments):
Expand Down
94 changes: 73 additions & 21 deletions scripts/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
'''

import argparse

import time
import pickle
import numpy as np

Expand Down Expand Up @@ -89,7 +89,6 @@

num_cpus = multiprocessing.cpu_count()


def levenshtein(a, b):
"""Calculates the Levenshtein distance between a and b.
The code was taken from: http://hetland.org/coding/python/levenshtein.py
Expand Down Expand Up @@ -170,6 +169,8 @@ def softmax(x):


def evaluate_wer(logits, labels, vocab, decoder):
eval_start=time.time()
print("evaluation started at ",eval_start)
total_dist = 0.0
total_count = 0.0
wer_per_sample = np.empty(shape=len(labels))
Expand All @@ -187,31 +188,52 @@ def evaluate_wer(logits, labels, vocab, decoder):
wer_per_sample[idx] = dist / len(label.split())
print('# empty preds: {}'.format(empty_preds))
wer = total_dist / total_count
eval_end=time.time()
print("evaluation took %s time"%(eval_end-eval_start))
return wer, wer_per_sample

def divide_chunks(l, n):
# looping till length l
for i in range(0, len(l), n):
yield l[i:i + n]

data_load_start=time.time()
data = load_dump(args.logits)
labels = load_labels(args.labels)
logits = get_logits(data, labels)
vocab = load_vocab(args.vocab)
vocab[-1] = '_'

data_load_end=time.time()
print("Data loading took %s seconds" %(data_load_end-data_load_start) )
probs_batch = []
for line in labels:
audio_filename = line[0]
probs_batch.append(softmax(logits[audio_filename]))
batch_prob_end=time.time()
print("Batch logit loading took %s seconds" %(batch_prob_end-data_load_end) )

if args.mode == 'eval':
eval_start=time.time()
wer, _ = evaluate_wer(logits, labels, vocab, greedy_decoder)
print('Greedy WER = {:.4f}'.format(wer))
best_result = {'wer': 1e6, 'alpha': 0.0, 'beta': 0.0, 'beams': None}
for alpha in np.arange(args.alpha, args.alpha_max, args.alpha_step):
for beta in np.arange(args.beta, args.beta_max, args.beta_step):
scorer = Scorer(alpha, beta, model_path=args.lm, vocabulary=vocab[:-1])
res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1],
beam_size=args.beam_width,
num_processes=num_cpus,
ext_scoring_func=scorer)
print("scorer complete")
probs_batch_list = list(divide_chunks(probs_batch, 500))
res=[]
for probs_batch in probs_batch_list:
f=time.time()
result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1],
beam_size=args.beam_width,
num_processes=num_cpus,
ext_scoring_func=scorer)
e=time.time()
for j in result:
res.append(j)
print("500 files batched took %s time"%(e-f))

total_dist = 0.0
total_count = 0.0
for idx, line in enumerate(labels):
Expand All @@ -230,7 +252,8 @@ def evaluate_wer(logits, labels, vocab, decoder):
print('alpha={:.2f}, beta={:.2f}: WER={:.4f}'.format(alpha, beta, wer))
print('BEST: alpha={:.2f}, beta={:.2f}, WER={:.4f}'.format(
best_result['alpha'], best_result['beta'], best_result['wer']))

eval_end=time.time()
print("evaluation took %s seconds",eval_end-eval_start)
if args.dump_all_beams_to:
with open(args.dump_all_beams_to, 'w') as f:
for beam in best_result['beams']:
Expand All @@ -239,19 +262,48 @@ def evaluate_wer(logits, labels, vocab, decoder):
f.write('{} 0.0 0.0 {}\n'.format(pred[0], pred[1]))
f.write('E=>>>>>>>>\n')

elif args.mode == 'greedy':
print("Greedy Mode")
greedy_preds = np.empty(shape=(len(labels), 2), dtype=object)
for idx, line in enumerate(labels):
filename = line[0]
greedy_preds[idx, 0] = filename
greedy_preds[idx, 1] = greedy_decoder(logits[filename], vocab)

np.savetxt(args.infer_output_file, greedy_preds, fmt='%s', delimiter=',',
header='wav_filename,greedy')


elif args.mode == 'infer':
scorer = Scorer(args.alpha, args.beta, model_path=args.lm, vocabulary=vocab[:-1])
res = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1],
beam_size=args.beam_width,
num_processes=num_cpus,
ext_scoring_func=scorer)
infer_preds = np.empty(shape=(len(labels), 2), dtype=object)
for idx, line in enumerate(labels):
filename = line[0]
score, text = [v for v in zip(*res[idx])]
infer_preds[idx, 0] = filename
infer_preds[idx, 1] = text[0]
print("Inference Mode")
infer_start=time.time()
scorer = Scorer(args.alpha, args.beta, model_path=args.lm, vocabulary=vocab[:-1])

probs_batch_list = list(divide_chunks(probs_batch, 500))
res=[]
for probs_batch in probs_batch_list:
f=time.time()
result = ctc_beam_search_decoder_batch(probs_batch, vocab[:-1],
beam_size=args.beam_width,
num_processes=num_cpus,
ext_scoring_func=scorer)
e=time.time()

for j in result:
res.append(j)

print("500 files batched took %s time"%(e-f))

infer_preds = np.empty(shape=(len(labels), 3), dtype=object)
for idx, line in enumerate(labels):
filename = line[0]
score, text = [v for v in zip(*res[idx])]
infer_preds[idx, 0] = filename
infer_preds[idx, 1] = text[0]
#Greedy
infer_preds[idx, 2] = greedy_decoder(logits[filename], vocab)

np.savetxt(args.infer_output_file, infer_preds, fmt='%s', delimiter=',',
header='wav_filename,transcript')
infer_end=time.time()
print("Inference took %s seconds",infer_end-infer_start)
np.savetxt(args.infer_output_file, infer_preds, fmt='%s', delimiter=',',header='wav_filename,lm,greedy')

23 changes: 23 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import setuptools
import numpy as np

with open("README.md", "r") as fh:
long_description = fh.read()

setuptools.setup(
name="open_seq2seq",
version="0.0.1",
author="voicezen",
author_email="[email protected]",
description="Python repo for components and analysis",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/voicezen/jivaka",
packages=setuptools.find_packages(),
include_dirs=[np.get_include()],
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
)