From e069b6d61f4e87ddd356fc965470549d3a6f19cf Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 8 Jun 2020 15:26:37 +0200 Subject: [PATCH 01/38] Add read only validation metrics For now this is just CTC loss like a validation set, but without affecting best validation checkpoint tracking logic. Eventually this could compute WER on a smaller set, for example. --- training/deepspeech_training/train.py | 31 ++++++++++++++++++++-- training/deepspeech_training/util/flags.py | 5 ++-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index f1832e62c4..cb07c3864a 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -450,6 +450,16 @@ def train(): buffering=FLAGS.read_buffer) for source in dev_sources] dev_init_ops = [iterator.make_initializer(dev_set) for dev_set in dev_sets] + if FLAGS.metrics_files: + metrics_sources = FLAGS.metrics_files.split(',') + metrics_sets = [create_dataset([source], + batch_size=FLAGS.dev_batch_size, + train_phase=False, + exception_box=exception_box, + process_ahead=len(Config.available_devices) * FLAGS.dev_batch_size * 2, + buffering=FLAGS.read_buffer) for source in metrics_sources] + metrics_init_ops = [iterator.make_initializer(metrics_set) for metrics_set in metrics_sets] + # Dropout dropout_rates = [tfv1.placeholder(tf.float32, name='dropout_{}'.format(i)) for i in range(6)] dropout_feed_dict = { @@ -488,7 +498,14 @@ def train(): step_summaries_op = tfv1.summary.merge_all('step_summaries') step_summary_writers = { 'train': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'train'), max_queue=120), - 'dev': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'dev'), max_queue=120) + 'dev': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'dev'), max_queue=120), + 'metrics': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'metrics'), max_queue=120), + } + + human_readable_set_names = { + 'train': 'Training', + 'dev': 'Validation', + 'metrics': 'Metrics', } # Checkpointing @@ -533,7 +550,7 @@ def __call__(self, progress, data, **kwargs): data['mean_loss'] = total_loss / step_count if step_count else 0.0 return progressbar.widgets.FormatLabel.__call__(self, progress, data, **kwargs) - prefix = 'Epoch {} | {:>10}'.format(epoch, 'Training' if is_train else 'Validation') + prefix = 'Epoch {} | {:>10}'.format(epoch, human_readable_set_names[set_name]) widgets = [' | ', progressbar.widgets.Timer(), ' | Steps: ', progressbar.widgets.Counter(), ' | ', LossWidget()] @@ -635,6 +652,16 @@ def __call__(self, progress, data, **kwargs): log_info('Encountered a plateau, reducing learning rate to {}'.format( current_learning_rate)) + if FLAGS.metrics_files: + # Read only metrics, not affecting best validation loss tracking + for source, init_op in zip(metrics_sources, metrics_init_ops): + log_progress('Metrics for epoch %d on %s...' % (epoch, source)) + set_loss, _ = run_set('metrics', epoch, init_op, dataset=source) + log_progress('Metrics for epoch %d on %s - loss: %f' % (epoch, source, set_loss)) + + print('-' * 80) + + except KeyboardInterrupt: pass log_info('FINISHED optimization in {}'.format(datetime.utcnow() - train_start_time)) diff --git a/training/deepspeech_training/util/flags.py b/training/deepspeech_training/util/flags.py index 69774c7824..66b25ebacf 100644 --- a/training/deepspeech_training/util/flags.py +++ b/training/deepspeech_training/util/flags.py @@ -13,8 +13,9 @@ def create_flags(): f = absl.flags f.DEFINE_string('train_files', '', 'comma separated list of files specifying the dataset used for training. Multiple files will get merged. If empty, training will not be run.') - f.DEFINE_string('dev_files', '', 'comma separated list of files specifying the dataset used for validation. Multiple files will get merged. If empty, validation will not be run.') - f.DEFINE_string('test_files', '', 'comma separated list of files specifying the dataset used for testing. Multiple files will get merged. If empty, the model will not be tested.') + f.DEFINE_string('dev_files', '', 'comma separated list of files specifying the datasets used for validation. Multiple files will get reported separately. If empty, validation will not be run.') + f.DEFINE_string('test_files', '', 'comma separated list of files specifying the datasets used for testing. Multiple files will get reported separately. If empty, the model will not be tested.') + f.DEFINE_string('metrics_files', '', 'comma separated list of files specifying the datasets used for tracking of metrics (after validation step). Currently the only metric is the CTC loss but without affecting the tracking of best validation loss. Multiple files will get reported separately. If empty, metrics will not be computed.') f.DEFINE_string('read_buffer', '1MB', 'buffer-size for reading samples from datasets (supports file-size suffixes KB, MB, GB, TB)') f.DEFINE_string('feature_cache', '', 'cache MFCC features to disk to speed up future training runs on the same data. This flag specifies the path where cached features extracted from --train_files will be saved. If empty, or if online augmentation flags are enabled, caching will be disabled.') From 07d2c39138805d8eb15f481c4c5511a2cb89f895 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 8 Jun 2020 15:37:06 +0200 Subject: [PATCH 02/38] Split SDB tests from basic training tests to speed up CI dependents --- taskcluster/tc-train-extra-tests.sh | 60 +++++++++++++++++++ taskcluster/tc-train-tests.sh | 6 -- ...aining-extra_16k-linux-amd64-py35m-opt.yml | 12 ++++ ...aining-extra_16k-linux-amd64-py36m-opt.yml | 12 ++++ ...aining-extra_16k-linux-amd64-py37m-opt.yml | 12 ++++ ...raining-extra_8k-linux-amd64-py36m-opt.yml | 12 ++++ ...raining-extra_8k-linux-amd64-py37m-opt.yml | 12 ++++ ...est-training_16k-linux-amd64-py35m-opt.yml | 2 +- ...est-training_16k-linux-amd64-py36m-opt.yml | 2 +- ...est-training_16k-linux-amd64-py37m-opt.yml | 2 +- ...test-training_8k-linux-amd64-py36m-opt.yml | 2 +- ...test-training_8k-linux-amd64-py37m-opt.yml | 2 +- 12 files changed, 125 insertions(+), 11 deletions(-) create mode 100644 taskcluster/tc-train-extra-tests.sh create mode 100644 taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml create mode 100644 taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml create mode 100644 taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml create mode 100644 taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml create mode 100644 taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml diff --git a/taskcluster/tc-train-extra-tests.sh b/taskcluster/tc-train-extra-tests.sh new file mode 100644 index 0000000000..dfdcf9ddd3 --- /dev/null +++ b/taskcluster/tc-train-extra-tests.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/tc-tests-utils.sh + +extract_python_versions "$1" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pyalias" + +bitrate=$2 + +decoder_src=$3 + +if [ "$decoder_src" = "--pypi" ]; then + # Disable automatically picking up decoder package built in this CI group + export DECODER_ARTIFACTS_ROOT="" +fi + +mkdir -p ${TASKCLUSTER_ARTIFACTS} || true +mkdir -p /tmp/train || true +mkdir -p /tmp/train_tflite || true + +virtualenv_activate "${pyalias}" "deepspeech" + +set -o pipefail +pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat +pushd ${HOME}/DeepSpeech/ds + pip install --upgrade . | cat +popd +set +o pipefail + +# Prepare correct arguments for training +case "${bitrate}" in + 8k) + sample_rate=8000 + sample_name='LDC93S1_pcms16le_1_8000.wav' + ;; + 16k) + sample_rate=16000 + sample_name='LDC93S1_pcms16le_1_16000.wav' + ;; +esac + +# Easier to rename to that we can exercize the LDC93S1 importer code to +# generate the CSV file. +echo "Moving ${sample_name} to LDC93S1.wav" +mv "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/${sample_name}" "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/LDC93S1.wav" + +pushd ${HOME}/DeepSpeech/ds/ + # Testing single SDB source + time ./bin/run-tc-ldc93s1_new_sdb.sh 220 "${sample_rate}" + # Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features + time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}" + time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}" +popd + +pushd ${HOME}/DeepSpeech/ds/ + time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh +popd + +virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/tc-train-tests.sh b/taskcluster/tc-train-tests.sh index 3b681dcdf7..f939493590 100644 --- a/taskcluster/tc-train-tests.sh +++ b/taskcluster/tc-train-tests.sh @@ -50,11 +50,6 @@ pushd ${HOME}/DeepSpeech/ds/ time ./bin/run-tc-ldc93s1_new.sh 249 "${sample_rate}" time ./bin/run-tc-ldc93s1_new.sh 1 "${sample_rate}" time ./bin/run-tc-ldc93s1_tflite.sh "${sample_rate}" - # Testing single SDB source - time ./bin/run-tc-ldc93s1_new_sdb.sh 220 "${sample_rate}" - # Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features - time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}" - time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}" popd cp /tmp/train/output_graph.pb ${TASKCLUSTER_ARTIFACTS} @@ -69,7 +64,6 @@ cp /tmp/train/output_graph.pbmm ${TASKCLUSTER_ARTIFACTS} pushd ${HOME}/DeepSpeech/ds/ time ./bin/run-tc-ldc93s1_checkpoint.sh - time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh popd virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml new file mode 100644 index 0000000000..6d19f491c2 --- /dev/null +++ b/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.5.8:m 16k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml new file mode 100644 index 0000000000..e292a84190 --- /dev/null +++ b/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 16k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml new file mode 100644 index 0000000000..a41f7cd472 --- /dev/null +++ b/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 16k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml new file mode 100644 index 0000000000..b79209ae7d --- /dev/null +++ b/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 8k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml new file mode 100644 index 0000000000..ed5259bb9d --- /dev/null +++ b/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 8k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml index 3f68fea380..48c2501930 100644 --- a/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml @@ -8,5 +8,5 @@ build: args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k" metadata: - name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.5" + name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.5" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version" diff --git a/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml index 9fa9791bb8..1d4eaa9858 100644 --- a/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml @@ -8,5 +8,5 @@ build: args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k" metadata: - name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.6" + name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.6" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version" diff --git a/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml index a63631dbac..25a35efa6e 100644 --- a/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml @@ -8,5 +8,5 @@ build: args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k" metadata: - name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.7" + name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.7" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml index dc2b486f57..b9396fdd77 100644 --- a/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml @@ -8,5 +8,5 @@ build: args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k" metadata: - name: "DeepSpeech Linux AMD64 CPU 8kHz training Py3.6" + name: "DeepSpeech Linux AMD64 CPU 8kHz basic training Py3.6" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version" diff --git a/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml index c863fa11b3..eb7cb1bee0 100644 --- a/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml @@ -8,5 +8,5 @@ build: args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k" metadata: - name: "DeepSpeech Linux AMD64 CPU 8kHz training Py3.7" + name: "DeepSpeech Linux AMD64 CPU 8kHz basic training Py3.7" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" From ecd79531c8d27f95392a1e3b999e7c8d5f8a7d93 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Mon, 8 Jun 2020 15:50:12 +0200 Subject: [PATCH 03/38] Add training test with --metrics_files --- bin/run-tc-ldc93s1_new_metrics.sh | 29 +++++++++++++++++++++++++++++ taskcluster/tc-train-extra-tests.sh | 3 +++ 2 files changed, 32 insertions(+) create mode 100755 bin/run-tc-ldc93s1_new_metrics.sh diff --git a/bin/run-tc-ldc93s1_new_metrics.sh b/bin/run-tc-ldc93s1_new_metrics.sh new file mode 100755 index 0000000000..01403bf199 --- /dev/null +++ b/bin/run-tc-ldc93s1_new_metrics.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +set -xe + +ldc93s1_dir="./data/smoke_test" +ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" + +epoch_count=$1 +audio_sample_rate=$2 + +if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then + echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." + python -u bin/import_ldc93s1.py ${ldc93s1_dir} +fi; + +# Force only one visible device because we have a single-sample dataset +# and when trying to run on multiple devices (like GPUs), this will break +export CUDA_VISIBLE_DEVICES=0 + +python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ + --train_files ${ldc93s1_csv} --train_batch_size 1 \ + --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ + --test_files ${ldc93s1_csv} --test_batch_size 1 \ + --metrics_files ${ldc93s1_csv} \ + --n_hidden 100 --epochs $epoch_count \ + --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_metrics' \ + --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_metrics' \ + --scorer_path 'data/smoke_test/pruned_lm.scorer' \ + --audio_sample_rate ${audio_sample_rate} diff --git a/taskcluster/tc-train-extra-tests.sh b/taskcluster/tc-train-extra-tests.sh index dfdcf9ddd3..62ec225e09 100644 --- a/taskcluster/tc-train-extra-tests.sh +++ b/taskcluster/tc-train-extra-tests.sh @@ -51,6 +51,9 @@ pushd ${HOME}/DeepSpeech/ds/ # Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}" time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}" + + # Test --metrics_files training argument + time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}" popd pushd ${HOME}/DeepSpeech/ds/ From cfc79799ecae723194e95aeb6a9b677cee322964 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Wed, 10 Jun 2020 13:12:15 +0200 Subject: [PATCH 04/38] Report imported vs total audio time --- bin/import_cv.py | 1 + bin/import_cv2.py | 1 + bin/import_lingua_libre.py | 1 + bin/import_m-ailabs.py | 1 + bin/import_slr57.py | 1 + bin/import_ts.py | 1 + training/deepspeech_training/util/importers.py | 4 ++-- 7 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bin/import_cv.py b/bin/import_cv.py index 3754694ffb..e7dab5644a 100755 --- a/bin/import_cv.py +++ b/bin/import_cv.py @@ -93,6 +93,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index c2880a0602..9db6365603 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -78,6 +78,7 @@ def one_sample(args): else: # This one is good - keep it for the target CSV rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2])) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py index ec5047ba8f..2273aae62a 100755 --- a/bin/import_lingua_libre.py +++ b/bin/import_lingua_libre.py @@ -91,6 +91,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_m-ailabs.py b/bin/import_m-ailabs.py index 963b2873ff..63bb1f4f21 100755 --- a/bin/import_m-ailabs.py +++ b/bin/import_m-ailabs.py @@ -91,6 +91,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) diff --git a/bin/import_slr57.py b/bin/import_slr57.py index 11e30fa4d6..4edb3654ce 100755 --- a/bin/import_slr57.py +++ b/bin/import_slr57.py @@ -86,6 +86,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_ts.py b/bin/import_ts.py index e6cdc1e8df..e01301309d 100755 --- a/bin/import_ts.py +++ b/bin/import_ts.py @@ -93,6 +93,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/training/deepspeech_training/util/importers.py b/training/deepspeech_training/util/importers.py index a4c3c3268d..61f2342d22 100644 --- a/training/deepspeech_training/util/importers.py +++ b/training/deepspeech_training/util/importers.py @@ -8,7 +8,7 @@ from collections import Counter def get_counter(): - return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'total_time': 0}) + return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'imported_time': 0, 'total_time': 0}) def get_imported_samples(counter): return counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long'] - counter['invalid_label'] @@ -23,7 +23,7 @@ def print_import_report(counter, sample_rate, max_secs): print('Skipped %d samples that were too short to match the transcript.' % counter['too_short']) if counter['too_long'] > 0: print('Skipped %d samples that were longer than %d seconds.' % (counter['too_long'], max_secs)) - print('Final amount of imported audio: %s.' % secs_to_hours(counter['total_time'] / sample_rate)) + print('Final amount of imported audio: %s from %s.' % (secs_to_hours(counter['imported_time'] / sample_rate), secs_to_hours(counter['total_time'] / sample_rate))) def get_importers_parser(description): parser = argparse.ArgumentParser(description=description) From d94db7ca43614854f782305cecb4c07f7d68ccb3 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Wed, 10 Jun 2020 13:42:45 +0200 Subject: [PATCH 05/38] Refactoring of TF based augmentations --- bin/play.py | 16 +- bin/run-tc-graph_augmentations.sh | 28 + ...ions.sh => run-tc-sample_augmentations.sh} | 6 - doc/TRAINING.rst | 184 ++++-- ...tion-tests.sh => tc-augmentation-tests.sh} | 3 +- ...t-augmentations-linux-amd64-py36m-opt.yml} | 2 +- training/deepspeech_training/train.py | 33 +- .../deepspeech_training/util/augmentations.py | 564 ++++++++++++++++++ training/deepspeech_training/util/config.py | 13 +- training/deepspeech_training/util/feeding.py | 123 ++-- training/deepspeech_training/util/flags.py | 27 +- training/deepspeech_training/util/helpers.py | 15 + .../util/sample_collections.py | 91 +-- .../util/signal_augmentations.py | 222 ------- .../util/spectrogram_augmentations.py | 127 ---- 15 files changed, 820 insertions(+), 634 deletions(-) create mode 100755 bin/run-tc-graph_augmentations.sh rename bin/{run-tc-signal_augmentations.sh => run-tc-sample_augmentations.sh} (90%) rename taskcluster/{tc-signal_augmentation-tests.sh => tc-augmentation-tests.sh} (86%) rename taskcluster/{test-signal_augmentations-linux-amd64-py36m-opt.yml => test-augmentations-linux-amd64-py36m-opt.yml} (90%) create mode 100644 training/deepspeech_training/util/augmentations.py delete mode 100644 training/deepspeech_training/util/signal_augmentations.py delete mode 100644 training/deepspeech_training/util/spectrogram_augmentations.py diff --git a/bin/play.py b/bin/play.py index 7d19a790ca..5c38fab0c3 100755 --- a/bin/play.py +++ b/bin/play.py @@ -10,7 +10,8 @@ import argparse from deepspeech_training.util.audio import LOADABLE_AUDIO_EXTENSIONS, AUDIO_TYPE_PCM, AUDIO_TYPE_WAV -from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source, augment_samples +from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source +from deepspeech_training.util.augmentations import parse_augmentations, apply_sample_augmentations, SampleAugmentation def get_samples_in_play_order(): @@ -38,12 +39,15 @@ def get_samples_in_play_order(): def play_collection(): + augmentations = parse_augmentations(CLI_ARGS.augment) + if any(map(lambda a: not isinstance(a, SampleAugmentation), augmentations)): + print("Warning: Some of the augmentations cannot be simulated by this command.") samples = get_samples_in_play_order() - samples = augment_samples(samples, - audio_type=AUDIO_TYPE_PCM, - augmentation_specs=CLI_ARGS.augment, - process_ahead=0, - fixed_clock=CLI_ARGS.clock) + samples = apply_sample_augmentations(samples, + audio_type=AUDIO_TYPE_PCM, + augmentations=augmentations, + process_ahead=0, + clock=CLI_ARGS.clock) for sample in samples: if not CLI_ARGS.quiet: print('Sample "{}"'.format(sample.sample_id), file=sys.stderr) diff --git a/bin/run-tc-graph_augmentations.sh b/bin/run-tc-graph_augmentations.sh new file mode 100755 index 0000000000..3db1ee14ac --- /dev/null +++ b/bin/run-tc-graph_augmentations.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +set -xe + +ldc93s1_dir="./data/smoke_test" +ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" + +if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then + echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." + python -u bin/import_ldc93s1.py ${ldc93s1_dir} +fi; + +# Force only one visible device because we have a single-sample dataset +# and when trying to run on multiple devices (like GPUs), this will break +export CUDA_VISIBLE_DEVICES=0 + +python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ + --train_files ${ldc93s1_csv} --train_batch_size 1 \ + --augment speed \ + --augment dropout \ + --augment pitch_and_tempo \ + --augment time_mask \ + --augment frequency_mask \ + --augment add \ + --augment multiply \ + --augment warp \ + --n_hidden 100 \ + --epochs 1 diff --git a/bin/run-tc-signal_augmentations.sh b/bin/run-tc-sample_augmentations.sh similarity index 90% rename from bin/run-tc-signal_augmentations.sh rename to bin/run-tc-sample_augmentations.sh index 5be8ed1206..266940706d 100755 --- a/bin/run-tc-signal_augmentations.sh +++ b/bin/run-tc-sample_augmentations.sh @@ -41,12 +41,6 @@ if ! $compare --if-differ "${ldc93s1_wav}" /tmp/reverb-test.wav; then exit 1 fi -$play ${ldc93s1_wav} --augment gaps[n=10,size=100.0] --pipe >/tmp/gaps-test.wav -if ! $compare --if-differ "${ldc93s1_wav}" /tmp/gaps-test.wav; then - echo "Gaps augmentation had no effect or changed basic sample properties" - exit 1 -fi - $play ${ldc93s1_wav} --augment resample[rate=4000] --pipe >/tmp/resample-test.wav if ! $compare --if-differ "${ldc93s1_wav}" /tmp/resample-test.wav; then echo "Resample augmentation had no effect or changed basic sample properties" diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 99dd849a03..f57744491c 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -270,12 +270,6 @@ Augmentation Augmentation is a useful technique for better generalization of machine learning models. Thus, a pre-processing pipeline with various augmentation techniques on raw pcm and spectrogram has been implemented and can be used while training the model. Following are the available augmentation techniques that can be enabled at training time by using the corresponding flags in the command line. - -Audio Augmentation ------------------- - -Augmentations that are applied before potential feature caching can be specified through the ``--augment`` flag. Being a multi-flag, it can be specified multiple times (see below for an example). - Each sample of the training data will get treated by every specified augmentation in their given order. However: whether an augmentation will actually get applied to a sample is decided by chance on base of the augmentation's probability value. For example a value of ``p=0.1`` would apply the according augmentation to just 10% of all samples. This also means that augmentations are not mutually exclusive on a per-sample basis. The ``--augment`` flag uses a common syntax for all augmentation types: @@ -297,14 +291,31 @@ In the documentation below, whenever a value is specified as ```` o * ``~``: A center value with a randomization radius around it. E.g. ``1.2~0.4`` will result in picking of a uniformly random value between 0.8 and 1.6 on each sample augmentation. - * ``:``: The value will range from `` at the beginning of an epoch to `` at the end of an epoch. E.g. ``-0.2:1.2`` (float) or ``2000:4000`` (int) + * ``:``: The value will range from `` at the beginning of the training to `` at the end of the training. E.g. ``-0.2:1.2`` (float) or ``2000:4000`` (int) - * ``:~``: Combination of the two previous cases with a ranging center value. E.g. ``4-6~2`` would at the beginning of an epoch pick values between 2 and 6 and at the end of an epoch between 4 and 8. + * ``:~``: Combination of the two previous cases with a ranging center value. E.g. ``4-6~2`` would at the beginning of the training pick values between 2 and 6 and at the end of the training between 4 and 8. Ranges specified with integer limits will only assume integer (rounded) values. -If feature caching is enabled, these augmentations will only be performed on the first epoch and the result will be reused for subsequent epochs. The flag ``--augmentations_per_epoch N`` (by default `N` is 1) could be used to get more than one epoch worth of augmentations into the cache. During training, each epoch will do ``N`` passes over the training set, each time performing augmentation independently of previous passes. Be aware: this will also multiply the required size of the feature cache if it's enabled. +.. warning:: + If feature caching is enabled and infinite (default), these augmentations will only be performed on first epoch and the result will be reused for subsequent epochs. This would not only hinder value ranges from reaching their intended final values, but could also lead to unintended over-fitting. In this case flag ``--cache_for_epochs N`` (with N > 1) should be used to periodically invalidate the cache and thus allow samples to be re-augmented in new ways and with current range-values. + +Every augmentation is targeting a certain data representation of the sample - further on called *domain*. +Augmentations are applied domain-wise in the following order: + +1. **sample** domain: The sample just got loaded and its waveform is represented as a NumPy array. For implementation reasons these augmentations are the only ones that can be "simulated" through ``bin/play.py``. + +2. **signal** domain: The sample waveform is represented as a tensor. +3. **spectrogram** domain: The sample spectrogram is represented as a tensor. + +4. **features** domain: The sample's MEL spectrogram features are represented as a tensor. + +During each phase augmentations are applied in command-line order (the **warp** augmentation being the only exception). + + +Sample domain augmentations +--------------------------- **Overlay augmentation** ``--augment overlay[p=,source=,snr=,layers=]`` Layers another audio source (multiple times) onto augmented samples. @@ -328,16 +339,6 @@ If feature caching is enabled, these augmentations will only be performed on the * **decay**: sound decay in dB per reflection - higher values will result in a less reflective perceived "room" -**Gaps augmentation** ``--augment gaps[p=,n=,size=]`` - Sets time-intervals within the augmented samples to zero (silence) at random positions. - - * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method - - * **n**: number of intervals to set to zero - - * **size**: duration of intervals in ms - - **Resample augmentation** ``--augment resample[p=,rate=]`` Resamples augmented samples to another sample rate and then resamples back to the original sample rate. @@ -361,6 +362,96 @@ If feature caching is enabled, these augmentations will only be performed on the * **dbfs** : target volume in dBFS (default value of 3.0103 will normalize min and max amplitudes to -1.0/1.0) +Spectrogram domain augmentations +-------------------------------- + +**Pitch and tempo augmentation** ``--augment pitch_and_tempo[p=,pitch=,tempo=]`` + Scales spectrogram on time and frequency axis and thus changes pitch and playback tempo. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **pitch**: pitch factor by with the frequency axis is scaled (e.g. a value of 2.0 will raise audio frequency by one octave) + + * **tempo**: tempo factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo) + + +**Speed augmentation** ``--augment speed[p=,factor=]`` + Scales spectrogram on time axis and thus changes playback tempo. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **factor**: speed factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo) + + +**Warp augmentation** ``--augment warp[p=,shift=,order=,nbp=,ncp=,regularization_weight=]`` + Applies a non-linear image warp to the spectrogram, where the warp is specified by the source and destination locations of a (potentially small) number of control points. Of all specified spectrogram augmentations this one will always be applied first. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **shift**: maximum shift distance of control points on time axis in ms + + * **order**: polynomial order used by the spline interpolation + + * **nbp**: how many zero-flow boundary points to include at each spectrogram edge + + * **ncp**: how many control points to warp inside the spectrogram + + * **regularization_weight**: weight on smoothness regularizer in interpolation + + +**Frequency mask augmentation** ``--augment frequency_mask[p=,n=,size=]`` + Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **n**: number of intervals to mask + + * **size**: number of frequency bands to mask per interval + +Multi domain augmentations +-------------------------- + +**Time mask augmentation** ``--augment time_mask[p=,n=,size=,domain=]`` + Sets time-intervals within the augmented samples to zero (silence) at random positions. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **n**: number of intervals to set to zero + + * **size**: duration of intervals in ms + + * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) + + +**Dropout augmentation** ``--augment dropout[p=,rate=,domain=]`` + Zeros random data points of the targeted data representation. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **rate**: dropout rate ranging from 0.0 for no dropout to 1.0 for 100% dropout + + * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) + + +**Add augmentation** ``--augment add[p=,stddev=,domain=]`` + Adds random values picked from a normal distribution (with a mean of 0.0) to all data points of the targeted data representation. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **stddev**: standard deviation of the normal distribution to pick values from + + * **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram" + + +**Multiply augmentation** ``--augment multiply[p=,stddev=,domain=]`` + Multiplies all data points of the targeted data representation with random values picked from a normal distribution (with a mean of 1.0). + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **stddev**: standard deviation of the normal distribution to pick values from + + * **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram" + Example training with all augmentations: @@ -368,18 +459,26 @@ Example training with all augmentations: python -u DeepSpeech.py \ --train_files "train.sdb" \ - --augmentations_per_epoch 10 \ + --feature_cache ./feature.cache \ + --cache_for_epochs 10 \ + --epochs 100 \ --augment overlay[p=0.5,source=noise.sdb,layers=1,snr=50:20~10] \ - --augment overlay[p=0.2,source=voices.sdb,layers=10:6,snr=50:20~10] \ --augment reverb[p=0.1,delay=50.0~30.0,decay=10.0:2.0~1.0] \ - --augment gaps[p=0.05,n=1:3~2,size=10:100] \ --augment resample[p=0.1,rate=12000:8000~4000] \ --augment codec[p=0.1,bitrate=48000:16000] \ --augment volume[p=0.1,dbfs=-10:-40] \ + --augment pitch_and_tempo[p=0.1,pitch=1~0.2,tempo=1~0.2] \ + --augment speed[p=0.1,factor=1~0.5] \ + --augment warp[p=0.1,shift=30:60~20,ncp=4~3] \ + --augment frequency_mask[p=0.1,n=1:3,size=1:5] \ + --augment time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40] \ + --augment dropout[p=0.1,rate=0.05] \ + --augment add[p=0.1,domain=signal,stddev=0~0.5] \ + --augment multiply[p=0.1,domain=features,stddev=0~0.5] \ [...] -The ``bin/play.py`` tool also supports ``--augment`` parameters and can be used for experimenting with different configurations. +The ``bin/play.py`` tool also supports ``--augment`` parameters (for sample domain augmentations) and can be used for experimenting with different configurations. Example of playing all samples with reverberation and maximized volume: @@ -393,42 +492,3 @@ Example simulation of the codec augmentation of a wav-file first at the beginnin bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 0.0 test.wav bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 1.0 test.wav - - -The following augmentations are applied after feature caching, hence the way they are applied will not repeat epoch-wise. -Working on spectrogram and feature level, `bin/play.py` offers no ability to simulate them. - -#. **Standard deviation for Gaussian additive noise:** ``--data_aug_features_additive`` -#. **Standard deviation for Normal distribution around 1 for multiplicative noise:** ``--data_aug_features_multiplicative`` -#. **Standard deviation for speeding-up tempo. If Standard deviation is 0, this augmentation is not performed:** ``--augmentation_speed_up_std`` - -Spectrogram Augmentation ------------------------- - -Inspired by Google Paper on `SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition `_ - - -#. - **Keep rate of dropout augmentation on a spectrogram (if 1, no dropout will be performed on the spectrogram)**\ : - - - * Keep Rate : ``--augmentation_spec_dropout_keeprate value between range [0 - 1]`` - -#. - **Whether to use frequency and time masking augmentation:** - - - * Enable / Disable : ``--augmentation_freq_and_time_masking / --noaugmentation_freq_and_time_masking`` - * Max range of masks in the frequency domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_freq_mask_range eg: 5`` - * Number of masks in the frequency domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_number_freq_masks eg: 3`` - * Max range of masks in the time domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_time_mask_range eg: 2`` - * Number of masks in the time domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_number_time_masks eg: 3`` - -#. - **Whether to use spectrogram speed and tempo scaling:** - - - * Enable / Disable : ``--augmentation_pitch_and_tempo_scaling / --noaugmentation_pitch_and_tempo_scaling`` - * Min value of pitch scaling: ``--augmentation_pitch_and_tempo_scaling_min_pitch eg:0.95`` - * Max value of pitch scaling: ``--augmentation_pitch_and_tempo_scaling_max_pitch eg:1.2`` - * Max value of tempo scaling: ``--augmentation_pitch_and_tempo_scaling_max_tempo eg:1.2`` diff --git a/taskcluster/tc-signal_augmentation-tests.sh b/taskcluster/tc-augmentation-tests.sh similarity index 86% rename from taskcluster/tc-signal_augmentation-tests.sh rename to taskcluster/tc-augmentation-tests.sh index edac0c190a..945f0bfb12 100644 --- a/taskcluster/tc-signal_augmentation-tests.sh +++ b/taskcluster/tc-augmentation-tests.sh @@ -22,7 +22,8 @@ popd set +o pipefail pushd ${HOME}/DeepSpeech/ds/ - time ./bin/run-tc-signal_augmentations.sh + time ./bin/run-tc-sample_augmentations.sh + time ./bin/run-tc-graph_augmentations.sh popd virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/test-signal_augmentations-linux-amd64-py36m-opt.yml b/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml similarity index 90% rename from taskcluster/test-signal_augmentations-linux-amd64-py36m-opt.yml rename to taskcluster/test-augmentations-linux-amd64-py36m-opt.yml index 6773f9082b..5dd066c25b 100644 --- a/taskcluster/test-signal_augmentations-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml @@ -6,7 +6,7 @@ build: > apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} args: - tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-signal_augmentation-tests.sh 3.6.10:m" + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-augmentation-tests.sh 3.6.10:m" metadata: name: "DeepSpeech Linux AMD64 CPU signal augmentations Py3.6" description: "Augmenting LDC93S1 sample in different ways for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version" diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index cb07c3864a..81c4ddd1c4 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -10,7 +10,6 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = DESIRED_LOG_LEVEL import absl.app -import json import numpy as np import progressbar import shutil @@ -32,7 +31,7 @@ from .util.config import Config, initialize_globals from .util.checkpoints import load_or_init_graph_for_training, load_graph_for_evaluation from .util.evaluate_tools import save_samples_json -from .util.feeding import create_dataset, samples_to_mfccs, audiofile_to_features +from .util.feeding import create_dataset, audio_to_features, audiofile_to_features from .util.flags import create_flags, FLAGS from .util.helpers import check_ctcdecoder_version, ExceptionBox from .util.logging import create_progressbar, log_debug, log_error, log_info, log_progress, log_warn @@ -407,26 +406,13 @@ def log_grads_and_vars(grads_and_vars): def train(): - do_cache_dataset = True - - # pylint: disable=too-many-boolean-expressions - if (FLAGS.data_aug_features_multiplicative > 0 or - FLAGS.data_aug_features_additive > 0 or - FLAGS.augmentation_spec_dropout_keeprate < 1 or - FLAGS.augmentation_freq_and_time_masking or - FLAGS.augmentation_pitch_and_tempo_scaling or - FLAGS.augmentation_speed_up_std > 0 or - FLAGS.augmentation_sparse_warp): - do_cache_dataset = False - exception_box = ExceptionBox() # Create training and validation datasets train_set = create_dataset(FLAGS.train_files.split(','), batch_size=FLAGS.train_batch_size, - repetitions=FLAGS.augmentations_per_epoch, - augmentation_specs=FLAGS.augment, - enable_cache=FLAGS.feature_cache and do_cache_dataset, + epochs=FLAGS.epochs, + augmentations=Config.augmentations, cache_path=FLAGS.feature_cache, train_phase=True, exception_box=exception_box, @@ -541,6 +527,12 @@ def run_set(set_name, epoch, init_op, dataset=None): step_summary_writer = step_summary_writers.get(set_name) checkpoint_time = time.time() + if is_train and FLAGS.cache_for_epochs > 0 and FLAGS.feature_cache is not None: + feature_cache_index = FLAGS.feature_cache + '.index' + if epoch % FLAGS.cache_for_epochs == 0 and os.path.isfile(feature_cache_index): + log_info('Invalidating feature cache') + os.remove(feature_cache_index) # this will let TF also overwrite the related cache data files + # Setup progress bar class LossWidget(progressbar.widgets.FormatLabel): def __init__(self): @@ -567,11 +559,6 @@ def __call__(self, progress, data, **kwargs): session.run([train_op, global_step, loss, non_finite_files, step_summaries_op], feed_dict=feed_dict) exception_box.raise_if_set() - except tf.errors.InvalidArgumentError as err: - if FLAGS.augmentation_sparse_warp: - log_info("Ignoring sparse warp error: {}".format(err)) - continue - raise except tf.errors.OutOfRangeError: exception_box.raise_if_set() break @@ -680,7 +667,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): # Create feature computation graph input_samples = tfv1.placeholder(tf.float32, [Config.audio_window_samples], 'input_samples') samples = tf.expand_dims(input_samples, -1) - mfccs, _ = samples_to_mfccs(samples, FLAGS.audio_sample_rate) + mfccs, _ = audio_to_features(samples, FLAGS.audio_sample_rate) mfccs = tf.identity(mfccs, name='mfccs') # Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input] diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py new file mode 100644 index 0000000000..eff08fbcb8 --- /dev/null +++ b/training/deepspeech_training/util/augmentations.py @@ -0,0 +1,564 @@ + +import os +import re +import math +import random +import numpy as np + +from multiprocessing import Queue, Process +from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS +from .helpers import LimitingPool, int_range, float_range, pick_value_from_range, tf_pick_value_from_range, MEGABYTE + +BUFFER_SIZE = 1 * MEGABYTE +SPEC_PARSER = re.compile(r'^(?P[a-z_]+)(\[(?P.*)\])?$') + + +class Augmentation: + def __init__(self, p=1.0): + self.probability = float(p) + + +class SampleAugmentation(Augmentation): + def start(self, buffering=BUFFER_SIZE): + pass + + def apply(self, sample, clock=0.0): + raise NotImplementedError + + def stop(self): + pass + + +class GraphAugmentation(Augmentation): + def __init__(self, p=1.0, domain='spectrogram'): + super(GraphAugmentation, self).__init__(p) + if domain not in ['signal', 'spectrogram', 'features']: + raise ValueError('Unsupported augmentation domain: {}'.format(domain)) + self.domain = domain + + def apply(self, tensor, clock=0.0): + raise NotImplementedError + + def apply_with_probability(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + rv = tf.random.stateless_uniform([], seed=(clock * tf.int32.min, clock * tf.int32.max)) + return tf.cond(tf.less(rv, self.probability), + lambda: self.apply(tensor, clock=clock), + lambda: tensor) + + def maybe_apply(self, domain, tensor, clock=0.0): + if domain == self.domain: + return self.apply_with_probability(tensor, clock=clock) + return tensor + + +def parse_augmentation(augmentation_spec): + """ + Parses an augmentation specification. + + Parameters + ---------- + augmentation_spec : str + Augmentation specification like "reverb[delay=20.0,decay=1.0]". + + Returns + ------- + Instance of an augmentation class from util.augmentations.*. + """ + match = SPEC_PARSER.match(augmentation_spec) + if not match: + raise ValueError('Augmentation specification has wrong format') + cls_name = ''.join(map(lambda p: p[0].upper() + p[1:], match.group('cls').split('_'))) + augmentation_cls = globals()[cls_name] if cls_name in globals() else None + if augmentation_cls is None or not issubclass(augmentation_cls, Augmentation) or augmentation_cls == Augmentation: + raise ValueError('Unknown augmentation: {}'.format(cls_name)) + parameters = match.group('params') + parameters = [] if parameters is None else parameters.split(',') + args = [] + kwargs = {} + for parameter in parameters: + pair = tuple(list(map(str.strip, (parameter.split('='))))) + if len(pair) == 1: + args.append(pair) + elif len(pair) == 2: + kwargs[pair[0]] = pair[1] + else: + raise ValueError('Unable to parse augmentation value assignment') + return augmentation_cls(*args, **kwargs) + + +def parse_augmentations(augmentation_specs): + """ + Parses an augmentation specification. + + Parameters + ---------- + augmentation_specs : list of str + List of augmentation specifications like ["reverb[delay=20.0,decay=1.0]", "volume"]. + + Returns + ------- + List of augmentation class instances from util.augmentations.*. + """ + return [] if augmentation_specs is None else list(map(parse_augmentation, augmentation_specs)) + + +def apply_graph_augmentations(domain, tensor, augmentations, clock=0.0): + """ + Augments training sample tensor of a certain domain with matching augmentations of passed list. + + Parameters + ---------- + domain : str + Domain of the tensor to apply augmentations to. One of "signal", "spectrogram" or "features" + tensor : Tensor of type float32 + Tensor to apply augmentations to. + augmentations : list of augmentation class instances from util.augmentations.*. + List of augmentations of which only the spectrogram ones will get applied to the samples. + clock : Tensor of type float32 + Time indicator for augmentation value-ranges. Running from 0.0 (start of training) to 1.0 (end of training). + + Returns + ------- + Tensor of type float32 + The augmented spectrogram + """ + if augmentations is not None: + # Warp has to come before any spectrogram masking + for augmentation in augmentations: + if isinstance(augmentation, Warp): + tensor = augmentation.maybe_apply(domain, tensor, clock=clock) + for augmentation in augmentations: + if isinstance(augmentation, GraphAugmentation) and not isinstance(augmentation, Warp): + tensor = augmentation.maybe_apply(domain, tensor, clock=clock) + return tensor + + +class AugmentationContext: + def __init__(self, target_audio_type, augmentations): + self.target_audio_type = target_audio_type + self.augmentations = augmentations + + +AUGMENTATION_CONTEXT = None + + +def _init_augmentation_worker(preparation_context): + global AUGMENTATION_CONTEXT # pylint: disable=global-statement + AUGMENTATION_CONTEXT = preparation_context + + +def _augment_sample(timed_sample, context=None): + context = AUGMENTATION_CONTEXT if context is None else context + sample, clock = timed_sample + for augmentation in context.augmentations: + if random.random() < augmentation.probability: + augmentation.apply(sample, clock) + sample.change_audio_type(new_audio_type=context.target_audio_type) + return sample + + +def apply_sample_augmentations(samples, + augmentations, + audio_type=AUDIO_TYPE_NP, + buffering=BUFFER_SIZE, + process_ahead=None, + clock=0.0, + final_clock=None): + """ + Prepares samples for being used during training. + This includes parallel and buffered application of augmentations and a conversion to a specified audio-type. + + Parameters + ---------- + samples : Sample enumeration + Typically produced by util.sample_collections.samples_from_sources. + augmentations : list of augmentation class instances from util.augmentations.*. + List of augmentations of which only the signal ones will get applied to the samples. + audio_type : str + Target audio-type to convert samples to. See util.audio.Sample.__init__ . + buffering : int + Read-buffer size to use while reading files. + process_ahead : int + Number of samples to pre-process ahead of time. + clock : float + Start or fixed clock value between 0.0 and 1.0 for the first or all samples. Has to be <= than clock_to. + final_clock : float + Final clock value between 0.0 and 1.0 for the last sample. Has to be >= than clock. + Requires samples.__len__ attribute. + + Returns + ------- + iterable of util.sample_collections.LabeledSample or util.audio.Sample + """ + def timed_samples(): + if final_clock is None: + for sample in samples: + yield sample, clock + else: + for sample_index, sample in enumerate(samples): + sample_clock = clock + (final_clock - clock) * (sample_index / len(samples)) + yield sample, sample_clock + + assert 0.0 <= clock <= 1.0 + if final_clock is not None: + assert 0.0 <= final_clock <= 1.0 + assert clock <= final_clock + augmentations = list(filter(lambda aug: isinstance(aug, SampleAugmentation), augmentations)) + try: + for augmentation in augmentations: + augmentation.start(buffering=buffering) + context = AugmentationContext(audio_type, augmentations) + if process_ahead == 0: + for timed_sample in timed_samples(): + yield _augment_sample(timed_sample, context=context) + else: + with LimitingPool(process_ahead=process_ahead, + initializer=_init_augmentation_worker, + initargs=(context,)) as pool: + yield from pool.imap(_augment_sample, timed_samples()) + finally: + for augmentation in augmentations: + augmentation.stop() + + +def _enqueue_overlay_samples(sample_source, queue, buffering=BUFFER_SIZE): + """ + As the central distribution point for overlay samples this function is supposed to run in one process only. + This ensures that samples are not used twice if not required. + It loads the (raw and still compressed) data and provides it to the actual augmentation workers. + These are then doing decompression, potential conversion and overlaying in parallel. + """ + # preventing cyclic import problems + from .sample_collections import samples_from_source # pylint: disable=import-outside-toplevel + samples = samples_from_source(sample_source, buffering=buffering, labeled=False) + while True: + for sample in samples: + queue.put(sample) + + +class Overlay(SampleAugmentation): + """See "Overlay augmentation" in TRAINING.rst""" + def __init__(self, source, p=1.0, snr=3.0, layers=1): + super(Overlay, self).__init__(p) + self.source = source + self.snr = float_range(snr) + self.layers = int_range(layers) + self.queue = Queue(max(1, math.floor(self.probability * self.layers[1] * os.cpu_count()))) + self.current_sample = None + self.enqueue_process = None + + def start(self, buffering=BUFFER_SIZE): + self.enqueue_process = Process(target=_enqueue_overlay_samples, + args=(self.source, self.queue), + kwargs={'buffering': buffering}) + self.enqueue_process.start() + + def apply(self, sample, clock=0.0): + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + n_layers = pick_value_from_range(self.layers, clock=clock) + audio = sample.audio + overlay_data = np.zeros_like(audio) + for _ in range(n_layers): + overlay_offset = 0 + while overlay_offset < len(audio): + if self.current_sample is None: + next_overlay_sample = self.queue.get() + next_overlay_sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + self.current_sample = next_overlay_sample.audio + n_required = len(audio) - overlay_offset + n_current = len(self.current_sample) + if n_required >= n_current: # take it completely + overlay_data[overlay_offset:overlay_offset + n_current] += self.current_sample + overlay_offset += n_current + self.current_sample = None + else: # take required slice from head and keep tail for next layer or sample + overlay_data[overlay_offset:overlay_offset + n_required] += self.current_sample[0:n_required] + overlay_offset += n_required + self.current_sample = self.current_sample[n_required:] + snr_db = pick_value_from_range(self.snr, clock=clock) + orig_dbfs = max_dbfs(audio) + overlay_gain = orig_dbfs - max_dbfs(overlay_data) - snr_db + audio += overlay_data * gain_db_to_ratio(overlay_gain) + sample.audio = normalize_audio(audio, dbfs=orig_dbfs) + + def stop(self): + if self.enqueue_process is not None: + self.enqueue_process.terminate() + + +class Codec(SampleAugmentation): + """See "Codec augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, bitrate=3200): + super(Codec, self).__init__(p) + self.bitrate = int_range(bitrate) + + def apply(self, sample, clock=0.0): + bitrate = pick_value_from_range(self.bitrate, clock=clock) + sample.change_audio_type(new_audio_type=AUDIO_TYPE_PCM) # decoding to ensure it has to get encoded again + sample.change_audio_type(new_audio_type=AUDIO_TYPE_OPUS, bitrate=bitrate) # will get decoded again downstream + + +class Reverb(SampleAugmentation): + """See "Reverb augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, delay=20.0, decay=10.0): + super(Reverb, self).__init__(p) + self.delay = float_range(delay) + self.decay = float_range(decay) + + def apply(self, sample, clock=0.0): + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + audio = np.array(sample.audio, dtype=np.float64) + orig_dbfs = max_dbfs(audio) + delay = pick_value_from_range(self.delay, clock=clock) + decay = pick_value_from_range(self.decay, clock=clock) + decay = gain_db_to_ratio(-decay) + result = np.copy(audio) + primes = [17, 19, 23, 29, 31] + for delay_prime in primes: # primes to minimize comb filter interference + layer = np.copy(audio) + n_delay = math.floor(delay * (delay_prime / primes[0]) * sample.audio_format.rate / 1000.0) + n_delay = max(16, n_delay) # 16 samples minimum to avoid performance trap and risk of division by zero + for w_index in range(0, math.floor(len(audio) / n_delay)): + w1 = w_index * n_delay + w2 = (w_index + 1) * n_delay + width = min(len(audio) - w2, n_delay) # last window could be smaller + layer[w2:w2 + width] += decay * layer[w1:w1 + width] + result += layer + audio = normalize_audio(result, dbfs=orig_dbfs) + sample.audio = np.array(audio, dtype=np.float32) + + +class Resample(SampleAugmentation): + """See "Resample augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, rate=8000): + super(Resample, self).__init__(p) + self.rate = int_range(rate) + + def apply(self, sample, clock=0.0): + # late binding librosa and its dependencies + from librosa.core import resample # pylint: disable=import-outside-toplevel + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + rate = pick_value_from_range(self.rate, clock=clock) + audio = sample.audio + orig_len = len(audio) + audio = np.swapaxes(audio, 0, 1) + audio = resample(audio, sample.audio_format.rate, rate) + audio = resample(audio, rate, sample.audio_format.rate) + audio = np.swapaxes(audio, 0, 1)[0:orig_len] + sample.audio = audio + + +class Volume(SampleAugmentation): + """See "Volume augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, dbfs=3.0103): + super(Volume, self).__init__(p) + self.target_dbfs = float_range(dbfs) + + def apply(self, sample, clock=0.0): + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + target_dbfs = pick_value_from_range(self.target_dbfs, clock=clock) + sample.audio = normalize_audio(sample.audio, dbfs=target_dbfs) + + +class PitchAndTempo(GraphAugmentation): + """See "Pitch and tempo augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, tempo=1.2, pitch=(1.075, 1.075, 0.125)): + super(PitchAndTempo, self).__init__(p, domain='spectrogram') + self.tempo = float_range(tempo) + self.pitch = float_range(pitch) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + original_shape = tf.shape(tensor) + pitch = tf_pick_value_from_range(self.pitch, clock=clock) + tempo = tf.math.maximum(1.0, tf_pick_value_from_range(self.tempo, clock=clock)) + new_freq_size = tf.cast(tf.cast(original_shape[2], tf.float32) * pitch, tf.int32) + new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32) / tempo, tf.int32) + spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(tensor, -1), [new_time_size, new_freq_size]) + spectrogram_aug = tf.image.crop_to_bounding_box(spectrogram_aug, + offset_height=0, + offset_width=0, + target_height=tf.shape(spectrogram_aug)[1], + target_width=tf.math.minimum(original_shape[2], new_freq_size)) + spectrogram_aug = tf.cond(pitch < 1, + lambda: tf.image.pad_to_bounding_box(spectrogram_aug, + offset_height=0, + offset_width=0, + target_height=tf.shape(spectrogram_aug)[1], + target_width=original_shape[2]), + lambda: spectrogram_aug) + return spectrogram_aug[:, :, :, 0] + + +class Speed(GraphAugmentation): + """See "Speed augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, factor=1.1): + super(Speed, self).__init__(p, domain='spectrogram') + self.factor = float_range(factor) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + factor = tf_pick_value_from_range(self.factor, clock=clock) + original_shape = tf.shape(tensor) + new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32) / factor, tf.int32) + spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(tensor, -1), [new_time_size, original_shape[2]]) + return spectrogram_aug[:, :, :, 0] + + +class Warp(GraphAugmentation): + """See "Warp augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, shift=100.0, order=3, nbp=1, ncp=1, regularization_weight=0.0): + super(Warp, self).__init__(p, domain='spectrogram') + self.shift = float_range(shift) + self.order = int_range(order) + self.nbp = int_range(nbp) + self.ncp = int_range(ncp) + # Making this a value-range is impossible, as it would get a tensor which would downstream be used as parameter + # of a comparison inside tensorflow.contrib.image.python.ops.interpolate_spline. This is not supported. + self.regularization_weight = float(regularization_weight) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + from .flags import FLAGS # pylint: disable=import-outside-toplevel + from .sparse_image_warp import sparse_image_warp # pylint: disable=import-outside-toplevel + + # reshape to fit `sparse_image_warp`'s input shape (1, time steps, freq, 1), batch_size must be 1 + expanded_spectrogram = tf.expand_dims(tensor, -1) + original_shape = tf.shape(expanded_spectrogram) + tau, freq_size = original_shape[1], original_shape[2] + seed = (clock * tf.int32.min, clock * tf.int32.max) + + shift = tf_pick_value_from_range(self.shift, clock=clock) + shift *= FLAGS.audio_sample_rate / (FLAGS.feature_win_step * 1000.0) # number of windows + shift = tf.math.minimum(tf.cast(shift, dtype=tf.int32), tf.math.floordiv(tau, 2) - 1) # to protect short audio + nbp = tf_pick_value_from_range(self.nbp, clock=clock) + ncp = tf_pick_value_from_range(self.ncp, clock=clock) + # workaround for missing stateless shuffle support + frequencies = tf.random.stateless_uniform([2 * ncp], seed, minval=1, maxval=freq_size - 2, dtype=tf.int32) + frequencies = tf.unique(tf.concat([frequencies, tf.range(1, limit=freq_size - 3)], axis=0))[0][0:ncp] + source_max = tau - shift + source_min = tf.math.minimum(source_max - ncp, shift) + # workaround for missing stateless shuffle support + src_times = tf.random.stateless_uniform([2 * ncp], seed, minval=source_min, maxval=source_max, dtype=tf.int32) + src_times = tf.unique(tf.concat([src_times, tf.range(1, limit=source_max)], axis=0))[0][0:ncp] + dst_times = src_times + tf.random.stateless_uniform([ncp], seed, minval=-shift, maxval=shift, dtype=tf.int32) + scp_locations = tf.cast([tf.transpose(tf.stack([src_times, frequencies]))], dtype=tf.float32) + dcp_locations = tf.cast([tf.transpose(tf.stack([dst_times, frequencies]))], dtype=tf.float32) + + order = tf_pick_value_from_range(self.order, clock=clock) + order = tf.math.maximum(3, order) # prevents "Input matrix is not invertible." exception + order = tf.cast(order, tf.float32) + + spectrogram_aug, _ = sparse_image_warp(expanded_spectrogram, + source_control_point_locations=scp_locations, + dest_control_point_locations=dcp_locations, + interpolation_order=order, + regularization_weight=self.regularization_weight, + num_boundary_points=nbp) + return tf.reshape(spectrogram_aug, shape=(1, -1, freq_size)) + + +class FrequencyMask(GraphAugmentation): + """See "Frequency mask augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, n=3, size=2): + super(FrequencyMask, self).__init__(p, domain='spectrogram') + self.n = int_range(n) # pylint: disable=invalid-name + self.size = int_range(size) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + time_max = tf.shape(tensor)[1] + freq_max = tf.shape(tensor)[2] + n = tf_pick_value_from_range(self.n, clock=clock) + + def body(i, spectrogram_aug): + size = tf_pick_value_from_range(self.size, clock=clock) + size = tf.math.maximum(1, tf.math.minimum(freq_max - 1, size)) + seed = tf.cast(clock * tf.int32.max, tf.int32) - i + f0 = tf.random.stateless_uniform((), (-seed, seed), minval=0, maxval=freq_max - size, dtype=tf.dtypes.int32) + freq_mask = tf.concat([tf.ones([1, time_max, f0]), + tf.zeros([1, time_max, size]), + tf.ones([1, time_max, freq_max - f0 - size])], axis=2) + return i + 1, spectrogram_aug * freq_mask + + return tf.while_loop(lambda i, spectrogram_aug: i < n, body, (0, tensor))[1] + + +class TimeMask(GraphAugmentation): + """See "Time mask augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, domain='spectrogram', n=3, size=10.0): + super(TimeMask, self).__init__(p, domain=domain) + self.n = int_range(n) # pylint: disable=invalid-name + self.size = float_range(size) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + from .flags import FLAGS # pylint: disable=import-outside-toplevel + time_factor = FLAGS.audio_sample_rate / 1000.0 # samples per ms + if self.domain != 'signal': + time_factor /= FLAGS.feature_win_step # windows per ms + time_max = tf.shape(tensor)[0] if self.domain == 'signal' else tf.shape(tensor)[1] + n = tf_pick_value_from_range(self.n, clock=clock) + + def body(i, augmented): + size = tf.cast(tf_pick_value_from_range(self.size, clock=clock) * time_factor, dtype=tf.int32) + size = tf.math.maximum(1, tf.math.minimum(time_max - 1, size)) + tf.print(size) + seed = tf.cast(clock * tf.int32.max, tf.int32) - i + t0 = tf.random.stateless_uniform((), (-seed, seed), minval=0, maxval=time_max - size, dtype=tf.dtypes.int32) + rest = time_max - t0 - size + if self.domain == 'spectrogram': + fm = tf.shape(tensor)[2] + time_mask = tf.concat([tf.ones([1, t0, fm]), tf.zeros([1, size, fm]), tf.ones([1, rest, fm])], axis=1) + elif self.domain == 'signal': + time_mask = tf.concat([tf.ones([t0, 1]), tf.zeros([size, 1]), tf.ones([rest, 1])], axis=0) + else: + time_mask = tf.concat([tf.ones([1, t0]), tf.zeros([1, size]), tf.ones([1, rest])], axis=1) + return i + 1, augmented * time_mask + + return tf.while_loop(lambda i, augmented: i < n, body, (0, tensor))[1] + + +class Dropout(GraphAugmentation): + """See "Dropout augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, domain='spectrogram', rate=0.05): + super(Dropout, self).__init__(p, domain=domain) + self.rate = float_range(rate) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + rate = tf_pick_value_from_range(self.rate, clock=clock) + rate = tf.math.maximum(0.0, rate) + factors = tf.random.stateless_uniform(tf.shape(tensor), + (clock * tf.int32.min, clock * tf.int32.max), + minval=0.0, + maxval=1.0, + dtype=tf.float32) + return tensor * tf.math.sign(tf.math.floor(factors + rate)) + + +class Add(GraphAugmentation): + """See "Add augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, domain='features', stddev=5): + super(Add, self).__init__(p, domain=domain) + self.stddev = float_range(stddev) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + stddev = tf_pick_value_from_range(self.stddev, clock=clock) + seed = (clock * tf.int32.min, clock * tf.int32.max) + return tensor + tf.random.stateless_normal(tf.shape(tensor), seed, mean=0.0, stddev=stddev) + + +class Multiply(GraphAugmentation): + """See "Multiply augmentation" in TRAINING.rst""" + def __init__(self, p=1.0, domain='features', stddev=5): + super(Multiply, self).__init__(p, domain=domain) + self.stddev = float_range(stddev) + + def apply(self, tensor, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + stddev = tf_pick_value_from_range(self.stddev, clock=clock) + seed = (clock * tf.int32.min, clock * tf.int32.max) + return tensor * tf.random.stateless_normal(tf.shape(tensor), seed, mean=1.0, stddev=stddev) diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index fdd6353903..0786ae4292 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -2,7 +2,6 @@ import os import sys -import tensorflow as tf import tensorflow.compat.v1 as tfv1 from attrdict import AttrDict @@ -13,6 +12,7 @@ from .logging import log_error, log_warn from .text import Alphabet, UTF8Alphabet from .helpers import parse_file_size +from .augmentations import parse_augmentations class ConfigSingleton: _config = None @@ -30,6 +30,17 @@ def __getattr__(self, name): def initialize_globals(): c = AttrDict() + # Augmentations + c.augmentations = parse_augmentations(FLAGS.augment) + if len(c.augmentations) > 0 and FLAGS.feature_cache is not None and FLAGS.cache_for_epochs == 0: + log_warn('Due to current feature-cache settings the exact same sample augmentations of the first ' + 'epoch will be repeated on all following epochs. This could lead to unintended over-fitting. ' + 'You could use --cache_for_epochs to invalidate the cache after a given number of epochs.') + + # Caching + if FLAGS.cache_for_epochs == 1: + log_warn('--cache_for_epochs == 1 is (re-)creating the feature cache on every epoch but will never use it.') + # Read-buffer FLAGS.read_buffer = parse_file_size(FLAGS.read_buffer) diff --git a/training/deepspeech_training/util/feeding.py b/training/deepspeech_training/util/feeding.py index 31dafcfb93..ff3a1a7536 100644 --- a/training/deepspeech_training/util/feeding.py +++ b/training/deepspeech_training/util/feeding.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function +from collections import Counter from functools import partial import numpy as np @@ -11,13 +12,13 @@ from .config import Config from .text import text_to_char_array from .flags import FLAGS -from .spectrogram_augmentations import augment_freq_time_mask, augment_dropout, augment_pitch_and_tempo, augment_speed_up, augment_sparse_warp +from .augmentations import apply_sample_augmentations, apply_graph_augmentations from .audio import read_frames_from_file, vad_split, pcm_to_np, DEFAULT_FORMAT -from .sample_collections import samples_from_sources, augment_samples +from .sample_collections import samples_from_sources from .helpers import remember_exception, MEGABYTE -def samples_to_mfccs(samples, sample_rate, train_phase=False, sample_id=None): +def audio_to_features(audio, sample_rate, clock=0.0, train_phase=False, augmentations=None, sample_id=None): if train_phase: # We need the lambdas to make TensorFlow happy. # pylint: disable=unnecessary-lambda @@ -27,73 +28,48 @@ def samples_to_mfccs(samples, sample_rate, train_phase=False, sample_id=None): lambda: tf.no_op(), name='matching_sample_rate') - spectrogram = contrib_audio.audio_spectrogram(samples, + if train_phase and augmentations is not None: + audio = apply_graph_augmentations('signal', audio, augmentations, clock=clock) + + spectrogram = contrib_audio.audio_spectrogram(audio, window_size=Config.audio_window_samples, stride=Config.audio_step_samples, magnitude_squared=True) - # Data Augmentations - if train_phase: - if FLAGS.augmentation_spec_dropout_keeprate < 1: - spectrogram = augment_dropout(spectrogram, - keep_prob=FLAGS.augmentation_spec_dropout_keeprate) - - # sparse warp must before freq/time masking - if FLAGS.augmentation_sparse_warp: - spectrogram = augment_sparse_warp(spectrogram, - time_warping_para=FLAGS.augmentation_sparse_warp_time_warping_para, - interpolation_order=FLAGS.augmentation_sparse_warp_interpolation_order, - regularization_weight=FLAGS.augmentation_sparse_warp_regularization_weight, - num_boundary_points=FLAGS.augmentation_sparse_warp_num_boundary_points, - num_control_points=FLAGS.augmentation_sparse_warp_num_control_points) - - if FLAGS.augmentation_freq_and_time_masking: - spectrogram = augment_freq_time_mask(spectrogram, - frequency_masking_para=FLAGS.augmentation_freq_and_time_masking_freq_mask_range, - time_masking_para=FLAGS.augmentation_freq_and_time_masking_time_mask_range, - frequency_mask_num=FLAGS.augmentation_freq_and_time_masking_number_freq_masks, - time_mask_num=FLAGS.augmentation_freq_and_time_masking_number_time_masks) - - if FLAGS.augmentation_pitch_and_tempo_scaling: - spectrogram = augment_pitch_and_tempo(spectrogram, - max_tempo=FLAGS.augmentation_pitch_and_tempo_scaling_max_tempo, - max_pitch=FLAGS.augmentation_pitch_and_tempo_scaling_max_pitch, - min_pitch=FLAGS.augmentation_pitch_and_tempo_scaling_min_pitch) - - if FLAGS.augmentation_speed_up_std > 0: - spectrogram = augment_speed_up(spectrogram, speed_std=FLAGS.augmentation_speed_up_std) - - mfccs = contrib_audio.mfcc(spectrogram=spectrogram, - sample_rate=sample_rate, - dct_coefficient_count=Config.n_input, - upper_frequency_limit=FLAGS.audio_sample_rate/2) - mfccs = tf.reshape(mfccs, [-1, Config.n_input]) - - return mfccs, tf.shape(input=mfccs)[0] - - -def audio_to_features(audio, sample_rate, train_phase=False, sample_id=None): - features, features_len = samples_to_mfccs(audio, sample_rate, train_phase=train_phase, sample_id=sample_id) + if train_phase and augmentations is not None: + spectrogram = apply_graph_augmentations('spectrogram', spectrogram, augmentations, clock=clock) - if train_phase: - if FLAGS.data_aug_features_multiplicative > 0: - features = features*tf.random.normal(mean=1, stddev=FLAGS.data_aug_features_multiplicative, shape=tf.shape(features)) + features = contrib_audio.mfcc(spectrogram=spectrogram, + sample_rate=sample_rate, + dct_coefficient_count=Config.n_input, + upper_frequency_limit=FLAGS.audio_sample_rate / 2) + features = tf.reshape(features, [-1, Config.n_input]) - if FLAGS.data_aug_features_additive > 0: - features = features+tf.random.normal(mean=0.0, stddev=FLAGS.data_aug_features_additive, shape=tf.shape(features)) + if train_phase and augmentations is not None: + features = apply_graph_augmentations('features', features, augmentations, clock=clock) - return features, features_len + return features, tf.shape(input=features)[0] -def audiofile_to_features(wav_filename, train_phase=False): +def audiofile_to_features(wav_filename, clock=0.0, train_phase=False, augmentations=None): samples = tf.io.read_file(wav_filename) decoded = contrib_audio.decode_wav(samples, desired_channels=1) - return audio_to_features(decoded.audio, decoded.sample_rate, train_phase=train_phase, sample_id=wav_filename) + return audio_to_features(decoded.audio, + decoded.sample_rate, + clock=clock, + train_phase=train_phase, + augmentations=augmentations, + sample_id=wav_filename) -def entry_to_features(sample_id, audio, sample_rate, transcript, train_phase=False): +def entry_to_features(sample_id, audio, sample_rate, transcript, clock, train_phase=False, augmentations=None): # https://bugs.python.org/issue32117 - features, features_len = audio_to_features(audio, sample_rate, train_phase=train_phase, sample_id=sample_id) + features, features_len = audio_to_features(audio, + sample_rate, + clock=clock, + train_phase=train_phase, + augmentations=augmentations, + sample_id=sample_id) sparse_transcript = tf.SparseTensor(*transcript) return sample_id, features, features_len, sparse_transcript @@ -109,25 +85,32 @@ def to_sparse_tuple(sequence): def create_dataset(sources, batch_size, - repetitions=1, - augmentation_specs=None, - enable_cache=False, + epochs=1, + augmentations=None, cache_path=None, train_phase=False, exception_box=None, process_ahead=None, buffering=1 * MEGABYTE): + epoch_counter = Counter() # survives restarts of the dataset and its generator + def generate_values(): + epoch = epoch_counter['epoch'] + if train_phase: + epoch_counter['epoch'] += 1 samples = samples_from_sources(sources, buffering=buffering, labeled=True) - samples = augment_samples(samples, - repetitions=repetitions, - augmentation_specs=augmentation_specs, - buffering=buffering, - process_ahead=2 * batch_size if process_ahead is None else process_ahead) - for sample in samples: + num_samples = len(samples) + samples = apply_sample_augmentations(samples, + augmentations, + buffering=buffering, + process_ahead=2 * batch_size if process_ahead is None else process_ahead, + clock=epoch / epochs, + final_clock=(epoch + 1) / epochs) + for sample_index, sample in enumerate(samples): + clock = (epoch * num_samples + sample_index) / (epochs * num_samples) if train_phase and epochs > 0 else 0.0 transcript = text_to_char_array(sample.transcript, Config.alphabet, context=sample.sample_id) transcript = to_sparse_tuple(transcript) - yield sample.sample_id, sample.audio, sample.audio_format.rate, transcript + yield sample.sample_id, sample.audio, sample.audio_format.rate, transcript, clock # Batching a dataset of 2D SparseTensors creates 3D batches, which fail # when passed to tf.nn.ctc_loss, so we reshape them to remove the extra @@ -143,13 +126,13 @@ def batch_fn(sample_ids, features, features_len, transcripts): sample_ids = sample_ids.batch(batch_size) return tf.data.Dataset.zip((sample_ids, features, transcripts)) - process_fn = partial(entry_to_features, train_phase=train_phase) + process_fn = partial(entry_to_features, train_phase=train_phase, augmentations=augmentations) dataset = (tf.data.Dataset.from_generator(remember_exception(generate_values, exception_box), output_types=(tf.string, tf.float32, tf.int32, - (tf.int64, tf.int32, tf.int64))) + (tf.int64, tf.int32, tf.int64), tf.float64)) .map(process_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)) - if enable_cache: + if cache_path is not None: dataset = dataset.cache(cache_path) dataset = (dataset.window(batch_size, drop_remainder=train_phase).flat_map(batch_fn) .prefetch(len(Config.available_devices))) @@ -172,7 +155,7 @@ def generate_values(): yield time_start, time_end, samples def to_mfccs(time_start, time_end, samples): - features, features_len = samples_to_mfccs(samples, audio_format.rate) + features, features_len = audio_to_features(samples, audio_format.rate) return time_start, time_end, features, features_len def create_batch_set(bs, criteria): diff --git a/training/deepspeech_training/util/flags.py b/training/deepspeech_training/util/flags.py index 66b25ebacf..c31eb461e7 100644 --- a/training/deepspeech_training/util/flags.py +++ b/training/deepspeech_training/util/flags.py @@ -19,6 +19,7 @@ def create_flags(): f.DEFINE_string('read_buffer', '1MB', 'buffer-size for reading samples from datasets (supports file-size suffixes KB, MB, GB, TB)') f.DEFINE_string('feature_cache', '', 'cache MFCC features to disk to speed up future training runs on the same data. This flag specifies the path where cached features extracted from --train_files will be saved. If empty, or if online augmentation flags are enabled, caching will be disabled.') + f.DEFINE_integer('cache_for_epochs', 0, 'after how many epochs the feature cache is invalidated again - 0 for "never"') f.DEFINE_integer('feature_win_len', 32, 'feature extraction audio window length in milliseconds') f.DEFINE_integer('feature_win_step', 20, 'feature extraction window step length in milliseconds') @@ -28,32 +29,6 @@ def create_flags(): # ================ f.DEFINE_multi_string('augment', None, 'specifies an augmentation of the training samples. Format is "--augment operation[param1=value1, ...]"') - f.DEFINE_integer('augmentations_per_epoch', 1, 'how often the train set should be repeated and re-augmented per epoch') - - f.DEFINE_float('data_aug_features_additive', 0, 'std of the Gaussian additive noise') - f.DEFINE_float('data_aug_features_multiplicative', 0, 'std of normal distribution around 1 for multiplicative noise') - - f.DEFINE_float('augmentation_spec_dropout_keeprate', 1, 'keep rate of dropout augmentation on spectrogram (if 1, no dropout will be performed on spectrogram)') - - f.DEFINE_boolean('augmentation_sparse_warp', False, 'whether to use spectrogram sparse warp. USE OF THIS FLAG IS UNSUPPORTED, enable sparse warp will increase training time drastically, and the paper also mentioned that this is not a major factor to improve accuracy.') - f.DEFINE_integer('augmentation_sparse_warp_num_control_points', 1, 'specify number of control points') - f.DEFINE_integer('augmentation_sparse_warp_time_warping_para', 20, 'time_warping_para') - f.DEFINE_integer('augmentation_sparse_warp_interpolation_order', 2, 'sparse_warp_interpolation_order') - f.DEFINE_float('augmentation_sparse_warp_regularization_weight', 0.0, 'sparse_warp_regularization_weight') - f.DEFINE_integer('augmentation_sparse_warp_num_boundary_points', 1, 'sparse_warp_num_boundary_points') - - f.DEFINE_boolean('augmentation_freq_and_time_masking', False, 'whether to use frequency and time masking augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_freq_mask_range', 5, 'max range of masks in the frequency domain when performing freqtime-mask augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_number_freq_masks', 3, 'number of masks in the frequency domain when performing freqtime-mask augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_time_mask_range', 2, 'max range of masks in the time domain when performing freqtime-mask augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_number_time_masks', 3, 'number of masks in the time domain when performing freqtime-mask augmentation') - - f.DEFINE_float('augmentation_speed_up_std', 0, 'std for speeding-up tempo. If std is 0, this augmentation is not performed') - - f.DEFINE_boolean('augmentation_pitch_and_tempo_scaling', False, 'whether to use spectrogram speed and tempo scaling') - f.DEFINE_float('augmentation_pitch_and_tempo_scaling_min_pitch', 0.95, 'min value of pitch scaling') - f.DEFINE_float('augmentation_pitch_and_tempo_scaling_max_pitch', 1.2, 'max value of pitch scaling') - f.DEFINE_float('augmentation_pitch_and_tempo_scaling_max_tempo', 1.2, 'max vlaue of tempo scaling') # Global Constants # ================ diff --git a/training/deepspeech_training/util/helpers.py b/training/deepspeech_training/util/helpers.py index cd1748cc47..6da708b90d 100644 --- a/training/deepspeech_training/util/helpers.py +++ b/training/deepspeech_training/util/helpers.py @@ -174,3 +174,18 @@ def pick_value_from_range(value_range, clock=None): value = value_range.start + clock * (value_range.end - value_range.start) value = random.uniform(value - value_range.r, value + value_range.r) return round(value) if isinstance(value_range.start, int) else value + + +def tf_pick_value_from_range(value_range, clock=None, double_precision=False): + import tensorflow as tf # pylint: disable=import-outside-toplevel + clock = (tf.random.stateless_uniform([], seed=(-1, 1), dtype=tf.float64) if clock is None + else tf.maximum(tf.constant(0.0, dtype=tf.float64), tf.minimum(tf.constant(1.0, dtype=tf.float64), clock))) + value = value_range.start + clock * (value_range.end - value_range.start) + value = tf.random.stateless_uniform([], + minval=value - value_range.r, + maxval=value + value_range.r, + seed=(clock * tf.int32.min, clock * tf.int32.max), + dtype=tf.float64) + if isinstance(value_range.start, int): + return tf.cast(tf.math.round(value), tf.int64 if double_precision else tf.int32) + return tf.cast(value, tf.float64 if double_precision else tf.float32) diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 942888f9f2..37210659a0 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -2,14 +2,12 @@ import os import csv import json -import random from pathlib import Path from functools import partial -from .signal_augmentations import parse_augmentation -from .helpers import MEGABYTE, GIGABYTE, Interleaved, LimitingPool -from .audio import Sample, DEFAULT_FORMAT, AUDIO_TYPE_OPUS, AUDIO_TYPE_NP, SERIALIZABLE_AUDIO_TYPES, get_audio_type_from_extension +from .helpers import MEGABYTE, GIGABYTE, Interleaved +from .audio import Sample, DEFAULT_FORMAT, AUDIO_TYPE_OPUS, SERIALIZABLE_AUDIO_TYPES, get_audio_type_from_extension BIG_ENDIAN = 'big' INT_SIZE = 4 @@ -416,88 +414,3 @@ def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None): return samples_from_source(sample_sources[0], buffering=buffering, labeled=labeled) cols = list(map(partial(samples_from_source, buffering=buffering, labeled=labeled), sample_sources)) return Interleaved(*cols, key=lambda s: s.duration) - - -class PreparationContext: - def __init__(self, target_audio_type, augmentations): - self.target_audio_type = target_audio_type - self.augmentations = augmentations - - -AUGMENTATION_CONTEXT = None - - -def _init_augmentation_worker(preparation_context): - global AUGMENTATION_CONTEXT # pylint: disable=global-statement - AUGMENTATION_CONTEXT = preparation_context - - -def _augment_sample(timed_sample, context=None): - context = AUGMENTATION_CONTEXT if context is None else context - sample, clock = timed_sample - for augmentation in context.augmentations: - if random.random() < augmentation.probability: - augmentation.apply(sample, clock) - sample.change_audio_type(new_audio_type=context.target_audio_type) - return sample - - -def augment_samples(samples, - audio_type=AUDIO_TYPE_NP, - augmentation_specs=None, - buffering=BUFFER_SIZE, - process_ahead=None, - repetitions=1, - fixed_clock=None): - """ - Prepares samples for being used during training. - This includes parallel and buffered application of augmentations and a conversion to a specified audio-type. - - Parameters - ---------- - samples : Sample enumeration - Typically produced by samples_from_sources. - audio_type : str - Target audio-type to convert samples to. See util.audio.Sample.__init__ . - augmentation_specs : list of str - Augmentation specifications like ["reverb[delay=20.0,decay=-20]", "volume"]. See TRAINING.rst. - buffering : int - Read-buffer size to use while reading files. - process_ahead : int - Number of samples to pre-process ahead of time. - repetitions : int - How often the input sample enumeration should get repeated for being re-augmented. - fixed_clock : float - Sets the internal clock to a value between 0.0 (beginning of epoch) and 1.0 (end of epoch). - Setting this to a number is used for simulating augmentations at a certain epoch-time. - If kept at None (default), the internal clock will run regularly from 0.0 to 1.0, - hence preparing them for training. - - Returns - ------- - iterable of util.sample_collections.LabeledSample or util.audio.Sample - """ - def timed_samples(): - for repetition in range(repetitions): - for sample_index, sample in enumerate(samples): - if fixed_clock is None: - yield sample, (repetition * len(samples) + sample_index) / (repetitions * len(samples)) - else: - yield sample, fixed_clock - - augmentations = [] if augmentation_specs is None else list(map(parse_augmentation, augmentation_specs)) - try: - for augmentation in augmentations: - augmentation.start(buffering=buffering) - context = PreparationContext(audio_type, augmentations) - if process_ahead == 0: - for timed_sample in timed_samples(): - yield _augment_sample(timed_sample, context=context) - else: - with LimitingPool(process_ahead=process_ahead, - initializer=_init_augmentation_worker, - initargs=(context,)) as pool: - yield from pool.imap(_augment_sample, timed_samples()) - finally: - for augmentation in augmentations: - augmentation.stop() diff --git a/training/deepspeech_training/util/signal_augmentations.py b/training/deepspeech_training/util/signal_augmentations.py deleted file mode 100644 index 6f48dc2bcf..0000000000 --- a/training/deepspeech_training/util/signal_augmentations.py +++ /dev/null @@ -1,222 +0,0 @@ - -import os -import re -import math -import random -import numpy as np - -from multiprocessing import Queue, Process -from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS -from .helpers import int_range, float_range, pick_value_from_range, MEGABYTE - -SPEC_PARSER = re.compile(r'^(?P[a-z]+)(\[(?P.*)\])?$') -BUFFER_SIZE = 1 * MEGABYTE - - -class Augmentation: - def __init__(self, p=1.0): - self.probability = float(p) - - def start(self, buffering=BUFFER_SIZE): - pass - - def apply(self, sample, clock): - raise NotImplementedError - - def stop(self): - pass - - -def _enqueue_overlay_samples(sample_source, queue, buffering=BUFFER_SIZE): - """ - As the central distribution point for overlay samples this function is supposed to run in one process only. - This ensures that samples are not used twice if not required. - It loads the (raw and still compressed) data and provides it to the actual augmentation workers. - These are then doing decompression, potential conversion and overlaying in parallel. - """ - # preventing cyclic import problems - from .sample_collections import samples_from_source # pylint: disable=import-outside-toplevel - samples = samples_from_source(sample_source, buffering=buffering, labeled=False) - while True: - for sample in samples: - queue.put(sample) - - -class Overlay(Augmentation): - """See "Overlay augmentation" in TRAINING.rst""" - def __init__(self, source, p=1.0, snr=3.0, layers=1): - super(Overlay, self).__init__(p) - self.source = source - self.snr = float_range(snr) - self.layers = int_range(layers) - self.queue = Queue(max(1, math.floor(self.probability * self.layers[1] * os.cpu_count()))) - self.current_sample = None - self.enqueue_process = None - - def start(self, buffering=BUFFER_SIZE): - self.enqueue_process = Process(target=_enqueue_overlay_samples, - args=(self.source, self.queue), - kwargs={'buffering': buffering}) - self.enqueue_process.start() - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - n_layers = pick_value_from_range(self.layers, clock=clock) - audio = sample.audio - overlay_data = np.zeros_like(audio) - for _ in range(n_layers): - overlay_offset = 0 - while overlay_offset < len(audio): - if self.current_sample is None: - next_overlay_sample = self.queue.get() - next_overlay_sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - self.current_sample = next_overlay_sample.audio - n_required = len(audio) - overlay_offset - n_current = len(self.current_sample) - if n_required >= n_current: # take it completely - overlay_data[overlay_offset:overlay_offset + n_current] += self.current_sample - overlay_offset += n_current - self.current_sample = None - else: # take required slice from head and keep tail for next layer or sample - overlay_data[overlay_offset:overlay_offset + n_required] += self.current_sample[0:n_required] - overlay_offset += n_required - self.current_sample = self.current_sample[n_required:] - snr_db = pick_value_from_range(self.snr, clock=clock) - orig_dbfs = max_dbfs(audio) - overlay_gain = orig_dbfs - max_dbfs(overlay_data) - snr_db - audio += overlay_data * gain_db_to_ratio(overlay_gain) - sample.audio = normalize_audio(audio, dbfs=orig_dbfs) - - def stop(self): - if self.enqueue_process is not None: - self.enqueue_process.terminate() - - -class Reverb(Augmentation): - """See "Reverb augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, delay=20.0, decay=10.0): - super(Reverb, self).__init__(p) - self.delay = float_range(delay) - self.decay = float_range(decay) - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - audio = np.array(sample.audio, dtype=np.float64) - orig_dbfs = max_dbfs(audio) - delay = pick_value_from_range(self.delay, clock=clock) - decay = pick_value_from_range(self.decay, clock=clock) - decay = gain_db_to_ratio(-decay) - result = np.copy(audio) - primes = [17, 19, 23, 29, 31] - for delay_prime in primes: # primes to minimize comb filter interference - layer = np.copy(audio) - n_delay = math.floor(delay * (delay_prime / primes[0]) * sample.audio_format.rate / 1000.0) - n_delay = max(16, n_delay) # 16 samples minimum to avoid performance trap and risk of division by zero - for w_index in range(0, math.floor(len(audio) / n_delay)): - w1 = w_index * n_delay - w2 = (w_index + 1) * n_delay - width = min(len(audio) - w2, n_delay) # last window could be smaller - layer[w2:w2 + width] += decay * layer[w1:w1 + width] - result += layer - audio = normalize_audio(result, dbfs=orig_dbfs) - sample.audio = np.array(audio, dtype=np.float32) - - -class Resample(Augmentation): - """See "Resample augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, rate=8000): - super(Resample, self).__init__(p) - self.rate = int_range(rate) - - def apply(self, sample, clock): - # late binding librosa and its dependencies - from librosa.core import resample # pylint: disable=import-outside-toplevel - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - rate = pick_value_from_range(self.rate, clock=clock) - audio = sample.audio - orig_len = len(audio) - audio = np.swapaxes(audio, 0, 1) - audio = resample(audio, sample.audio_format.rate, rate) - audio = resample(audio, rate, sample.audio_format.rate) - audio = np.swapaxes(audio, 0, 1)[0:orig_len] - sample.audio = audio - - -class Codec(Augmentation): - """See "Codec augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, bitrate=3200): - super(Codec, self).__init__(p) - self.bitrate = int_range(bitrate) - - def apply(self, sample, clock): - bitrate = pick_value_from_range(self.bitrate, clock=clock) - sample.change_audio_type(new_audio_type=AUDIO_TYPE_PCM) # decoding to ensure it has to get encoded again - sample.change_audio_type(new_audio_type=AUDIO_TYPE_OPUS, bitrate=bitrate) # will get decoded again downstream - - -class Gaps(Augmentation): - """See "Gaps augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, n=1, size=50.0): - super(Gaps, self).__init__(p) - self.n_gaps = int_range(n) - self.size = float_range(size) - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - audio = sample.audio - n_gaps = pick_value_from_range(self.n_gaps, clock=clock) - for _ in range(n_gaps): - size = pick_value_from_range(self.size, clock=clock) - size = int(size * sample.audio_format.rate / 1000.0) - size = min(size, len(audio) // 10) # a gap should never exceed 10 percent of the audio - offset = random.randint(0, max(0, len(audio) - size - 1)) - audio[offset:offset + size] = 0 - sample.audio = audio - - -class Volume(Augmentation): - """See "Volume augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, dbfs=3.0103): - super(Volume, self).__init__(p) - self.target_dbfs = float_range(dbfs) - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - target_dbfs = pick_value_from_range(self.target_dbfs, clock=clock) - sample.audio = normalize_audio(sample.audio, dbfs=target_dbfs) - - -def parse_augmentation(augmentation_spec): - """ - Parses an augmentation specification. - - Parameters - ---------- - augmentation_spec : str - Augmentation specification like "reverb[delay=20.0,decay=-20]". - - Returns - ------- - Instance of an augmentation class from util.signal_augmentations.*. - """ - match = SPEC_PARSER.match(augmentation_spec) - if not match: - raise ValueError('Augmentation specification has wrong format') - cls_name = match.group('cls') - cls_name = cls_name[0].upper() + cls_name[1:] - augmentation_cls = globals()[cls_name] if cls_name in globals() else None - if not issubclass(augmentation_cls, Augmentation) or augmentation_cls == Augmentation: - raise ValueError('Unknown augmentation: {}'.format(cls_name)) - parameters = match.group('params') - parameters = [] if parameters is None else parameters.split(',') - args = [] - kwargs = {} - for parameter in parameters: - pair = tuple(list(map(str.strip, (parameter.split('='))))) - if len(pair) == 1: - args.append(pair) - elif len(pair) == 2: - kwargs[pair[0]] = pair[1] - else: - raise ValueError('Unable to parse augmentation value assignment') - return augmentation_cls(*args, **kwargs) diff --git a/training/deepspeech_training/util/spectrogram_augmentations.py b/training/deepspeech_training/util/spectrogram_augmentations.py deleted file mode 100644 index 9e7d65738d..0000000000 --- a/training/deepspeech_training/util/spectrogram_augmentations.py +++ /dev/null @@ -1,127 +0,0 @@ -import tensorflow as tf -import tensorflow.compat.v1 as tfv1 - -from .sparse_image_warp import sparse_image_warp - -def augment_freq_time_mask(spectrogram, - frequency_masking_para=30, - time_masking_para=10, - frequency_mask_num=3, - time_mask_num=3): - time_max = tf.shape(spectrogram)[1] - freq_max = tf.shape(spectrogram)[2] - # Frequency masking - for _ in range(frequency_mask_num): - f = tf.random.uniform(shape=(), minval=0, maxval=frequency_masking_para, dtype=tf.dtypes.int32) - f0 = tf.random.uniform(shape=(), minval=0, maxval=freq_max - f, dtype=tf.dtypes.int32) - value_ones_freq_prev = tf.ones(shape=[1, time_max, f0]) - value_zeros_freq = tf.zeros(shape=[1, time_max, f]) - value_ones_freq_next = tf.ones(shape=[1, time_max, freq_max-(f0+f)]) - freq_mask = tf.concat([value_ones_freq_prev, value_zeros_freq, value_ones_freq_next], axis=2) - # mel_spectrogram[:, f0:f0 + f, :] = 0 #can't assign to tensor - # mel_spectrogram[:, f0:f0 + f, :] = value_zeros_freq #can't assign to tensor - spectrogram = spectrogram*freq_mask - - # Time masking - for _ in range(time_mask_num): - t = tf.random.uniform(shape=(), minval=0, maxval=time_masking_para, dtype=tf.dtypes.int32) - t0 = tf.random.uniform(shape=(), minval=0, maxval=time_max - t, dtype=tf.dtypes.int32) - value_zeros_time_prev = tf.ones(shape=[1, t0, freq_max]) - value_zeros_time = tf.zeros(shape=[1, t, freq_max]) - value_zeros_time_next = tf.ones(shape=[1, time_max-(t0+t), freq_max]) - time_mask = tf.concat([value_zeros_time_prev, value_zeros_time, value_zeros_time_next], axis=1) - # mel_spectrogram[:, :, t0:t0 + t] = 0 #can't assign to tensor - # mel_spectrogram[:, :, t0:t0 + t] = value_zeros_time #can't assign to tensor - spectrogram = spectrogram*time_mask - - return spectrogram - -def augment_pitch_and_tempo(spectrogram, - max_tempo=1.2, - max_pitch=1.1, - min_pitch=0.95): - original_shape = tf.shape(spectrogram) - choosen_pitch = tf.random.uniform(shape=(), minval=min_pitch, maxval=max_pitch) - choosen_tempo = tf.random.uniform(shape=(), minval=1, maxval=max_tempo) - new_freq_size = tf.cast(tf.cast(original_shape[2], tf.float32)*choosen_pitch, tf.int32) - new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32)/(choosen_tempo), tf.int32) - spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(spectrogram, -1), [new_time_size, new_freq_size]) - spectrogram_aug = tf.image.crop_to_bounding_box(spectrogram_aug, offset_height=0, offset_width=0, target_height=tf.shape(spectrogram_aug)[1], target_width=tf.minimum(original_shape[2], new_freq_size)) - spectrogram_aug = tf.cond(choosen_pitch < 1, - lambda: tf.image.pad_to_bounding_box(spectrogram_aug, offset_height=0, offset_width=0, - target_height=tf.shape(spectrogram_aug)[1], target_width=original_shape[2]), - lambda: spectrogram_aug) - return spectrogram_aug[:, :, :, 0] - - -def augment_speed_up(spectrogram, - speed_std=0.1): - original_shape = tf.shape(spectrogram) - choosen_speed = tf.math.abs(tf.random.normal(shape=(), stddev=speed_std)) # abs makes sure the augmention will only speed up - choosen_speed = 1 + choosen_speed - new_freq_size = tf.cast(tf.cast(original_shape[2], tf.float32), tf.int32) - new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32)/(choosen_speed), tf.int32) - spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(spectrogram, -1), [new_time_size, new_freq_size]) - return spectrogram_aug[:, :, :, 0] - -def augment_dropout(spectrogram, - keep_prob=0.95): - return tf.nn.dropout(spectrogram, rate=1-keep_prob) - - -def augment_sparse_warp(spectrogram, time_warping_para=20, interpolation_order=2, regularization_weight=0.0, num_boundary_points=1, num_control_points=1): - """Reference: https://arxiv.org/pdf/1904.08779.pdf - Args: - spectrogram: `[batch, time, frequency]` float `Tensor` - time_warping_para: 'W' parameter in paper - interpolation_order: used to put into `sparse_image_warp` - regularization_weight: used to put into `sparse_image_warp` - num_boundary_points: used to put into `sparse_image_warp`, - default=1 means boundary points on 4 corners of the image - num_control_points: number of control points - Returns: - warped_spectrogram: `[batch, time, frequency]` float `Tensor` with same - type as input image. - """ - # reshape to fit `sparse_image_warp`'s input shape - # (1, time steps, freq, 1), batch_size must be 1 - spectrogram = tf.expand_dims(spectrogram, -1) - - original_shape = tf.shape(spectrogram) - tau, freq_size = original_shape[1], original_shape[2] - - # to protect short audio - time_warping_para = tf.math.minimum( - time_warping_para, tf.math.subtract(tf.math.floordiv(tau, 2), 1)) - - # don't choose boundary frequency - choosen_freqs = tf.random.shuffle( - tf.add(tf.range(freq_size - 3), 1))[0: num_control_points] - - source_max = tau - time_warping_para - source_min = tf.math.minimum(source_max - num_control_points, time_warping_para) - - choosen_times = tf.random.shuffle(tf.range(source_min, limit=source_max))[0: num_control_points] - dest_time_widths = tfv1.random_uniform([num_control_points], tf.negative(time_warping_para), time_warping_para, tf.int32) - - sources = [] - dests = [] - for i in range(num_control_points): - # generate source points `t` of time axis between (W, tau-W) - rand_source_time = choosen_times[i] - rand_dest_time = rand_source_time + dest_time_widths[i] - - choosen_freq = choosen_freqs[i] - sources.append([rand_source_time, choosen_freq]) - dests.append([rand_dest_time, choosen_freq]) - - source_control_point_locations = tf.cast([sources], tf.float32) - dest_control_point_locations = tf.cast([dests], tf.float32) - - warped_spectrogram, _ = sparse_image_warp(spectrogram, - source_control_point_locations=source_control_point_locations, - dest_control_point_locations=dest_control_point_locations, - interpolation_order=interpolation_order, - regularization_weight=regularization_weight, - num_boundary_points=num_boundary_points) - return tf.reshape(warped_spectrogram, shape=(1, -1, freq_size)) From c294d80a9312563de55bd77f570743927df04777 Mon Sep 17 00:00:00 2001 From: ObliviousParadigm <47667852+ObliviousParadigm@users.noreply.github.com> Date: Sun, 14 Jun 2020 16:00:32 +0530 Subject: [PATCH 06/38] DOC: Fixed grammatical mistake. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 17b849faa8..9c1b987e93 100644 --- a/README.rst +++ b/README.rst @@ -14,7 +14,7 @@ Project DeepSpeech DeepSpeech is an open source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper `_. Project DeepSpeech uses Google's `TensorFlow `_ to make the implementation easier. -Documentation for installation, usage, and training models is available on `deepspeech.readthedocs.io `_. +Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io `_. For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub `_. From 4d541394e86079a61781ba50ad61c99d3e6d1ea4 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 2 Jun 2020 21:23:20 +0200 Subject: [PATCH 07/38] Decouple Dockerfile into build and train --- Dockerfile => Dockerfile.build.tmpl | 170 +++++++++++----------------- Dockerfile.train.tmpl | 44 +++++++ Makefile | 8 ++ doc/Scorer.rst | 4 +- doc/TRAINING.rst | 16 +++ doc/USING.rst | 16 +++ taskcluster/docker-build-base.tyml | 3 +- taskcluster/docker-image-build.yml | 4 +- taskcluster/docker-image-train.yml | 6 + 9 files changed, 163 insertions(+), 108 deletions(-) rename Dockerfile => Dockerfile.build.tmpl (54%) create mode 100644 Dockerfile.train.tmpl create mode 100644 Makefile create mode 100644 taskcluster/docker-image-train.yml diff --git a/Dockerfile b/Dockerfile.build.tmpl similarity index 54% rename from Dockerfile rename to Dockerfile.build.tmpl index 8a03b3c492..a4eebb6eba 100644 --- a/Dockerfile +++ b/Dockerfile.build.tmpl @@ -1,68 +1,56 @@ +# Please refer to the USING documentation, "Dockerfile for building from source" + # Need devel version cause we need /usr/include/cudnn.h -# for compiling libctc_decoder_with_kenlm.so FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 +ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# +ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA# # >> START Install base software # Get basic packages RUN apt-get update && apt-get install -y --no-install-recommends \ apt-utils \ + bash-completion \ build-essential \ + ca-certificates \ + cmake \ curl \ - wget \ + g++ \ + gcc \ git \ + git-lfs \ + libbz2-dev \ + libboost-all-dev \ + libgsm1-dev \ + libltdl-dev \ + liblzma-dev \ + libmagic-dev \ + libpng-dev \ + libsox-fmt-mp3 \ + libsox-dev \ + locales \ + openjdk-8-jdk \ + pkg-config \ python3 \ python3-dev \ python3-pip \ python3-wheel \ python3-numpy \ - libcurl3-dev \ - ca-certificates \ - gcc \ sox \ - libsox-fmt-mp3 \ - htop \ - nano \ - cmake \ - libboost-all-dev \ - zlib1g-dev \ - libbz2-dev \ - liblzma-dev \ - locales \ - pkg-config \ - libpng-dev \ - libsox-dev \ - libmagic-dev \ - libgsm1-dev \ - libltdl-dev \ - openjdk-8-jdk \ - bash-completion \ - g++ \ - unzip - -RUN ln -s -f /usr/bin/python3 /usr/bin/python + unzip \ + wget \ + zlib1g-dev -# Install NCCL 2.2 -RUN apt-get --no-install-recommends install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0 +RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 # Install Bazel RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb" RUN dpkg -i bazel_*.deb -# Install CUDA CLI Tools -RUN apt-get --no-install-recommends install -qq -y cuda-command-line-tools-10-0 - -# Install pip -RUN wget https://bootstrap.pypa.io/get-pip.py && \ - python3 get-pip.py && \ - rm get-pip.py - # << END Install base software - - - # >> START Configure Tensorflow Build # Clone TensorFlow from Mozilla repo @@ -70,14 +58,13 @@ RUN git clone https://github.com/mozilla/tensorflow/ WORKDIR /tensorflow RUN git checkout r1.15 - # GPU Environment Setup ENV TF_NEED_CUDA 1 -ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/" +ENV TF_CUDA_PATHS "/usr,/usr/local/cuda,/usr/lib/x86_64-linux-gnu/" ENV TF_CUDA_VERSION 10.0 -ENV TF_CUDNN_VERSION 7 +ENV TF_CUDNN_VERSION 7.6 ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0 -ENV TF_NCCL_VERSION 2.3 +ENV TF_NCCL_VERSION 2.4 # Common Environment Setup ENV TF_BUILD_CONTAINER_TYPE GPU @@ -105,14 +92,12 @@ ENV TF_NEED_TENSORRT 0 ENV TF_NEED_GDR 0 ENV TF_NEED_VERBS 0 ENV TF_NEED_OPENCL_SYCL 0 + ENV PYTHON_BIN_PATH /usr/bin/python3.6 ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages # << END Configure Tensorflow Build - - - # >> START Configure Bazel # Running bazel inside a `docker build` command causes trouble, cf: @@ -124,39 +109,17 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ >>/etc/bazel.bazelrc -# Put cuda libraries to where they are expected to be -RUN mkdir /usr/local/cuda/lib && \ - ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \ - ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \ - ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ - ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h - - -# Set library paths -ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/ - # << END Configure Bazel +WORKDIR / -# Copy DeepSpeech repo contents to container's /DeepSpeech -COPY . /DeepSpeech/ - -# Alternative clone from GitHub -# RUN apt-get update && apt-get install -y git-lfs -# WORKDIR / -# RUN git lfs install -# RUN git clone https://github.com/mozilla/DeepSpeech.git - +RUN git clone $DEEPSPEECH_REPO WORKDIR /DeepSpeech - -RUN DS_NODECODER=1 pip3 --no-cache-dir install . +RUN git checkout $DEEPSPEECH_SHA # Link DeepSpeech native_client libs to tf folder RUN ln -s /DeepSpeech/native_client /tensorflow - - - # >> START Build and bind WORKDIR /tensorflow @@ -170,59 +133,60 @@ RUN ./configure # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment - # Build DeepSpeech -RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} - -### -### Using TensorFlow upstream should work -### -# # Build TF pip package -# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} -# -# # Build wheel -# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg -# -# # Install tensorflow from our custom wheel -# RUN pip3 install /tmp/tensorflow_pkg/*.whl +RUN bazel build \ + --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \ + --config=monolithic \ + --config=cuda \ + -c opt \ + --copt=-O3 \ + --copt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + --copt=-mtune=generic \ + --copt=-march=x86-64 \ + --copt=-msse \ + --copt=-msse2 \ + --copt=-msse3 \ + --copt=-msse4.1 \ + --copt=-msse4.2 \ + --copt=-mavx \ + --copt=-fvisibility=hidden \ + //native_client:libdeepspeech.so \ + --verbose_failures \ + --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} # Copy built libs to /DeepSpeech/native_client RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ -# Install TensorFlow -WORKDIR /DeepSpeech/ -RUN pip3 install tensorflow-gpu==1.15.0 - - # Build client.cc and install Python client and decoder bindings ENV TFDIR /tensorflow + +RUN nproc + WORKDIR /DeepSpeech/native_client -RUN make deepspeech +RUN make NUM_PROCESSES=$(nproc) deepspeech WORKDIR /DeepSpeech -RUN cd native_client/python && make bindings +RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings RUN pip3 install --upgrade native_client/python/dist/*.whl -RUN cd native_client/ctcdecode && make bindings +RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl - # << END Build and bind - - - # Allow Python printing utf-8 ENV PYTHONIOENCODING UTF-8 # Build KenLM in /DeepSpeech/native_client/kenlm folder WORKDIR /DeepSpeech/native_client -RUN rm -rf kenlm \ - && git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \ - && mkdir -p build \ - && cd build \ - && cmake .. \ - && make -j 4 +RUN rm -rf kenlm && \ + git clone https://github.com/kpu/kenlm && \ + cd kenlm && \ + git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \ + mkdir -p build && \ + cd build && \ + cmake .. && \ + make -j $(nproc) # Done WORKDIR /DeepSpeech diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl new file mode 100644 index 0000000000..25afc455a7 --- /dev/null +++ b/Dockerfile.train.tmpl @@ -0,0 +1,44 @@ +# Please refer to the TRAINING documentation, "Basic Dockerfile for training" + +FROM tensorflow/tensorflow:1.15.2-gpu-py3 + +ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# +ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA# + +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils \ + bash-completion \ + build-essential \ + curl \ + git \ + git-lfs \ + libbz2-dev \ + locales \ + python3-venv \ + unzip \ + wget + +WORKDIR / +RUN git lfs install +RUN git clone $DEEPSPEECH_REPO + +WORKDIR /DeepSpeech +RUN git checkout $DEEPSPEECH_SHA + +# Setup a virtualenv otherwise we mess with the system and this is BAD. +RUN python3 -m venv venv/ + +ENV VIRTUAL_ENV=/DeepSpeech/venv +ENV PATH=$VIRTUAL_ENV/bin:$PATH + +# Build CTC decoder first, to avoid clashes on incompatible versions upgrades +RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings +RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl + +# Prepare deps +RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 + +# Install DeepSpeech, no need for the decoder since we did it earlier +RUN DS_NODECODER=y pip3 install --upgrade --force-reinstall -e . + +RUN ./bin/run-ldc93s1.sh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..2d28d24b94 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git +DEEPSPEECH_SHA ?= origin/master + +Dockerfile%: Dockerfile%.tmpl + sed \ + -e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \ + -e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \ + < $< > $@ diff --git a/doc/Scorer.rst b/doc/Scorer.rst index 78f7210104..8df94a74ce 100644 --- a/doc/Scorer.rst +++ b/doc/Scorer.rst @@ -24,7 +24,7 @@ Then use the ``generate_lm.py`` script to generate ``lm.binary`` and ``vocab-500 As input you can use a plain text (e.g. ``file.txt``) or gzipped (e.g. ``file.txt.gz``) text file with one sentence in each line. -If you are using a container created from the Dockerfile, you can use ``--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/``. +If you are using a container created from ``Dockerfile.build``, you can use ``--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/``. Else you have to build `KenLM `_ first and then pass the build directory to the script. .. code-block:: bash @@ -54,4 +54,4 @@ The LibriSpeech LM training text used by our scorer is around 4GB uncompressed, With a text corpus in hand, you can then re-use the ``generate_lm.py`` and ``generate_package.py`` scripts to create your own scorer that is compatible with DeepSpeech clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit `_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior. After using ``generate_lm.py`` to create a KenLM language model binary file, you can use ``generate_package.py`` to create a scorer package as described in the previous section. Note that we have a :github:`lm_optimizer.py script ` which can be used to find good default values for alpha and beta. To use it, you must first -generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py``. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_package.py`` again, this time with the new values. \ No newline at end of file +generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py``. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_package.py`` again, this time with the new values. diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 99dd849a03..b7e3014b34 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -76,6 +76,22 @@ It has been reported for some people failure at training: Setting the ``TF_FORCE_GPU_ALLOW_GROWTH`` environment variable to ``true`` seems to help in such cases. This could also be due to an incorrect version of libcudnn. Double check your versions with the :ref:`TensorFlow 1.15 documentation `. +Basic Dockerfile for training +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide ``Dockerfile.train`` to automatically set up a basic training environment in Docker. You need to generate the Dockerfile from the template using: +This should ensure that you'll re-use the upstream Python 3 TensorFlow GPU-enabled Docker image. + +.. code-block:: bash + + make Dockerfile.train + +If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: + +.. code-block:: bash + + make Dockerfile.train DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch + Common Voice training data ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/USING.rst b/doc/USING.rst index bafbc46fc7..9032fe3408 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -186,6 +186,22 @@ Installing bindings from source If pre-built binaries aren't available for your system, you'll need to install them from scratch. Follow the :github:`native client build and installation instructions `. +Dockerfile for building from source +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide ``Dockerfile.build`` to automatically build ``libdeepspeech.so``, the C++ native client, Python bindings, and KenLM. +You need to generate the Dockerfile from the template using: + +.. code-block:: bash + + make Dockerfile.build + +If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: + +.. code-block:: bash + + make Dockerfile.build DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch + Third party bindings ^^^^^^^^^^^^^^^^^^^^ diff --git a/taskcluster/docker-build-base.tyml b/taskcluster/docker-build-base.tyml index a88676aa41..6e4d2471e0 100644 --- a/taskcluster/docker-build-base.tyml +++ b/taskcluster/docker-build-base.tyml @@ -31,11 +31,12 @@ then: in: > apt-get -qq -y remove --purge ubuntu-advantage-tools && ${aptEc2Mirrors} && - apt-get -qq update && apt-get -qq -y install git wget pkg-config apt-transport-https ca-certificates curl software-properties-common && + apt-get -qq update && apt-get -qq -y install git wget pkg-config apt-transport-https ca-certificates curl software-properties-common make && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" && apt-get -qq update && apt-get -qq -y install docker-ce && mkdir -p /opt/deepspeech && git clone --quiet ${event.head.repo.url} /opt/deepspeech && cd /opt/deepspeech && git checkout --quiet ${event.head.sha} && + make ${dockerfile} DEEPSPEECH_REPO=${event.head.repo.url} DEEPSPEECH_SHA=${event.head.sha} && docker build --file ${dockerfile} . artifacts: diff --git a/taskcluster/docker-image-build.yml b/taskcluster/docker-image-build.yml index a8d5386d87..792e14af04 100644 --- a/taskcluster/docker-image-build.yml +++ b/taskcluster/docker-image-build.yml @@ -1,6 +1,6 @@ build: template_file: docker-build-base.tyml - dockerfile: "Dockerfile" + dockerfile: "Dockerfile.build" metadata: name: "DeepSpeech Docker build" - description: "Testing |docker build| of DeepSpeech" + description: "Testing |docker build| of DeepSpeech build image" diff --git a/taskcluster/docker-image-train.yml b/taskcluster/docker-image-train.yml new file mode 100644 index 0000000000..03f9abea3a --- /dev/null +++ b/taskcluster/docker-image-train.yml @@ -0,0 +1,6 @@ +build: + template_file: docker-build-base.tyml + dockerfile: "Dockerfile.train" + metadata: + name: "DeepSpeech Docker train" + description: "Testing |docker build| of DeepSpeech train image" From 0bec67d74cd546ec64c55c3eedb30d266934ef19 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Tue, 16 Jun 2020 10:10:01 +0200 Subject: [PATCH 08/38] Update bin/play.py Co-authored-by: Reuben Morais --- bin/play.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/play.py b/bin/play.py index 5c38fab0c3..e9348c8e01 100755 --- a/bin/play.py +++ b/bin/play.py @@ -40,7 +40,7 @@ def get_samples_in_play_order(): def play_collection(): augmentations = parse_augmentations(CLI_ARGS.augment) - if any(map(lambda a: not isinstance(a, SampleAugmentation), augmentations)): + if any(not isinstance(a, SampleAugmentation) for a in augmentations): print("Warning: Some of the augmentations cannot be simulated by this command.") samples = get_samples_in_play_order() samples = apply_sample_augmentations(samples, From ea21c7d24e7ae4336f3de4d9077f69b63e6f5568 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Tue, 16 Jun 2020 10:22:45 +0200 Subject: [PATCH 09/38] Apply suggestions from code review Co-authored-by: Reuben Morais --- doc/TRAINING.rst | 14 +++++++------- training/deepspeech_training/train.py | 2 +- training/deepspeech_training/util/config.py | 2 +- training/deepspeech_training/util/feeding.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index f57744491c..17230914a1 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -298,10 +298,10 @@ In the documentation below, whenever a value is specified as ```` o Ranges specified with integer limits will only assume integer (rounded) values. .. warning:: - If feature caching is enabled and infinite (default), these augmentations will only be performed on first epoch and the result will be reused for subsequent epochs. This would not only hinder value ranges from reaching their intended final values, but could also lead to unintended over-fitting. In this case flag ``--cache_for_epochs N`` (with N > 1) should be used to periodically invalidate the cache and thus allow samples to be re-augmented in new ways and with current range-values. + When feature caching is enabled, by default the cache has no expiration limit and will be used for the entire training run. This will cause these augmentations to only be performed once during the first epoch and the result will be reused for subsequent epochs. This would not only hinder value ranges from reaching their intended final values, but could also lead to unintended over-fitting. In this case flag ``--cache_for_epochs N`` (with N > 1) should be used to periodically invalidate the cache after every N epochs and thus allow samples to be re-augmented in new ways and with current range-values. -Every augmentation is targeting a certain data representation of the sample - further on called *domain*. -Augmentations are applied domain-wise in the following order: +Every augmentation targets a certain representation of the sample - in this documentation these representations are referred to as *domains*. +Augmentations are applied in the following order: 1. **sample** domain: The sample just got loaded and its waveform is represented as a NumPy array. For implementation reasons these augmentations are the only ones that can be "simulated" through ``bin/play.py``. @@ -309,9 +309,9 @@ Augmentations are applied domain-wise in the following order: 3. **spectrogram** domain: The sample spectrogram is represented as a tensor. -4. **features** domain: The sample's MEL spectrogram features are represented as a tensor. +4. **features** domain: The sample's mel spectrogram features are represented as a tensor. -During each phase augmentations are applied in command-line order (the **warp** augmentation being the only exception). +Within a single domain, augmentations are applied in the same order as they appear in the command-line (the **warp** augmentation being the only exception, as it is always applied first when enabled). Sample domain augmentations @@ -384,7 +384,7 @@ Spectrogram domain augmentations **Warp augmentation** ``--augment warp[p=,shift=,order=,nbp=,ncp=,regularization_weight=]`` - Applies a non-linear image warp to the spectrogram, where the warp is specified by the source and destination locations of a (potentially small) number of control points. Of all specified spectrogram augmentations this one will always be applied first. + Applies a non-linear image warp to the spectrogram, where the warp is specified by the source and destination locations of a (potentially small) number of control points. Of all specified spectrogram augmentations this one will always be applied first. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779 * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -400,7 +400,7 @@ Spectrogram domain augmentations **Frequency mask augmentation** ``--augment frequency_mask[p=,n=,size=]`` - Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. + Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779 * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index 81c4ddd1c4..175032da52 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -527,7 +527,7 @@ def run_set(set_name, epoch, init_op, dataset=None): step_summary_writer = step_summary_writers.get(set_name) checkpoint_time = time.time() - if is_train and FLAGS.cache_for_epochs > 0 and FLAGS.feature_cache is not None: + if is_train and FLAGS.cache_for_epochs > 0 and FLAGS.feature_cache: feature_cache_index = FLAGS.feature_cache + '.index' if epoch % FLAGS.cache_for_epochs == 0 and os.path.isfile(feature_cache_index): log_info('Invalidating feature cache') diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index 0786ae4292..13a362201d 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -32,7 +32,7 @@ def initialize_globals(): # Augmentations c.augmentations = parse_augmentations(FLAGS.augment) - if len(c.augmentations) > 0 and FLAGS.feature_cache is not None and FLAGS.cache_for_epochs == 0: + if len(c.augmentations) > 0 and FLAGS.feature_cache and FLAGS.cache_for_epochs == 0: log_warn('Due to current feature-cache settings the exact same sample augmentations of the first ' 'epoch will be repeated on all following epochs. This could lead to unintended over-fitting. ' 'You could use --cache_for_epochs to invalidate the cache after a given number of epochs.') diff --git a/training/deepspeech_training/util/feeding.py b/training/deepspeech_training/util/feeding.py index ff3a1a7536..9dbdae2bad 100644 --- a/training/deepspeech_training/util/feeding.py +++ b/training/deepspeech_training/util/feeding.py @@ -132,7 +132,7 @@ def batch_fn(sample_ids, features, features_len, transcripts): output_types=(tf.string, tf.float32, tf.int32, (tf.int64, tf.int32, tf.int64), tf.float64)) .map(process_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)) - if cache_path is not None: + if cache_path: dataset = dataset.cache(cache_path) dataset = (dataset.window(batch_size, drop_remainder=train_phase).flat_map(batch_fn) .prefetch(len(Config.available_devices))) From 5b6de213d8dc8893647261f38dcfaf454854225c Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Tue, 16 Jun 2020 11:07:57 +0200 Subject: [PATCH 10/38] Follow-up on PR comments; removed warp augmentation; split pitch_and_tempo augmentation --- doc/TRAINING.rst | 31 +-- .../deepspeech_training/util/augmentations.py | 154 ++++-------- training/deepspeech_training/util/feeding.py | 11 +- .../util/sparse_image_warp.py | 220 ------------------ 4 files changed, 61 insertions(+), 355 deletions(-) delete mode 100644 training/deepspeech_training/util/sparse_image_warp.py diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 17230914a1..be904120f3 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -311,7 +311,7 @@ Augmentations are applied in the following order: 4. **features** domain: The sample's mel spectrogram features are represented as a tensor. -Within a single domain, augmentations are applied in the same order as they appear in the command-line (the **warp** augmentation being the only exception, as it is always applied first when enabled). +Within a single domain, augmentations are applied in the same order as they appear in the command-line. Sample domain augmentations @@ -365,17 +365,15 @@ Sample domain augmentations Spectrogram domain augmentations -------------------------------- -**Pitch and tempo augmentation** ``--augment pitch_and_tempo[p=,pitch=,tempo=]`` - Scales spectrogram on time and frequency axis and thus changes pitch and playback tempo. +**Pitch augmentation** ``--augment pitch[p=,pitch=]`` + Scales spectrogram on frequency axis and thus changes pitch. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method * **pitch**: pitch factor by with the frequency axis is scaled (e.g. a value of 2.0 will raise audio frequency by one octave) - * **tempo**: tempo factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo) - -**Speed augmentation** ``--augment speed[p=,factor=]`` +**Tempo augmentation** ``--augment tempo[p=,factor=]`` Scales spectrogram on time axis and thus changes playback tempo. * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method @@ -383,22 +381,6 @@ Spectrogram domain augmentations * **factor**: speed factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo) -**Warp augmentation** ``--augment warp[p=,shift=,order=,nbp=,ncp=,regularization_weight=]`` - Applies a non-linear image warp to the spectrogram, where the warp is specified by the source and destination locations of a (potentially small) number of control points. Of all specified spectrogram augmentations this one will always be applied first. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779 - - * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method - - * **shift**: maximum shift distance of control points on time axis in ms - - * **order**: polynomial order used by the spline interpolation - - * **nbp**: how many zero-flow boundary points to include at each spectrogram edge - - * **ncp**: how many control points to warp inside the spectrogram - - * **regularization_weight**: weight on smoothness regularizer in interpolation - - **Frequency mask augmentation** ``--augment frequency_mask[p=,n=,size=]`` Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779 @@ -467,9 +449,8 @@ Example training with all augmentations: --augment resample[p=0.1,rate=12000:8000~4000] \ --augment codec[p=0.1,bitrate=48000:16000] \ --augment volume[p=0.1,dbfs=-10:-40] \ - --augment pitch_and_tempo[p=0.1,pitch=1~0.2,tempo=1~0.2] \ - --augment speed[p=0.1,factor=1~0.5] \ - --augment warp[p=0.1,shift=30:60~20,ncp=4~3] \ + --augment pitch[p=0.1,pitch=1~0.2] \ + --augment tempo[p=0.1,factor=1~0.5] \ --augment frequency_mask[p=0.1,n=1:3,size=1:5] \ --augment time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40] \ --augment dropout[p=0.1,rate=0.05] \ diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index eff08fbcb8..5c17dedd2e 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -8,6 +8,7 @@ from multiprocessing import Queue, Process from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS from .helpers import LimitingPool, int_range, float_range, pick_value_from_range, tf_pick_value_from_range, MEGABYTE +from .sample_collections import samples_from_source BUFFER_SIZE = 1 * MEGABYTE SPEC_PARSER = re.compile(r'^(?P[a-z_]+)(\[(?P.*)\])?$') @@ -36,21 +37,25 @@ def __init__(self, p=1.0, domain='spectrogram'): raise ValueError('Unsupported augmentation domain: {}'.format(domain)) self.domain = domain - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): raise NotImplementedError - def apply_with_probability(self, tensor, clock=0.0): + def apply_with_probability(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel rv = tf.random.stateless_uniform([], seed=(clock * tf.int32.min, clock * tf.int32.max)) return tf.cond(tf.less(rv, self.probability), - lambda: self.apply(tensor, clock=clock), + lambda: self.apply(tensor, transcript=transcript, clock=clock), lambda: tensor) - def maybe_apply(self, domain, tensor, clock=0.0): + def maybe_apply(self, domain, tensor, transcript=None, clock=0.0): if domain == self.domain: - return self.apply_with_probability(tensor, clock=clock) + return self.apply_with_probability(tensor, transcript=transcript, clock=clock) return tensor + def units_per_ms(self): + from .flags import FLAGS # pylint: disable=import-outside-toplevel + return FLAGS.audio_sample_rate / 1000.0 if self.domain == 'signal' else 1.0 / FLAGS.feature_win_step + def parse_augmentation(augmentation_spec): """ @@ -103,7 +108,7 @@ def parse_augmentations(augmentation_specs): return [] if augmentation_specs is None else list(map(parse_augmentation, augmentation_specs)) -def apply_graph_augmentations(domain, tensor, augmentations, clock=0.0): +def apply_graph_augmentations(domain, tensor, augmentations, transcript=None, clock=0.0): """ Augments training sample tensor of a certain domain with matching augmentations of passed list. @@ -115,6 +120,7 @@ def apply_graph_augmentations(domain, tensor, augmentations, clock=0.0): Tensor to apply augmentations to. augmentations : list of augmentation class instances from util.augmentations.*. List of augmentations of which only the spectrogram ones will get applied to the samples. + transcript : SparseTensor clock : Tensor of type float32 Time indicator for augmentation value-ranges. Running from 0.0 (start of training) to 1.0 (end of training). @@ -124,13 +130,9 @@ def apply_graph_augmentations(domain, tensor, augmentations, clock=0.0): The augmented spectrogram """ if augmentations is not None: - # Warp has to come before any spectrogram masking - for augmentation in augmentations: - if isinstance(augmentation, Warp): - tensor = augmentation.maybe_apply(domain, tensor, clock=clock) for augmentation in augmentations: - if isinstance(augmentation, GraphAugmentation) and not isinstance(augmentation, Warp): - tensor = augmentation.maybe_apply(domain, tensor, clock=clock) + if isinstance(augmentation, GraphAugmentation): + tensor = augmentation.maybe_apply(domain, tensor, transcript=transcript, clock=clock) return tensor @@ -204,7 +206,7 @@ def timed_samples(): if final_clock is not None: assert 0.0 <= final_clock <= 1.0 assert clock <= final_clock - augmentations = list(filter(lambda aug: isinstance(aug, SampleAugmentation), augmentations)) + augmentations = [aug for aug in augmentations if isinstance(aug, SampleAugmentation)] try: for augmentation in augmentations: augmentation.start(buffering=buffering) @@ -229,8 +231,6 @@ def _enqueue_overlay_samples(sample_source, queue, buffering=BUFFER_SIZE): It loads the (raw and still compressed) data and provides it to the actual augmentation workers. These are then doing decompression, potential conversion and overlaying in parallel. """ - # preventing cyclic import problems - from .sample_collections import samples_from_source # pylint: disable=import-outside-toplevel samples = samples_from_source(sample_source, buffering=buffering, labeled=False) while True: for sample in samples: @@ -238,7 +238,7 @@ def _enqueue_overlay_samples(sample_source, queue, buffering=BUFFER_SIZE): class Overlay(SampleAugmentation): - """See "Overlay augmentation" in TRAINING.rst""" + """See "Overlay augmentation" in training documentation""" def __init__(self, source, p=1.0, snr=3.0, layers=1): super(Overlay, self).__init__(p) self.source = source @@ -288,7 +288,7 @@ def stop(self): class Codec(SampleAugmentation): - """See "Codec augmentation" in TRAINING.rst""" + """See "Codec augmentation" in training documentation""" def __init__(self, p=1.0, bitrate=3200): super(Codec, self).__init__(p) self.bitrate = int_range(bitrate) @@ -300,7 +300,7 @@ def apply(self, sample, clock=0.0): class Reverb(SampleAugmentation): - """See "Reverb augmentation" in TRAINING.rst""" + """See "Reverb augmentation" in training documentation""" def __init__(self, p=1.0, delay=20.0, decay=10.0): super(Reverb, self).__init__(p) self.delay = float_range(delay) @@ -330,7 +330,7 @@ def apply(self, sample, clock=0.0): class Resample(SampleAugmentation): - """See "Resample augmentation" in TRAINING.rst""" + """See "Resample augmentation" in training documentation""" def __init__(self, p=1.0, rate=8000): super(Resample, self).__init__(p) self.rate = int_range(rate) @@ -350,7 +350,7 @@ def apply(self, sample, clock=0.0): class Volume(SampleAugmentation): - """See "Volume augmentation" in TRAINING.rst""" + """See "Volume augmentation" in training documentation""" def __init__(self, p=1.0, dbfs=3.0103): super(Volume, self).__init__(p) self.target_dbfs = float_range(dbfs) @@ -361,25 +361,22 @@ def apply(self, sample, clock=0.0): sample.audio = normalize_audio(sample.audio, dbfs=target_dbfs) -class PitchAndTempo(GraphAugmentation): - """See "Pitch and tempo augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, tempo=1.2, pitch=(1.075, 1.075, 0.125)): - super(PitchAndTempo, self).__init__(p, domain='spectrogram') - self.tempo = float_range(tempo) +class Pitch(GraphAugmentation): + """See "Pitch augmentation" in training documentation""" + def __init__(self, p=1.0, pitch=(1.075, 1.075, 0.125)): + super(Pitch, self).__init__(p, domain='spectrogram') self.pitch = float_range(pitch) - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel original_shape = tf.shape(tensor) pitch = tf_pick_value_from_range(self.pitch, clock=clock) - tempo = tf.math.maximum(1.0, tf_pick_value_from_range(self.tempo, clock=clock)) new_freq_size = tf.cast(tf.cast(original_shape[2], tf.float32) * pitch, tf.int32) - new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32) / tempo, tf.int32) - spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(tensor, -1), [new_time_size, new_freq_size]) + spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(tensor, -1), [original_shape[1], new_freq_size]) spectrogram_aug = tf.image.crop_to_bounding_box(spectrogram_aug, offset_height=0, offset_width=0, - target_height=tf.shape(spectrogram_aug)[1], + target_height=original_shape[1], target_width=tf.math.minimum(original_shape[2], new_freq_size)) spectrogram_aug = tf.cond(pitch < 1, lambda: tf.image.pad_to_bounding_box(spectrogram_aug, @@ -391,82 +388,34 @@ def apply(self, tensor, clock=0.0): return spectrogram_aug[:, :, :, 0] -class Speed(GraphAugmentation): - """See "Speed augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, factor=1.1): - super(Speed, self).__init__(p, domain='spectrogram') +class Tempo(GraphAugmentation): + """See "Tempo augmentation" in training documentation""" + def __init__(self, p=1.0, factor=1.1, max_time=-1): + super(Tempo, self).__init__(p, domain='spectrogram') self.factor = float_range(factor) + self.max_time = float(max_time) - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel factor = tf_pick_value_from_range(self.factor, clock=clock) original_shape = tf.shape(tensor) new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32) / factor, tf.int32) + if transcript is not None: + new_time_size = tf.math.maximum(new_time_size, tf.shape(transcript)[1]) + if self.max_time > 0: + new_time_size = tf.math.minimum(new_time_size, tf.cast(self.max_time * self.units_per_ms(), tf.int32)) spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(tensor, -1), [new_time_size, original_shape[2]]) return spectrogram_aug[:, :, :, 0] -class Warp(GraphAugmentation): - """See "Warp augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, shift=100.0, order=3, nbp=1, ncp=1, regularization_weight=0.0): - super(Warp, self).__init__(p, domain='spectrogram') - self.shift = float_range(shift) - self.order = int_range(order) - self.nbp = int_range(nbp) - self.ncp = int_range(ncp) - # Making this a value-range is impossible, as it would get a tensor which would downstream be used as parameter - # of a comparison inside tensorflow.contrib.image.python.ops.interpolate_spline. This is not supported. - self.regularization_weight = float(regularization_weight) - - def apply(self, tensor, clock=0.0): - import tensorflow as tf # pylint: disable=import-outside-toplevel - from .flags import FLAGS # pylint: disable=import-outside-toplevel - from .sparse_image_warp import sparse_image_warp # pylint: disable=import-outside-toplevel - - # reshape to fit `sparse_image_warp`'s input shape (1, time steps, freq, 1), batch_size must be 1 - expanded_spectrogram = tf.expand_dims(tensor, -1) - original_shape = tf.shape(expanded_spectrogram) - tau, freq_size = original_shape[1], original_shape[2] - seed = (clock * tf.int32.min, clock * tf.int32.max) - - shift = tf_pick_value_from_range(self.shift, clock=clock) - shift *= FLAGS.audio_sample_rate / (FLAGS.feature_win_step * 1000.0) # number of windows - shift = tf.math.minimum(tf.cast(shift, dtype=tf.int32), tf.math.floordiv(tau, 2) - 1) # to protect short audio - nbp = tf_pick_value_from_range(self.nbp, clock=clock) - ncp = tf_pick_value_from_range(self.ncp, clock=clock) - # workaround for missing stateless shuffle support - frequencies = tf.random.stateless_uniform([2 * ncp], seed, minval=1, maxval=freq_size - 2, dtype=tf.int32) - frequencies = tf.unique(tf.concat([frequencies, tf.range(1, limit=freq_size - 3)], axis=0))[0][0:ncp] - source_max = tau - shift - source_min = tf.math.minimum(source_max - ncp, shift) - # workaround for missing stateless shuffle support - src_times = tf.random.stateless_uniform([2 * ncp], seed, minval=source_min, maxval=source_max, dtype=tf.int32) - src_times = tf.unique(tf.concat([src_times, tf.range(1, limit=source_max)], axis=0))[0][0:ncp] - dst_times = src_times + tf.random.stateless_uniform([ncp], seed, minval=-shift, maxval=shift, dtype=tf.int32) - scp_locations = tf.cast([tf.transpose(tf.stack([src_times, frequencies]))], dtype=tf.float32) - dcp_locations = tf.cast([tf.transpose(tf.stack([dst_times, frequencies]))], dtype=tf.float32) - - order = tf_pick_value_from_range(self.order, clock=clock) - order = tf.math.maximum(3, order) # prevents "Input matrix is not invertible." exception - order = tf.cast(order, tf.float32) - - spectrogram_aug, _ = sparse_image_warp(expanded_spectrogram, - source_control_point_locations=scp_locations, - dest_control_point_locations=dcp_locations, - interpolation_order=order, - regularization_weight=self.regularization_weight, - num_boundary_points=nbp) - return tf.reshape(spectrogram_aug, shape=(1, -1, freq_size)) - - class FrequencyMask(GraphAugmentation): - """See "Frequency mask augmentation" in TRAINING.rst""" + """See "Frequency mask augmentation" in training documentation""" def __init__(self, p=1.0, n=3, size=2): super(FrequencyMask, self).__init__(p, domain='spectrogram') self.n = int_range(n) # pylint: disable=invalid-name self.size = int_range(size) - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel time_max = tf.shape(tensor)[1] freq_max = tf.shape(tensor)[2] @@ -486,25 +435,20 @@ def body(i, spectrogram_aug): class TimeMask(GraphAugmentation): - """See "Time mask augmentation" in TRAINING.rst""" + """See "Time mask augmentation" in training documentation""" def __init__(self, p=1.0, domain='spectrogram', n=3, size=10.0): super(TimeMask, self).__init__(p, domain=domain) self.n = int_range(n) # pylint: disable=invalid-name self.size = float_range(size) - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel - from .flags import FLAGS # pylint: disable=import-outside-toplevel - time_factor = FLAGS.audio_sample_rate / 1000.0 # samples per ms - if self.domain != 'signal': - time_factor /= FLAGS.feature_win_step # windows per ms - time_max = tf.shape(tensor)[0] if self.domain == 'signal' else tf.shape(tensor)[1] + time_max = tf.shape(tensor)[0 if self.domain == 'signal' else 1] n = tf_pick_value_from_range(self.n, clock=clock) def body(i, augmented): - size = tf.cast(tf_pick_value_from_range(self.size, clock=clock) * time_factor, dtype=tf.int32) + size = tf.cast(tf_pick_value_from_range(self.size, clock=clock) * self.units_per_ms(), dtype=tf.int32) size = tf.math.maximum(1, tf.math.minimum(time_max - 1, size)) - tf.print(size) seed = tf.cast(clock * tf.int32.max, tf.int32) - i t0 = tf.random.stateless_uniform((), (-seed, seed), minval=0, maxval=time_max - size, dtype=tf.dtypes.int32) rest = time_max - t0 - size @@ -521,12 +465,12 @@ def body(i, augmented): class Dropout(GraphAugmentation): - """See "Dropout augmentation" in TRAINING.rst""" + """See "Dropout augmentation" in training documentation""" def __init__(self, p=1.0, domain='spectrogram', rate=0.05): super(Dropout, self).__init__(p, domain=domain) self.rate = float_range(rate) - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel rate = tf_pick_value_from_range(self.rate, clock=clock) rate = tf.math.maximum(0.0, rate) @@ -539,12 +483,12 @@ def apply(self, tensor, clock=0.0): class Add(GraphAugmentation): - """See "Add augmentation" in TRAINING.rst""" + """See "Add augmentation" in training documentation""" def __init__(self, p=1.0, domain='features', stddev=5): super(Add, self).__init__(p, domain=domain) self.stddev = float_range(stddev) - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel stddev = tf_pick_value_from_range(self.stddev, clock=clock) seed = (clock * tf.int32.min, clock * tf.int32.max) @@ -552,12 +496,12 @@ def apply(self, tensor, clock=0.0): class Multiply(GraphAugmentation): - """See "Multiply augmentation" in TRAINING.rst""" + """See "Multiply augmentation" in training documentation""" def __init__(self, p=1.0, domain='features', stddev=5): super(Multiply, self).__init__(p, domain=domain) self.stddev = float_range(stddev) - def apply(self, tensor, clock=0.0): + def apply(self, tensor, transcript=None, clock=0.0): import tensorflow as tf # pylint: disable=import-outside-toplevel stddev = tf_pick_value_from_range(self.stddev, clock=clock) seed = (clock * tf.int32.min, clock * tf.int32.max) diff --git a/training/deepspeech_training/util/feeding.py b/training/deepspeech_training/util/feeding.py index 9dbdae2bad..4c9b681d3b 100644 --- a/training/deepspeech_training/util/feeding.py +++ b/training/deepspeech_training/util/feeding.py @@ -18,7 +18,7 @@ from .helpers import remember_exception, MEGABYTE -def audio_to_features(audio, sample_rate, clock=0.0, train_phase=False, augmentations=None, sample_id=None): +def audio_to_features(audio, sample_rate, transcript=None, clock=0.0, train_phase=False, augmentations=None, sample_id=None): if train_phase: # We need the lambdas to make TensorFlow happy. # pylint: disable=unnecessary-lambda @@ -29,7 +29,7 @@ def audio_to_features(audio, sample_rate, clock=0.0, train_phase=False, augmenta name='matching_sample_rate') if train_phase and augmentations is not None: - audio = apply_graph_augmentations('signal', audio, augmentations, clock=clock) + audio = apply_graph_augmentations('signal', audio, augmentations, transcript=transcript, clock=clock) spectrogram = contrib_audio.audio_spectrogram(audio, window_size=Config.audio_window_samples, @@ -37,7 +37,7 @@ def audio_to_features(audio, sample_rate, clock=0.0, train_phase=False, augmenta magnitude_squared=True) if train_phase and augmentations is not None: - spectrogram = apply_graph_augmentations('spectrogram', spectrogram, augmentations, clock=clock) + spectrogram = apply_graph_augmentations('spectrogram', spectrogram, augmentations, transcript=transcript, clock=clock) features = contrib_audio.mfcc(spectrogram=spectrogram, sample_rate=sample_rate, @@ -46,7 +46,7 @@ def audio_to_features(audio, sample_rate, clock=0.0, train_phase=False, augmenta features = tf.reshape(features, [-1, Config.n_input]) if train_phase and augmentations is not None: - features = apply_graph_augmentations('features', features, augmentations, clock=clock) + features = apply_graph_augmentations('features', features, augmentations, transcript=transcript, clock=clock) return features, tf.shape(input=features)[0] @@ -64,13 +64,14 @@ def audiofile_to_features(wav_filename, clock=0.0, train_phase=False, augmentati def entry_to_features(sample_id, audio, sample_rate, transcript, clock, train_phase=False, augmentations=None): # https://bugs.python.org/issue32117 + sparse_transcript = tf.SparseTensor(*transcript) features, features_len = audio_to_features(audio, sample_rate, + transcript=sparse_transcript, clock=clock, train_phase=train_phase, augmentations=augmentations, sample_id=sample_id) - sparse_transcript = tf.SparseTensor(*transcript) return sample_id, features, features_len, sparse_transcript diff --git a/training/deepspeech_training/util/sparse_image_warp.py b/training/deepspeech_training/util/sparse_image_warp.py deleted file mode 100644 index 0fcdba0ad4..0000000000 --- a/training/deepspeech_training/util/sparse_image_warp.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Image warping using sparse flow defined at control points.""" - -# The following code is from: https://github.com/tensorflow/tensorflow/blob/v1.14.0/tensorflow/contrib/image/python/ops/sparse_image_warp.py -# But refactored for dynamic tensor shape compatibility -# The core idea is to replace every numpy implementation with tensorflow implementation - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -import tensorflow.compat.v1 as tfv1 -from tensorflow.compat import dimension_value -from tensorflow.contrib.image.python.ops import dense_image_warp -from tensorflow.contrib.image.python.ops import interpolate_spline - -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops - -def _to_float32(value): - return tf.cast(value, tf.float32) - -def _to_int32(value): - return tf.cast(value, tf.int32) - -def _get_grid_locations(image_height, image_width): - """Wrapper for np.meshgrid.""" - tfv1.assert_type(image_height, tf.int32) - tfv1.assert_type(image_width, tf.int32) - - y_range = tf.range(image_height) - x_range = tf.range(image_width) - y_grid, x_grid = tf.meshgrid(y_range, x_range, indexing='ij') - return tf.stack((y_grid, x_grid), -1) - - -def _expand_to_minibatch(tensor, batch_size): - """Tile arbitrarily-sized np_array to include new batch dimension.""" - ndim = tf.size(tf.shape(tensor)) - ones = tf.ones((ndim,), tf.int32) - - tiles = tf.concat(([batch_size], ones), 0) - return tf.tile(tf.expand_dims(tensor, 0), tiles) - - -def _get_boundary_locations(image_height, image_width, num_points_per_edge): - """Compute evenly-spaced indices along edge of image.""" - image_height_end = _to_float32(tf.math.subtract(image_height, 1)) - image_width_end = _to_float32(tf.math.subtract(image_width, 1)) - y_range = tf.linspace(0.0, image_height_end, num_points_per_edge + 2) - x_range = tf.linspace(0.0, image_height_end, num_points_per_edge + 2) - ys, xs = tf.meshgrid(y_range, x_range, indexing='ij') - is_boundary = tf.logical_or( - tf.logical_or(tf.equal(xs, 0.0), tf.equal(xs, image_width_end)), - tf.logical_or(tf.equal(ys, 0.0), tf.equal(ys, image_height_end))) - return tf.stack([tf.boolean_mask(ys, is_boundary), tf.boolean_mask(xs, is_boundary)], axis=-1) - - -def _add_zero_flow_controls_at_boundary(control_point_locations, - control_point_flows, image_height, - image_width, boundary_points_per_edge): - """Add control points for zero-flow boundary conditions. - - Augment the set of control points with extra points on the - boundary of the image that have zero flow. - - Args: - control_point_locations: input control points - control_point_flows: their flows - image_height: image height - image_width: image width - boundary_points_per_edge: number of points to add in the middle of each - edge (not including the corners). - The total number of points added is - 4 + 4*(boundary_points_per_edge). - - Returns: - merged_control_point_locations: augmented set of control point locations - merged_control_point_flows: augmented set of control point flows - """ - - batch_size = dimension_value(tf.shape(control_point_locations)[0]) - - boundary_point_locations = _get_boundary_locations(image_height, image_width, - boundary_points_per_edge) - boundary_point_shape = tf.shape(boundary_point_locations) - boundary_point_flows = tf.zeros([boundary_point_shape[0], 2]) - - minbatch_locations = _expand_to_minibatch(boundary_point_locations, batch_size) - type_to_use = control_point_locations.dtype - boundary_point_locations = tf.cast(minbatch_locations, type_to_use) - - minbatch_flows = _expand_to_minibatch(boundary_point_flows, batch_size) - - boundary_point_flows = tf.cast(minbatch_flows, type_to_use) - - merged_control_point_locations = tf.concat( - [control_point_locations, boundary_point_locations], 1) - - merged_control_point_flows = tf.concat( - [control_point_flows, boundary_point_flows], 1) - - return merged_control_point_locations, merged_control_point_flows - - -def sparse_image_warp(image, - source_control_point_locations, - dest_control_point_locations, - interpolation_order=2, - regularization_weight=0.0, - num_boundary_points=0, - name='sparse_image_warp'): - """Image warping using correspondences between sparse control points. - - Apply a non-linear warp to the image, where the warp is specified by - the source and destination locations of a (potentially small) number of - control points. First, we use a polyharmonic spline - (`tf.contrib.image.interpolate_spline`) to interpolate the displacements - between the corresponding control points to a dense flow field. - Then, we warp the image using this dense flow field - (`tf.contrib.image.dense_image_warp`). - - Let t index our control points. For regularization_weight=0, we have: - warped_image[b, dest_control_point_locations[b, t, 0], - dest_control_point_locations[b, t, 1], :] = - image[b, source_control_point_locations[b, t, 0], - source_control_point_locations[b, t, 1], :]. - - For regularization_weight > 0, this condition is met approximately, since - regularized interpolation trades off smoothness of the interpolant vs. - reconstruction of the interpolant at the control points. - See `tf.contrib.image.interpolate_spline` for further documentation of the - interpolation_order and regularization_weight arguments. - - - Args: - image: `[batch, height, width, channels]` float `Tensor` - source_control_point_locations: `[batch, num_control_points, 2]` float - `Tensor` - dest_control_point_locations: `[batch, num_control_points, 2]` float - `Tensor` - interpolation_order: polynomial order used by the spline interpolation - regularization_weight: weight on smoothness regularizer in interpolation - num_boundary_points: How many zero-flow boundary points to include at - each image edge.Usage: - num_boundary_points=0: don't add zero-flow points - num_boundary_points=1: 4 corners of the image - num_boundary_points=2: 4 corners and one in the middle of each edge - (8 points total) - num_boundary_points=n: 4 corners and n-1 along each edge - name: A name for the operation (optional). - - Note that image and offsets can be of type tf.half, tf.float32, or - tf.float64, and do not necessarily have to be the same type. - - Returns: - warped_image: `[batch, height, width, channels]` float `Tensor` with same - type as input image. - flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense - flow field produced by the interpolation. - """ - - image = ops.convert_to_tensor(image) - source_control_point_locations = ops.convert_to_tensor( - source_control_point_locations) - dest_control_point_locations = ops.convert_to_tensor( - dest_control_point_locations) - - control_point_flows = ( - dest_control_point_locations - source_control_point_locations) - - clamp_boundaries = num_boundary_points > 0 - boundary_points_per_edge = num_boundary_points - 1 - - with ops.name_scope(name): - image_shape = tf.shape(image) - batch_size, image_height, image_width = image_shape[0], image_shape[1], image_shape[2] - - # This generates the dense locations where the interpolant - # will be evaluated. - grid_locations = _get_grid_locations(image_height, image_width) - - flattened_grid_locations = tf.reshape(grid_locations, - [tf.multiply(image_height, image_width), 2]) - - # flattened_grid_locations = constant_op.constant( - # _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype) - flattened_grid_locations = _expand_to_minibatch(flattened_grid_locations, batch_size) - flattened_grid_locations = tf.cast(flattened_grid_locations, dtype=image.dtype) - - if clamp_boundaries: - (dest_control_point_locations, - control_point_flows) = _add_zero_flow_controls_at_boundary( - dest_control_point_locations, control_point_flows, image_height, - image_width, boundary_points_per_edge) - - flattened_flows = interpolate_spline.interpolate_spline( - dest_control_point_locations, control_point_flows, - flattened_grid_locations, interpolation_order, regularization_weight) - - dense_flows = array_ops.reshape(flattened_flows, - [batch_size, image_height, image_width, 2]) - - warped_image = dense_image_warp.dense_image_warp(image, dense_flows) - - return warped_image, dense_flows From aeb4c5b10599acde1631583ec7433f8441f688b3 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 11:22:58 +0200 Subject: [PATCH 11/38] Fix #2942: Document supported platforms --- doc/SUPPORTED_PLATFORMS.rst | 69 +++++++++++++++++++++++++++++++++++++ doc/index.rst | 2 ++ 2 files changed, 71 insertions(+) create mode 100644 doc/SUPPORTED_PLATFORMS.rst diff --git a/doc/SUPPORTED_PLATFORMS.rst b/doc/SUPPORTED_PLATFORMS.rst new file mode 100644 index 0000000000..1ccfb7e3aa --- /dev/null +++ b/doc/SUPPORTED_PLATFORMS.rst @@ -0,0 +1,69 @@ +.. _supported-platforms-inference: + +Supported platforms for inference +================================= + +Here we maintain the list of supported platforms for running inference. + +Linux / AMD64 without GPU +^^^^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Linux / AMD64 with GPU +^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) +* CUDA 10.0 (and capable GPU) +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Linux / ARMv7 +^^^^^^^^^^^^^ +* Cortex-A53 compatible ARMv7 SoC with Neon support +* Raspbian Buster-compatible distribution +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Linux / Aarch64 +^^^^^^^^^^^^^^^ +* Cortex-A72 compatible Aarch64 SoC +* ARMbian Buster-compatible distribution +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Android / ARMv7 +^^^^^^^^^^^^^^^ +* ARMv7 SoC with Neon support +* Android 7.0-10.0 +* NDK API level >= 21 +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Android / Aarch64 +^^^^^^^^^^^^^^^^^ +* Aarch64 SoC +* Android 7.0-10.0 +* NDK API level >= 21 +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +macOS / AMD64 +^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* macOS >= 10.10 +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Windows / AMD64 without GPU +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Windows Server >= 2012 R2 ; Windows >= 8.1 +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Windows / AMD64 with GPU +^^^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Windows Server >= 2012 R2 ; Windows >= 8.1 +* CUDA 10.0 (and capable GPU) +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) diff --git a/doc/index.rst b/doc/index.rst index 008cbaa227..9bcfe91236 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -58,6 +58,8 @@ See the output of ``deepspeech -h`` for more information on the use of ``deepspe TRAINING + SUPPORTED_PLATFORMS + .. toctree:: :maxdepth: 2 :caption: Decoder and scorer From 2d5dcc359a694d506057fbf048ecf7996e71a120 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Tue, 16 Jun 2020 13:32:32 +0200 Subject: [PATCH 12/38] Tests for TF based value range picking --- tests/test_value_range.py | 61 +++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 18 deletions(-) diff --git a/tests/test_value_range.py b/tests/test_value_range.py index 7e17b3eccd..c08dcbd5cc 100644 --- a/tests/test_value_range.py +++ b/tests/test_value_range.py @@ -1,6 +1,8 @@ import unittest -from deepspeech_training.util.helpers import ValueRange, get_value_range, pick_value_from_range +import numpy as np +import tensorflow as tf +from deepspeech_training.util.helpers import ValueRange, get_value_range, pick_value_from_range, tf_pick_value_from_range class TestValueRange(unittest.TestCase): @@ -56,12 +58,23 @@ def test_float_int_3tuple(self): class TestPickValueFromFixedRange(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(TestPickValueFromFixedRange, self).__init__(*args, **kwargs) + self.session = tf.Session() + self.clock_ph = tf.placeholder(dtype=tf.float64, name='clock') def _ending_tester(self, value_range, clock, expected): - is_int = isinstance(value_range.start, int) - result = pick_value_from_range(value_range, clock) - self.assertEqual(result, expected) - self.assertTrue(isinstance(result, int if is_int else float)) + with tf.Session() as session: + tf_pick = tf_pick_value_from_range(value_range, clock=self.clock_ph) + + def run_pick(_, c): + return session.run(tf_pick, feed_dict={self.clock_ph: c}) + + is_int = isinstance(value_range.start, int) + for pick, int_type, float_type in [(pick_value_from_range, int, float), (run_pick, np.int32, np.float32)]: + result = pick(value_range, clock) + self.assertEqual(result, expected) + self.assertTrue(isinstance(result, int_type if is_int else float_type)) def test_int_0(self): self._ending_tester(ValueRange(1, 3, 0), 0.0, 1) @@ -83,31 +96,43 @@ def test_float_1(self): class TestPickValueFromRandomizedRange(unittest.TestCase): - - def _ending_tester(self, value_range, clock, expected_min, expected_max): - is_int = isinstance(value_range.start, int) - results = list(map(lambda x: pick_value_from_range(value_range, clock), range(100))) - self.assertGreater(len(set(results)), 80) - self.assertTrue(all(map(lambda x: expected_min <= x <= expected_max, results))) - self.assertTrue(all(map(lambda x: isinstance(x, int if is_int else float), results))) + def __init__(self, *args, **kwargs): + super(TestPickValueFromRandomizedRange, self).__init__(*args, **kwargs) + self.session = tf.Session() + self.clock_ph = tf.placeholder(dtype=tf.float64, name='clock') + + def _ending_tester(self, value_range, clock_min, clock_max, expected_min, expected_max): + with self.session as session: + tf_pick = tf_pick_value_from_range(value_range, clock=self.clock_ph) + + def run_pick(_, c): + return session.run(tf_pick, feed_dict={self.clock_ph: c}) + + is_int = isinstance(value_range.start, int) + clock_range = np.arange(clock_min, clock_max, (clock_max - clock_min) / 100.0) + for pick, int_type, float_type in [(pick_value_from_range, int, float), (run_pick, np.int32, np.float32)]: + results = [pick(value_range, c) for c in clock_range] + self.assertGreater(len(set(results)), 80) + self.assertTrue(all(map(lambda x: expected_min <= x <= expected_max, results))) + self.assertTrue(all(map(lambda x: isinstance(x, int_type if is_int else float_type), results))) def test_int_0(self): - self._ending_tester(ValueRange(10000, 30000, 10000), 0.0, 0, 20000) + self._ending_tester(ValueRange(10000, 30000, 10000), 0.0, 0.1, 0, 22000) def test_int_half(self): - self._ending_tester(ValueRange(10000, 30000, 10000), 0.5, 10000, 30000) + self._ending_tester(ValueRange(10000, 30000, 10000), 0.4, 0.6, 8000, 32000) def test_int_1(self): - self._ending_tester(ValueRange(10000, 30000, 10000), 1.0, 20000, 40000) + self._ending_tester(ValueRange(10000, 30000, 10000), 0.8, 1.0, 16000, 40000) def test_float_0(self): - self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.0, 0.0, 20000.0) + self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.0, 0.1, 0.0, 22000.0) def test_float_half(self): - self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.5, 10000.0, 30000.0) + self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.4, 0.6, 8000.0, 32000.0) def test_float_1(self): - self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 1.0, 20000.0, 40000.0) + self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.8, 1.0, 16000.0, 40000.0) if __name__ == '__main__': From 7a835bee5a8676eb173e1dce78f1958bdcc94a94 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Tue, 16 Jun 2020 13:45:05 +0200 Subject: [PATCH 13/38] Updated training tests --- bin/run-tc-graph_augmentations.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/run-tc-graph_augmentations.sh b/bin/run-tc-graph_augmentations.sh index 3db1ee14ac..1d8017e493 100755 --- a/bin/run-tc-graph_augmentations.sh +++ b/bin/run-tc-graph_augmentations.sh @@ -16,13 +16,12 @@ export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ - --augment speed \ --augment dropout \ - --augment pitch_and_tempo \ + --augment pitch \ + --augment tempo \ --augment time_mask \ --augment frequency_mask \ --augment add \ --augment multiply \ - --augment warp \ --n_hidden 100 \ --epochs 1 From a28df45192e84aaaeebb9c8dad2094a0f9367b1a Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Tue, 16 Jun 2020 15:46:28 +0200 Subject: [PATCH 14/38] Respect None case for augmentations list --- training/deepspeech_training/util/augmentations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 5c17dedd2e..a86b60a978 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -206,7 +206,7 @@ def timed_samples(): if final_clock is not None: assert 0.0 <= final_clock <= 1.0 assert clock <= final_clock - augmentations = [aug for aug in augmentations if isinstance(aug, SampleAugmentation)] + augmentations = [aug for aug in augmentations if isinstance(aug, SampleAugmentation)] if augmentations else [] try: for augmentation in augmentations: augmentation.start(buffering=buffering) From e10b807e92552bff498feba65d97417572e17a9b Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 16 Jun 2020 16:24:55 +0200 Subject: [PATCH 15/38] Ignore generated dockerfiles. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index a44fa2fbb6..5a9e6d8adb 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,5 @@ /doc/.build/ /doc/xml-c/ /doc/xml-java/ +Dockerfile.build +Dockerfile.train From 5dd08d2f8eec44705fe90c7ae351d202cf54d4df Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Tue, 16 Jun 2020 16:57:09 +0200 Subject: [PATCH 16/38] Deactivated scorer in graph augmentation test --- bin/run-tc-graph_augmentations.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/run-tc-graph_augmentations.sh b/bin/run-tc-graph_augmentations.sh index 1d8017e493..0d949125e1 100755 --- a/bin/run-tc-graph_augmentations.sh +++ b/bin/run-tc-graph_augmentations.sh @@ -16,6 +16,7 @@ export CUDA_VISIBLE_DEVICES=0 python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ --train_files ${ldc93s1_csv} --train_batch_size 1 \ + --scorer "" \ --augment dropout \ --augment pitch \ --augment tempo \ From 6c2cbbd725928b4fba6e90d9bc7381bf3cc28911 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 12:40:37 +0200 Subject: [PATCH 17/38] Fix #3053: Check output stream when producing scorer --- data/lm/generate_package.py | 8 ++++++-- native_client/ctcdecode/__init__.py | 2 +- native_client/ctcdecode/scorer.cpp | 28 ++++++++++++++++++++++++++-- native_client/ctcdecode/scorer.h | 2 +- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py index 27b007422b..30a33fcc7e 100644 --- a/data/lm/generate_package.py +++ b/data/lm/generate_package.py @@ -61,8 +61,12 @@ def create_bundle( sys.exit(1) scorer.fill_dictionary(list(words)) shutil.copy(lm_path, package_path) - scorer.save_dictionary(package_path, True) # append, not overwrite - print("Package created in {}".format(package_path)) + # append, not overwrite + if scorer.save_dictionary(package_path, True): + print("Package created in {}".format(package_path)) + else: + print("Error when creating {}".format(package_path)) + sys.exit(1) class Tristate(object): diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index c9b917b362..7e3766bebf 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -47,7 +47,7 @@ def load_lm(self, lm_path): return super(Scorer, self).load_lm(lm_path.encode('utf-8')) def save_dictionary(self, save_path, *args, **kwargs): - super(Scorer, self).save_dictionary(save_path.encode('utf-8'), *args, **kwargs) + return super(Scorer, self).save_dictionary(save_path.encode('utf-8'), *args, **kwargs) def ctc_beam_search_decoder(probs_seq, diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 1834c21c70..ebf5522763 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -146,7 +146,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) return DS_ERR_OK; } -void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) +bool Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) { std::ios::openmode om; if (append_instead_of_overwrite) { @@ -155,15 +155,39 @@ void Scorer::save_dictionary(const std::string& path, bool append_instead_of_ove om = std::ios::out|std::ios::binary; } std::fstream fout(path, om); + if (!fout ||fout.bad()) { + std::cerr << "Error opening '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&MAGIC), sizeof(MAGIC)); + if (fout.bad()) { + std::cerr << "Error writing MAGIC '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&FILE_VERSION), sizeof(FILE_VERSION)); + if (fout.bad()) { + std::cerr << "Error writing FILE_VERSION '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); + if (fout.bad()) { + std::cerr << "Error writing is_utf8_mode '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&alpha), sizeof(alpha)); + if (fout.bad()) { + std::cerr << "Error writing alpha '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&beta), sizeof(beta)); + if (fout.bad()) { + std::cerr << "Error writing beta '" << path << "'" << std::endl; + return false; + } fst::FstWriteOptions opt; opt.align = true; opt.source = path; - dictionary->Write(fout, opt); + return dictionary->Write(fout, opt); } bool Scorer::is_scoring_boundary(PathTrie* prefix, size_t new_label) diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index 55f337eda7..d2a1c8b3be 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -77,7 +77,7 @@ class Scorer { void set_alphabet(const Alphabet& alphabet); // save dictionary in file - void save_dictionary(const std::string &path, bool append_instead_of_overwrite=false); + bool save_dictionary(const std::string &path, bool append_instead_of_overwrite=false); // return weather this step represents a boundary where beam scoring should happen bool is_scoring_boundary(PathTrie* prefix, size_t new_label); From 7768c89e2a8d4dd99f93cb702a3e44bbc415f1f9 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 23:38:12 +0200 Subject: [PATCH 18/38] Fix #3073: Update libssl version --- taskcluster/tc-py-utils.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/taskcluster/tc-py-utils.sh b/taskcluster/tc-py-utils.sh index e4937282f5..2beee417fb 100755 --- a/taskcluster/tc-py-utils.sh +++ b/taskcluster/tc-py-utils.sh @@ -212,8 +212,8 @@ maybe_ssl102_py37() mkdir -p ${PY37_OPENSSL_DIR} ${WGET} -P ${TASKCLUSTER_TMP_DIR} \ - http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.15_amd64.deb \ - http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.15_amd64.deb + http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.16_amd64.deb \ + http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.16_amd64.deb for deb in ${TASKCLUSTER_TMP_DIR}/libssl*.deb; do dpkg -x ${deb} ${PY37_OPENSSL_DIR} From 4f7842c96657c24c3bc3f0e7cb6ae9efd848e15a Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 17:09:14 +0200 Subject: [PATCH 19/38] Fix #3068: More generic TaskCluster build/caching tasks --- taskcluster/.build.yml | 4 ++ taskcluster/.shared.yml | 10 ++++ .../android-cache-arm64-v8a-android-24.yml | 6 +- .../android-cache-arm64-v8a-android-25.yml | 6 +- .../android-cache-armeabi-v7a-android-24.yml | 6 +- .../android-cache-armeabi-v7a-android-25.yml | 6 +- taskcluster/android-cache-sdk-android-27.yml | 6 +- .../android-cache-x86_64-android-24.yml | 6 +- .../android-cache-x86_64-android-25.yml | 6 +- .../android-cache-x86_64-android-26.yml | 6 +- .../android-cache-x86_64-android-28.yml | 6 +- .../android-cache-x86_64-android-29.yml | 6 +- taskcluster/android_cache-opt-base.tyml | 52 ---------------- taskcluster/android_cache-package.sh | 11 ---- ...> generic_tc_caching-darwin-opt-base.tyml} | 13 ++-- .../generic_tc_caching-linux-opt-base.tyml | 50 ++++++++++++++++ .../generic_tc_caching-win-opt-base.tyml | 57 ++++++++++++++++++ taskcluster/gradle-cache.yml | 8 +-- taskcluster/gradle-package.sh | 11 ---- taskcluster/homebrew-darwin-opt-base.tyml | 59 ------------------- taskcluster/homebrew_builds-darwin-amd64.yml | 8 +-- taskcluster/homebrew_tests-darwin-amd64.yml | 8 +-- taskcluster/node-gyp-cache-base.tyml | 57 ------------------ taskcluster/node-gyp-cache.yml | 9 ++- taskcluster/node-gyp-populate.sh | 4 +- taskcluster/pyenv-darwin-amd64.yml | 5 +- taskcluster/pyenv-linux-amd64.yml | 5 +- taskcluster/pyenv-linux-opt-base.tyml | 52 ---------------- taskcluster/pyenv-package.sh | 11 ---- taskcluster/pyenv-win-amd64.yml | 5 +- taskcluster/pyenv-win-opt-base.tyml | 59 ------------------- taskcluster/swig-darwin-amd64.yml | 11 +++- taskcluster/swig-darwin-opt-base.tyml | 56 ------------------ taskcluster/swig-linux-amd64.yml | 11 +++- taskcluster/swig-linux-opt-base.tyml | 54 ----------------- taskcluster/swig-win-amd64.yml | 11 +++- taskcluster/tc-update-index.sh | 18 ++++++ 37 files changed, 235 insertions(+), 484 deletions(-) delete mode 100644 taskcluster/android_cache-opt-base.tyml rename taskcluster/{pyenv-darwin-opt-base.tyml => generic_tc_caching-darwin-opt-base.tyml} (66%) create mode 100644 taskcluster/generic_tc_caching-linux-opt-base.tyml create mode 100644 taskcluster/generic_tc_caching-win-opt-base.tyml delete mode 100644 taskcluster/homebrew-darwin-opt-base.tyml delete mode 100644 taskcluster/node-gyp-cache-base.tyml delete mode 100644 taskcluster/pyenv-linux-opt-base.tyml delete mode 100644 taskcluster/pyenv-win-opt-base.tyml delete mode 100644 taskcluster/swig-darwin-opt-base.tyml delete mode 100644 taskcluster/swig-linux-opt-base.tyml create mode 100755 taskcluster/tc-update-index.sh diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index 7e9809e5e9..fee2c1d6c9 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -36,3 +36,7 @@ build: gradle_cache: url: '' namespace: '' + build_or_cache: + repo: "${event.head.repo.url}" + sha: "${event.head.sha}" + dir: "DeepSpeech/ds" diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index dadb10944a..b04ff9de08 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -119,6 +119,16 @@ system: swig: repo: "https://github.com/lissyx/swig" sha1: "b5fea54d39832d1d132d7dd921b69c0c2c9d5118" + swig_build: + linux: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.linux.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118/artifacts/public/ds-swig.tar.gz" + namespace: "project.deepspeech.swig.linux.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" + osx: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.darwin.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118/artifacts/public/ds-swig.tar.gz" + namespace: "project.deepspeech.swig.darwin.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" + win: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118/artifacts/public/ds-swig.tar.gz" + namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" username: 'build-user' homedir: linux: '/home/build-user' diff --git a/taskcluster/android-cache-arm64-v8a-android-24.yml b/taskcluster/android-cache-arm64-v8a-android-24.yml index a8fef42fe4..8fa8e9275d 100644 --- a/taskcluster/android-cache-arm64-v8a-android-24.yml +++ b/taskcluster/android-cache-arm64-v8a-android-24.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.arm64_v8a.android_24.url} - namespace: ${system.android_cache.arm64_v8a.android_24.namespace} + artifact_url: ${system.android_cache.arm64_v8a.android_24.url} + artifact_namespace: ${system.android_cache.arm64_v8a.android_24.namespace} scripts: build: "taskcluster/android_cache-build.sh arm64-v8a android-24" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-arm64-v8a-android-25.yml b/taskcluster/android-cache-arm64-v8a-android-25.yml index 7dca05ea93..366387269a 100644 --- a/taskcluster/android-cache-arm64-v8a-android-25.yml +++ b/taskcluster/android-cache-arm64-v8a-android-25.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.arm64_v8a.android_25.url} - namespace: ${system.android_cache.arm64_v8a.android_25.namespace} + artifact_url: ${system.android_cache.arm64_v8a.android_25.url} + artifact_namespace: ${system.android_cache.arm64_v8a.android_25.namespace} scripts: build: "taskcluster/android_cache-build.sh arm64-v8a android-25" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-armeabi-v7a-android-24.yml b/taskcluster/android-cache-armeabi-v7a-android-24.yml index 605f0e92a0..3536ab3fa6 100644 --- a/taskcluster/android-cache-armeabi-v7a-android-24.yml +++ b/taskcluster/android-cache-armeabi-v7a-android-24.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.armeabi_v7a.android_24.url} - namespace: ${system.android_cache.armeabi_v7a.android_24.namespace} + artifact_url: ${system.android_cache.armeabi_v7a.android_24.url} + artifact_namespace: ${system.android_cache.armeabi_v7a.android_24.namespace} scripts: build: "taskcluster/android_cache-build.sh armeabi-v7a android-24" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-armeabi-v7a-android-25.yml b/taskcluster/android-cache-armeabi-v7a-android-25.yml index 6089d940a6..e4f78de783 100644 --- a/taskcluster/android-cache-armeabi-v7a-android-25.yml +++ b/taskcluster/android-cache-armeabi-v7a-android-25.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.armeabi_v7a.android_25.url} - namespace: ${system.android_cache.armeabi_v7a.android_25.namespace} + artifact_url: ${system.android_cache.armeabi_v7a.android_25.url} + artifact_namespace: ${system.android_cache.armeabi_v7a.android_25.namespace} scripts: build: "taskcluster/android_cache-build.sh armeabi-v7a android-25" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-sdk-android-27.yml b/taskcluster/android-cache-sdk-android-27.yml index 7632e5b821..668e142e65 100644 --- a/taskcluster/android-cache-sdk-android-27.yml +++ b/taskcluster/android-cache-sdk-android-27.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.sdk.android_27.url} - namespace: ${system.android_cache.sdk.android_27.namespace} + artifact_url: ${system.android_cache.sdk.android_27.url} + artifact_namespace: ${system.android_cache.sdk.android_27.namespace} scripts: build: "taskcluster/android_cache-build.sh sdk android-27" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-24.yml b/taskcluster/android-cache-x86_64-android-24.yml index 38013d0834..ff06f5516f 100644 --- a/taskcluster/android-cache-x86_64-android-24.yml +++ b/taskcluster/android-cache-x86_64-android-24.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.x86_64.android_24.url} - namespace: ${system.android_cache.x86_64.android_24.namespace} + artifact_url: ${system.android_cache.x86_64.android_24.url} + artifact_namespace: ${system.android_cache.x86_64.android_24.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-24" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-25.yml b/taskcluster/android-cache-x86_64-android-25.yml index 6f57be4287..8ccac4f9db 100644 --- a/taskcluster/android-cache-x86_64-android-25.yml +++ b/taskcluster/android-cache-x86_64-android-25.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.x86_64.android_25.url} - namespace: ${system.android_cache.x86_64.android_25.namespace} + artifact_url: ${system.android_cache.x86_64.android_25.url} + artifact_namespace: ${system.android_cache.x86_64.android_25.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-25" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-26.yml b/taskcluster/android-cache-x86_64-android-26.yml index ec711ede0b..0a74e78e60 100644 --- a/taskcluster/android-cache-x86_64-android-26.yml +++ b/taskcluster/android-cache-x86_64-android-26.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.x86_64.android_26.url} - namespace: ${system.android_cache.x86_64.android_26.namespace} + artifact_url: ${system.android_cache.x86_64.android_26.url} + artifact_namespace: ${system.android_cache.x86_64.android_26.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-26" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-28.yml b/taskcluster/android-cache-x86_64-android-28.yml index 471f33b929..4fa124fdb9 100644 --- a/taskcluster/android-cache-x86_64-android-28.yml +++ b/taskcluster/android-cache-x86_64-android-28.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.x86_64.android_28.url} - namespace: ${system.android_cache.x86_64.android_28.namespace} + artifact_url: ${system.android_cache.x86_64.android_28.url} + artifact_namespace: ${system.android_cache.x86_64.android_28.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-28" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-29.yml b/taskcluster/android-cache-x86_64-android-29.yml index 835453f923..57c194f2d8 100644 --- a/taskcluster/android-cache-x86_64-android-29.yml +++ b/taskcluster/android-cache-x86_64-android-29.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > ${java.packages_trusty.apt} cache: - url: ${system.android_cache.x86_64.android_29.url} - namespace: ${system.android_cache.x86_64.android_29.namespace} + artifact_url: ${system.android_cache.x86_64.android_29.url} + artifact_namespace: ${system.android_cache.x86_64.android_29.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-29" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android_cache-opt-base.tyml b/taskcluster/android_cache-opt-base.tyml deleted file mode 100644 index b44778aaac..0000000000 --- a/taskcluster/android_cache-opt-base.tyml +++ /dev/null @@ -1,52 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: ${build.docker_image} - - features: - taskclusterProxy: true - - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && - apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; - cache_file=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.url}` && - if [ "$cache_file" != "200" ]; then - ${extraSystemSetup} && - adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && - mkdir -p /tmp/artifacts/ && chmod 777 /tmp/artifacts && - echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && - sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.build} && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} taskcluster ${build.cache.namespace} - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/android_cache-package.sh b/taskcluster/android_cache-package.sh index 22ec767def..8c73070d5c 100755 --- a/taskcluster/android_cache-package.sh +++ b/taskcluster/android_cache-package.sh @@ -2,17 +2,6 @@ set -xe -TC_EXPIRE=$1 -TC_INSTANCE=$2 -TC_INDEX=$3 - source $(dirname "$0")/tc-tests-utils.sh cd $HOME/ && tar -czf $TASKCLUSTER_ARTIFACTS/android_cache.tar.gz DeepSpeech/Android/ - -if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ - "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" -fi; diff --git a/taskcluster/pyenv-darwin-opt-base.tyml b/taskcluster/generic_tc_caching-darwin-opt-base.tyml similarity index 66% rename from taskcluster/pyenv-darwin-opt-base.tyml rename to taskcluster/generic_tc_caching-darwin-opt-base.tyml index 063c661322..e6777f22f4 100644 --- a/taskcluster/pyenv-darwin-opt-base.tyml +++ b/taskcluster/generic_tc_caching-darwin-opt-base.tyml @@ -35,12 +35,13 @@ payload: export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/ && env && mkdir -p $TASKCLUSTER_ARTIFACTS/ && - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" ${system.pyenv.osx.url}` && - if [ "$swig_bin" != "200" ]; then - git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ && - cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} 127.0.0.1:8080 ${system.pyenv.osx.namespace} + cache_artifact=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.artifact_url}` && + if [ "$cache_artifact" != "200" ]; then + git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && + cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && + $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && + $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && + $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} 127.0.0.1:8080 ${build.cache.artifact_namespace} fi; artifacts: diff --git a/taskcluster/generic_tc_caching-linux-opt-base.tyml b/taskcluster/generic_tc_caching-linux-opt-base.tyml new file mode 100644 index 0000000000..36668a0c3b --- /dev/null +++ b/taskcluster/generic_tc_caching-linux-opt-base.tyml @@ -0,0 +1,50 @@ +taskId: ${taskcluster.taskId} +provisionerId: ${taskcluster.docker.provisionerId} +workerType: ${taskcluster.docker.workerType} +taskGroupId: ${taskcluster.taskGroupId} +schedulerId: ${taskcluster.schedulerId} +created: { $fromNow: '0 sec' } +deadline: { $fromNow: '1 day' } +expires: { $fromNow: '6 months' } +scopes: + - "index:insert-task:project.deepspeech.*" + +payload: + maxRunTime: { $eval: to_int(build.maxRunTime) } + image: ${build.docker_image} + + features: + taskclusterProxy: true + + command: + - "/bin/bash" + - "--login" + - "-cxe" + - $let: + extraSystemSetup: { $eval: strip(str(build.system_setup)) } + taskIndexExpire: { $fromNow: '6 months' } + in: > + (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && + apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; + cache_artifact=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.artifact_url}` && + if [ "$cache_artifact" != "200" ]; then + adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && + mkdir -p /tmp/artifacts/ && chmod 777 /tmp/artifacts && + echo -e "#!/bin/bash\nset -xe\n env && id && (git clone --quiet ${build.build_or_cache.repo} ~/${build.build_or_cache.dir}/ && cd ~/${build.build_or_cache.dir}/ && git checkout --quiet ${build.build_or_cache.sha})" > /tmp/clone.sh && chmod +x /tmp/clone.sh && + sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && + sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/${build.scripts.build} && + sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/${build.scripts.package} && + sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace} + fi; + + artifacts: + "public": + type: "directory" + path: "/tmp/artifacts/" + expires: { $fromNow: '6 months' } + +metadata: + name: ${build.metadata.name} + description: ${build.metadata.description} + owner: ${event.head.user.email} + source: ${event.head.repo.url} diff --git a/taskcluster/generic_tc_caching-win-opt-base.tyml b/taskcluster/generic_tc_caching-win-opt-base.tyml new file mode 100644 index 0000000000..a41103188e --- /dev/null +++ b/taskcluster/generic_tc_caching-win-opt-base.tyml @@ -0,0 +1,57 @@ +taskId: ${taskcluster.taskId} +provisionerId: ${taskcluster.docker.provisionerId} +workerType: ${taskcluster.docker.workerTypeWin} +taskGroupId: ${taskcluster.taskGroupId} +schedulerId: ${taskcluster.schedulerId} +created: { $fromNow: '0 sec' } +deadline: { $fromNow: '1 day' } +expires: { $fromNow: '6 months' } +scopes: + - "index:insert-task:project.deepspeech.*" + +payload: + maxRunTime: { $eval: to_int(build.maxRunTime) } + + features: + taskclusterProxy: true + + mounts: + - file: msys2-base-x86_64.tar.xz + content: + sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 + url: >- + https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + + env: + TC_MSYS_VERSION: 'MSYS_NT-6.3-9600' + MSYS: 'winsymlinks:nativestrict' + + command: + - >- + "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | + "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si + - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" + - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" + - $let: + taskIndexExpire: { $fromNow: '6 months' } + in: > + echo .\msys64\usr\bin\bash.exe --login -cxe "export LC_ALL=C && + export PATH=\"$USERPROFILE/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" && + export TASKCLUSTER_ARTIFACTS=\"$(cygpath -u $USERPROFILE/public)\" && + export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && + echo \"export TASKCLUSTER_TASK_EXIT_CODE=0\" > $USERPROFILE/tc-exit.sh && + env && pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${build.cache.artifact_url}`\" != \"200\" ]; then git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/ && cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k + + - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE" + + artifacts: + - type: "directory" + path: "public/" + expires: { $fromNow: '6 months' } + +metadata: + name: ${build.metadata.name} + description: ${build.metadata.description} + owner: ${event.head.user.email} + source: ${event.head.repo.url} diff --git a/taskcluster/gradle-cache.yml b/taskcluster/gradle-cache.yml index 334545c300..ef589197f6 100644 --- a/taskcluster/gradle-cache.yml +++ b/taskcluster/gradle-cache.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml + cache: + artifact_url: ${system.gradle_cache.url} + artifact_namespace: ${system.gradle_cache.namespace} system_setup: > ${java.packages_trusty.apt} - cache: - url: ${system.gradle_cache.url} - namespace: ${system.gradle_cache.namespace} scripts: build: "taskcluster/gradle-build.sh" package: "taskcluster/gradle-package.sh" diff --git a/taskcluster/gradle-package.sh b/taskcluster/gradle-package.sh index 495c05e8d9..840fffc832 100755 --- a/taskcluster/gradle-package.sh +++ b/taskcluster/gradle-package.sh @@ -2,17 +2,6 @@ set -xe -TC_EXPIRE=$1 -TC_INSTANCE=$2 -TC_INDEX=$3 - source $(dirname "$0")/tc-tests-utils.sh cd ${GRADLE_USER_HOME}/../ && tar -czf $TASKCLUSTER_ARTIFACTS/gradle.tar.gz gradle-cache/ - -if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ - "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" -fi; diff --git a/taskcluster/homebrew-darwin-opt-base.tyml b/taskcluster/homebrew-darwin-opt-base.tyml deleted file mode 100644 index ac6c5af356..0000000000 --- a/taskcluster/homebrew-darwin-opt-base.tyml +++ /dev/null @@ -1,59 +0,0 @@ -taskId: ${taskcluster.taskId} -provisionerId: ${taskcluster.generic.provisionerId} -workerType: ${taskcluster.generic.workerType} -taskGroupId: ${taskcluster.taskGroupId} -schedulerId: ${taskcluster.schedulerId} -dependencies: - $map: { $eval: build.dependencies } - each(b): - $eval: as_slugid(b) -created: { $fromNow: '0 sec' } -deadline: { $fromNow: '1 day' } -expires: { $fromNow: '6 months' } -scopes: - - "index:insert-task:project.deepspeech.*" - -payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - - features: - taskclusterProxy: true - - command: - - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - taskIndexExpire: { $fromNow: '6 months' } - in: > - export TASKCLUSTER_ARTIFACTS="$(pwd)/public/" && - export TASKCLUSTER_ORIG_TASKDIR="$(pwd)" && - (mkdir ../tc-workdir/ || rm -fr ../tc-workdir/*) && cd ../tc-workdir/ && - export TASKCLUSTER_TASK_DIR="$(pwd)" && - export LC_ALL=C && - export MACOSX_DEPLOYMENT_TARGET=10.10 && - export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/ && - env && - mkdir -p $TASKCLUSTER_ARTIFACTS/ && - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.homebrew.url}` && - if [ "$swig_bin" != "200" ]; then - git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ && - cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://127.0.0.1:8080/index/v1/task/${build.homebrew.namespace}" - fi; - - artifacts: - - type: "directory" - path: "public/" - expires: { $fromNow: '6 months' } - -metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/homebrew_builds-darwin-amd64.yml b/taskcluster/homebrew_builds-darwin-amd64.yml index 4dac3a1332..3377dcac86 100644 --- a/taskcluster/homebrew_builds-darwin-amd64.yml +++ b/taskcluster/homebrew_builds-darwin-amd64.yml @@ -1,8 +1,8 @@ build: - template_file: homebrew-darwin-opt-base.tyml - homebrew: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.5/artifacts/public/homebrew_builds.tar.gz' - namespace: 'project.deepspeech.homebrew_builds.5' + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.homebrew_builds.url} + artifact_namespace: ${system.homebrew_builds.namespace} scripts: build: "taskcluster/homebrew-build.sh --builds" package: "taskcluster/homebrew-package.sh --builds" diff --git a/taskcluster/homebrew_tests-darwin-amd64.yml b/taskcluster/homebrew_tests-darwin-amd64.yml index fc9637f297..dc93c183eb 100644 --- a/taskcluster/homebrew_tests-darwin-amd64.yml +++ b/taskcluster/homebrew_tests-darwin-amd64.yml @@ -1,8 +1,8 @@ build: - template_file: homebrew-darwin-opt-base.tyml - homebrew: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.6/artifacts/public/homebrew_tests.tar.gz' - namespace: 'project.deepspeech.homebrew_tests.6' + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.homebrew_tests.url} + artifact_namespace: ${system.homebrew_tests.namespace} scripts: build: "taskcluster/homebrew-build.sh --tests" package: "taskcluster/homebrew-package.sh --tests" diff --git a/taskcluster/node-gyp-cache-base.tyml b/taskcluster/node-gyp-cache-base.tyml deleted file mode 100644 index 5c0e895d06..0000000000 --- a/taskcluster/node-gyp-cache-base.tyml +++ /dev/null @@ -1,57 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "node:12" - - features: - taskclusterProxy: true - - # This task will inspect system.node_gyp_cache taskcluster index existence: - # - if the artifact does not exists, it will build it - # - if the artifact exists, it will re-mirror it (if we don't do that, new - # index gets published with no artifact and erases existing one) - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; - cache_file=`curl -sSIL -o /dev/null -w "%{http_code}" ${system.node_gyp_cache.url}` && - if [ "$cache_file" != "200" ]; then - mkdir -p ~/DeepSpeech/ds/ && - git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && - cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && - ~/DeepSpeech/ds/${build.scripts.build} && - ~/DeepSpeech/ds/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://taskcluster/index/v1/task/${system.node_gyp_cache.namespace}" - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/node-gyp-cache.yml b/taskcluster/node-gyp-cache.yml index a9be39de1a..a0c48ba52f 100644 --- a/taskcluster/node-gyp-cache.yml +++ b/taskcluster/node-gyp-cache.yml @@ -1,5 +1,12 @@ build: - template_file: node-gyp-cache-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml + docker_image: "node:12" + cache: + artifact_url: "${system.node_gyp_cache.url}" + artifact_namespace: "${system.node_gyp_cache.namespace}" + system_setup: + > + (apt-get -qq -y install sudo || true) scripts: build: "taskcluster/node-gyp-populate.sh" package: "taskcluster/node-gyp-package.sh" diff --git a/taskcluster/node-gyp-populate.sh b/taskcluster/node-gyp-populate.sh index 7043d33244..333c692a54 100755 --- a/taskcluster/node-gyp-populate.sh +++ b/taskcluster/node-gyp-populate.sh @@ -8,7 +8,9 @@ node --version npm --version -npm install -g node-gyp@6.x +npm install node-gyp@6.x + +export PATH=$HOME/node_modules/.bin/:$PATH devDir=$DS_ROOT_TASK/node-gyp-cache/ diff --git a/taskcluster/pyenv-darwin-amd64.yml b/taskcluster/pyenv-darwin-amd64.yml index 47cceec9c6..3ad055caa1 100644 --- a/taskcluster/pyenv-darwin-amd64.yml +++ b/taskcluster/pyenv-darwin-amd64.yml @@ -1,5 +1,8 @@ build: - template_file: pyenv-darwin-opt-base.tyml + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.pyenv.osx.url} + artifact_namespace: ${system.pyenv.osx.namespace} scripts: build: "taskcluster/pyenv-build.sh" package: "taskcluster/pyenv-package.sh" diff --git a/taskcluster/pyenv-linux-amd64.yml b/taskcluster/pyenv-linux-amd64.yml index 393a8872b2..a8ece0b45f 100644 --- a/taskcluster/pyenv-linux-amd64.yml +++ b/taskcluster/pyenv-linux-amd64.yml @@ -1,5 +1,8 @@ build: - template_file: pyenv-linux-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml + cache: + artifact_url: "${system.pyenv.linux.url}" + artifact_namespace: "${system.pyenv.linux.namespace}" system_setup: > apt-get -qq update && apt-get -qq -y install python-yaml ${python.packages_trusty.apt} wget diff --git a/taskcluster/pyenv-linux-opt-base.tyml b/taskcluster/pyenv-linux-opt-base.tyml deleted file mode 100644 index 15d8f7b0cc..0000000000 --- a/taskcluster/pyenv-linux-opt-base.tyml +++ /dev/null @@ -1,52 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: ${build.docker_image} - - features: - taskclusterProxy: true - - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && - apt-get -qq update && apt-get -qq -y install curl git; - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" ${system.pyenv.linux.url}` && - if [ "$swig_bin" != "200" ]; then - ${extraSystemSetup} && - adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && - mkdir -p /tmp/artifacts/ && chmod 777 /tmp/artifacts && - echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && - sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.build} && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} taskcluster ${system.pyenv.linux.namespace} - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/pyenv-package.sh b/taskcluster/pyenv-package.sh index bc2c0639a0..73f6a542a2 100755 --- a/taskcluster/pyenv-package.sh +++ b/taskcluster/pyenv-package.sh @@ -2,17 +2,6 @@ set -xe -TC_EXPIRE=$1 -TC_INSTANCE=$2 -TC_INDEX=$3 - source $(dirname "$0")/tc-tests-utils.sh cd ${PYENV_ROOT}/ && $TAR -czf $TASKCLUSTER_ARTIFACTS/pyenv.tar.gz . - -if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ - "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" -fi; diff --git a/taskcluster/pyenv-win-amd64.yml b/taskcluster/pyenv-win-amd64.yml index d22872b33e..7d05f1412c 100644 --- a/taskcluster/pyenv-win-amd64.yml +++ b/taskcluster/pyenv-win-amd64.yml @@ -1,5 +1,8 @@ build: - template_file: pyenv-win-opt-base.tyml + template_file: generic_tc_caching-win-opt-base.tyml + cache: + artifact_url: "${system.pyenv.win.url}" + artifact_namespace: "${system.pyenv.win.namespace}" scripts: build: "taskcluster/pyenv-build.sh" package: "taskcluster/pyenv-package.sh" diff --git a/taskcluster/pyenv-win-opt-base.tyml b/taskcluster/pyenv-win-opt-base.tyml deleted file mode 100644 index 59c584bae1..0000000000 --- a/taskcluster/pyenv-win-opt-base.tyml +++ /dev/null @@ -1,59 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerTypeWin} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - - features: - taskclusterProxy: true - - mounts: - - file: msys2-base-x86_64.tar.xz - content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz - - env: - TC_MSYS_VERSION: 'MSYS_NT-6.3-9600' - MSYS: 'winsymlinks:nativestrict' - - command: - - >- - "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | - "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" - - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - - $let: - taskIndexExpire: { $fromNow: '6 months' } - in: > - echo .\msys64\usr\bin\bash.exe --login -cxe "export LC_ALL=C && - export PATH=\"$USERPROFILE/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" && - export TASKCLUSTER_ARTIFACTS=\"$(cygpath -u $USERPROFILE/public)\" && - export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && - echo \"export TASKCLUSTER_TASK_EXIT_CODE=0\" > $USERPROFILE/tc-exit.sh && - env && pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${system.pyenv.win.url}`\" != \"200\" ]; then git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ && cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} taskcluster ${system.pyenv.win.namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k - - - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE" - - artifacts: - - type: "directory" - path: "public/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/swig-darwin-amd64.yml b/taskcluster/swig-darwin-amd64.yml index cb08cdef19..64ca794b58 100644 --- a/taskcluster/swig-darwin-amd64.yml +++ b/taskcluster/swig-darwin-amd64.yml @@ -1,7 +1,12 @@ build: - template_file: swig-darwin-opt-base.tyml - swig_system: "darwin" - swig_arch: "amd64" + template_file: generic_tc_caching-darwin-opt-base.tyml + build_or_cache: + repo: "${system.swig.repo}" + sha: "${system.swig.sha1}" + dir: "swig" + cache: + artifact_url: "${system.swig_build.osx.url}" + artifact_namespace: "${system.swig_build.osx.namespace}" scripts: build: "taskcluster/build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/swig-darwin-opt-base.tyml b/taskcluster/swig-darwin-opt-base.tyml deleted file mode 100644 index 0f08e596e9..0000000000 --- a/taskcluster/swig-darwin-opt-base.tyml +++ /dev/null @@ -1,56 +0,0 @@ -taskId: ${taskcluster.taskId} -provisionerId: ${taskcluster.generic.provisionerId} -workerType: ${taskcluster.generic.workerType} -taskGroupId: ${taskcluster.taskGroupId} -schedulerId: ${taskcluster.schedulerId} -dependencies: - $map: { $eval: build.dependencies } - each(b): - $eval: as_slugid(b) -created: { $fromNow: '0 sec' } -deadline: { $fromNow: '1 day' } -expires: { $fromNow: '6 months' } -scopes: - - "index:insert-task:project.deepspeech.*" - -payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - - command: - - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - taskIndexExpire: { $fromNow: '6 months' } - in: > - export TASKCLUSTER_ARTIFACTS="$(pwd)/public/" && - export TASKCLUSTER_TASKDIR="$(pwd)" && - export TASKCLUSTER_ORIG_TASKDIR="$(pwd)" && - export LC_ALL=C && - export MACOSX_DEPLOYMENT_TARGET=10.10 && - export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/ && - export HOMEBREW_NO_AUTO_UPDATE=1 && - env && - mkdir -p $TASKCLUSTER_ARTIFACTS/ && - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}/artifacts/public/ds-swig.tar.gz` && - if [ "$swig_bin" != "200" ]; then - git clone --quiet ${system.swig.repo} $TASKCLUSTER_TASKDIR/swig/ && - cd $TASKCLUSTER_TASKDIR/swig/ && git checkout --quiet ${system.swig.sha1} && - $TASKCLUSTER_TASKDIR/swig/${build.scripts.build} && - $TASKCLUSTER_TASKDIR/swig/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://taskcluster/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}" - fi; - - artifacts: - - type: "directory" - path: "public/" - expires: { $fromNow: '6 months' } - -metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/swig-linux-amd64.yml b/taskcluster/swig-linux-amd64.yml index 27bd058c1d..3b199c1afb 100644 --- a/taskcluster/swig-linux-amd64.yml +++ b/taskcluster/swig-linux-amd64.yml @@ -1,8 +1,13 @@ build: - template_file: swig-linux-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml docker_image: "ubuntu:14.04" - swig_system: "linux" - swig_arch: "amd64" + build_or_cache: + repo: "${system.swig.repo}" + sha: "${system.swig.sha1}" + dir: "swig" + cache: + artifact_url: "${system.swig_build.linux.url}" + artifact_namespace: "${system.swig_build.linux.namespace}" system_setup: > apt-get -qq -y install autoconf automake bison build-essential diff --git a/taskcluster/swig-linux-opt-base.tyml b/taskcluster/swig-linux-opt-base.tyml deleted file mode 100644 index fb3662e414..0000000000 --- a/taskcluster/swig-linux-opt-base.tyml +++ /dev/null @@ -1,54 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: ${build.docker_image} - - features: - taskclusterProxy: true - - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && - apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}/artifacts/public/ds-swig.tar.gz` && - if [ "$swig_bin" != "200" ]; then - adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && - echo -e "#!/bin/bash\nset -xe\n env && id && (git clone --quiet ${system.swig.repo} ~/swig/ && cd ~/swig/ && git checkout --quiet ${system.swig.sha1})" > /tmp/clone.sh && chmod +x /tmp/clone.sh && - sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/swig/${build.scripts.build} && - sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/swig/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://taskcluster/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}" - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/swig-win-amd64.yml b/taskcluster/swig-win-amd64.yml index 576bdf73a1..310a81122c 100644 --- a/taskcluster/swig-win-amd64.yml +++ b/taskcluster/swig-win-amd64.yml @@ -1,8 +1,13 @@ build: - template_file: swig-linux-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml docker_image: "ubuntu:18.04" - swig_system: "win" - swig_arch: "amd64" + build_or_cache: + repo: "${system.swig.repo}" + sha: "${system.swig.sha1}" + dir: "swig" + cache: + artifact_url: "${system.swig_build.win.url}" + artifact_namespace: "${system.swig_build.win.namespace}" system_setup: > apt-get -qq -y install autoconf automake bison build-essential mingw-w64 && diff --git a/taskcluster/tc-update-index.sh b/taskcluster/tc-update-index.sh new file mode 100755 index 0000000000..fbc7231609 --- /dev/null +++ b/taskcluster/tc-update-index.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Helper script because it is way too painful to deal with Windows' CMD.exe +# ways of escaping things when pushing JSON + +set -xe + +TC_EXPIRE=$1 +TC_INSTANCE=$2 +TC_INDEX=$3 + +source $(dirname "$0")/tc-tests-utils.sh + +if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then + curl -sSL --fail -X PUT \ + -H "Content-Type: application/json" \ + -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ + "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" +fi; From b52139ceb6ef54036b06a7140cf565178f21f654 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 23:50:04 +0200 Subject: [PATCH 20/38] Fix #3075: Add Android 11 to CI --- taskcluster/.shared.yml | 3 +++ .../android-cache-x86_64-android-30.yml | 14 +++++++++++ .../test-apk-android-30-x86_64-opt.yml | 23 +++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 taskcluster/android-cache-x86_64-android-30.yml create mode 100644 taskcluster/test-apk-android-30-x86_64-opt.yml diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index b04ff9de08..5b174b31e0 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -99,6 +99,9 @@ system: android_29: url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-29.0/artifacts/public/android_cache.tar.gz' namespace: 'project.deepspeech.android_cache.x86_64.android-29.0' + android_30: + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-30.0/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-30.0' sdk: android_27: url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.sdk.android-27.4/artifacts/public/android_cache.tar.gz' diff --git a/taskcluster/android-cache-x86_64-android-30.yml b/taskcluster/android-cache-x86_64-android-30.yml new file mode 100644 index 0000000000..217459aa87 --- /dev/null +++ b/taskcluster/android-cache-x86_64-android-30.yml @@ -0,0 +1,14 @@ +build: + template_file: generic_tc_caching-linux-opt-base.tyml + system_setup: + > + ${java.packages_trusty.apt} + cache: + artifact_url: ${system.android_cache.x86_64.android_30.url} + artifact_namespace: ${system.android_cache.x86_64.android_30.namespace} + scripts: + build: "taskcluster/android_cache-build.sh x86_64 android-30" + package: "taskcluster/android_cache-package.sh" + metadata: + name: "Builds Android cache x86_64 / android-30" + description: "Setup an Android SDK / emulator cache for Android / x86_64 android-30" diff --git a/taskcluster/test-apk-android-30-x86_64-opt.yml b/taskcluster/test-apk-android-30-x86_64-opt.yml new file mode 100644 index 0000000000..c3ee8265db --- /dev/null +++ b/taskcluster/test-apk-android-30-x86_64-opt.yml @@ -0,0 +1,23 @@ +build: + template_file: test-android-opt-base.tyml + dependencies: + - "android-x86_64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + - "swig-linux-amd64" + - "gradle-cache" + - "android-cache-x86_64-android-30" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + apt-get -qq -y install curl make python + cache: + url: ${system.android_cache.x86_64.android_30.url} + namespace: ${system.android_cache.x86_64.android_30.namespace} + gradle_cache: + url: ${system.gradle_cache.url} + namespace: ${system.gradle_cache.namespace} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-android-apk-tests.sh x86_64 android-30" + metadata: + name: "DeepSpeech Android 11.0 x86_64 Google Pixel APK/Java tests" + description: "Testing DeepSpeech APK/Java for Android 11.0 x86_64 Google Pixel, optimized version" From 0fd28cfbdf0bd2add0a4467ade81efef5a2812ce Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 23:10:51 +0200 Subject: [PATCH 21/38] Updating caches --- taskcluster/.shared.yml | 76 ++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 5b174b31e0..9367974101 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -57,68 +57,68 @@ nodejs: prep_14: '/usr/bin/wget.exe https://nodejs.org/dist/v14.3.0/node-v14.3.0-win-x64.zip && ""C:\Program Files\7-zip\7z.exe"" x -o$TASKCLUSTER_NODE_DIR -tzip -aoa node-v14.3.0-win-x64.zip && rm node-*.zip && export PATH=$TASKCLUSTER_TASK_DIR/bin/node-v14.3.0-win-x64/:$PATH' system: node_gyp_cache: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.node-gyp-cache.4/artifacts/public/node-gyp-cache.tar.gz' - namespace: 'project.deepspeech.node-gyp-cache.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.node-gyp-cache.6/artifacts/public/node-gyp-cache.tar.gz' + namespace: 'project.deepspeech.node-gyp-cache.6' homebrew_builds: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.5/artifacts/public/homebrew_builds.tar.gz' - namespace: 'project.deepspeech.homebrew_builds.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.7/artifacts/public/homebrew_builds.tar.gz' + namespace: 'project.deepspeech.homebrew_builds.7' homebrew_tests: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.6/artifacts/public/homebrew_tests.tar.gz' - namespace: 'project.deepspeech.homebrew_tests.6' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.8/artifacts/public/homebrew_tests.tar.gz' + namespace: 'project.deepspeech.homebrew_tests.8' android_cache: arm64_v8a: android_24: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-24.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.arm64-v8a.android-24.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-24.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.arm64-v8a.android-24.6' android_25: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-25.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.arm64-v8a.android-25.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-25.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.arm64-v8a.android-25.6' armeabi_v7a: android_24: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-24.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-24.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-24.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-24.6' android_25: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-25.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-25.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-25.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-25.6' x86_64: android_24: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-24.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-24.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-24.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-24.6' android_25: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-25.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-25.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-25.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-25.6' android_26: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-26.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-26.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-26.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-26.2' android_27: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-27.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-27.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-27.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-27.2' android_28: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-28.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-28.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-28.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-28.2' android_29: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-29.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-29.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-29.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-29.2' android_30: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-30.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-30.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-30.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-30.2' sdk: android_27: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.sdk.android-27.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.sdk.android-27.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.sdk.android-27.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.sdk.android-27.6' gradle_cache: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.gradle.4/artifacts/public/gradle.tar.gz' - namespace: 'project.deepspeech.gradle.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.gradle.6/artifacts/public/gradle.tar.gz' + namespace: 'project.deepspeech.gradle.6' pyenv: linux: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.5/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.linux.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.7/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.linux.7' osx: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.5/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.osx.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.7/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.osx.7' win: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.5/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.win.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.7/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.win.7' swig: repo: "https://github.com/lissyx/swig" sha1: "b5fea54d39832d1d132d7dd921b69c0c2c9d5118" From a47c9a2b8c847a621abbb46ed892b518f118f20e Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 23:44:00 +0200 Subject: [PATCH 22/38] Request android default instead of google_apis It seems some armv7a image disappeared --- taskcluster/android-cache-armeabi-v7a-android-24.yml | 2 +- taskcluster/android_cache-build.sh | 3 ++- taskcluster/tc-android-utils.sh | 9 +++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/taskcluster/android-cache-armeabi-v7a-android-24.yml b/taskcluster/android-cache-armeabi-v7a-android-24.yml index 3536ab3fa6..4c686739db 100644 --- a/taskcluster/android-cache-armeabi-v7a-android-24.yml +++ b/taskcluster/android-cache-armeabi-v7a-android-24.yml @@ -7,7 +7,7 @@ build: artifact_url: ${system.android_cache.armeabi_v7a.android_24.url} artifact_namespace: ${system.android_cache.armeabi_v7a.android_24.namespace} scripts: - build: "taskcluster/android_cache-build.sh armeabi-v7a android-24" + build: "taskcluster/android_cache-build.sh armeabi-v7a android-24 default" package: "taskcluster/android_cache-package.sh" metadata: name: "Builds Android cache armeabi-v7a / android-24" diff --git a/taskcluster/android_cache-build.sh b/taskcluster/android_cache-build.sh index c22ece4720..b1e1b8436f 100755 --- a/taskcluster/android_cache-build.sh +++ b/taskcluster/android_cache-build.sh @@ -6,6 +6,7 @@ source $(dirname "$0")/tc-tests-utils.sh arm_flavor=$1 api_level=$2 +api_kind=$3 export ANDROID_HOME=${ANDROID_SDK_HOME} @@ -17,5 +18,5 @@ android_install_sdk android_install_sdk_platform "android-27" if [ "${arm_flavor}" != "sdk" ]; then - android_setup_emulator "${arm_flavor}" "${api_level}" + android_setup_emulator "${arm_flavor}" "${api_level}" "${api_kind}" fi; diff --git a/taskcluster/tc-android-utils.sh b/taskcluster/tc-android-utils.sh index a71edfb3d6..3bf66927f5 100755 --- a/taskcluster/tc-android-utils.sh +++ b/taskcluster/tc-android-utils.sh @@ -112,6 +112,11 @@ android_setup_emulator() local _flavor=$1 local _api_level=${2:-android-25} + local _api_kind=${3:-google_apis} + + if [ -z "${_api_kind}" ]; then + _api_kind="google_apis" + fi export PATH=${ANDROID_SDK_HOME}/tools/bin/:${ANDROID_SDK_HOME}/platform-tools/:$PATH export DS_BINARY_PREFIX="adb shell LD_LIBRARY_PATH=${ANDROID_TMP_DIR}/ds/ ${ANDROID_TMP_DIR}/ds/" @@ -123,11 +128,11 @@ android_setup_emulator() android_install_sdk_platform "${_api_level}" # Same, yes in case of license - yes | sdkmanager --install "system-images;${_api_level};google_apis;${_flavor}" + yes | sdkmanager --install "system-images;${_api_level};${_api_kind};${_flavor}" android_sdk_accept_licenses - avdmanager create avd --name "${_flavor}-ds-pixel-${_api_level}" --device 17 --package "system-images;${_api_level};google_apis;${_flavor}" + avdmanager create avd --name "${_flavor}-ds-pixel-${_api_level}" --device 17 --package "system-images;${_api_level};${_api_kind};${_flavor}" } android_start_emulator() From 4e3b4bb3a6a608472382e5a45b3243aee14a3c2a Mon Sep 17 00:00:00 2001 From: Anubhav <42858179+eagledot@users.noreply.github.com> Date: Wed, 17 Jun 2020 10:07:57 +0530 Subject: [PATCH 23/38] Added third-party bindings for NIM-lang. --- doc/USING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/USING.rst b/doc/USING.rst index 9032fe3408..6085e3cf90 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -213,4 +213,4 @@ In addition to the bindings above, third party developers have started to provid * `stes `_ provides preliminary `PKGBUILDs `_ to install the client and python bindings on `Arch Linux `_ in the `arch-deepspeech `_ repo. * `gst-deepspeech `_ provides a `GStreamer `_ plugin which can be used from any language with GStreamer bindings. * `thecodrr `_ provides `Vlang `_ bindings. The installation and use of which is described in their `vspeech `_ repo. - +* `eagledot `_ provides `NIM-lang `_ bindings. The installation and use of which is described in their `nim-deepspeech `_ repo. From f4f4903b2bf0da15163d65e01e5cc3c71bfba81a Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 16 Jun 2020 22:14:24 +0200 Subject: [PATCH 24/38] Fix #3071: Don't reinstall TensorFlow on top of TensorFlow --- Dockerfile.train.tmpl | 17 +++++++++-------- setup.py | 10 +++++++++- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl index 25afc455a7..6243c575c2 100644 --- a/Dockerfile.train.tmpl +++ b/Dockerfile.train.tmpl @@ -18,6 +18,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ unzip \ wget +# We need to remove it because it's breaking deepspeech install later with +# weird errors about setuptools +RUN apt-get purge -y python3-xdg + WORKDIR / RUN git lfs install RUN git clone $DEEPSPEECH_REPO @@ -25,12 +29,6 @@ RUN git clone $DEEPSPEECH_REPO WORKDIR /DeepSpeech RUN git checkout $DEEPSPEECH_SHA -# Setup a virtualenv otherwise we mess with the system and this is BAD. -RUN python3 -m venv venv/ - -ENV VIRTUAL_ENV=/DeepSpeech/venv -ENV PATH=$VIRTUAL_ENV/bin:$PATH - # Build CTC decoder first, to avoid clashes on incompatible versions upgrades RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl @@ -38,7 +36,10 @@ RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl # Prepare deps RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 -# Install DeepSpeech, no need for the decoder since we did it earlier -RUN DS_NODECODER=y pip3 install --upgrade --force-reinstall -e . +# Install DeepSpeech +# - No need for the decoder since we did it earlier +# - There is already correct TensorFlow GPU installed on the base image, +# we don't want to break that +RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade --force-reinstall -e . RUN ./bin/run-ldc93s1.sh diff --git a/setup.py b/setup.py index 6811b7170b..7fd995c86a 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,6 @@ def main(): version = fin.read().strip() install_requires_base = [ - 'tensorflow == 1.15.2', 'numpy', 'progressbar2', 'six', @@ -74,6 +73,10 @@ def main(): 'ds_ctcdecoder == {}'.format(version) ] + tensorflow_pypi_dep = [ + 'tensorflow == 1.15.2' + ] + # Due to pip craziness environment variables are the only consistent way to # get options into this script when doing `pip install`. tc_decoder_artifacts_root = os.environ.get('DECODER_ARTIFACTS_ROOT', '') @@ -87,6 +90,11 @@ def main(): else: install_requires = install_requires_base + decoder_pypi_dep + if len(os.environ.get('DS_NOTENSORFLOW', '')) > 0: + install_requires = install_requires + else: + install_requires = install_requires + tensorflow_pypi_dep + setup( name='deepspeech_training', version=version, From 07c8daef43f94b04dfe978eda81388388406ad6c Mon Sep 17 00:00:00 2001 From: lissyx <1645737+lissyx@users.noreply.github.com> Date: Wed, 17 Jun 2020 12:50:17 +0200 Subject: [PATCH 25/38] Update setup.py Co-authored-by: Reuben Morais --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7fd995c86a..91a9af5bcc 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,7 @@ def main(): else: install_requires = install_requires_base + decoder_pypi_dep - if len(os.environ.get('DS_NOTENSORFLOW', '')) > 0: + if os.environ.get('DS_NOTENSORFLOW', ''): install_requires = install_requires else: install_requires = install_requires + tensorflow_pypi_dep From 6ccbbede090df1afa96861ae06ff0eaba86e3488 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Wed, 17 Jun 2020 15:26:31 +0200 Subject: [PATCH 26/38] Remove --force-reinstall from training code install No longer needed since we started publishing ds_ctcdecode on PyPI. --- Dockerfile.train.tmpl | 2 +- doc/TRAINING.rst | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl index 6243c575c2..9e9c9b642c 100644 --- a/Dockerfile.train.tmpl +++ b/Dockerfile.train.tmpl @@ -40,6 +40,6 @@ RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 # - No need for the decoder since we did it earlier # - There is already correct TensorFlow GPU installed on the base image, # we don't want to break that -RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade --force-reinstall -e . +RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . RUN ./bin/run-ldc93s1.sh diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index b7e3014b34..978caef1a8 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -47,7 +47,9 @@ Install the required dependencies using ``pip3``\ : cd DeepSpeech pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 - pip3 install --upgrade --force-reinstall -e . + pip3 install --upgrade -e . + +Remember to re-run the last ``pip3 install`` command above when you update the training code (for example by pulling new changes), in order to update any dependencies. The ``webrtcvad`` Python package might require you to ensure you have proper tooling to build Python modules: From 3f8033e1f1aa84558d50bfd992d38ef5582dd500 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 18 Jun 2020 12:24:56 +0200 Subject: [PATCH 27/38] Add dependencies for new audio augmentation flags. Fixes #3082. --- Dockerfile.train.tmpl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl index 6243c575c2..426a80f85a 100644 --- a/Dockerfile.train.tmpl +++ b/Dockerfile.train.tmpl @@ -1,6 +1,7 @@ # Please refer to the TRAINING documentation, "Basic Dockerfile for training" FROM tensorflow/tensorflow:1.15.2-gpu-py3 +ARG DEBIAN_FRONTEND=noninteractive ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA# @@ -22,6 +23,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # weird errors about setuptools RUN apt-get purge -y python3-xdg +# Install dependencies for audio augmentation +RUN apt-get install -y --no-install-recommends libopus0 libsndfile1 + WORKDIR / RUN git lfs install RUN git clone $DEEPSPEECH_REPO From eda5f69f2d902d7f5d357519e7f20aae38f656b2 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 18 Jun 2020 15:20:28 +0200 Subject: [PATCH 28/38] Install checkpoint converting tool. --- Dockerfile.train.tmpl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl index 426a80f85a..c9a9211750 100644 --- a/Dockerfile.train.tmpl +++ b/Dockerfile.train.tmpl @@ -46,4 +46,8 @@ RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 # we don't want to break that RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade --force-reinstall -e . +# Tool to convert output graph for inference +RUN python3 /DeepSpeech/util/taskcluster.py --source tensorflow --branch r1.15 \ + --artifact convert_graphdef_memmapped_format --target /DeepSpeech/ + RUN ./bin/run-ldc93s1.sh From bc31eb4b9ef5d311ff6cee70c2c23a4cd947973c Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 18 Jun 2020 15:20:46 +0200 Subject: [PATCH 29/38] Fix usage of ARG instead of ENV in Dockerfile.train --- Dockerfile.train.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl index 63198c2352..1cc3dffc02 100644 --- a/Dockerfile.train.tmpl +++ b/Dockerfile.train.tmpl @@ -1,7 +1,7 @@ # Please refer to the TRAINING documentation, "Basic Dockerfile for training" FROM tensorflow/tensorflow:1.15.2-gpu-py3 -ARG DEBIAN_FRONTEND=noninteractive +ENV DEBIAN_FRONTEND=noninteractive ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA# From 5edc1cf5033ecec4d1bcaa9792508d589c617575 Mon Sep 17 00:00:00 2001 From: Reuben Morais Date: Thu, 18 Jun 2020 14:55:53 +0200 Subject: [PATCH 30/38] Bump VERSION to 0.7.4 --- doc/USING.rst | 8 ++++---- doc/index.rst | 12 ++++++------ training/deepspeech_training/VERSION | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/USING.rst b/doc/USING.rst index 6085e3cf90..6496b164ee 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -37,8 +37,8 @@ If you want to use the pre-trained English model for performing speech-to-text, .. code-block:: bash - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.pbmm + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.scorer Model compatibility ^^^^^^^^^^^^^^^^^^^ @@ -113,7 +113,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio my_audio_file.wav + deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio my_audio_file.wav The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio. @@ -177,7 +177,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - ./deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio_input.wav + ./deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio_input.wav See the help output with ``./deepspeech -h`` for more details. diff --git a/doc/index.rst b/doc/index.rst index 9bcfe91236..659439ee0f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -20,15 +20,15 @@ To install and use DeepSpeech all you have to do is: pip3 install deepspeech # Download pre-trained English model files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.pbmm + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.scorer # Download example audio files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/audio-0.7.3.tar.gz - tar xvf audio-0.7.3.tar.gz + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/audio-0.7.4.tar.gz + tar xvf audio-0.7.4.tar.gz # Transcribe an audio file - deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio/2830-3980-0043.wav A pre-trained English model is available for use and can be downloaded following the instructions in :ref:`the usage docs `. For the latest release, including pre-trained models and checkpoints, `see the GitHub releases page `_. @@ -44,7 +44,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th pip3 install deepspeech-gpu # Transcribe an audio file. - deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio/2830-3980-0043.wav Please ensure you have the required :ref:`CUDA dependencies `. diff --git a/training/deepspeech_training/VERSION b/training/deepspeech_training/VERSION index f38fc5393f..0a1ffad4b4 100644 --- a/training/deepspeech_training/VERSION +++ b/training/deepspeech_training/VERSION @@ -1 +1 @@ -0.7.3 +0.7.4 From e17619bec8a1fa00b191e3e1a0bb9d93165da4e0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 18 Jun 2020 17:26:38 +0200 Subject: [PATCH 31/38] Make paths relative. --- Dockerfile.train.tmpl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl index c9a9211750..da23acabcc 100644 --- a/Dockerfile.train.tmpl +++ b/Dockerfile.train.tmpl @@ -47,7 +47,7 @@ RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade --force-reinstall -e . # Tool to convert output graph for inference -RUN python3 /DeepSpeech/util/taskcluster.py --source tensorflow --branch r1.15 \ - --artifact convert_graphdef_memmapped_format --target /DeepSpeech/ +RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \ + --artifact convert_graphdef_memmapped_format --target . RUN ./bin/run-ldc93s1.sh From da96d14eaaecbb252b3dc922b1e27f86feac9e62 Mon Sep 17 00:00:00 2001 From: Tilman Kamp <5991088+tilmankamp@users.noreply.github.com> Date: Fri, 19 Jun 2020 11:10:09 +0200 Subject: [PATCH 32/38] Fix #3089 - Recreate overlay queue on augmentation restart --- training/deepspeech_training/util/augmentations.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index a86b60a978..b7033c51ee 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -244,11 +244,12 @@ def __init__(self, source, p=1.0, snr=3.0, layers=1): self.source = source self.snr = float_range(snr) self.layers = int_range(layers) - self.queue = Queue(max(1, math.floor(self.probability * self.layers[1] * os.cpu_count()))) self.current_sample = None + self.queue = None self.enqueue_process = None def start(self, buffering=BUFFER_SIZE): + self.queue = Queue(max(1, math.floor(self.probability * self.layers[1] * os.cpu_count()))) self.enqueue_process = Process(target=_enqueue_overlay_samples, args=(self.source, self.queue), kwargs={'buffering': buffering}) @@ -285,6 +286,9 @@ def apply(self, sample, clock=0.0): def stop(self): if self.enqueue_process is not None: self.enqueue_process.terminate() + self.enqueue_process = None + self.current_sample = None + self.queue = None class Codec(SampleAugmentation): From 41d7b4e6f053306c13e94e6d306a964bce6da16e Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 2 Jun 2020 14:03:15 +0200 Subject: [PATCH 33/38] Use TensorFlow r2.2 artifacts --- taskcluster/.build.yml | 2 +- taskcluster/android-arm64-cpu-opt.yml | 2 +- taskcluster/android-armv7-cpu-opt.yml | 2 +- taskcluster/android-java-opt.yml | 2 +- taskcluster/android-x86_64-cpu-opt.yml | 2 +- taskcluster/darwin-amd64-cpu-opt.yml | 2 +- taskcluster/darwin-amd64-ctc-opt.yml | 2 +- taskcluster/darwin-amd64-tflite-opt.yml | 2 +- taskcluster/linux-amd64-cpu-opt.yml | 2 +- taskcluster/linux-amd64-ctc-opt.yml | 2 +- taskcluster/linux-amd64-gpu-opt.yml | 2 +- taskcluster/linux-amd64-tflite-opt.yml | 2 +- taskcluster/linux-arm64-cpu-opt.yml | 2 +- taskcluster/linux-rpi3-cpu-opt.yml | 2 +- taskcluster/win-amd64-cpu-opt.yml | 2 +- taskcluster/win-amd64-ctc-opt.yml | 2 +- taskcluster/win-amd64-gpu-opt.yml | 2 +- taskcluster/win-amd64-tflite-opt.yml | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index fee2c1d6c9..4e9ab63d81 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -22,7 +22,7 @@ build: nc_asset_name: 'native_client.tar.xz' args: tests_cmdline: '' - tensorflow_git_desc: 'TensorFlow: v1.15.0-24-gceb46aa' + tensorflow_git_desc: 'TensorFlow: v2.2.0-12-gc29895f' test_model_task: '' homebrew: url: '' diff --git a/taskcluster/android-arm64-cpu-opt.yml b/taskcluster/android-arm64-cpu-opt.yml index fcd1b2bedb..43b756cc23 100644 --- a/taskcluster/android-arm64-cpu-opt.yml +++ b/taskcluster/android-arm64-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-arm64" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-arm64" - "index.project.deepspeech.deepspeech.native_client.android-arm64.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-arm64/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-arm64/artifacts/public/home.tar.xz" scripts: build: "taskcluster/android-build.sh arm64-v8a" package: "taskcluster/android-package.sh arm64-v8a" diff --git a/taskcluster/android-armv7-cpu-opt.yml b/taskcluster/android-armv7-cpu-opt.yml index 2578758b64..168b542beb 100644 --- a/taskcluster/android-armv7-cpu-opt.yml +++ b/taskcluster/android-armv7-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-armv7" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-armv7" - "index.project.deepspeech.deepspeech.native_client.android-armv7.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-armv7/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-armv7/artifacts/public/home.tar.xz" scripts: build: "taskcluster/android-build.sh armeabi-v7a" package: "taskcluster/android-package.sh armeabi-v7a" diff --git a/taskcluster/android-java-opt.yml b/taskcluster/android-java-opt.yml index 7ca7d0f272..268e6a5245 100644 --- a/taskcluster/android-java-opt.yml +++ b/taskcluster/android-java-opt.yml @@ -14,7 +14,7 @@ build: system_setup: > ${java.packages_trusty.apt} - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-armv7/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-armv7/artifacts/public/home.tar.xz" gradle_cache: url: ${system.gradle_cache.url} namespace: ${system.gradle_cache.namespace} diff --git a/taskcluster/android-x86_64-cpu-opt.yml b/taskcluster/android-x86_64-cpu-opt.yml index 447822ab40..a304fe9f65 100644 --- a/taskcluster/android-x86_64-cpu-opt.yml +++ b/taskcluster/android-x86_64-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-x86_64" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-x86_64" - "index.project.deepspeech.deepspeech.native_client.android-x86_64.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-arm64/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-arm64/artifacts/public/home.tar.xz" scripts: build: "taskcluster/android-build.sh x86_64" package: "taskcluster/android-package.sh x86_64" diff --git a/taskcluster/darwin-amd64-cpu-opt.yml b/taskcluster/darwin-amd64-cpu-opt.yml index 8589436093..12f41b2ff5 100644 --- a/taskcluster/darwin-amd64-cpu-opt.yml +++ b/taskcluster/darwin-amd64-cpu-opt.yml @@ -9,7 +9,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx" - "index.project.deepspeech.deepspeech.native_client.osx.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.osx/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/darwin-amd64-ctc-opt.yml b/taskcluster/darwin-amd64-ctc-opt.yml index 0e3e51e6b2..0f80f31e2c 100644 --- a/taskcluster/darwin-amd64-ctc-opt.yml +++ b/taskcluster/darwin-amd64-ctc-opt.yml @@ -9,7 +9,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx-ctc" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx-ctc" - "index.project.deepspeech.deepspeech.native_client.osx-ctc.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.osx/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz" maxRunTime: 14400 scripts: build: 'taskcluster/decoder-build.sh' diff --git a/taskcluster/darwin-amd64-tflite-opt.yml b/taskcluster/darwin-amd64-tflite-opt.yml index 4ab6c7c7d7..4a22e0dcaf 100644 --- a/taskcluster/darwin-amd64-tflite-opt.yml +++ b/taskcluster/darwin-amd64-tflite-opt.yml @@ -9,7 +9,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx-tflite" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx-tflite" - "index.project.deepspeech.deepspeech.native_client.osx-tflite.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.osx/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh tflite" package: "taskcluster/package.sh" diff --git a/taskcluster/linux-amd64-cpu-opt.yml b/taskcluster/linux-amd64-cpu-opt.yml index e2af482b30..db67d75560 100644 --- a/taskcluster/linux-amd64-cpu-opt.yml +++ b/taskcluster/linux-amd64-cpu-opt.yml @@ -12,7 +12,7 @@ build: > ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.cpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/linux-amd64-ctc-opt.yml b/taskcluster/linux-amd64-ctc-opt.yml index 00af6149d1..5bedbb8997 100644 --- a/taskcluster/linux-amd64-ctc-opt.yml +++ b/taskcluster/linux-amd64-ctc-opt.yml @@ -12,7 +12,7 @@ build: > ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.cpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: build: 'taskcluster/decoder-build.sh' package: 'taskcluster/decoder-package.sh' diff --git a/taskcluster/linux-amd64-gpu-opt.yml b/taskcluster/linux-amd64-gpu-opt.yml index 532b8bb75f..96b0096783 100644 --- a/taskcluster/linux-amd64-gpu-opt.yml +++ b/taskcluster/linux-amd64-gpu-opt.yml @@ -12,7 +12,7 @@ build: > ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.gpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.gpu/artifacts/public/home.tar.xz" maxRunTime: 14400 scripts: build: "taskcluster/cuda-build.sh" diff --git a/taskcluster/linux-amd64-tflite-opt.yml b/taskcluster/linux-amd64-tflite-opt.yml index 3d4847f91b..6d0d78638f 100644 --- a/taskcluster/linux-amd64-tflite-opt.yml +++ b/taskcluster/linux-amd64-tflite-opt.yml @@ -12,7 +12,7 @@ build: > ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.cpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh tflite" package: "taskcluster/package.sh" diff --git a/taskcluster/linux-arm64-cpu-opt.yml b/taskcluster/linux-arm64-cpu-opt.yml index 1865784ec8..776d8d30ca 100644 --- a/taskcluster/linux-arm64-cpu-opt.yml +++ b/taskcluster/linux-arm64-cpu-opt.yml @@ -19,7 +19,7 @@ build: system_config: > multistrap -d /tmp/multistrap-armbian64-buster/ -f ${system.homedir.linux}/DeepSpeech/ds/native_client/multistrap_armbian64_buster.conf - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.arm64/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.arm64/artifacts/public/home.tar.xz" scripts: build: "taskcluster/arm64-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/linux-rpi3-cpu-opt.yml b/taskcluster/linux-rpi3-cpu-opt.yml index 0cabb87022..cdce0bc0f7 100644 --- a/taskcluster/linux-rpi3-cpu-opt.yml +++ b/taskcluster/linux-rpi3-cpu-opt.yml @@ -19,7 +19,7 @@ build: system_config: > multistrap -d /tmp/multistrap-raspbian-buster/ -f ${system.homedir.linux}/DeepSpeech/ds/native_client/multistrap_raspbian_buster.conf - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.arm/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.arm/artifacts/public/home.tar.xz" scripts: build: "taskcluster/rpi3-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/win-amd64-cpu-opt.yml b/taskcluster/win-amd64-cpu-opt.yml index 3251209467..b7dbba5bc8 100644 --- a/taskcluster/win-amd64-cpu-opt.yml +++ b/taskcluster/win-amd64-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "node-gyp-cache" - "swig-win-amd64" - "pyenv-win-amd64" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win/artifacts/public/home.tar.xz" scripts: build: "taskcluster/win-build.sh" package: "taskcluster/win-package.sh" diff --git a/taskcluster/win-amd64-ctc-opt.yml b/taskcluster/win-amd64-ctc-opt.yml index ebd37445a5..b17bc53c33 100644 --- a/taskcluster/win-amd64-ctc-opt.yml +++ b/taskcluster/win-amd64-ctc-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.win-ctc" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.win-ctc" - "index.project.deepspeech.deepspeech.native_client.win-ctc.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win/artifacts/public/home.tar.xz" scripts: build: 'taskcluster/decoder-build.sh' package: 'taskcluster/decoder-package.sh' diff --git a/taskcluster/win-amd64-gpu-opt.yml b/taskcluster/win-amd64-gpu-opt.yml index 077e79db46..b9c99395c7 100644 --- a/taskcluster/win-amd64-gpu-opt.yml +++ b/taskcluster/win-amd64-gpu-opt.yml @@ -8,7 +8,7 @@ build: - "node-gyp-cache" - "swig-win-amd64" - "pyenv-win-amd64" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win-cuda/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win-cuda/artifacts/public/home.tar.xz" scripts: build: "taskcluster/win-build.sh --cuda" package: "taskcluster/win-package.sh" diff --git a/taskcluster/win-amd64-tflite-opt.yml b/taskcluster/win-amd64-tflite-opt.yml index 993f04f38b..629380b7fe 100644 --- a/taskcluster/win-amd64-tflite-opt.yml +++ b/taskcluster/win-amd64-tflite-opt.yml @@ -8,7 +8,7 @@ build: - "node-gyp-cache" - "swig-win-amd64" - "pyenv-win-amd64" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win/artifacts/public/home.tar.xz" scripts: build: "taskcluster/win-build.sh --tflite" package: "taskcluster/win-package.sh" From bc086ec998d34adcb7d480b3bc9a0919312e3916 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 2 Jun 2020 14:06:38 +0200 Subject: [PATCH 34/38] Build DeepSpeech using TensorFlow r2.2 --- Dockerfile.build.tmpl | 15 +++++++++------ doc/USING.rst | 2 +- native_client/BUILD | 12 +++++------- native_client/README.rst | 8 ++++---- native_client/ctcdecode/Makefile | 8 ++++---- native_client/definitions.mk | 2 +- native_client/dotnet/README.rst | 8 ++++---- native_client/javascript/Makefile | 4 ---- native_client/javascript/package.json.in | 2 +- native_client/python/Makefile | 4 ++-- native_client/tfmodelstate.cc | 2 +- taskcluster/.build.yml | 2 +- taskcluster/linux-opt-base.tyml | 6 ++---- taskcluster/tc-build-utils.sh | 4 ++-- taskcluster/test-win-cuda-opt-base.tyml | 2 +- taskcluster/win-opt-base.tyml | 6 +++--- taskcluster/worker.cyml | 4 ++-- 17 files changed, 43 insertions(+), 48 deletions(-) diff --git a/Dockerfile.build.tmpl b/Dockerfile.build.tmpl index a4eebb6eba..73c8bdd2a8 100644 --- a/Dockerfile.build.tmpl +++ b/Dockerfile.build.tmpl @@ -1,7 +1,7 @@ # Please refer to the USING documentation, "Dockerfile for building from source" # Need devel version cause we need /usr/include/cudnn.h -FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 +FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA# @@ -46,7 +46,7 @@ RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 # Install Bazel -RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb" +RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/2.0.0/bazel_2.0.0-linux-x86_64.deb" RUN dpkg -i bazel_*.deb # << END Install base software @@ -56,12 +56,15 @@ RUN dpkg -i bazel_*.deb # Clone TensorFlow from Mozilla repo RUN git clone https://github.com/mozilla/tensorflow/ WORKDIR /tensorflow -RUN git checkout r1.15 +RUN git checkout r2.2 # GPU Environment Setup +ENV TF_NEED_ROCM 0 +ENV TF_NEED_OPENCL_SYCL 0 +ENV TF_NEED_OPENCL 0 ENV TF_NEED_CUDA 1 -ENV TF_CUDA_PATHS "/usr,/usr/local/cuda,/usr/lib/x86_64-linux-gnu/" -ENV TF_CUDA_VERSION 10.0 +ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" +ENV TF_CUDA_VERSION 10.1 ENV TF_CUDNN_VERSION 7.6 ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0 ENV TF_NCCL_VERSION 2.4 @@ -94,7 +97,7 @@ ENV TF_NEED_VERBS 0 ENV TF_NEED_OPENCL_SYCL 0 ENV PYTHON_BIN_PATH /usr/bin/python3.6 -ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages +ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages # << END Configure Tensorflow Build diff --git a/doc/USING.rst b/doc/USING.rst index 6496b164ee..db3013d825 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -28,7 +28,7 @@ Please refer to your system's documentation on how to install these dependencies CUDA dependency ^^^^^^^^^^^^^^^ -The GPU capable builds (Python, NodeJS, C++, etc) depend on the same CUDA runtime as upstream TensorFlow. Currently with TensorFlow 1.15 it depends on CUDA 10.0 and CuDNN v7.6. `See the TensorFlow documentation `_. +The GPU capable builds (Python, NodeJS, C++, etc) depend on the same CUDA runtime as upstream TensorFlow. Currently with TensorFlow 2.2 it depends on CUDA 10.1 and CuDNN v7.6. `See the TensorFlow documentation `_. Getting the pre-trained model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/native_client/BUILD b/native_client/BUILD index 1e4a66ebb1..0798b82dfe 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -1,10 +1,8 @@ # Description: Deepspeech native client library. -load( - "@org_tensorflow//tensorflow:tensorflow.bzl", - "if_cuda", - "tf_cc_shared_object", -) +load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") + load( "@org_tensorflow//tensorflow/lite:build_def.bzl", "tflite_copts", @@ -143,7 +141,6 @@ tf_cc_shared_object( ### CPU only build, libdeepspeech.so file size reduced by ~50% "//tensorflow/core/kernels:spectrogram_op", # AudioSpectrogram "//tensorflow/core/kernels:bias_op", # BiasAdd - "//tensorflow/contrib/rnn:lstm_ops_kernels", # BlockLSTM "//tensorflow/core/kernels:cast_op", # Cast "//tensorflow/core/kernels:concat_op", # ConcatV2 "//tensorflow/core/kernels:constant_op", # Const, Placeholder @@ -163,9 +160,10 @@ tf_cc_shared_object( "//tensorflow/core/kernels:softmax_op", # Softmax "//tensorflow/core/kernels:tile_ops", # Tile "//tensorflow/core/kernels:transpose_op", # Transpose + "//tensorflow/core/kernels:rnn_ops", # BlockLSTM # And we also need the op libs for these ops used in the model: "//tensorflow/core:audio_ops_op_lib", # AudioSpectrogram, Mfcc - "//tensorflow/contrib/rnn:lstm_ops_op_lib", # BlockLSTM + "//tensorflow/core:rnn_ops_op_lib", # BlockLSTM "//tensorflow/core:math_ops_op_lib", # Cast, Less, Max, MatMul, Minimum, Range "//tensorflow/core:array_ops_op_lib", # ConcatV2, Const, ExpandDims, Fill, GatherNd, Identity, Pack, Placeholder, Reshape, Tile, Transpose "//tensorflow/core:no_op_op_lib", # NoOp diff --git a/native_client/README.rst b/native_client/README.rst index 841a4546a6..d77df35dfa 100644 --- a/native_client/README.rst +++ b/native_client/README.rst @@ -5,8 +5,8 @@ Building DeepSpeech Binaries If you'd like to build the DeepSpeech binaries yourself, you'll need the following pre-requisites downloaded and installed: -* `Mozilla's TensorFlow r1.15 branch `_ -* `Bazel 0.24.1 `_ +* `Mozilla's TensorFlow r2.2 branch `_ +* `Bazel 2.0.0 `_ * `General TensorFlow requirements `_ * `libsox `_ @@ -36,12 +36,12 @@ Clone our fork of TensorFlow and checkout the correct version: .. code-block:: git clone https://github.com/mozilla/tensorflow.git - git checkout origin/r1.15 + git checkout origin/r2.2 Bazel: Download & Install ^^^^^^^^^^^^^^^^^^^^^^^^^ -First, install Bazel 0.24.1 following the `Bazel installation documentation `_. +First, install Bazel 2.0.0 following the `Bazel installation documentation `_. TensorFlow: Configure with Bazel ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index 3cf7647833..8bff277b32 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -45,15 +45,15 @@ workspace_status.cc: # variables over several runs bindings: clean-keep-third-party workspace_status.cc ds-swig pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 - PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) find temp_build -type f -name "*.o" -delete - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build bindings-debug: clean-keep-third-party workspace_status.cc ds-swig pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 - PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) $(GENERATE_DEBUG_SYMS) find temp_build -type f -name "*.o" -delete - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 41acf5f39e..5ab84e7d67 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -48,7 +48,7 @@ endif endif ifeq ($(TARGET),host-win) -TOOLCHAIN := '$(VCINSTALLDIR)\bin\amd64\' +TOOLCHAIN := '$(VCToolsInstallDir)\bin\Hostx64\x64\' TOOL_CC := cl.exe TOOL_CXX := cl.exe TOOL_LD := link.exe diff --git a/native_client/dotnet/README.rst b/native_client/dotnet/README.rst index 70fdbf3d28..97ac04753e 100644 --- a/native_client/dotnet/README.rst +++ b/native_client/dotnet/README.rst @@ -30,11 +30,11 @@ Prerequisites * Windows 10 * `Windows 10 SDK `_ -* `Visual Studio 2017 Community `_ +* `Visual Studio 2019 Community `_ * `Git Large File Storage `_ * `TensorFlow Windows pre-requisites `_ -Inside the Visual Studio Installer enable ``MS Build Tools`` and ``VC++ 2015.3 v14.00 (v140) toolset for desktop``. +Inside the Visual Studio Installer enable ``MS Build Tools`` and ``VC++ 2019 v16.00 (v160) toolset for desktop``. If you want to enable CUDA support you need to follow the steps in `the TensorFlow docs for building on Windows with CUDA `_. @@ -51,7 +51,7 @@ We need to clone ``mozilla/DeepSpeech`` and ``mozilla/tensorflow``. .. code-block:: bash - git clone --branch r1.15 https://github.com/mozilla/tensorflow + git clone --branch r2.2 https://github.com/mozilla/tensorflow Configuring the paths --------------------- @@ -113,7 +113,7 @@ If you run CUDA enabled ``native_client`` we need to add the following to the `` .. code-block:: - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\bin + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\bin Building the native_client ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/native_client/javascript/Makefile b/native_client/javascript/Makefile index ac5ee8ed39..454bdc4221 100644 --- a/native_client/javascript/Makefile +++ b/native_client/javascript/Makefile @@ -41,10 +41,6 @@ package.json: package.json.in package.json.in > package.json && cat package.json npm-dev: package.json -ifeq ($(findstring _NT,$(OS)),_NT) - # node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection - $(NPM_TOOL) install node-gyp@4.x -endif $(NPM_TOOL) install --prefix=$(NPM_ROOT)/../ --ignore-scripts --force --verbose --production=false . configure: deepspeech_wrap.cxx package.json npm-dev diff --git a/native_client/javascript/package.json.in b/native_client/javascript/package.json.in index 09dad3a583..6616250878 100644 --- a/native_client/javascript/package.json.in +++ b/native_client/javascript/package.json.in @@ -40,7 +40,7 @@ }, "devDependencies": { "electron": "^1.7.9", - "node-gyp": "4.x - 5.x", + "node-gyp": "5.x", "typescript": "3.8.x", "typedoc": "0.17.x", "@types/argparse": "1.0.x", diff --git a/native_client/python/Makefile b/native_client/python/Makefile index cf17c9dbbf..7f948649af 100644 --- a/native_client/python/Makefile +++ b/native_client/python/Makefile @@ -10,7 +10,7 @@ bindings-clean: # variables over several runs bindings-build: ds-swig pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 - PATH=$(TOOLCHAIN):$(DS_SWIG_BIN_PATH):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME) + DISTUTILS_USE_SDK=1 PATH=$(TOOLCHAIN):$(DS_SWIG_BIN_PATH):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME) MANIFEST.in: bindings-build > $@ @@ -21,6 +21,6 @@ MANIFEST.in: bindings-build bindings-package: MANIFEST.in cat MANIFEST.in rm -f temp_build/*_wrap.o temp_build/Release/*_wrap.obj - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) bindings: bindings-build bindings-package diff --git a/native_client/tfmodelstate.cc b/native_client/tfmodelstate.cc index 5b1e16751d..440c44e602 100644 --- a/native_client/tfmodelstate.cc +++ b/native_client/tfmodelstate.cc @@ -118,7 +118,7 @@ TFModelState::init(const char* model_path) int beam_width = metadata_outputs[3].scalar()(); beam_width_ = (unsigned int)(beam_width); - string serialized_alphabet = metadata_outputs[4].scalar()(); + string serialized_alphabet = metadata_outputs[4].scalar()(); err = alphabet_.deserialize(serialized_alphabet.data(), serialized_alphabet.size()); if (err != 0) { return DS_ERR_INVALID_ALPHABET; diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index 4e9ab63d81..1b94c52d3e 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -9,7 +9,7 @@ build: dependencies: [] routes: [] maxRunTime: 3600 - docker_image: "ubuntu:14.04" + docker_image: "ubuntu:16.04" system_setup: > true diff --git a/taskcluster/linux-opt-base.tyml b/taskcluster/linux-opt-base.tyml index 0298954cf8..9cc0ccfefb 100644 --- a/taskcluster/linux-opt-base.tyml +++ b/taskcluster/linux-opt-base.tyml @@ -30,7 +30,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "ubuntu:14.04" + image: "ubuntu:16.04" env: TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow} @@ -43,10 +43,8 @@ then: extraSystemSetup: { $eval: strip(str(build.system_setup)) } extraSystemConfig: { $eval: strip(str(build.system_config)) } in: > - apt-get -qq -y remove --purge ubuntu-advantage-tools && - ${aptEc2Mirrors} && adduser --system --home ${system.homedir.linux} ${system.username} && - apt-get -qq update && apt-get -qq -y install ${tensorflow.packages_trusty.apt} pixz pkg-config realpath unzip wget zip && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install ${tensorflow.packages_trusty.apt} pixz pkg-config realpath sudo unzip wget zip && ${extraSystemSetup} && cd ${system.homedir.linux}/ && echo -e "#!/bin/bash\nset -xe\n env && id && (wget -O - $TENSORFLOW_BUILD_ARTIFACT | pixz -d | tar -C ${system.homedir.linux}/ -xf - ) && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && ln -s ~/DeepSpeech/ds/native_client/ ~/DeepSpeech/tf/native_client && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf - && mkdir -p ${system.homedir.linux}/pyenv-root/ && wget -O - ${system.pyenv.linux.url} | tar -C ${system.homedir.linux}/pyenv-root/ -xzf - && if [ ! -z "${build.gradle_cache.url}" ]; then wget -O - ${build.gradle_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi && if [ ! -z "${build.android_cache.url}" ]; then wget -O - ${build.android_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi;" > /tmp/clone.sh && chmod +x /tmp/clone.sh && sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} && diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 5530517ef2..6a41a88a9c 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -231,7 +231,7 @@ do_deepspeech_netframework_build() # Setup dependencies nuget install DeepSpeechConsole/packages.config -OutputDirectory packages/ - MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe')" + MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe')" # We need MSYS2_ARG_CONV_EXCL='/' otherwise the '/' of CLI parameters gets mangled and disappears # We build the .NET Client for .NET Framework v4.5,v4.6,v4.7 @@ -270,7 +270,7 @@ do_deepspeech_netframework_wpf_example_build() # Setup dependencies nuget install DeepSpeechWPF/packages.config -OutputDirectory DeepSpeechWPF/packages/ - MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe')" + MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe')" # We need MSYS2_ARG_CONV_EXCL='/' otherwise the '/' of CLI parameters gets mangled and disappears # Build WPF example diff --git a/taskcluster/test-win-cuda-opt-base.tyml b/taskcluster/test-win-cuda-opt-base.tyml index eb2fb1d1fe..e5dc5ee62d 100644 --- a/taskcluster/test-win-cuda-opt-base.tyml +++ b/taskcluster/test-win-cuda-opt-base.tyml @@ -44,7 +44,7 @@ then: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > .\msys64\usr\bin\bash.exe --login -cxe "export LC_ALL=C && - export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/:$PATH\" && + export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/:$PATH\" && export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" && export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && export TASKCLUSTER_NODE_DIR=\"$(cygpath -w $TASKCLUSTER_TASK_DIR/bin)\" && diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index 8d77b64740..ff1aa4a57c 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -44,7 +44,7 @@ payload: - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - echo .\msys64\usr\bin\bash.exe --login -cxe " export LC_ALL=C && - export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" && + export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files (x86)/Windows Kits/10/bin/x64/:$PATH\" && export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" && export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && (mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) && cd $TASKCLUSTER_TASK_DIR && @@ -65,7 +65,7 @@ payload: $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh && cd $TASKCLUSTER_TASK_DIR/../ && rm -fr tc-workdir/ && exit $TASKCLUSTER_TASK_EXIT_CODE" | cmd - /k ""C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat"" x64 + /k ""C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat"" x64 - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE" @@ -82,7 +82,7 @@ payload: - file: pyenv.tar.gz content: url: ${system.pyenv.win.url} - - directory: .node-gyp + - directory: AppData/Local/node-gyp/Cache format: tar.gz content: url: ${system.node_gyp_cache.url} diff --git a/taskcluster/worker.cyml b/taskcluster/worker.cyml index 9ef5a85e90..65c5c8954a 100644 --- a/taskcluster/worker.cyml +++ b/taskcluster/worker.cyml @@ -4,8 +4,8 @@ taskcluster: provisionerId: proj-deepspeech workerType: ci workerTypeKvm: kvm - workerTypeWin: win-b - workerTypeCuda: win-gpu + workerTypeWin: win + workerTypeCuda: win-gpu-b dockerrpi3: provisionerId: proj-deepspeech workerType: ds-rpi3 From 4a174f6adc5bcd621b13ec709cfb2fd8672f0dca Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Sat, 2 May 2020 13:09:52 +0200 Subject: [PATCH 35/38] Remove libssl 1.0.2 hack --- taskcluster/.shared.yml | 12 +++++------ taskcluster/pyenv-build.sh | 6 +----- taskcluster/tc-all-vars.sh | 4 ---- taskcluster/tc-py-utils.sh | 41 -------------------------------------- 4 files changed, 7 insertions(+), 56 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 9367974101..8b4385aca0 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -111,14 +111,14 @@ system: namespace: 'project.deepspeech.gradle.6' pyenv: linux: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.7/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.linux.7' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.8/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.linux.8' osx: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.7/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.osx.7' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.8/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.osx.8' win: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.7/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.win.7' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.8/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.win.8' swig: repo: "https://github.com/lissyx/swig" sha1: "b5fea54d39832d1d132d7dd921b69c0c2c9d5118" diff --git a/taskcluster/pyenv-build.sh b/taskcluster/pyenv-build.sh index b3faf8fb47..54d11c7a04 100755 --- a/taskcluster/pyenv-build.sh +++ b/taskcluster/pyenv-build.sh @@ -18,11 +18,7 @@ for pyver_conf in ${SUPPORTED_PYTHON_VERSIONS}; do pyalias="${pyver}_${pyconf}" - maybe_ssl102_py37 ${pyver} - - LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH \ - PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf} ${PY37_OPENSSL}" \ - pyenv_install ${pyver} ${pyalias} + PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf}" pyenv_install ${pyver} ${pyalias} setup_pyenv_virtualenv "${pyalias}" "deepspeech" diff --git a/taskcluster/tc-all-vars.sh b/taskcluster/tc-all-vars.sh index 1a38e18b76..2efc1dbc21 100755 --- a/taskcluster/tc-all-vars.sh +++ b/taskcluster/tc-all-vars.sh @@ -42,10 +42,6 @@ if [ "${OS}" = "Darwin" ]; then fi; fi; -PY37_OPENSSL_DIR="${PYENV_ROOT}/ssl-xenial" -export PY37_LDPATH="${PY37_OPENSSL_DIR}/usr/lib/" -export LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH - export TASKCLUSTER_ARTIFACTS=${TASKCLUSTER_ARTIFACTS:-/tmp/artifacts} export TASKCLUSTER_TMP_DIR=${TASKCLUSTER_TMP_DIR:-/tmp} diff --git a/taskcluster/tc-py-utils.sh b/taskcluster/tc-py-utils.sh index 2beee417fb..36f8073157 100755 --- a/taskcluster/tc-py-utils.sh +++ b/taskcluster/tc-py-utils.sh @@ -191,47 +191,6 @@ pyenv_install() fi } -# Hack to extract Ubuntu's 16.04 libssl 1.0.2 packages and use them during the -# local build of Python. -# -# Avoid (risky) upgrade of base system, allowing to keep one task build that -# builds all the python packages -maybe_ssl102_py37() -{ - pyver=$1 - - unset PY37_OPENSSL - - ARCH=$(uname -m) - case "${pyver}" in - 3.7*|3.8*) - if [ "${OS}" = "Linux" -a "${ARCH}" = "x86_64" ]; then - if [ -d "${PY37_OPENSSL_DIR}" ]; then - rm -rf "${PY37_OPENSSL_DIR}" - fi - - mkdir -p ${PY37_OPENSSL_DIR} - ${WGET} -P ${TASKCLUSTER_TMP_DIR} \ - http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.16_amd64.deb \ - http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.16_amd64.deb - - for deb in ${TASKCLUSTER_TMP_DIR}/libssl*.deb; do - dpkg -x ${deb} ${PY37_OPENSSL_DIR} - done; - - # Python configure expects things to be under lib/ - mv ${PY37_OPENSSL_DIR}/usr/include/x86_64-linux-gnu/openssl/opensslconf.h ${PY37_OPENSSL_DIR}/usr/include/openssl/ - mv ${PY37_OPENSSL_DIR}/lib/x86_64-linux-gnu/lib* ${PY37_OPENSSL_DIR}/usr/lib/ - mv ${PY37_OPENSSL_DIR}/usr/lib/x86_64-linux-gnu/* ${PY37_OPENSSL_DIR}/usr/lib/ - ln -sfn libcrypto.so.1.0.0 ${PY37_OPENSSL_DIR}/usr/lib/libcrypto.so - ln -sfn libssl.so.1.0.0 ${PY37_OPENSSL_DIR}/usr/lib/libssl.so - - export PY37_OPENSSL="--with-openssl=${PY37_OPENSSL_DIR}/usr" - fi; - ;; - esac -} - maybe_numpy_min_version() { local pyver=$1 From f169e8f92197ce3dee4c5c6b92b58f840ee87ea3 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Sat, 2 May 2020 13:11:42 +0200 Subject: [PATCH 36/38] Linux cleanup --- taskcluster/.shared.yml | 1 - taskcluster/docker-build-base.tyml | 1 - taskcluster/node-package-opt-base.tyml | 6 ++---- taskcluster/test-linux-opt-base.tyml | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 8b4385aca0..d8c94a3ea7 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -138,4 +138,3 @@ system: osx: '/Users/build-user' win: '/c/builds/tc-workdir' sox_win: '/usr/bin/wget.exe https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2-win32.zip/download -O sox-14.4.2-win32.zip && ""C:\Program Files\7-zip\7z.exe"" x -o$TASKCLUSTER_TASK_DIR/bin/ -tzip -aoa sox-14.4.2-win32.zip && rm sox-*zip && export PATH=$TASKCLUSTER_TASK_DIR/bin/sox-14.4.2/:$PATH' -aptEc2Mirrors: 'echo "deb http://archive.ubuntu.com/ubuntu/ trusty-updates main" > /etc/apt/sources.list.d/trusty-updates.list && apt-get -qq update && apt-get -qq -y upgrade' diff --git a/taskcluster/docker-build-base.tyml b/taskcluster/docker-build-base.tyml index 6e4d2471e0..6d46a4bdcd 100644 --- a/taskcluster/docker-build-base.tyml +++ b/taskcluster/docker-build-base.tyml @@ -30,7 +30,6 @@ then: dockerfile: { $eval: strip(str(build.dockerfile)) } in: > apt-get -qq -y remove --purge ubuntu-advantage-tools && - ${aptEc2Mirrors} && apt-get -qq update && apt-get -qq -y install git wget pkg-config apt-transport-https ca-certificates curl software-properties-common make && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" && diff --git a/taskcluster/node-package-opt-base.tyml b/taskcluster/node-package-opt-base.tyml index b17daaace0..b14cdd4c39 100644 --- a/taskcluster/node-package-opt-base.tyml +++ b/taskcluster/node-package-opt-base.tyml @@ -30,7 +30,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "ubuntu:14.04" + image: "ubuntu:16.04" command: - "/bin/bash" @@ -40,10 +40,8 @@ then: extraSystemSetup: { $eval: strip(str(build.system_setup)) } extraSystemConfig: { $eval: strip(str(build.system_config)) } in: > - apt-get -qq -y remove --purge ubuntu-advantage-tools && - ${aptEc2Mirrors} && adduser --system --home ${system.homedir.linux} ${system.username} && - apt-get -qq update && apt-get -qq -y install realpath git wget curl make && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install realpath git wget curl make sudo && ${extraSystemSetup} && cd ${system.homedir.linux}/ && echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && mkdir -p ~/DeepSpeech/tf/ && touch ~/DeepSpeech/tf/tc-vars.sh && chmod +x ~/DeepSpeech/tf/tc-vars.sh && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf -" > /tmp/clone.sh && chmod +x /tmp/clone.sh && sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} && diff --git a/taskcluster/test-linux-opt-base.tyml b/taskcluster/test-linux-opt-base.tyml index fcb8b46685..67f8220be2 100644 --- a/taskcluster/test-linux-opt-base.tyml +++ b/taskcluster/test-linux-opt-base.tyml @@ -44,7 +44,6 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - ${aptEc2Mirrors} && apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox sudo wget && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && From eca69d1c84194c83aef36368d11937d570796f46 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Sat, 2 May 2020 13:13:07 +0200 Subject: [PATCH 37/38] Trusty -> Xenial --- .taskcluster.yml | 2 +- native_client/BUILD | 4 ++-- native_client/definitions.mk | 2 +- native_client/javascript/package.json.in | 2 +- taskcluster/.shared.yml | 12 ++++++------ taskcluster/android-cache-arm64-v8a-android-24.yml | 2 +- taskcluster/android-cache-arm64-v8a-android-25.yml | 2 +- taskcluster/android-cache-armeabi-v7a-android-24.yml | 2 +- taskcluster/android-cache-armeabi-v7a-android-25.yml | 2 +- taskcluster/android-cache-sdk-android-27.yml | 2 +- taskcluster/android-cache-x86_64-android-24.yml | 2 +- taskcluster/android-cache-x86_64-android-25.yml | 2 +- taskcluster/android-cache-x86_64-android-26.yml | 2 +- taskcluster/android-cache-x86_64-android-28.yml | 2 +- taskcluster/android-cache-x86_64-android-29.yml | 2 +- taskcluster/android-cache-x86_64-android-30.yml | 2 +- taskcluster/android-java-opt.yml | 2 +- taskcluster/generic_tc_caching-linux-opt-base.tyml | 2 +- taskcluster/gradle-cache.yml | 2 +- taskcluster/linux-amd64-cpu-opt.yml | 2 +- taskcluster/linux-amd64-ctc-opt.yml | 2 +- taskcluster/linux-amd64-gpu-opt.yml | 2 +- taskcluster/linux-amd64-tflite-opt.yml | 2 +- taskcluster/linux-arm64-cpu-opt.yml | 2 +- taskcluster/linux-opt-base.tyml | 2 +- taskcluster/linux-rpi3-cpu-opt.yml | 2 +- taskcluster/node-package-cpu.yml | 2 +- taskcluster/node-package-gpu.yml | 2 +- taskcluster/node-package-tflite.yml | 2 +- taskcluster/pyenv-linux-amd64.yml | 2 +- taskcluster/swig-linux-amd64.yml | 2 +- taskcluster/test-armbian-opt-base.tyml | 4 ++-- .../test-augmentations-linux-amd64-py36m-opt.yml | 2 +- taskcluster/test-linux-opt-tag-base.tyml | 1 - taskcluster/test-raspbian-opt-base.tyml | 4 ++-- ...test-training-extra_16k-linux-amd64-py35m-opt.yml | 2 +- ...test-training-extra_16k-linux-amd64-py36m-opt.yml | 2 +- ...test-training-extra_16k-linux-amd64-py37m-opt.yml | 2 +- .../test-training-extra_8k-linux-amd64-py36m-opt.yml | 2 +- .../test-training-extra_8k-linux-amd64-py37m-opt.yml | 2 +- .../test-training-pypi_16k-linux-amd64-py35m-opt.yml | 2 +- .../test-training-pypi_16k-linux-amd64-py36m-opt.yml | 2 +- .../test-training-pypi_16k-linux-amd64-py37m-opt.yml | 2 +- .../test-training-pypi_8k-linux-amd64-py36m-opt.yml | 2 +- .../test-training-pypi_8k-linux-amd64-py37m-opt.yml | 2 +- ...t-training-unittests_8k-linux-amd64-py35m-opt.yml | 2 +- ...t-training-unittests_8k-linux-amd64-py36m-opt.yml | 4 ++-- ...t-training-unittests_8k-linux-amd64-py37m-opt.yml | 2 +- .../test-training_16k-linux-amd64-py35m-opt.yml | 2 +- .../test-training_16k-linux-amd64-py36m-opt.yml | 2 +- .../test-training_16k-linux-amd64-py37m-opt.yml | 2 +- .../test-training_8k-linux-amd64-py36m-opt.yml | 2 +- .../test-training_8k-linux-amd64-py37m-opt.yml | 2 +- 53 files changed, 61 insertions(+), 62 deletions(-) diff --git a/.taskcluster.yml b/.taskcluster.yml index 89e4ebbebe..9ed80334cf 100644 --- a/.taskcluster.yml +++ b/.taskcluster.yml @@ -45,7 +45,7 @@ tasks: - "--login" - "-cxe" - > - echo "deb http://archive.ubuntu.com/ubuntu/ trusty-updates main" > /etc/apt/sources.list.d/trusty-updates.list && + echo "deb http://archive.ubuntu.com/ubuntu/ xenial-updates main" > /etc/apt/sources.list.d/xenial-updates.list && apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo && adduser --system --home /home/build-user build-user && cd /home/build-user/ && diff --git a/native_client/BUILD b/native_client/BUILD index 0798b82dfe..53711dc2a6 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -122,8 +122,8 @@ tf_cc_shared_object( linkopts = select({ "//tensorflow:macos": [], "//tensorflow:linux_x86_64": LINUX_LINKOPTS, - "//tensorflow:rpi3": LINUX_LINKOPTS + ["-l:libstdc++.a"], - "//tensorflow:rpi3-armv8": LINUX_LINKOPTS + ["-l:libstdc++.a"], + "//tensorflow:rpi3": LINUX_LINKOPTS, + "//tensorflow:rpi3-armv8": LINUX_LINKOPTS, "//tensorflow:windows": [], "//conditions:default": [], }) + tflite_linkopts(), diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 5ab84e7d67..8d43ce0a26 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -65,7 +65,7 @@ ifeq ($(TARGET),rpi3) TOOLCHAIN ?= ${TFDIR}/bazel-$(shell basename "${TFDIR}")/external/LinaroArmGcc72/bin/arm-linux-gnueabihf- RASPBIAN ?= $(abspath $(NC_DIR)/../multistrap-raspbian-buster) CFLAGS := -march=armv7-a -mtune=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard -D_GLIBCXX_USE_CXX11_ABI=0 --sysroot $(RASPBIAN) -CXXFLAGS := $(CXXFLAGS) +CXXFLAGS := $(CFLAGS) LDFLAGS := -Wl,-rpath-link,$(RASPBIAN)/lib/arm-linux-gnueabihf/ -Wl,-rpath-link,$(RASPBIAN)/usr/lib/arm-linux-gnueabihf/ SOX_CFLAGS := -I$(RASPBIAN)/usr/include diff --git a/native_client/javascript/package.json.in b/native_client/javascript/package.json.in index 6616250878..42edc3c161 100644 --- a/native_client/javascript/package.json.in +++ b/native_client/javascript/package.json.in @@ -35,7 +35,7 @@ "node-pre-gyp": "0.15.x", "argparse": "1.0.x", "sox-stream": "2.0.x", - "memory-stream": "0.0.3", + "memory-stream": "1.0.x", "node-wav": "0.0.2" }, "devDependencies": { diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index d8c94a3ea7..6a21254770 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -1,27 +1,27 @@ python: - packages_trusty: + packages_xenial: apt: 'make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev liblzma-dev curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libpng-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev' packages_buster: apt: 'python3-virtualenv python3-setuptools python3-pip python3-wheel python3-pkg-resources' packages_docs_bionic: apt: 'python3 python3-pip zip doxygen' training: - packages_trusty: + packages_xenial: apt: 'libopus0' tensorflow: - packages_trusty: + packages_xenial: apt: 'make build-essential gfortran git libblas-dev liblapack-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev libpng-dev python zlib1g-dev' java: - packages_trusty: + packages_xenial: apt: 'apt-get -qq -y install curl software-properties-common wget unzip && add-apt-repository --yes ppa:openjdk-r/ppa && apt-get -qq update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y --force-yes install openjdk-8-jdk && java -version && update-ca-certificates -f' electronjs: packages_xenial: apt: 'libatk1.0-0 libatk-bridge2.0-0 libcairo2 libcups2 libdbus-1-3 libgdk-pixbuf2.0-0 libgtk-3-0 libnspr4 libnss3 libpango-1.0-0 libpangocairo-1.0-0 libx11-xcb1 libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 xvfb' nodejs: - packages_trusty: + packages_xenial: apt: 'nodejs sox' apt_pinning: '(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > /etc/apt/preferences' - prep_12: 'echo "deb http://deb.nodesource.com/node_12.x trusty main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -' + prep_12: 'echo "deb http://deb.nodesource.com/node_12.x xenial main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -' packages_xenial: apt: 'nodejs sox' apt_pinning: '(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > /etc/apt/preferences' diff --git a/taskcluster/android-cache-arm64-v8a-android-24.yml b/taskcluster/android-cache-arm64-v8a-android-24.yml index 8fa8e9275d..260ea54667 100644 --- a/taskcluster/android-cache-arm64-v8a-android-24.yml +++ b/taskcluster/android-cache-arm64-v8a-android-24.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.arm64_v8a.android_24.url} artifact_namespace: ${system.android_cache.arm64_v8a.android_24.namespace} diff --git a/taskcluster/android-cache-arm64-v8a-android-25.yml b/taskcluster/android-cache-arm64-v8a-android-25.yml index 366387269a..34b8158b44 100644 --- a/taskcluster/android-cache-arm64-v8a-android-25.yml +++ b/taskcluster/android-cache-arm64-v8a-android-25.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.arm64_v8a.android_25.url} artifact_namespace: ${system.android_cache.arm64_v8a.android_25.namespace} diff --git a/taskcluster/android-cache-armeabi-v7a-android-24.yml b/taskcluster/android-cache-armeabi-v7a-android-24.yml index 4c686739db..441b925504 100644 --- a/taskcluster/android-cache-armeabi-v7a-android-24.yml +++ b/taskcluster/android-cache-armeabi-v7a-android-24.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.armeabi_v7a.android_24.url} artifact_namespace: ${system.android_cache.armeabi_v7a.android_24.namespace} diff --git a/taskcluster/android-cache-armeabi-v7a-android-25.yml b/taskcluster/android-cache-armeabi-v7a-android-25.yml index e4f78de783..620745a05d 100644 --- a/taskcluster/android-cache-armeabi-v7a-android-25.yml +++ b/taskcluster/android-cache-armeabi-v7a-android-25.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.armeabi_v7a.android_25.url} artifact_namespace: ${system.android_cache.armeabi_v7a.android_25.namespace} diff --git a/taskcluster/android-cache-sdk-android-27.yml b/taskcluster/android-cache-sdk-android-27.yml index 668e142e65..0b047dc115 100644 --- a/taskcluster/android-cache-sdk-android-27.yml +++ b/taskcluster/android-cache-sdk-android-27.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.sdk.android_27.url} artifact_namespace: ${system.android_cache.sdk.android_27.namespace} diff --git a/taskcluster/android-cache-x86_64-android-24.yml b/taskcluster/android-cache-x86_64-android-24.yml index ff06f5516f..c3b3ba1dc5 100644 --- a/taskcluster/android-cache-x86_64-android-24.yml +++ b/taskcluster/android-cache-x86_64-android-24.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.x86_64.android_24.url} artifact_namespace: ${system.android_cache.x86_64.android_24.namespace} diff --git a/taskcluster/android-cache-x86_64-android-25.yml b/taskcluster/android-cache-x86_64-android-25.yml index 8ccac4f9db..a2e642e6ed 100644 --- a/taskcluster/android-cache-x86_64-android-25.yml +++ b/taskcluster/android-cache-x86_64-android-25.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.x86_64.android_25.url} artifact_namespace: ${system.android_cache.x86_64.android_25.namespace} diff --git a/taskcluster/android-cache-x86_64-android-26.yml b/taskcluster/android-cache-x86_64-android-26.yml index 0a74e78e60..a8b9b15ace 100644 --- a/taskcluster/android-cache-x86_64-android-26.yml +++ b/taskcluster/android-cache-x86_64-android-26.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.x86_64.android_26.url} artifact_namespace: ${system.android_cache.x86_64.android_26.namespace} diff --git a/taskcluster/android-cache-x86_64-android-28.yml b/taskcluster/android-cache-x86_64-android-28.yml index 4fa124fdb9..6193dd000d 100644 --- a/taskcluster/android-cache-x86_64-android-28.yml +++ b/taskcluster/android-cache-x86_64-android-28.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.x86_64.android_28.url} artifact_namespace: ${system.android_cache.x86_64.android_28.namespace} diff --git a/taskcluster/android-cache-x86_64-android-29.yml b/taskcluster/android-cache-x86_64-android-29.yml index 57c194f2d8..63e1b3c659 100644 --- a/taskcluster/android-cache-x86_64-android-29.yml +++ b/taskcluster/android-cache-x86_64-android-29.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.x86_64.android_29.url} artifact_namespace: ${system.android_cache.x86_64.android_29.namespace} diff --git a/taskcluster/android-cache-x86_64-android-30.yml b/taskcluster/android-cache-x86_64-android-30.yml index 217459aa87..51cf50b93e 100644 --- a/taskcluster/android-cache-x86_64-android-30.yml +++ b/taskcluster/android-cache-x86_64-android-30.yml @@ -2,7 +2,7 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: artifact_url: ${system.android_cache.x86_64.android_30.url} artifact_namespace: ${system.android_cache.x86_64.android_30.namespace} diff --git a/taskcluster/android-java-opt.yml b/taskcluster/android-java-opt.yml index 268e6a5245..5d4ec60130 100644 --- a/taskcluster/android-java-opt.yml +++ b/taskcluster/android-java-opt.yml @@ -13,7 +13,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.android-apk.${event.head.sha}" system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-armv7/artifacts/public/home.tar.xz" gradle_cache: url: ${system.gradle_cache.url} diff --git a/taskcluster/generic_tc_caching-linux-opt-base.tyml b/taskcluster/generic_tc_caching-linux-opt-base.tyml index 36668a0c3b..da91f11544 100644 --- a/taskcluster/generic_tc_caching-linux-opt-base.tyml +++ b/taskcluster/generic_tc_caching-linux-opt-base.tyml @@ -25,7 +25,7 @@ payload: taskIndexExpire: { $fromNow: '6 months' } in: > (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && - apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; + apt-get -qq update && apt-get -qq -y install curl git sudo && ${extraSystemSetup}; cache_artifact=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.artifact_url}` && if [ "$cache_artifact" != "200" ]; then adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && diff --git a/taskcluster/gradle-cache.yml b/taskcluster/gradle-cache.yml index ef589197f6..a77b2e190e 100644 --- a/taskcluster/gradle-cache.yml +++ b/taskcluster/gradle-cache.yml @@ -5,7 +5,7 @@ build: artifact_namespace: ${system.gradle_cache.namespace} system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} scripts: build: "taskcluster/gradle-build.sh" package: "taskcluster/gradle-package.sh" diff --git a/taskcluster/linux-amd64-cpu-opt.yml b/taskcluster/linux-amd64-cpu-opt.yml index db67d75560..50f78a2db8 100644 --- a/taskcluster/linux-amd64-cpu-opt.yml +++ b/taskcluster/linux-amd64-cpu-opt.yml @@ -10,7 +10,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.cpu.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: diff --git a/taskcluster/linux-amd64-ctc-opt.yml b/taskcluster/linux-amd64-ctc-opt.yml index 5bedbb8997..f0979261f6 100644 --- a/taskcluster/linux-amd64-ctc-opt.yml +++ b/taskcluster/linux-amd64-ctc-opt.yml @@ -10,7 +10,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.cpu-ctc.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: diff --git a/taskcluster/linux-amd64-gpu-opt.yml b/taskcluster/linux-amd64-gpu-opt.yml index 96b0096783..f34f2de0a0 100644 --- a/taskcluster/linux-amd64-gpu-opt.yml +++ b/taskcluster/linux-amd64-gpu-opt.yml @@ -10,7 +10,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.gpu.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.gpu/artifacts/public/home.tar.xz" maxRunTime: 14400 diff --git a/taskcluster/linux-amd64-tflite-opt.yml b/taskcluster/linux-amd64-tflite-opt.yml index 6d0d78638f..ffdb749922 100644 --- a/taskcluster/linux-amd64-tflite-opt.yml +++ b/taskcluster/linux-amd64-tflite-opt.yml @@ -10,7 +10,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.tflite.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: diff --git a/taskcluster/linux-arm64-cpu-opt.yml b/taskcluster/linux-arm64-cpu-opt.yml index 776d8d30ca..aa4e2a3403 100644 --- a/taskcluster/linux-arm64-cpu-opt.yml +++ b/taskcluster/linux-arm64-cpu-opt.yml @@ -14,7 +14,7 @@ build: apt-get -qq -y install gdebi git pixz && wget http://mirrors.kernel.org/ubuntu/pool/universe/m/multistrap/multistrap_2.2.0ubuntu2_all.deb -O /tmp/multistrap_2.2.0ubuntu2_all.deb && echo "y" | gdebi /tmp/multistrap_2.2.0ubuntu2_all.deb && - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml system_config: > diff --git a/taskcluster/linux-opt-base.tyml b/taskcluster/linux-opt-base.tyml index 9cc0ccfefb..f98d84a656 100644 --- a/taskcluster/linux-opt-base.tyml +++ b/taskcluster/linux-opt-base.tyml @@ -44,7 +44,7 @@ then: extraSystemConfig: { $eval: strip(str(build.system_config)) } in: > adduser --system --home ${system.homedir.linux} ${system.username} && - apt-get -qq update && apt-get -qq -y install ${tensorflow.packages_trusty.apt} pixz pkg-config realpath sudo unzip wget zip && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install ${tensorflow.packages_xenial.apt} pixz pkg-config realpath sudo unzip wget zip && ${extraSystemSetup} && cd ${system.homedir.linux}/ && echo -e "#!/bin/bash\nset -xe\n env && id && (wget -O - $TENSORFLOW_BUILD_ARTIFACT | pixz -d | tar -C ${system.homedir.linux}/ -xf - ) && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && ln -s ~/DeepSpeech/ds/native_client/ ~/DeepSpeech/tf/native_client && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf - && mkdir -p ${system.homedir.linux}/pyenv-root/ && wget -O - ${system.pyenv.linux.url} | tar -C ${system.homedir.linux}/pyenv-root/ -xzf - && if [ ! -z "${build.gradle_cache.url}" ]; then wget -O - ${build.gradle_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi && if [ ! -z "${build.android_cache.url}" ]; then wget -O - ${build.android_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi;" > /tmp/clone.sh && chmod +x /tmp/clone.sh && sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} && diff --git a/taskcluster/linux-rpi3-cpu-opt.yml b/taskcluster/linux-rpi3-cpu-opt.yml index cdce0bc0f7..82959259b8 100644 --- a/taskcluster/linux-rpi3-cpu-opt.yml +++ b/taskcluster/linux-rpi3-cpu-opt.yml @@ -14,7 +14,7 @@ build: apt-get -qq -y install gdebi git pixz && wget http://mirrors.kernel.org/ubuntu/pool/universe/m/multistrap/multistrap_2.2.0ubuntu2_all.deb -O /tmp/multistrap_2.2.0ubuntu2_all.deb && echo "y" | gdebi /tmp/multistrap_2.2.0ubuntu2_all.deb && - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml system_config: > diff --git a/taskcluster/node-package-cpu.yml b/taskcluster/node-package-cpu.yml index 72870cd51d..6769543b82 100644 --- a/taskcluster/node-package-cpu.yml +++ b/taskcluster/node-package-cpu.yml @@ -8,7 +8,7 @@ build: - "win-amd64-cpu-opt" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml scripts: build: "taskcluster/node-build.sh" diff --git a/taskcluster/node-package-gpu.yml b/taskcluster/node-package-gpu.yml index db48ae3248..a430b4b1ac 100644 --- a/taskcluster/node-package-gpu.yml +++ b/taskcluster/node-package-gpu.yml @@ -5,7 +5,7 @@ build: - "win-amd64-gpu-opt" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml scripts: build: "taskcluster/node-build.sh --cuda" diff --git a/taskcluster/node-package-tflite.yml b/taskcluster/node-package-tflite.yml index d49404d02b..003384461e 100644 --- a/taskcluster/node-package-tflite.yml +++ b/taskcluster/node-package-tflite.yml @@ -6,7 +6,7 @@ build: - "win-amd64-tflite-opt" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml scripts: build: "taskcluster/node-build.sh --tflite" diff --git a/taskcluster/pyenv-linux-amd64.yml b/taskcluster/pyenv-linux-amd64.yml index a8ece0b45f..4946e15b40 100644 --- a/taskcluster/pyenv-linux-amd64.yml +++ b/taskcluster/pyenv-linux-amd64.yml @@ -5,7 +5,7 @@ build: artifact_namespace: "${system.pyenv.linux.namespace}" system_setup: > - apt-get -qq update && apt-get -qq -y install python-yaml ${python.packages_trusty.apt} wget + apt-get -qq update && apt-get -qq -y install python-yaml ${python.packages_xenial.apt} wget scripts: build: "taskcluster/pyenv-build.sh" package: "taskcluster/pyenv-package.sh" diff --git a/taskcluster/swig-linux-amd64.yml b/taskcluster/swig-linux-amd64.yml index 3b199c1afb..2a127bbc4d 100644 --- a/taskcluster/swig-linux-amd64.yml +++ b/taskcluster/swig-linux-amd64.yml @@ -1,6 +1,6 @@ build: template_file: generic_tc_caching-linux-opt-base.tyml - docker_image: "ubuntu:14.04" + docker_image: "ubuntu:16.04" build_or_cache: repo: "${system.swig.repo}" sha: "${system.swig.sha1}" diff --git a/taskcluster/test-armbian-opt-base.tyml b/taskcluster/test-armbian-opt-base.tyml index 7218e6848a..afe7b64b4e 100644 --- a/taskcluster/test-armbian-opt-base.tyml +++ b/taskcluster/test-armbian-opt-base.tyml @@ -19,7 +19,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "arm64v8/debian:buster-20190812" + image: "arm64v8/debian:buster-20200422" env: $let: @@ -42,7 +42,7 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - apt-get -qq update && apt-get -qq -y install curl python-simplejson git gnupg pixz sox sudo wget libatlas3-base libopenblas-base xxd && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install curl python-simplejson git gnupg pixz sox sudo wget libatlas3-base libopenblas-base xxd && update-ca-certificates --fresh && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && echo -e "#!/bin/bash\nset -xe\n env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && diff --git a/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml b/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml index 5dd066c25b..8007cd6c79 100644 --- a/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-augmentation-tests.sh 3.6.10:m" metadata: diff --git a/taskcluster/test-linux-opt-tag-base.tyml b/taskcluster/test-linux-opt-tag-base.tyml index 1e6686b5d3..78c87625b4 100644 --- a/taskcluster/test-linux-opt-tag-base.tyml +++ b/taskcluster/test-linux-opt-tag-base.tyml @@ -44,7 +44,6 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - ${aptEc2Mirrors} && apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox sudo wget && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && diff --git a/taskcluster/test-raspbian-opt-base.tyml b/taskcluster/test-raspbian-opt-base.tyml index f3d2a5e5b2..2938b6b6d9 100644 --- a/taskcluster/test-raspbian-opt-base.tyml +++ b/taskcluster/test-raspbian-opt-base.tyml @@ -19,7 +19,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "resin/rpi-raspbian:buster-20190121" + image: "balenalib/rpi-raspbian:buster-20200429" env: $let: @@ -42,7 +42,7 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox wget libatlas3-base xxd && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox wget libatlas3-base xxd && update-ca-certificates --fresh && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && echo -e "#!/bin/bash\nset -xe\n env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml index 6d19f491c2..3b1e137818 100644 --- a/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.5.8:m 16k" metadata: diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml index e292a84190..c75bc7a220 100644 --- a/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 16k" metadata: diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml index a41f7cd472..79f9419d02 100644 --- a/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 16k" metadata: diff --git a/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml index b79209ae7d..a5335c50ac 100644 --- a/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 8k" metadata: diff --git a/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml index ed5259bb9d..32edde28d4 100644 --- a/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 8k" metadata: diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml index dc7f83d8fc..67a1419136 100644 --- a/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml index ffac9c895d..f2909aa108 100644 --- a/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml index a7624e73b5..c3b8282a90 100644 --- a/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml index 48c52ea187..78029d2369 100644 --- a/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml index 18f4d51fed..c044a55093 100644 --- a/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k --pypi" metadata: diff --git a/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml index d33ad21da3..3bd4c39dc1 100644 --- a/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-unittests.sh 3.5.8:m" metadata: diff --git a/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml index 195ddfe990..a4949830cd 100644 --- a/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml @@ -4,10 +4,10 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-unittests.sh 3.6.10:m" metadata: name: "DeepSpeech on Linux AMD64 CPU training unittests using Python 3.6" description: "Training unittests DeepSpeech LDC93S1 model for Linux/AMD64 using Python 3.6, for CPU only, and optimized version" - \ No newline at end of file + diff --git a/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml index 1f9aabc3e1..5b9ff7a58a 100644 --- a/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-unittests.sh 3.7.6:m" metadata: diff --git a/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml index 48c2501930..5e5e2f0ac1 100644 --- a/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k" metadata: diff --git a/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml index 1d4eaa9858..7f19771dd5 100644 --- a/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k" metadata: diff --git a/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml index 25a35efa6e..4146dfca08 100644 --- a/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k" metadata: diff --git a/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml index b9396fdd77..560ffcfb05 100644 --- a/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k" metadata: diff --git a/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml index eb7cb1bee0..09bfd1291e 100644 --- a/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k" metadata: From da471ecbab84f662e3a59965eb9b3da23cb31ef8 Mon Sep 17 00:00:00 2001 From: Alexandre Lissy Date: Tue, 23 Jun 2020 15:50:52 +0200 Subject: [PATCH 38/38] Fix #3095: Update msys2 to 2020-06-20 release --- taskcluster/.shared.yml | 7 +++++++ taskcluster/generic_tc_caching-win-opt-base.tyml | 12 ++++++++---- taskcluster/test-win-cuda-opt-base.tyml | 11 +++++++---- taskcluster/test-win-opt-base.tyml | 11 +++++++---- taskcluster/win-opt-base.tyml | 11 +++++++---- 5 files changed, 36 insertions(+), 16 deletions(-) diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index 6a21254770..f6c1084811 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -138,3 +138,10 @@ system: osx: '/Users/build-user' win: '/c/builds/tc-workdir' sox_win: '/usr/bin/wget.exe https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2-win32.zip/download -O sox-14.4.2-win32.zip && ""C:\Program Files\7-zip\7z.exe"" x -o$TASKCLUSTER_TASK_DIR/bin/ -tzip -aoa sox-14.4.2-win32.zip && rm sox-*zip && export PATH=$TASKCLUSTER_TASK_DIR/bin/sox-14.4.2/:$PATH' + msys2: + url: 'https://github.com/msys2/msys2-installer/releases/download/2020-06-02/msys2-base-x86_64-20200602.tar.xz' + sha: '598ceeaa3e2ccf86a25a2e3c449d00a9fd35300e36011bee610036dfa59d670a' + msys2_filesystem_pkg: + url: 'http://repo.msys2.org/msys/x86_64/filesystem-2020.02-3-x86_64.pkg.tar.xz' + sha: '927b020a67a05139ee1b2c45bff491c1d42335e64350cc7758ee20d7c3099477' + install: 'pacman -Udd --noconfirm $USERPROFILE/filesystem-2020.02-3-x86_64.pkg.tar.xz' diff --git a/taskcluster/generic_tc_caching-win-opt-base.tyml b/taskcluster/generic_tc_caching-win-opt-base.tyml index a41103188e..f96d4c6cae 100644 --- a/taskcluster/generic_tc_caching-win-opt-base.tyml +++ b/taskcluster/generic_tc_caching-win-opt-base.tyml @@ -18,9 +18,12 @@ payload: mounts: - file: msys2-base-x86_64.tar.xz content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} env: TC_MSYS_VERSION: 'MSYS_NT-6.3-9600' @@ -31,6 +34,7 @@ payload: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - $let: @@ -41,7 +45,7 @@ payload: export TASKCLUSTER_ARTIFACTS=\"$(cygpath -u $USERPROFILE/public)\" && export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && echo \"export TASKCLUSTER_TASK_EXIT_CODE=0\" > $USERPROFILE/tc-exit.sh && - env && pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${build.cache.artifact_url}`\" != \"200\" ]; then git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/ && cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k + env && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${build.cache.artifact_url}`\" != \"200\" ]; then git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/ && cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE" diff --git a/taskcluster/test-win-cuda-opt-base.tyml b/taskcluster/test-win-cuda-opt-base.tyml index e5dc5ee62d..9fa3740420 100644 --- a/taskcluster/test-win-cuda-opt-base.tyml +++ b/taskcluster/test-win-cuda-opt-base.tyml @@ -38,6 +38,7 @@ then: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - $let: @@ -57,7 +58,6 @@ then: cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && cd $TASKCLUSTER_TASK_DIR && (mkdir pyenv-root/ && 7z x -so $USERPROFILE/pyenv.tar.gz | 7z x -opyenv-root/ -aoa -ttar -si ) && - pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar vim && ${extraSystemSetup} && /bin/bash ${build.args.tests_cmdline} ; @@ -67,9 +67,12 @@ then: mounts: - file: msys2-base-x86_64.tar.xz content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} - file: pyenv.tar.gz content: url: ${system.pyenv.win.url} diff --git a/taskcluster/test-win-opt-base.tyml b/taskcluster/test-win-opt-base.tyml index 4ddfa59562..f8925d6deb 100644 --- a/taskcluster/test-win-opt-base.tyml +++ b/taskcluster/test-win-opt-base.tyml @@ -40,6 +40,7 @@ then: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - $let: @@ -59,7 +60,6 @@ then: cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && cd $TASKCLUSTER_TASK_DIR && (mkdir pyenv-root/ && 7z x -so $USERPROFILE/pyenv.tar.gz | 7z x -opyenv-root/ -aoa -ttar -si ) && - pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar vim && ${extraSystemSetup} && /bin/bash ${build.args.tests_cmdline} ; @@ -69,9 +69,12 @@ then: mounts: - file: msys2-base-x86_64.tar.xz content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} - file: pyenv.tar.gz content: url: ${system.pyenv.win.url} diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index ff1aa4a57c..e7db49a41e 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -40,6 +40,7 @@ payload: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - echo .\msys64\usr\bin\bash.exe --login -cxe " @@ -58,7 +59,6 @@ payload: ln -s $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/native_client/ $TASKCLUSTER_TASK_DIR/DeepSpeech/tf/native_client && cd $TASKCLUSTER_TASK_DIR && (mkdir pyenv-root/ && 7z x -so $USERPROFILE/pyenv.tar.gz | 7z x -opyenv-root/ -aoa -ttar -si ) && - pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar make && pacman --noconfirm -S zip && $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && @@ -73,9 +73,12 @@ payload: mounts: - file: msys2-base-x86_64.tar.xz content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} - file: home.tar.xz content: url: ${build.tensorflow}