diff --git a/.gitignore b/.gitignore index a44fa2fbb6..5a9e6d8adb 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,5 @@ /doc/.build/ /doc/xml-c/ /doc/xml-java/ +Dockerfile.build +Dockerfile.train diff --git a/.taskcluster.yml b/.taskcluster.yml index b42d47ab1b..2df06939e7 100644 --- a/.taskcluster.yml +++ b/.taskcluster.yml @@ -45,7 +45,7 @@ tasks: - "--login" - "-cxe" - > - echo "deb http://archive.ubuntu.com/ubuntu/ trusty-updates main" > /etc/apt/sources.list.d/trusty-updates.list && + echo "deb http://archive.ubuntu.com/ubuntu/ xenial-updates main" > /etc/apt/sources.list.d/xenial-updates.list && apt-get -qq update && apt-get -qq -y install git python3-pip curl sudo && adduser --system --home /home/build-user build-user && cd /home/build-user/ && diff --git a/Dockerfile b/Dockerfile.build.tmpl similarity index 52% rename from Dockerfile rename to Dockerfile.build.tmpl index 8a03b3c492..73c8bdd2a8 100644 --- a/Dockerfile +++ b/Dockerfile.build.tmpl @@ -1,83 +1,73 @@ +# Please refer to the USING documentation, "Dockerfile for building from source" + # Need devel version cause we need /usr/include/cudnn.h -# for compiling libctc_decoder_with_kenlm.so -FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 +FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 +ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# +ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA# # >> START Install base software # Get basic packages RUN apt-get update && apt-get install -y --no-install-recommends \ apt-utils \ + bash-completion \ build-essential \ + ca-certificates \ + cmake \ curl \ - wget \ + g++ \ + gcc \ git \ + git-lfs \ + libbz2-dev \ + libboost-all-dev \ + libgsm1-dev \ + libltdl-dev \ + liblzma-dev \ + libmagic-dev \ + libpng-dev \ + libsox-fmt-mp3 \ + libsox-dev \ + locales \ + openjdk-8-jdk \ + pkg-config \ python3 \ python3-dev \ python3-pip \ python3-wheel \ python3-numpy \ - libcurl3-dev \ - ca-certificates \ - gcc \ sox \ - libsox-fmt-mp3 \ - htop \ - nano \ - cmake \ - libboost-all-dev \ - zlib1g-dev \ - libbz2-dev \ - liblzma-dev \ - locales \ - pkg-config \ - libpng-dev \ - libsox-dev \ - libmagic-dev \ - libgsm1-dev \ - libltdl-dev \ - openjdk-8-jdk \ - bash-completion \ - g++ \ - unzip - -RUN ln -s -f /usr/bin/python3 /usr/bin/python + unzip \ + wget \ + zlib1g-dev -# Install NCCL 2.2 -RUN apt-get --no-install-recommends install -qq -y --allow-downgrades --allow-change-held-packages libnccl2=2.3.7-1+cuda10.0 libnccl-dev=2.3.7-1+cuda10.0 +RUN update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1 +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 # Install Bazel -RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.24.1/bazel_0.24.1-linux-x86_64.deb" +RUN curl -LO "https://github.com/bazelbuild/bazel/releases/download/2.0.0/bazel_2.0.0-linux-x86_64.deb" RUN dpkg -i bazel_*.deb -# Install CUDA CLI Tools -RUN apt-get --no-install-recommends install -qq -y cuda-command-line-tools-10-0 - -# Install pip -RUN wget https://bootstrap.pypa.io/get-pip.py && \ - python3 get-pip.py && \ - rm get-pip.py - # << END Install base software - - - # >> START Configure Tensorflow Build # Clone TensorFlow from Mozilla repo RUN git clone https://github.com/mozilla/tensorflow/ WORKDIR /tensorflow -RUN git checkout r1.15 - +RUN git checkout r2.2 # GPU Environment Setup +ENV TF_NEED_ROCM 0 +ENV TF_NEED_OPENCL_SYCL 0 +ENV TF_NEED_OPENCL 0 ENV TF_NEED_CUDA 1 -ENV TF_CUDA_PATHS "/usr/local/cuda,/usr/lib/x86_64-linux-gnu/" -ENV TF_CUDA_VERSION 10.0 -ENV TF_CUDNN_VERSION 7 +ENV TF_CUDA_PATHS "/usr,/usr/local/cuda-10.1,/usr/lib/x86_64-linux-gnu/" +ENV TF_CUDA_VERSION 10.1 +ENV TF_CUDNN_VERSION 7.6 ENV TF_CUDA_COMPUTE_CAPABILITIES 6.0 -ENV TF_NCCL_VERSION 2.3 +ENV TF_NCCL_VERSION 2.4 # Common Environment Setup ENV TF_BUILD_CONTAINER_TYPE GPU @@ -105,14 +95,12 @@ ENV TF_NEED_TENSORRT 0 ENV TF_NEED_GDR 0 ENV TF_NEED_VERBS 0 ENV TF_NEED_OPENCL_SYCL 0 + ENV PYTHON_BIN_PATH /usr/bin/python3.6 -ENV PYTHON_LIB_PATH /usr/lib/python3.6/dist-packages +ENV PYTHON_LIB_PATH /usr/local/lib/python3.6/dist-packages # << END Configure Tensorflow Build - - - # >> START Configure Bazel # Running bazel inside a `docker build` command causes trouble, cf: @@ -124,39 +112,17 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ >>/etc/bazel.bazelrc -# Put cuda libraries to where they are expected to be -RUN mkdir /usr/local/cuda/lib && \ - ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \ - ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h && \ - ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ - ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h - - -# Set library paths -ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/stubs/ - # << END Configure Bazel +WORKDIR / -# Copy DeepSpeech repo contents to container's /DeepSpeech -COPY . /DeepSpeech/ - -# Alternative clone from GitHub -# RUN apt-get update && apt-get install -y git-lfs -# WORKDIR / -# RUN git lfs install -# RUN git clone https://github.com/mozilla/DeepSpeech.git - +RUN git clone $DEEPSPEECH_REPO WORKDIR /DeepSpeech - -RUN DS_NODECODER=1 pip3 --no-cache-dir install . +RUN git checkout $DEEPSPEECH_SHA # Link DeepSpeech native_client libs to tf folder RUN ln -s /DeepSpeech/native_client /tensorflow - - - # >> START Build and bind WORKDIR /tensorflow @@ -170,59 +136,60 @@ RUN ./configure # passing LD_LIBRARY_PATH is required cause Bazel doesn't pickup it from environment - # Build DeepSpeech -RUN bazel build --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" --config=monolithic --config=cuda -c opt --copt=-O3 --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx --copt=-fvisibility=hidden //native_client:libdeepspeech.so --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} - -### -### Using TensorFlow upstream should work -### -# # Build TF pip package -# RUN bazel build --config=opt --config=cuda --copt="-D_GLIBCXX_USE_CXX11_ABI=0" --copt=-mtune=generic --copt=-march=x86-64 --copt=-msse --copt=-msse2 --copt=-msse3 --copt=-msse4.1 --copt=-msse4.2 --copt=-mavx //tensorflow/tools/pip_package:build_pip_package --verbose_failures --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} -# -# # Build wheel -# RUN bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg -# -# # Install tensorflow from our custom wheel -# RUN pip3 install /tmp/tensorflow_pkg/*.whl +RUN bazel build \ + --workspace_status_command="bash native_client/bazel_workspace_status_cmd.sh" \ + --config=monolithic \ + --config=cuda \ + -c opt \ + --copt=-O3 \ + --copt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + --copt=-mtune=generic \ + --copt=-march=x86-64 \ + --copt=-msse \ + --copt=-msse2 \ + --copt=-msse3 \ + --copt=-msse4.1 \ + --copt=-msse4.2 \ + --copt=-mavx \ + --copt=-fvisibility=hidden \ + //native_client:libdeepspeech.so \ + --verbose_failures \ + --action_env=LD_LIBRARY_PATH=${LD_LIBRARY_PATH} # Copy built libs to /DeepSpeech/native_client RUN cp /tensorflow/bazel-bin/native_client/libdeepspeech.so /DeepSpeech/native_client/ -# Install TensorFlow -WORKDIR /DeepSpeech/ -RUN pip3 install tensorflow-gpu==1.15.0 - - # Build client.cc and install Python client and decoder bindings ENV TFDIR /tensorflow + +RUN nproc + WORKDIR /DeepSpeech/native_client -RUN make deepspeech +RUN make NUM_PROCESSES=$(nproc) deepspeech WORKDIR /DeepSpeech -RUN cd native_client/python && make bindings +RUN cd native_client/python && make NUM_PROCESSES=$(nproc) bindings RUN pip3 install --upgrade native_client/python/dist/*.whl -RUN cd native_client/ctcdecode && make bindings +RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl - # << END Build and bind - - - # Allow Python printing utf-8 ENV PYTHONIOENCODING UTF-8 # Build KenLM in /DeepSpeech/native_client/kenlm folder WORKDIR /DeepSpeech/native_client -RUN rm -rf kenlm \ - && git clone --depth 1 https://github.com/kpu/kenlm && cd kenlm \ - && mkdir -p build \ - && cd build \ - && cmake .. \ - && make -j 4 +RUN rm -rf kenlm && \ + git clone https://github.com/kpu/kenlm && \ + cd kenlm && \ + git checkout 87e85e66c99ceff1fab2500a7c60c01da7315eec && \ + mkdir -p build && \ + cd build && \ + cmake .. && \ + make -j $(nproc) # Done WORKDIR /DeepSpeech diff --git a/Dockerfile.train.tmpl b/Dockerfile.train.tmpl new file mode 100644 index 0000000000..7feab79e7e --- /dev/null +++ b/Dockerfile.train.tmpl @@ -0,0 +1,53 @@ +# Please refer to the TRAINING documentation, "Basic Dockerfile for training" + +FROM tensorflow/tensorflow:1.15.2-gpu-py3 +ENV DEBIAN_FRONTEND=noninteractive + +ENV DEEPSPEECH_REPO=#DEEPSPEECH_REPO# +ENV DEEPSPEECH_SHA=#DEEPSPEECH_SHA# + +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils \ + bash-completion \ + build-essential \ + curl \ + git \ + git-lfs \ + libbz2-dev \ + locales \ + python3-venv \ + unzip \ + wget + +# We need to remove it because it's breaking deepspeech install later with +# weird errors about setuptools +RUN apt-get purge -y python3-xdg + +# Install dependencies for audio augmentation +RUN apt-get install -y --no-install-recommends libopus0 libsndfile1 + +WORKDIR / +RUN git lfs install +RUN git clone $DEEPSPEECH_REPO + +WORKDIR /DeepSpeech +RUN git checkout $DEEPSPEECH_SHA + +# Build CTC decoder first, to avoid clashes on incompatible versions upgrades +RUN cd native_client/ctcdecode && make NUM_PROCESSES=$(nproc) bindings +RUN pip3 install --upgrade native_client/ctcdecode/dist/*.whl + +# Prepare deps +RUN pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 + +# Install DeepSpeech +# - No need for the decoder since we did it earlier +# - There is already correct TensorFlow GPU installed on the base image, +# we don't want to break that +RUN DS_NODECODER=y DS_NOTENSORFLOW=y pip3 install --upgrade -e . + +# Tool to convert output graph for inference +RUN python3 util/taskcluster.py --source tensorflow --branch r1.15 \ + --artifact convert_graphdef_memmapped_format --target . + +RUN ./bin/run-ldc93s1.sh diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..2d28d24b94 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +DEEPSPEECH_REPO ?= https://github.com/mozilla/DeepSpeech.git +DEEPSPEECH_SHA ?= origin/master + +Dockerfile%: Dockerfile%.tmpl + sed \ + -e "s|#DEEPSPEECH_REPO#|$(DEEPSPEECH_REPO)|g" \ + -e "s|#DEEPSPEECH_SHA#|$(DEEPSPEECH_SHA)|g" \ + < $< > $@ diff --git a/README.rst b/README.rst index 17b849faa8..9c1b987e93 100644 --- a/README.rst +++ b/README.rst @@ -14,7 +14,7 @@ Project DeepSpeech DeepSpeech is an open source Speech-To-Text engine, using a model trained by machine learning techniques based on `Baidu's Deep Speech research paper `_. Project DeepSpeech uses Google's `TensorFlow `_ to make the implementation easier. -Documentation for installation, usage, and training models is available on `deepspeech.readthedocs.io `_. +Documentation for installation, usage, and training models are available on `deepspeech.readthedocs.io `_. For the latest release, including pre-trained models and checkpoints, `see the latest release on GitHub `_. diff --git a/bin/import_cv.py b/bin/import_cv.py index 3754694ffb..e7dab5644a 100755 --- a/bin/import_cv.py +++ b/bin/import_cv.py @@ -93,6 +93,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) diff --git a/bin/import_cv2.py b/bin/import_cv2.py index c2880a0602..9db6365603 100755 --- a/bin/import_cv2.py +++ b/bin/import_cv2.py @@ -78,6 +78,7 @@ def one_sample(args): else: # This one is good - keep it for the target CSV rows.append((os.path.split(wav_filename)[-1], file_size, label, sample[2])) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_lingua_libre.py b/bin/import_lingua_libre.py index ec5047ba8f..2273aae62a 100755 --- a/bin/import_lingua_libre.py +++ b/bin/import_lingua_libre.py @@ -91,6 +91,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_m-ailabs.py b/bin/import_m-ailabs.py index 963b2873ff..63bb1f4f21 100755 --- a/bin/import_m-ailabs.py +++ b/bin/import_m-ailabs.py @@ -91,6 +91,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames return (counter, rows) diff --git a/bin/import_slr57.py b/bin/import_slr57.py index 11e30fa4d6..4edb3654ce 100755 --- a/bin/import_slr57.py +++ b/bin/import_slr57.py @@ -86,6 +86,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/import_ts.py b/bin/import_ts.py index e6cdc1e8df..e01301309d 100755 --- a/bin/import_ts.py +++ b/bin/import_ts.py @@ -93,6 +93,7 @@ def one_sample(sample): else: # This one is good - keep it for the target CSV rows.append((wav_filename, file_size, label)) + counter["imported_time"] += frames counter["all"] += 1 counter["total_time"] += frames diff --git a/bin/play.py b/bin/play.py index 7d19a790ca..e9348c8e01 100755 --- a/bin/play.py +++ b/bin/play.py @@ -10,7 +10,8 @@ import argparse from deepspeech_training.util.audio import LOADABLE_AUDIO_EXTENSIONS, AUDIO_TYPE_PCM, AUDIO_TYPE_WAV -from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source, augment_samples +from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source +from deepspeech_training.util.augmentations import parse_augmentations, apply_sample_augmentations, SampleAugmentation def get_samples_in_play_order(): @@ -38,12 +39,15 @@ def get_samples_in_play_order(): def play_collection(): + augmentations = parse_augmentations(CLI_ARGS.augment) + if any(not isinstance(a, SampleAugmentation) for a in augmentations): + print("Warning: Some of the augmentations cannot be simulated by this command.") samples = get_samples_in_play_order() - samples = augment_samples(samples, - audio_type=AUDIO_TYPE_PCM, - augmentation_specs=CLI_ARGS.augment, - process_ahead=0, - fixed_clock=CLI_ARGS.clock) + samples = apply_sample_augmentations(samples, + audio_type=AUDIO_TYPE_PCM, + augmentations=augmentations, + process_ahead=0, + clock=CLI_ARGS.clock) for sample in samples: if not CLI_ARGS.quiet: print('Sample "{}"'.format(sample.sample_id), file=sys.stderr) diff --git a/bin/run-tc-graph_augmentations.sh b/bin/run-tc-graph_augmentations.sh new file mode 100755 index 0000000000..0d949125e1 --- /dev/null +++ b/bin/run-tc-graph_augmentations.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +set -xe + +ldc93s1_dir="./data/smoke_test" +ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" + +if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then + echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." + python -u bin/import_ldc93s1.py ${ldc93s1_dir} +fi; + +# Force only one visible device because we have a single-sample dataset +# and when trying to run on multiple devices (like GPUs), this will break +export CUDA_VISIBLE_DEVICES=0 + +python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ + --train_files ${ldc93s1_csv} --train_batch_size 1 \ + --scorer "" \ + --augment dropout \ + --augment pitch \ + --augment tempo \ + --augment time_mask \ + --augment frequency_mask \ + --augment add \ + --augment multiply \ + --n_hidden 100 \ + --epochs 1 diff --git a/bin/run-tc-ldc93s1_new_metrics.sh b/bin/run-tc-ldc93s1_new_metrics.sh new file mode 100755 index 0000000000..01403bf199 --- /dev/null +++ b/bin/run-tc-ldc93s1_new_metrics.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +set -xe + +ldc93s1_dir="./data/smoke_test" +ldc93s1_csv="${ldc93s1_dir}/ldc93s1.csv" + +epoch_count=$1 +audio_sample_rate=$2 + +if [ ! -f "${ldc93s1_dir}/ldc93s1.csv" ]; then + echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}." + python -u bin/import_ldc93s1.py ${ldc93s1_dir} +fi; + +# Force only one visible device because we have a single-sample dataset +# and when trying to run on multiple devices (like GPUs), this will break +export CUDA_VISIBLE_DEVICES=0 + +python -u DeepSpeech.py --noshow_progressbar --noearly_stop \ + --train_files ${ldc93s1_csv} --train_batch_size 1 \ + --dev_files ${ldc93s1_csv} --dev_batch_size 1 \ + --test_files ${ldc93s1_csv} --test_batch_size 1 \ + --metrics_files ${ldc93s1_csv} \ + --n_hidden 100 --epochs $epoch_count \ + --max_to_keep 1 --checkpoint_dir '/tmp/ckpt_metrics' \ + --learning_rate 0.001 --dropout_rate 0.05 --export_dir '/tmp/train_metrics' \ + --scorer_path 'data/smoke_test/pruned_lm.scorer' \ + --audio_sample_rate ${audio_sample_rate} diff --git a/bin/run-tc-signal_augmentations.sh b/bin/run-tc-sample_augmentations.sh similarity index 90% rename from bin/run-tc-signal_augmentations.sh rename to bin/run-tc-sample_augmentations.sh index 5be8ed1206..266940706d 100755 --- a/bin/run-tc-signal_augmentations.sh +++ b/bin/run-tc-sample_augmentations.sh @@ -41,12 +41,6 @@ if ! $compare --if-differ "${ldc93s1_wav}" /tmp/reverb-test.wav; then exit 1 fi -$play ${ldc93s1_wav} --augment gaps[n=10,size=100.0] --pipe >/tmp/gaps-test.wav -if ! $compare --if-differ "${ldc93s1_wav}" /tmp/gaps-test.wav; then - echo "Gaps augmentation had no effect or changed basic sample properties" - exit 1 -fi - $play ${ldc93s1_wav} --augment resample[rate=4000] --pipe >/tmp/resample-test.wav if ! $compare --if-differ "${ldc93s1_wav}" /tmp/resample-test.wav; then echo "Resample augmentation had no effect or changed basic sample properties" diff --git a/data/lm/generate_package.py b/data/lm/generate_package.py index 27b007422b..30a33fcc7e 100644 --- a/data/lm/generate_package.py +++ b/data/lm/generate_package.py @@ -61,8 +61,12 @@ def create_bundle( sys.exit(1) scorer.fill_dictionary(list(words)) shutil.copy(lm_path, package_path) - scorer.save_dictionary(package_path, True) # append, not overwrite - print("Package created in {}".format(package_path)) + # append, not overwrite + if scorer.save_dictionary(package_path, True): + print("Package created in {}".format(package_path)) + else: + print("Error when creating {}".format(package_path)) + sys.exit(1) class Tristate(object): diff --git a/doc/SUPPORTED_PLATFORMS.rst b/doc/SUPPORTED_PLATFORMS.rst new file mode 100644 index 0000000000..1ccfb7e3aa --- /dev/null +++ b/doc/SUPPORTED_PLATFORMS.rst @@ -0,0 +1,69 @@ +.. _supported-platforms-inference: + +Supported platforms for inference +================================= + +Here we maintain the list of supported platforms for running inference. + +Linux / AMD64 without GPU +^^^^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Linux / AMD64 with GPU +^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Ubuntu 14.04+ (glibc >= 2.19, libstdc++6 >= 4.8) +* CUDA 10.0 (and capable GPU) +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Linux / ARMv7 +^^^^^^^^^^^^^ +* Cortex-A53 compatible ARMv7 SoC with Neon support +* Raspbian Buster-compatible distribution +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Linux / Aarch64 +^^^^^^^^^^^^^^^ +* Cortex-A72 compatible Aarch64 SoC +* ARMbian Buster-compatible distribution +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Android / ARMv7 +^^^^^^^^^^^^^^^ +* ARMv7 SoC with Neon support +* Android 7.0-10.0 +* NDK API level >= 21 +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Android / Aarch64 +^^^^^^^^^^^^^^^^^ +* Aarch64 SoC +* Android 7.0-10.0 +* NDK API level >= 21 +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +macOS / AMD64 +^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* macOS >= 10.10 +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Windows / AMD64 without GPU +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Windows Server >= 2012 R2 ; Windows >= 8.1 +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) + +Windows / AMD64 with GPU +^^^^^^^^^^^^^^^^^^^^^^^^ +* x86-64 CPU with AVX/FMA (one can rebuild without AVX/FMA, but it might slow down inference) +* Windows Server >= 2012 R2 ; Windows >= 8.1 +* CUDA 10.0 (and capable GPU) +* Full TensorFlow runtime (``deepspeech`` packages) +* TensorFlow Lite runtime (``deepspeech-tflite`` packages) diff --git a/doc/Scorer.rst b/doc/Scorer.rst index 78f7210104..8df94a74ce 100644 --- a/doc/Scorer.rst +++ b/doc/Scorer.rst @@ -24,7 +24,7 @@ Then use the ``generate_lm.py`` script to generate ``lm.binary`` and ``vocab-500 As input you can use a plain text (e.g. ``file.txt``) or gzipped (e.g. ``file.txt.gz``) text file with one sentence in each line. -If you are using a container created from the Dockerfile, you can use ``--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/``. +If you are using a container created from ``Dockerfile.build``, you can use ``--kenlm_bins /DeepSpeech/native_client/kenlm/build/bin/``. Else you have to build `KenLM `_ first and then pass the build directory to the script. .. code-block:: bash @@ -54,4 +54,4 @@ The LibriSpeech LM training text used by our scorer is around 4GB uncompressed, With a text corpus in hand, you can then re-use the ``generate_lm.py`` and ``generate_package.py`` scripts to create your own scorer that is compatible with DeepSpeech clients and language bindings. Before building the language model, you must first familiarize yourself with the `KenLM toolkit `_. Most of the options exposed by the ``generate_lm.py`` script are simply forwarded to KenLM options of the same name, so you must read the KenLM documentation in order to fully understand their behavior. After using ``generate_lm.py`` to create a KenLM language model binary file, you can use ``generate_package.py`` to create a scorer package as described in the previous section. Note that we have a :github:`lm_optimizer.py script ` which can be used to find good default values for alpha and beta. To use it, you must first -generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py``. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_package.py`` again, this time with the new values. \ No newline at end of file +generate a package with any value set for default alpha and beta flags. For this step, it doesn't matter what values you use, as they'll be overridden by ``lm_optimizer.py``. Then, use ``lm_optimizer.py`` with this scorer file to find good alpha and beta values. Finally, use ``generate_package.py`` again, this time with the new values. diff --git a/doc/TRAINING.rst b/doc/TRAINING.rst index 99dd849a03..479d93ddf9 100644 --- a/doc/TRAINING.rst +++ b/doc/TRAINING.rst @@ -47,7 +47,9 @@ Install the required dependencies using ``pip3``\ : cd DeepSpeech pip3 install --upgrade pip==20.0.2 wheel==0.34.2 setuptools==46.1.3 - pip3 install --upgrade --force-reinstall -e . + pip3 install --upgrade -e . + +Remember to re-run the last ``pip3 install`` command above when you update the training code (for example by pulling new changes), in order to update any dependencies. The ``webrtcvad`` Python package might require you to ensure you have proper tooling to build Python modules: @@ -76,6 +78,22 @@ It has been reported for some people failure at training: Setting the ``TF_FORCE_GPU_ALLOW_GROWTH`` environment variable to ``true`` seems to help in such cases. This could also be due to an incorrect version of libcudnn. Double check your versions with the :ref:`TensorFlow 1.15 documentation `. +Basic Dockerfile for training +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide ``Dockerfile.train`` to automatically set up a basic training environment in Docker. You need to generate the Dockerfile from the template using: +This should ensure that you'll re-use the upstream Python 3 TensorFlow GPU-enabled Docker image. + +.. code-block:: bash + + make Dockerfile.train + +If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: + +.. code-block:: bash + + make Dockerfile.train DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch + Common Voice training data ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -270,12 +288,6 @@ Augmentation Augmentation is a useful technique for better generalization of machine learning models. Thus, a pre-processing pipeline with various augmentation techniques on raw pcm and spectrogram has been implemented and can be used while training the model. Following are the available augmentation techniques that can be enabled at training time by using the corresponding flags in the command line. - -Audio Augmentation ------------------- - -Augmentations that are applied before potential feature caching can be specified through the ``--augment`` flag. Being a multi-flag, it can be specified multiple times (see below for an example). - Each sample of the training data will get treated by every specified augmentation in their given order. However: whether an augmentation will actually get applied to a sample is decided by chance on base of the augmentation's probability value. For example a value of ``p=0.1`` would apply the according augmentation to just 10% of all samples. This also means that augmentations are not mutually exclusive on a per-sample basis. The ``--augment`` flag uses a common syntax for all augmentation types: @@ -297,14 +309,31 @@ In the documentation below, whenever a value is specified as ```` o * ``~``: A center value with a randomization radius around it. E.g. ``1.2~0.4`` will result in picking of a uniformly random value between 0.8 and 1.6 on each sample augmentation. - * ``:``: The value will range from `` at the beginning of an epoch to `` at the end of an epoch. E.g. ``-0.2:1.2`` (float) or ``2000:4000`` (int) + * ``:``: The value will range from `` at the beginning of the training to `` at the end of the training. E.g. ``-0.2:1.2`` (float) or ``2000:4000`` (int) - * ``:~``: Combination of the two previous cases with a ranging center value. E.g. ``4-6~2`` would at the beginning of an epoch pick values between 2 and 6 and at the end of an epoch between 4 and 8. + * ``:~``: Combination of the two previous cases with a ranging center value. E.g. ``4-6~2`` would at the beginning of the training pick values between 2 and 6 and at the end of the training between 4 and 8. Ranges specified with integer limits will only assume integer (rounded) values. -If feature caching is enabled, these augmentations will only be performed on the first epoch and the result will be reused for subsequent epochs. The flag ``--augmentations_per_epoch N`` (by default `N` is 1) could be used to get more than one epoch worth of augmentations into the cache. During training, each epoch will do ``N`` passes over the training set, each time performing augmentation independently of previous passes. Be aware: this will also multiply the required size of the feature cache if it's enabled. +.. warning:: + When feature caching is enabled, by default the cache has no expiration limit and will be used for the entire training run. This will cause these augmentations to only be performed once during the first epoch and the result will be reused for subsequent epochs. This would not only hinder value ranges from reaching their intended final values, but could also lead to unintended over-fitting. In this case flag ``--cache_for_epochs N`` (with N > 1) should be used to periodically invalidate the cache after every N epochs and thus allow samples to be re-augmented in new ways and with current range-values. + +Every augmentation targets a certain representation of the sample - in this documentation these representations are referred to as *domains*. +Augmentations are applied in the following order: + +1. **sample** domain: The sample just got loaded and its waveform is represented as a NumPy array. For implementation reasons these augmentations are the only ones that can be "simulated" through ``bin/play.py``. + +2. **signal** domain: The sample waveform is represented as a tensor. + +3. **spectrogram** domain: The sample spectrogram is represented as a tensor. +4. **features** domain: The sample's mel spectrogram features are represented as a tensor. + +Within a single domain, augmentations are applied in the same order as they appear in the command-line. + + +Sample domain augmentations +--------------------------- **Overlay augmentation** ``--augment overlay[p=,source=,snr=,layers=]`` Layers another audio source (multiple times) onto augmented samples. @@ -328,16 +357,6 @@ If feature caching is enabled, these augmentations will only be performed on the * **decay**: sound decay in dB per reflection - higher values will result in a less reflective perceived "room" -**Gaps augmentation** ``--augment gaps[p=,n=,size=]`` - Sets time-intervals within the augmented samples to zero (silence) at random positions. - - * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method - - * **n**: number of intervals to set to zero - - * **size**: duration of intervals in ms - - **Resample augmentation** ``--augment resample[p=,rate=]`` Resamples augmented samples to another sample rate and then resamples back to the original sample rate. @@ -361,6 +380,78 @@ If feature caching is enabled, these augmentations will only be performed on the * **dbfs** : target volume in dBFS (default value of 3.0103 will normalize min and max amplitudes to -1.0/1.0) +Spectrogram domain augmentations +-------------------------------- + +**Pitch augmentation** ``--augment pitch[p=,pitch=]`` + Scales spectrogram on frequency axis and thus changes pitch. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **pitch**: pitch factor by with the frequency axis is scaled (e.g. a value of 2.0 will raise audio frequency by one octave) + + +**Tempo augmentation** ``--augment tempo[p=,factor=]`` + Scales spectrogram on time axis and thus changes playback tempo. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **factor**: speed factor by which the time axis is stretched or shrunken (e.g. a value of 2.0 will double playback tempo) + + +**Frequency mask augmentation** ``--augment frequency_mask[p=,n=,size=]`` + Sets frequency-intervals within the augmented samples to zero (silence) at random frequencies. See the SpecAugment paper for more details - https://arxiv.org/abs/1904.08779 + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **n**: number of intervals to mask + + * **size**: number of frequency bands to mask per interval + +Multi domain augmentations +-------------------------- + +**Time mask augmentation** ``--augment time_mask[p=,n=,size=,domain=]`` + Sets time-intervals within the augmented samples to zero (silence) at random positions. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **n**: number of intervals to set to zero + + * **size**: duration of intervals in ms + + * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) + + +**Dropout augmentation** ``--augment dropout[p=,rate=,domain=]`` + Zeros random data points of the targeted data representation. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **rate**: dropout rate ranging from 0.0 for no dropout to 1.0 for 100% dropout + + * **domain**: data representation to apply augmentation to - "signal", "features" or "spectrogram" (default) + + +**Add augmentation** ``--augment add[p=,stddev=,domain=]`` + Adds random values picked from a normal distribution (with a mean of 0.0) to all data points of the targeted data representation. + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **stddev**: standard deviation of the normal distribution to pick values from + + * **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram" + + +**Multiply augmentation** ``--augment multiply[p=,stddev=,domain=]`` + Multiplies all data points of the targeted data representation with random values picked from a normal distribution (with a mean of 1.0). + + * **p**: probability value between 0.0 (never) and 1.0 (always) if a given sample gets augmented by this method + + * **stddev**: standard deviation of the normal distribution to pick values from + + * **domain**: data representation to apply augmentation to - "signal", "features" (default) or "spectrogram" + Example training with all augmentations: @@ -368,18 +459,25 @@ Example training with all augmentations: python -u DeepSpeech.py \ --train_files "train.sdb" \ - --augmentations_per_epoch 10 \ + --feature_cache ./feature.cache \ + --cache_for_epochs 10 \ + --epochs 100 \ --augment overlay[p=0.5,source=noise.sdb,layers=1,snr=50:20~10] \ - --augment overlay[p=0.2,source=voices.sdb,layers=10:6,snr=50:20~10] \ --augment reverb[p=0.1,delay=50.0~30.0,decay=10.0:2.0~1.0] \ - --augment gaps[p=0.05,n=1:3~2,size=10:100] \ --augment resample[p=0.1,rate=12000:8000~4000] \ --augment codec[p=0.1,bitrate=48000:16000] \ --augment volume[p=0.1,dbfs=-10:-40] \ + --augment pitch[p=0.1,pitch=1~0.2] \ + --augment tempo[p=0.1,factor=1~0.5] \ + --augment frequency_mask[p=0.1,n=1:3,size=1:5] \ + --augment time_mask[p=0.1,domain=signal,n=3:10~2,size=50:100~40] \ + --augment dropout[p=0.1,rate=0.05] \ + --augment add[p=0.1,domain=signal,stddev=0~0.5] \ + --augment multiply[p=0.1,domain=features,stddev=0~0.5] \ [...] -The ``bin/play.py`` tool also supports ``--augment`` parameters and can be used for experimenting with different configurations. +The ``bin/play.py`` tool also supports ``--augment`` parameters (for sample domain augmentations) and can be used for experimenting with different configurations. Example of playing all samples with reverberation and maximized volume: @@ -393,42 +491,3 @@ Example simulation of the codec augmentation of a wav-file first at the beginnin bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 0.0 test.wav bin/play.py --augment codec[p=0.1,bitrate=48000:16000] --clock 1.0 test.wav - - -The following augmentations are applied after feature caching, hence the way they are applied will not repeat epoch-wise. -Working on spectrogram and feature level, `bin/play.py` offers no ability to simulate them. - -#. **Standard deviation for Gaussian additive noise:** ``--data_aug_features_additive`` -#. **Standard deviation for Normal distribution around 1 for multiplicative noise:** ``--data_aug_features_multiplicative`` -#. **Standard deviation for speeding-up tempo. If Standard deviation is 0, this augmentation is not performed:** ``--augmentation_speed_up_std`` - -Spectrogram Augmentation ------------------------- - -Inspired by Google Paper on `SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition `_ - - -#. - **Keep rate of dropout augmentation on a spectrogram (if 1, no dropout will be performed on the spectrogram)**\ : - - - * Keep Rate : ``--augmentation_spec_dropout_keeprate value between range [0 - 1]`` - -#. - **Whether to use frequency and time masking augmentation:** - - - * Enable / Disable : ``--augmentation_freq_and_time_masking / --noaugmentation_freq_and_time_masking`` - * Max range of masks in the frequency domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_freq_mask_range eg: 5`` - * Number of masks in the frequency domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_number_freq_masks eg: 3`` - * Max range of masks in the time domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_time_mask_range eg: 2`` - * Number of masks in the time domain when performing freqtime-mask augmentation: ``--augmentation_freq_and_time_masking_number_time_masks eg: 3`` - -#. - **Whether to use spectrogram speed and tempo scaling:** - - - * Enable / Disable : ``--augmentation_pitch_and_tempo_scaling / --noaugmentation_pitch_and_tempo_scaling`` - * Min value of pitch scaling: ``--augmentation_pitch_and_tempo_scaling_min_pitch eg:0.95`` - * Max value of pitch scaling: ``--augmentation_pitch_and_tempo_scaling_max_pitch eg:1.2`` - * Max value of tempo scaling: ``--augmentation_pitch_and_tempo_scaling_max_tempo eg:1.2`` diff --git a/doc/USING.rst b/doc/USING.rst index bafbc46fc7..db3013d825 100644 --- a/doc/USING.rst +++ b/doc/USING.rst @@ -28,7 +28,7 @@ Please refer to your system's documentation on how to install these dependencies CUDA dependency ^^^^^^^^^^^^^^^ -The GPU capable builds (Python, NodeJS, C++, etc) depend on the same CUDA runtime as upstream TensorFlow. Currently with TensorFlow 1.15 it depends on CUDA 10.0 and CuDNN v7.6. `See the TensorFlow documentation `_. +The GPU capable builds (Python, NodeJS, C++, etc) depend on the same CUDA runtime as upstream TensorFlow. Currently with TensorFlow 2.2 it depends on CUDA 10.1 and CuDNN v7.6. `See the TensorFlow documentation `_. Getting the pre-trained model ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -37,8 +37,8 @@ If you want to use the pre-trained English model for performing speech-to-text, .. code-block:: bash - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm - wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.pbmm + wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.scorer Model compatibility ^^^^^^^^^^^^^^^^^^^ @@ -113,7 +113,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio my_audio_file.wav + deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio my_audio_file.wav The ``--scorer`` argument is optional, and represents an external language model to be used when transcribing the audio. @@ -177,7 +177,7 @@ Note: the following command assumes you `downloaded the pre-trained model <#gett .. code-block:: bash - ./deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio_input.wav + ./deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio_input.wav See the help output with ``./deepspeech -h`` for more details. @@ -186,6 +186,22 @@ Installing bindings from source If pre-built binaries aren't available for your system, you'll need to install them from scratch. Follow the :github:`native client build and installation instructions `. +Dockerfile for building from source +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide ``Dockerfile.build`` to automatically build ``libdeepspeech.so``, the C++ native client, Python bindings, and KenLM. +You need to generate the Dockerfile from the template using: + +.. code-block:: bash + + make Dockerfile.build + +If you want to specify a different DeepSpeech repository / branch, you can pass ``DEEPSPEECH_REPO`` or ``DEEPSPEECH_SHA`` parameters: + +.. code-block:: bash + + make Dockerfile.build DEEPSPEECH_REPO=git://your/fork DEEPSPEECH_SHA=origin/your-branch + Third party bindings ^^^^^^^^^^^^^^^^^^^^ @@ -197,4 +213,4 @@ In addition to the bindings above, third party developers have started to provid * `stes `_ provides preliminary `PKGBUILDs `_ to install the client and python bindings on `Arch Linux `_ in the `arch-deepspeech `_ repo. * `gst-deepspeech `_ provides a `GStreamer `_ plugin which can be used from any language with GStreamer bindings. * `thecodrr `_ provides `Vlang `_ bindings. The installation and use of which is described in their `vspeech `_ repo. - +* `eagledot `_ provides `NIM-lang `_ bindings. The installation and use of which is described in their `nim-deepspeech `_ repo. diff --git a/doc/index.rst b/doc/index.rst index 008cbaa227..659439ee0f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -20,15 +20,15 @@ To install and use DeepSpeech all you have to do is: pip3 install deepspeech # Download pre-trained English model files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.pbmm - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.pbmm + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/deepspeech-0.7.4-models.scorer # Download example audio files - curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/audio-0.7.3.tar.gz - tar xvf audio-0.7.3.tar.gz + curl -LO https://github.com/mozilla/DeepSpeech/releases/download/v0.7.4/audio-0.7.4.tar.gz + tar xvf audio-0.7.4.tar.gz # Transcribe an audio file - deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio/2830-3980-0043.wav A pre-trained English model is available for use and can be downloaded following the instructions in :ref:`the usage docs `. For the latest release, including pre-trained models and checkpoints, `see the GitHub releases page `_. @@ -44,7 +44,7 @@ Quicker inference can be performed using a supported NVIDIA GPU on Linux. See th pip3 install deepspeech-gpu # Transcribe an audio file. - deepspeech --model deepspeech-0.7.3-models.pbmm --scorer deepspeech-0.7.3-models.scorer --audio audio/2830-3980-0043.wav + deepspeech --model deepspeech-0.7.4-models.pbmm --scorer deepspeech-0.7.4-models.scorer --audio audio/2830-3980-0043.wav Please ensure you have the required :ref:`CUDA dependencies `. @@ -58,6 +58,8 @@ See the output of ``deepspeech -h`` for more information on the use of ``deepspe TRAINING + SUPPORTED_PLATFORMS + .. toctree:: :maxdepth: 2 :caption: Decoder and scorer diff --git a/native_client/BUILD b/native_client/BUILD index 1e4a66ebb1..53711dc2a6 100644 --- a/native_client/BUILD +++ b/native_client/BUILD @@ -1,10 +1,8 @@ # Description: Deepspeech native client library. -load( - "@org_tensorflow//tensorflow:tensorflow.bzl", - "if_cuda", - "tf_cc_shared_object", -) +load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") + load( "@org_tensorflow//tensorflow/lite:build_def.bzl", "tflite_copts", @@ -124,8 +122,8 @@ tf_cc_shared_object( linkopts = select({ "//tensorflow:macos": [], "//tensorflow:linux_x86_64": LINUX_LINKOPTS, - "//tensorflow:rpi3": LINUX_LINKOPTS + ["-l:libstdc++.a"], - "//tensorflow:rpi3-armv8": LINUX_LINKOPTS + ["-l:libstdc++.a"], + "//tensorflow:rpi3": LINUX_LINKOPTS, + "//tensorflow:rpi3-armv8": LINUX_LINKOPTS, "//tensorflow:windows": [], "//conditions:default": [], }) + tflite_linkopts(), @@ -143,7 +141,6 @@ tf_cc_shared_object( ### CPU only build, libdeepspeech.so file size reduced by ~50% "//tensorflow/core/kernels:spectrogram_op", # AudioSpectrogram "//tensorflow/core/kernels:bias_op", # BiasAdd - "//tensorflow/contrib/rnn:lstm_ops_kernels", # BlockLSTM "//tensorflow/core/kernels:cast_op", # Cast "//tensorflow/core/kernels:concat_op", # ConcatV2 "//tensorflow/core/kernels:constant_op", # Const, Placeholder @@ -163,9 +160,10 @@ tf_cc_shared_object( "//tensorflow/core/kernels:softmax_op", # Softmax "//tensorflow/core/kernels:tile_ops", # Tile "//tensorflow/core/kernels:transpose_op", # Transpose + "//tensorflow/core/kernels:rnn_ops", # BlockLSTM # And we also need the op libs for these ops used in the model: "//tensorflow/core:audio_ops_op_lib", # AudioSpectrogram, Mfcc - "//tensorflow/contrib/rnn:lstm_ops_op_lib", # BlockLSTM + "//tensorflow/core:rnn_ops_op_lib", # BlockLSTM "//tensorflow/core:math_ops_op_lib", # Cast, Less, Max, MatMul, Minimum, Range "//tensorflow/core:array_ops_op_lib", # ConcatV2, Const, ExpandDims, Fill, GatherNd, Identity, Pack, Placeholder, Reshape, Tile, Transpose "//tensorflow/core:no_op_op_lib", # NoOp diff --git a/native_client/README.rst b/native_client/README.rst index 841a4546a6..d77df35dfa 100644 --- a/native_client/README.rst +++ b/native_client/README.rst @@ -5,8 +5,8 @@ Building DeepSpeech Binaries If you'd like to build the DeepSpeech binaries yourself, you'll need the following pre-requisites downloaded and installed: -* `Mozilla's TensorFlow r1.15 branch `_ -* `Bazel 0.24.1 `_ +* `Mozilla's TensorFlow r2.2 branch `_ +* `Bazel 2.0.0 `_ * `General TensorFlow requirements `_ * `libsox `_ @@ -36,12 +36,12 @@ Clone our fork of TensorFlow and checkout the correct version: .. code-block:: git clone https://github.com/mozilla/tensorflow.git - git checkout origin/r1.15 + git checkout origin/r2.2 Bazel: Download & Install ^^^^^^^^^^^^^^^^^^^^^^^^^ -First, install Bazel 0.24.1 following the `Bazel installation documentation `_. +First, install Bazel 2.0.0 following the `Bazel installation documentation `_. TensorFlow: Configure with Bazel ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/native_client/ctcdecode/Makefile b/native_client/ctcdecode/Makefile index 3cf7647833..8bff277b32 100644 --- a/native_client/ctcdecode/Makefile +++ b/native_client/ctcdecode/Makefile @@ -45,15 +45,15 @@ workspace_status.cc: # variables over several runs bindings: clean-keep-third-party workspace_status.cc ds-swig pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 - PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) find temp_build -type f -name "*.o" -delete - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build bindings-debug: clean-keep-third-party workspace_status.cc ds-swig pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 - PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 PATH=$(DS_SWIG_BIN_PATH):$(TOOLCHAIN):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py build_ext --debug --num_processes $(NUM_PROCESSES) $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) $(GENERATE_DEBUG_SYMS) find temp_build -type f -name "*.o" -delete - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) LIBEXE=$(LIBEXE) CFLAGS="$(CFLAGS) $(CXXFLAGS) -DDEBUG" LDFLAGS="$(LDFLAGS_NEEDED)" $(PYTHON_PATH) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) rm -rf temp_build diff --git a/native_client/ctcdecode/__init__.py b/native_client/ctcdecode/__init__.py index c9b917b362..7e3766bebf 100644 --- a/native_client/ctcdecode/__init__.py +++ b/native_client/ctcdecode/__init__.py @@ -47,7 +47,7 @@ def load_lm(self, lm_path): return super(Scorer, self).load_lm(lm_path.encode('utf-8')) def save_dictionary(self, save_path, *args, **kwargs): - super(Scorer, self).save_dictionary(save_path.encode('utf-8'), *args, **kwargs) + return super(Scorer, self).save_dictionary(save_path.encode('utf-8'), *args, **kwargs) def ctc_beam_search_decoder(probs_seq, diff --git a/native_client/ctcdecode/scorer.cpp b/native_client/ctcdecode/scorer.cpp index 1834c21c70..ebf5522763 100644 --- a/native_client/ctcdecode/scorer.cpp +++ b/native_client/ctcdecode/scorer.cpp @@ -146,7 +146,7 @@ int Scorer::load_trie(std::ifstream& fin, const std::string& file_path) return DS_ERR_OK; } -void Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) +bool Scorer::save_dictionary(const std::string& path, bool append_instead_of_overwrite) { std::ios::openmode om; if (append_instead_of_overwrite) { @@ -155,15 +155,39 @@ void Scorer::save_dictionary(const std::string& path, bool append_instead_of_ove om = std::ios::out|std::ios::binary; } std::fstream fout(path, om); + if (!fout ||fout.bad()) { + std::cerr << "Error opening '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&MAGIC), sizeof(MAGIC)); + if (fout.bad()) { + std::cerr << "Error writing MAGIC '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&FILE_VERSION), sizeof(FILE_VERSION)); + if (fout.bad()) { + std::cerr << "Error writing FILE_VERSION '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&is_utf8_mode_), sizeof(is_utf8_mode_)); + if (fout.bad()) { + std::cerr << "Error writing is_utf8_mode '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&alpha), sizeof(alpha)); + if (fout.bad()) { + std::cerr << "Error writing alpha '" << path << "'" << std::endl; + return false; + } fout.write(reinterpret_cast(&beta), sizeof(beta)); + if (fout.bad()) { + std::cerr << "Error writing beta '" << path << "'" << std::endl; + return false; + } fst::FstWriteOptions opt; opt.align = true; opt.source = path; - dictionary->Write(fout, opt); + return dictionary->Write(fout, opt); } bool Scorer::is_scoring_boundary(PathTrie* prefix, size_t new_label) diff --git a/native_client/ctcdecode/scorer.h b/native_client/ctcdecode/scorer.h index 55f337eda7..d2a1c8b3be 100644 --- a/native_client/ctcdecode/scorer.h +++ b/native_client/ctcdecode/scorer.h @@ -77,7 +77,7 @@ class Scorer { void set_alphabet(const Alphabet& alphabet); // save dictionary in file - void save_dictionary(const std::string &path, bool append_instead_of_overwrite=false); + bool save_dictionary(const std::string &path, bool append_instead_of_overwrite=false); // return weather this step represents a boundary where beam scoring should happen bool is_scoring_boundary(PathTrie* prefix, size_t new_label); diff --git a/native_client/definitions.mk b/native_client/definitions.mk index 41acf5f39e..8d43ce0a26 100644 --- a/native_client/definitions.mk +++ b/native_client/definitions.mk @@ -48,7 +48,7 @@ endif endif ifeq ($(TARGET),host-win) -TOOLCHAIN := '$(VCINSTALLDIR)\bin\amd64\' +TOOLCHAIN := '$(VCToolsInstallDir)\bin\Hostx64\x64\' TOOL_CC := cl.exe TOOL_CXX := cl.exe TOOL_LD := link.exe @@ -65,7 +65,7 @@ ifeq ($(TARGET),rpi3) TOOLCHAIN ?= ${TFDIR}/bazel-$(shell basename "${TFDIR}")/external/LinaroArmGcc72/bin/arm-linux-gnueabihf- RASPBIAN ?= $(abspath $(NC_DIR)/../multistrap-raspbian-buster) CFLAGS := -march=armv7-a -mtune=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard -D_GLIBCXX_USE_CXX11_ABI=0 --sysroot $(RASPBIAN) -CXXFLAGS := $(CXXFLAGS) +CXXFLAGS := $(CFLAGS) LDFLAGS := -Wl,-rpath-link,$(RASPBIAN)/lib/arm-linux-gnueabihf/ -Wl,-rpath-link,$(RASPBIAN)/usr/lib/arm-linux-gnueabihf/ SOX_CFLAGS := -I$(RASPBIAN)/usr/include diff --git a/native_client/dotnet/README.rst b/native_client/dotnet/README.rst index 70fdbf3d28..97ac04753e 100644 --- a/native_client/dotnet/README.rst +++ b/native_client/dotnet/README.rst @@ -30,11 +30,11 @@ Prerequisites * Windows 10 * `Windows 10 SDK `_ -* `Visual Studio 2017 Community `_ +* `Visual Studio 2019 Community `_ * `Git Large File Storage `_ * `TensorFlow Windows pre-requisites `_ -Inside the Visual Studio Installer enable ``MS Build Tools`` and ``VC++ 2015.3 v14.00 (v140) toolset for desktop``. +Inside the Visual Studio Installer enable ``MS Build Tools`` and ``VC++ 2019 v16.00 (v160) toolset for desktop``. If you want to enable CUDA support you need to follow the steps in `the TensorFlow docs for building on Windows with CUDA `_. @@ -51,7 +51,7 @@ We need to clone ``mozilla/DeepSpeech`` and ``mozilla/tensorflow``. .. code-block:: bash - git clone --branch r1.15 https://github.com/mozilla/tensorflow + git clone --branch r2.2 https://github.com/mozilla/tensorflow Configuring the paths --------------------- @@ -113,7 +113,7 @@ If you run CUDA enabled ``native_client`` we need to add the following to the `` .. code-block:: - C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\bin + C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\bin Building the native_client ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/native_client/javascript/Makefile b/native_client/javascript/Makefile index ac5ee8ed39..454bdc4221 100644 --- a/native_client/javascript/Makefile +++ b/native_client/javascript/Makefile @@ -41,10 +41,6 @@ package.json: package.json.in package.json.in > package.json && cat package.json npm-dev: package.json -ifeq ($(findstring _NT,$(OS)),_NT) - # node-gyp@5.x behaves erratically with VS2015 and MSBuild.exe detection - $(NPM_TOOL) install node-gyp@4.x -endif $(NPM_TOOL) install --prefix=$(NPM_ROOT)/../ --ignore-scripts --force --verbose --production=false . configure: deepspeech_wrap.cxx package.json npm-dev diff --git a/native_client/javascript/package.json.in b/native_client/javascript/package.json.in index 09dad3a583..42edc3c161 100644 --- a/native_client/javascript/package.json.in +++ b/native_client/javascript/package.json.in @@ -35,12 +35,12 @@ "node-pre-gyp": "0.15.x", "argparse": "1.0.x", "sox-stream": "2.0.x", - "memory-stream": "0.0.3", + "memory-stream": "1.0.x", "node-wav": "0.0.2" }, "devDependencies": { "electron": "^1.7.9", - "node-gyp": "4.x - 5.x", + "node-gyp": "5.x", "typescript": "3.8.x", "typedoc": "0.17.x", "@types/argparse": "1.0.x", diff --git a/native_client/python/Makefile b/native_client/python/Makefile index cf17c9dbbf..7f948649af 100644 --- a/native_client/python/Makefile +++ b/native_client/python/Makefile @@ -10,7 +10,7 @@ bindings-clean: # variables over several runs bindings-build: ds-swig pip install --quiet $(PYTHON_PACKAGES) wheel==0.33.6 setuptools==39.1.0 - PATH=$(TOOLCHAIN):$(DS_SWIG_BIN_PATH):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME) + DISTUTILS_USE_SDK=1 PATH=$(TOOLCHAIN):$(DS_SWIG_BIN_PATH):$$PATH SWIG_LIB="$(SWIG_LIB)" AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py build_ext $(PYTHON_PLATFORM_NAME) MANIFEST.in: bindings-build > $@ @@ -21,6 +21,6 @@ MANIFEST.in: bindings-build bindings-package: MANIFEST.in cat MANIFEST.in rm -f temp_build/*_wrap.o temp_build/Release/*_wrap.obj - AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) + DISTUTILS_USE_SDK=1 AS=$(AS) CC=$(CC) CXX=$(CXX) LD=$(LD) CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS_NEEDED) $(RPATH_PYTHON)" MODEL_LDFLAGS="$(LDFLAGS_DIRS)" MODEL_LIBS="$(LIBS)" $(PYTHON_PATH) $(PYTHON_SYSCONFIGDATA) $(NUMPY_INCLUDE) python ./setup.py bdist_wheel $(PYTHON_PLATFORM_NAME) $(SETUP_FLAGS) bindings: bindings-build bindings-package diff --git a/native_client/tfmodelstate.cc b/native_client/tfmodelstate.cc index 5b1e16751d..440c44e602 100644 --- a/native_client/tfmodelstate.cc +++ b/native_client/tfmodelstate.cc @@ -118,7 +118,7 @@ TFModelState::init(const char* model_path) int beam_width = metadata_outputs[3].scalar()(); beam_width_ = (unsigned int)(beam_width); - string serialized_alphabet = metadata_outputs[4].scalar()(); + string serialized_alphabet = metadata_outputs[4].scalar()(); err = alphabet_.deserialize(serialized_alphabet.data(), serialized_alphabet.size()); if (err != 0) { return DS_ERR_INVALID_ALPHABET; diff --git a/setup.py b/setup.py index 6811b7170b..91a9af5bcc 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,6 @@ def main(): version = fin.read().strip() install_requires_base = [ - 'tensorflow == 1.15.2', 'numpy', 'progressbar2', 'six', @@ -74,6 +73,10 @@ def main(): 'ds_ctcdecoder == {}'.format(version) ] + tensorflow_pypi_dep = [ + 'tensorflow == 1.15.2' + ] + # Due to pip craziness environment variables are the only consistent way to # get options into this script when doing `pip install`. tc_decoder_artifacts_root = os.environ.get('DECODER_ARTIFACTS_ROOT', '') @@ -87,6 +90,11 @@ def main(): else: install_requires = install_requires_base + decoder_pypi_dep + if os.environ.get('DS_NOTENSORFLOW', ''): + install_requires = install_requires + else: + install_requires = install_requires + tensorflow_pypi_dep + setup( name='deepspeech_training', version=version, diff --git a/taskcluster/.build.yml b/taskcluster/.build.yml index 7e9809e5e9..1b94c52d3e 100644 --- a/taskcluster/.build.yml +++ b/taskcluster/.build.yml @@ -9,7 +9,7 @@ build: dependencies: [] routes: [] maxRunTime: 3600 - docker_image: "ubuntu:14.04" + docker_image: "ubuntu:16.04" system_setup: > true @@ -22,7 +22,7 @@ build: nc_asset_name: 'native_client.tar.xz' args: tests_cmdline: '' - tensorflow_git_desc: 'TensorFlow: v1.15.0-24-gceb46aa' + tensorflow_git_desc: 'TensorFlow: v2.2.0-12-gc29895f' test_model_task: '' homebrew: url: '' @@ -36,3 +36,7 @@ build: gradle_cache: url: '' namespace: '' + build_or_cache: + repo: "${event.head.repo.url}" + sha: "${event.head.sha}" + dir: "DeepSpeech/ds" diff --git a/taskcluster/.shared.yml b/taskcluster/.shared.yml index dadb10944a..f6c1084811 100644 --- a/taskcluster/.shared.yml +++ b/taskcluster/.shared.yml @@ -1,27 +1,27 @@ python: - packages_trusty: + packages_xenial: apt: 'make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev liblzma-dev curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libpng-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev' packages_buster: apt: 'python3-virtualenv python3-setuptools python3-pip python3-wheel python3-pkg-resources' packages_docs_bionic: apt: 'python3 python3-pip zip doxygen' training: - packages_trusty: + packages_xenial: apt: 'libopus0' tensorflow: - packages_trusty: + packages_xenial: apt: 'make build-essential gfortran git libblas-dev liblapack-dev libsox-dev libmagic-dev libgsm1-dev libltdl-dev libpng-dev python zlib1g-dev' java: - packages_trusty: + packages_xenial: apt: 'apt-get -qq -y install curl software-properties-common wget unzip && add-apt-repository --yes ppa:openjdk-r/ppa && apt-get -qq update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y --force-yes install openjdk-8-jdk && java -version && update-ca-certificates -f' electronjs: packages_xenial: apt: 'libatk1.0-0 libatk-bridge2.0-0 libcairo2 libcups2 libdbus-1-3 libgdk-pixbuf2.0-0 libgtk-3-0 libnspr4 libnss3 libpango-1.0-0 libpangocairo-1.0-0 libx11-xcb1 libxcomposite1 libxcursor1 libxdamage1 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 xvfb' nodejs: - packages_trusty: + packages_xenial: apt: 'nodejs sox' apt_pinning: '(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > /etc/apt/preferences' - prep_12: 'echo "deb http://deb.nodesource.com/node_12.x trusty main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -' + prep_12: 'echo "deb http://deb.nodesource.com/node_12.x xenial main" > /etc/apt/sources.list.d/nodesource.list && wget -qO- https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -' packages_xenial: apt: 'nodejs sox' apt_pinning: '(echo "Package: nodejs" && echo "Pin: origin deb.nodesource.com" && echo "Pin-Priority: 999") > /etc/apt/preferences' @@ -57,72 +57,91 @@ nodejs: prep_14: '/usr/bin/wget.exe https://nodejs.org/dist/v14.3.0/node-v14.3.0-win-x64.zip && ""C:\Program Files\7-zip\7z.exe"" x -o$TASKCLUSTER_NODE_DIR -tzip -aoa node-v14.3.0-win-x64.zip && rm node-*.zip && export PATH=$TASKCLUSTER_TASK_DIR/bin/node-v14.3.0-win-x64/:$PATH' system: node_gyp_cache: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.node-gyp-cache.4/artifacts/public/node-gyp-cache.tar.gz' - namespace: 'project.deepspeech.node-gyp-cache.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.node-gyp-cache.6/artifacts/public/node-gyp-cache.tar.gz' + namespace: 'project.deepspeech.node-gyp-cache.6' homebrew_builds: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.5/artifacts/public/homebrew_builds.tar.gz' - namespace: 'project.deepspeech.homebrew_builds.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.7/artifacts/public/homebrew_builds.tar.gz' + namespace: 'project.deepspeech.homebrew_builds.7' homebrew_tests: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.6/artifacts/public/homebrew_tests.tar.gz' - namespace: 'project.deepspeech.homebrew_tests.6' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.8/artifacts/public/homebrew_tests.tar.gz' + namespace: 'project.deepspeech.homebrew_tests.8' android_cache: arm64_v8a: android_24: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-24.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.arm64-v8a.android-24.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-24.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.arm64-v8a.android-24.6' android_25: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-25.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.arm64-v8a.android-25.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.arm64-v8a.android-25.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.arm64-v8a.android-25.6' armeabi_v7a: android_24: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-24.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-24.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-24.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-24.6' android_25: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-25.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-25.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.armeabi-v7a.android-25.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.armeabi-v7a.android-25.6' x86_64: android_24: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-24.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-24.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-24.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-24.6' android_25: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-25.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-25.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-25.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-25.6' android_26: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-26.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-26.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-26.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-26.2' android_27: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-27.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-27.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-27.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-27.2' android_28: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-28.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-28.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-28.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-28.2' android_29: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-29.0/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.x86_64.android-29.0' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-29.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-29.2' + android_30: + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.x86_64.android-30.2/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.x86_64.android-30.2' sdk: android_27: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.sdk.android-27.4/artifacts/public/android_cache.tar.gz' - namespace: 'project.deepspeech.android_cache.sdk.android-27.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.android_cache.sdk.android-27.6/artifacts/public/android_cache.tar.gz' + namespace: 'project.deepspeech.android_cache.sdk.android-27.6' gradle_cache: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.gradle.4/artifacts/public/gradle.tar.gz' - namespace: 'project.deepspeech.gradle.4' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.gradle.6/artifacts/public/gradle.tar.gz' + namespace: 'project.deepspeech.gradle.6' pyenv: linux: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.5/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.linux.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.linux.8/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.linux.8' osx: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.5/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.osx.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.osx.8/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.osx.8' win: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.5/artifacts/public/pyenv.tar.gz' - namespace: 'project.deepspeech.pyenv.win.5' + url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.pyenv.win.8/artifacts/public/pyenv.tar.gz' + namespace: 'project.deepspeech.pyenv.win.8' swig: repo: "https://github.com/lissyx/swig" sha1: "b5fea54d39832d1d132d7dd921b69c0c2c9d5118" + swig_build: + linux: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.linux.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118/artifacts/public/ds-swig.tar.gz" + namespace: "project.deepspeech.swig.linux.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" + osx: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.darwin.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118/artifacts/public/ds-swig.tar.gz" + namespace: "project.deepspeech.swig.darwin.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" + win: + url: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118/artifacts/public/ds-swig.tar.gz" + namespace: "project.deepspeech.swig.win.amd64.b5fea54d39832d1d132d7dd921b69c0c2c9d5118" username: 'build-user' homedir: linux: '/home/build-user' osx: '/Users/build-user' win: '/c/builds/tc-workdir' sox_win: '/usr/bin/wget.exe https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2-win32.zip/download -O sox-14.4.2-win32.zip && ""C:\Program Files\7-zip\7z.exe"" x -o$TASKCLUSTER_TASK_DIR/bin/ -tzip -aoa sox-14.4.2-win32.zip && rm sox-*zip && export PATH=$TASKCLUSTER_TASK_DIR/bin/sox-14.4.2/:$PATH' -aptEc2Mirrors: 'echo "deb http://archive.ubuntu.com/ubuntu/ trusty-updates main" > /etc/apt/sources.list.d/trusty-updates.list && apt-get -qq update && apt-get -qq -y upgrade' + msys2: + url: 'https://github.com/msys2/msys2-installer/releases/download/2020-06-02/msys2-base-x86_64-20200602.tar.xz' + sha: '598ceeaa3e2ccf86a25a2e3c449d00a9fd35300e36011bee610036dfa59d670a' + msys2_filesystem_pkg: + url: 'http://repo.msys2.org/msys/x86_64/filesystem-2020.02-3-x86_64.pkg.tar.xz' + sha: '927b020a67a05139ee1b2c45bff491c1d42335e64350cc7758ee20d7c3099477' + install: 'pacman -Udd --noconfirm $USERPROFILE/filesystem-2020.02-3-x86_64.pkg.tar.xz' diff --git a/taskcluster/android-arm64-cpu-opt.yml b/taskcluster/android-arm64-cpu-opt.yml index fcd1b2bedb..43b756cc23 100644 --- a/taskcluster/android-arm64-cpu-opt.yml +++ b/taskcluster/android-arm64-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-arm64" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-arm64" - "index.project.deepspeech.deepspeech.native_client.android-arm64.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-arm64/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-arm64/artifacts/public/home.tar.xz" scripts: build: "taskcluster/android-build.sh arm64-v8a" package: "taskcluster/android-package.sh arm64-v8a" diff --git a/taskcluster/android-armv7-cpu-opt.yml b/taskcluster/android-armv7-cpu-opt.yml index 2578758b64..168b542beb 100644 --- a/taskcluster/android-armv7-cpu-opt.yml +++ b/taskcluster/android-armv7-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-armv7" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-armv7" - "index.project.deepspeech.deepspeech.native_client.android-armv7.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-armv7/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-armv7/artifacts/public/home.tar.xz" scripts: build: "taskcluster/android-build.sh armeabi-v7a" package: "taskcluster/android-package.sh armeabi-v7a" diff --git a/taskcluster/android-cache-arm64-v8a-android-24.yml b/taskcluster/android-cache-arm64-v8a-android-24.yml index a8fef42fe4..260ea54667 100644 --- a/taskcluster/android-cache-arm64-v8a-android-24.yml +++ b/taskcluster/android-cache-arm64-v8a-android-24.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.arm64_v8a.android_24.url} - namespace: ${system.android_cache.arm64_v8a.android_24.namespace} + artifact_url: ${system.android_cache.arm64_v8a.android_24.url} + artifact_namespace: ${system.android_cache.arm64_v8a.android_24.namespace} scripts: build: "taskcluster/android_cache-build.sh arm64-v8a android-24" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-arm64-v8a-android-25.yml b/taskcluster/android-cache-arm64-v8a-android-25.yml index 7dca05ea93..34b8158b44 100644 --- a/taskcluster/android-cache-arm64-v8a-android-25.yml +++ b/taskcluster/android-cache-arm64-v8a-android-25.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.arm64_v8a.android_25.url} - namespace: ${system.android_cache.arm64_v8a.android_25.namespace} + artifact_url: ${system.android_cache.arm64_v8a.android_25.url} + artifact_namespace: ${system.android_cache.arm64_v8a.android_25.namespace} scripts: build: "taskcluster/android_cache-build.sh arm64-v8a android-25" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-armeabi-v7a-android-24.yml b/taskcluster/android-cache-armeabi-v7a-android-24.yml index 605f0e92a0..441b925504 100644 --- a/taskcluster/android-cache-armeabi-v7a-android-24.yml +++ b/taskcluster/android-cache-armeabi-v7a-android-24.yml @@ -1,13 +1,13 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.armeabi_v7a.android_24.url} - namespace: ${system.android_cache.armeabi_v7a.android_24.namespace} + artifact_url: ${system.android_cache.armeabi_v7a.android_24.url} + artifact_namespace: ${system.android_cache.armeabi_v7a.android_24.namespace} scripts: - build: "taskcluster/android_cache-build.sh armeabi-v7a android-24" + build: "taskcluster/android_cache-build.sh armeabi-v7a android-24 default" package: "taskcluster/android_cache-package.sh" metadata: name: "Builds Android cache armeabi-v7a / android-24" diff --git a/taskcluster/android-cache-armeabi-v7a-android-25.yml b/taskcluster/android-cache-armeabi-v7a-android-25.yml index 6089d940a6..620745a05d 100644 --- a/taskcluster/android-cache-armeabi-v7a-android-25.yml +++ b/taskcluster/android-cache-armeabi-v7a-android-25.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.armeabi_v7a.android_25.url} - namespace: ${system.android_cache.armeabi_v7a.android_25.namespace} + artifact_url: ${system.android_cache.armeabi_v7a.android_25.url} + artifact_namespace: ${system.android_cache.armeabi_v7a.android_25.namespace} scripts: build: "taskcluster/android_cache-build.sh armeabi-v7a android-25" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-sdk-android-27.yml b/taskcluster/android-cache-sdk-android-27.yml index 7632e5b821..0b047dc115 100644 --- a/taskcluster/android-cache-sdk-android-27.yml +++ b/taskcluster/android-cache-sdk-android-27.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.sdk.android_27.url} - namespace: ${system.android_cache.sdk.android_27.namespace} + artifact_url: ${system.android_cache.sdk.android_27.url} + artifact_namespace: ${system.android_cache.sdk.android_27.namespace} scripts: build: "taskcluster/android_cache-build.sh sdk android-27" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-24.yml b/taskcluster/android-cache-x86_64-android-24.yml index 38013d0834..c3b3ba1dc5 100644 --- a/taskcluster/android-cache-x86_64-android-24.yml +++ b/taskcluster/android-cache-x86_64-android-24.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.x86_64.android_24.url} - namespace: ${system.android_cache.x86_64.android_24.namespace} + artifact_url: ${system.android_cache.x86_64.android_24.url} + artifact_namespace: ${system.android_cache.x86_64.android_24.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-24" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-25.yml b/taskcluster/android-cache-x86_64-android-25.yml index 6f57be4287..a2e642e6ed 100644 --- a/taskcluster/android-cache-x86_64-android-25.yml +++ b/taskcluster/android-cache-x86_64-android-25.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.x86_64.android_25.url} - namespace: ${system.android_cache.x86_64.android_25.namespace} + artifact_url: ${system.android_cache.x86_64.android_25.url} + artifact_namespace: ${system.android_cache.x86_64.android_25.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-25" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-26.yml b/taskcluster/android-cache-x86_64-android-26.yml index ec711ede0b..a8b9b15ace 100644 --- a/taskcluster/android-cache-x86_64-android-26.yml +++ b/taskcluster/android-cache-x86_64-android-26.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.x86_64.android_26.url} - namespace: ${system.android_cache.x86_64.android_26.namespace} + artifact_url: ${system.android_cache.x86_64.android_26.url} + artifact_namespace: ${system.android_cache.x86_64.android_26.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-26" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-28.yml b/taskcluster/android-cache-x86_64-android-28.yml index 471f33b929..6193dd000d 100644 --- a/taskcluster/android-cache-x86_64-android-28.yml +++ b/taskcluster/android-cache-x86_64-android-28.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.x86_64.android_28.url} - namespace: ${system.android_cache.x86_64.android_28.namespace} + artifact_url: ${system.android_cache.x86_64.android_28.url} + artifact_namespace: ${system.android_cache.x86_64.android_28.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-28" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-29.yml b/taskcluster/android-cache-x86_64-android-29.yml index 835453f923..63e1b3c659 100644 --- a/taskcluster/android-cache-x86_64-android-29.yml +++ b/taskcluster/android-cache-x86_64-android-29.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml system_setup: > - ${java.packages_trusty.apt} + ${java.packages_xenial.apt} cache: - url: ${system.android_cache.x86_64.android_29.url} - namespace: ${system.android_cache.x86_64.android_29.namespace} + artifact_url: ${system.android_cache.x86_64.android_29.url} + artifact_namespace: ${system.android_cache.x86_64.android_29.namespace} scripts: build: "taskcluster/android_cache-build.sh x86_64 android-29" package: "taskcluster/android_cache-package.sh" diff --git a/taskcluster/android-cache-x86_64-android-30.yml b/taskcluster/android-cache-x86_64-android-30.yml new file mode 100644 index 0000000000..51cf50b93e --- /dev/null +++ b/taskcluster/android-cache-x86_64-android-30.yml @@ -0,0 +1,14 @@ +build: + template_file: generic_tc_caching-linux-opt-base.tyml + system_setup: + > + ${java.packages_xenial.apt} + cache: + artifact_url: ${system.android_cache.x86_64.android_30.url} + artifact_namespace: ${system.android_cache.x86_64.android_30.namespace} + scripts: + build: "taskcluster/android_cache-build.sh x86_64 android-30" + package: "taskcluster/android_cache-package.sh" + metadata: + name: "Builds Android cache x86_64 / android-30" + description: "Setup an Android SDK / emulator cache for Android / x86_64 android-30" diff --git a/taskcluster/android-java-opt.yml b/taskcluster/android-java-opt.yml index 7ca7d0f272..5d4ec60130 100644 --- a/taskcluster/android-java-opt.yml +++ b/taskcluster/android-java-opt.yml @@ -13,8 +13,8 @@ build: - "index.project.deepspeech.deepspeech.native_client.android-apk.${event.head.sha}" system_setup: > - ${java.packages_trusty.apt} - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-armv7/artifacts/public/home.tar.xz" + ${java.packages_xenial.apt} + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-armv7/artifacts/public/home.tar.xz" gradle_cache: url: ${system.gradle_cache.url} namespace: ${system.gradle_cache.namespace} diff --git a/taskcluster/android-x86_64-cpu-opt.yml b/taskcluster/android-x86_64-cpu-opt.yml index 447822ab40..a304fe9f65 100644 --- a/taskcluster/android-x86_64-cpu-opt.yml +++ b/taskcluster/android-x86_64-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.android-x86_64" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.android-x86_64" - "index.project.deepspeech.deepspeech.native_client.android-x86_64.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.android-arm64/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.android-arm64/artifacts/public/home.tar.xz" scripts: build: "taskcluster/android-build.sh x86_64" package: "taskcluster/android-package.sh x86_64" diff --git a/taskcluster/android_cache-build.sh b/taskcluster/android_cache-build.sh index c22ece4720..b1e1b8436f 100755 --- a/taskcluster/android_cache-build.sh +++ b/taskcluster/android_cache-build.sh @@ -6,6 +6,7 @@ source $(dirname "$0")/tc-tests-utils.sh arm_flavor=$1 api_level=$2 +api_kind=$3 export ANDROID_HOME=${ANDROID_SDK_HOME} @@ -17,5 +18,5 @@ android_install_sdk android_install_sdk_platform "android-27" if [ "${arm_flavor}" != "sdk" ]; then - android_setup_emulator "${arm_flavor}" "${api_level}" + android_setup_emulator "${arm_flavor}" "${api_level}" "${api_kind}" fi; diff --git a/taskcluster/android_cache-opt-base.tyml b/taskcluster/android_cache-opt-base.tyml deleted file mode 100644 index b44778aaac..0000000000 --- a/taskcluster/android_cache-opt-base.tyml +++ /dev/null @@ -1,52 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: ${build.docker_image} - - features: - taskclusterProxy: true - - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && - apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; - cache_file=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.url}` && - if [ "$cache_file" != "200" ]; then - ${extraSystemSetup} && - adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && - mkdir -p /tmp/artifacts/ && chmod 777 /tmp/artifacts && - echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && - sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.build} && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} taskcluster ${build.cache.namespace} - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/android_cache-package.sh b/taskcluster/android_cache-package.sh index 22ec767def..8c73070d5c 100755 --- a/taskcluster/android_cache-package.sh +++ b/taskcluster/android_cache-package.sh @@ -2,17 +2,6 @@ set -xe -TC_EXPIRE=$1 -TC_INSTANCE=$2 -TC_INDEX=$3 - source $(dirname "$0")/tc-tests-utils.sh cd $HOME/ && tar -czf $TASKCLUSTER_ARTIFACTS/android_cache.tar.gz DeepSpeech/Android/ - -if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ - "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" -fi; diff --git a/taskcluster/darwin-amd64-cpu-opt.yml b/taskcluster/darwin-amd64-cpu-opt.yml index 8589436093..12f41b2ff5 100644 --- a/taskcluster/darwin-amd64-cpu-opt.yml +++ b/taskcluster/darwin-amd64-cpu-opt.yml @@ -9,7 +9,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx" - "index.project.deepspeech.deepspeech.native_client.osx.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.osx/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/darwin-amd64-ctc-opt.yml b/taskcluster/darwin-amd64-ctc-opt.yml index 0e3e51e6b2..0f80f31e2c 100644 --- a/taskcluster/darwin-amd64-ctc-opt.yml +++ b/taskcluster/darwin-amd64-ctc-opt.yml @@ -9,7 +9,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx-ctc" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx-ctc" - "index.project.deepspeech.deepspeech.native_client.osx-ctc.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.osx/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz" maxRunTime: 14400 scripts: build: 'taskcluster/decoder-build.sh' diff --git a/taskcluster/darwin-amd64-tflite-opt.yml b/taskcluster/darwin-amd64-tflite-opt.yml index 4ab6c7c7d7..4a22e0dcaf 100644 --- a/taskcluster/darwin-amd64-tflite-opt.yml +++ b/taskcluster/darwin-amd64-tflite-opt.yml @@ -9,7 +9,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.osx-tflite" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.osx-tflite" - "index.project.deepspeech.deepspeech.native_client.osx-tflite.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.osx/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.osx/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh tflite" package: "taskcluster/package.sh" diff --git a/taskcluster/docker-build-base.tyml b/taskcluster/docker-build-base.tyml index a88676aa41..6d46a4bdcd 100644 --- a/taskcluster/docker-build-base.tyml +++ b/taskcluster/docker-build-base.tyml @@ -30,12 +30,12 @@ then: dockerfile: { $eval: strip(str(build.dockerfile)) } in: > apt-get -qq -y remove --purge ubuntu-advantage-tools && - ${aptEc2Mirrors} && - apt-get -qq update && apt-get -qq -y install git wget pkg-config apt-transport-https ca-certificates curl software-properties-common && + apt-get -qq update && apt-get -qq -y install git wget pkg-config apt-transport-https ca-certificates curl software-properties-common make && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" && apt-get -qq update && apt-get -qq -y install docker-ce && mkdir -p /opt/deepspeech && git clone --quiet ${event.head.repo.url} /opt/deepspeech && cd /opt/deepspeech && git checkout --quiet ${event.head.sha} && + make ${dockerfile} DEEPSPEECH_REPO=${event.head.repo.url} DEEPSPEECH_SHA=${event.head.sha} && docker build --file ${dockerfile} . artifacts: diff --git a/taskcluster/docker-image-build.yml b/taskcluster/docker-image-build.yml index a8d5386d87..792e14af04 100644 --- a/taskcluster/docker-image-build.yml +++ b/taskcluster/docker-image-build.yml @@ -1,6 +1,6 @@ build: template_file: docker-build-base.tyml - dockerfile: "Dockerfile" + dockerfile: "Dockerfile.build" metadata: name: "DeepSpeech Docker build" - description: "Testing |docker build| of DeepSpeech" + description: "Testing |docker build| of DeepSpeech build image" diff --git a/taskcluster/docker-image-train.yml b/taskcluster/docker-image-train.yml new file mode 100644 index 0000000000..03f9abea3a --- /dev/null +++ b/taskcluster/docker-image-train.yml @@ -0,0 +1,6 @@ +build: + template_file: docker-build-base.tyml + dockerfile: "Dockerfile.train" + metadata: + name: "DeepSpeech Docker train" + description: "Testing |docker build| of DeepSpeech train image" diff --git a/taskcluster/pyenv-darwin-opt-base.tyml b/taskcluster/generic_tc_caching-darwin-opt-base.tyml similarity index 66% rename from taskcluster/pyenv-darwin-opt-base.tyml rename to taskcluster/generic_tc_caching-darwin-opt-base.tyml index 063c661322..e6777f22f4 100644 --- a/taskcluster/pyenv-darwin-opt-base.tyml +++ b/taskcluster/generic_tc_caching-darwin-opt-base.tyml @@ -35,12 +35,13 @@ payload: export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/ && env && mkdir -p $TASKCLUSTER_ARTIFACTS/ && - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" ${system.pyenv.osx.url}` && - if [ "$swig_bin" != "200" ]; then - git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ && - cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} 127.0.0.1:8080 ${system.pyenv.osx.namespace} + cache_artifact=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.artifact_url}` && + if [ "$cache_artifact" != "200" ]; then + git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && + cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && + $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && + $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && + $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} 127.0.0.1:8080 ${build.cache.artifact_namespace} fi; artifacts: diff --git a/taskcluster/generic_tc_caching-linux-opt-base.tyml b/taskcluster/generic_tc_caching-linux-opt-base.tyml new file mode 100644 index 0000000000..da91f11544 --- /dev/null +++ b/taskcluster/generic_tc_caching-linux-opt-base.tyml @@ -0,0 +1,50 @@ +taskId: ${taskcluster.taskId} +provisionerId: ${taskcluster.docker.provisionerId} +workerType: ${taskcluster.docker.workerType} +taskGroupId: ${taskcluster.taskGroupId} +schedulerId: ${taskcluster.schedulerId} +created: { $fromNow: '0 sec' } +deadline: { $fromNow: '1 day' } +expires: { $fromNow: '6 months' } +scopes: + - "index:insert-task:project.deepspeech.*" + +payload: + maxRunTime: { $eval: to_int(build.maxRunTime) } + image: ${build.docker_image} + + features: + taskclusterProxy: true + + command: + - "/bin/bash" + - "--login" + - "-cxe" + - $let: + extraSystemSetup: { $eval: strip(str(build.system_setup)) } + taskIndexExpire: { $fromNow: '6 months' } + in: > + (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && + apt-get -qq update && apt-get -qq -y install curl git sudo && ${extraSystemSetup}; + cache_artifact=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.cache.artifact_url}` && + if [ "$cache_artifact" != "200" ]; then + adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && + mkdir -p /tmp/artifacts/ && chmod 777 /tmp/artifacts && + echo -e "#!/bin/bash\nset -xe\n env && id && (git clone --quiet ${build.build_or_cache.repo} ~/${build.build_or_cache.dir}/ && cd ~/${build.build_or_cache.dir}/ && git checkout --quiet ${build.build_or_cache.sha})" > /tmp/clone.sh && chmod +x /tmp/clone.sh && + sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && + sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/${build.scripts.build} && + sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/${build.scripts.package} && + sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace} + fi; + + artifacts: + "public": + type: "directory" + path: "/tmp/artifacts/" + expires: { $fromNow: '6 months' } + +metadata: + name: ${build.metadata.name} + description: ${build.metadata.description} + owner: ${event.head.user.email} + source: ${event.head.repo.url} diff --git a/taskcluster/generic_tc_caching-win-opt-base.tyml b/taskcluster/generic_tc_caching-win-opt-base.tyml new file mode 100644 index 0000000000..f96d4c6cae --- /dev/null +++ b/taskcluster/generic_tc_caching-win-opt-base.tyml @@ -0,0 +1,61 @@ +taskId: ${taskcluster.taskId} +provisionerId: ${taskcluster.docker.provisionerId} +workerType: ${taskcluster.docker.workerTypeWin} +taskGroupId: ${taskcluster.taskGroupId} +schedulerId: ${taskcluster.schedulerId} +created: { $fromNow: '0 sec' } +deadline: { $fromNow: '1 day' } +expires: { $fromNow: '6 months' } +scopes: + - "index:insert-task:project.deepspeech.*" + +payload: + maxRunTime: { $eval: to_int(build.maxRunTime) } + + features: + taskclusterProxy: true + + mounts: + - file: msys2-base-x86_64.tar.xz + content: + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} + + env: + TC_MSYS_VERSION: 'MSYS_NT-6.3-9600' + MSYS: 'winsymlinks:nativestrict' + + command: + - >- + "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | + "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si + - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" + - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" + - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" + - $let: + taskIndexExpire: { $fromNow: '6 months' } + in: > + echo .\msys64\usr\bin\bash.exe --login -cxe "export LC_ALL=C && + export PATH=\"$USERPROFILE/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" && + export TASKCLUSTER_ARTIFACTS=\"$(cygpath -u $USERPROFILE/public)\" && + export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && + echo \"export TASKCLUSTER_TASK_EXIT_CODE=0\" > $USERPROFILE/tc-exit.sh && + env && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${build.cache.artifact_url}`\" != \"200\" ]; then git clone --quiet ${build.build_or_cache.repo} $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/ && cd $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir} && git checkout --quiet ${build.build_or_cache.sha} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/${build.scripts.package} && $TASKCLUSTER_TASK_DIR/${build.build_or_cache.dir}/taskcluster/tc-update-index.sh ${taskIndexExpire} taskcluster ${build.cache.artifact_namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k + + - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE" + + artifacts: + - type: "directory" + path: "public/" + expires: { $fromNow: '6 months' } + +metadata: + name: ${build.metadata.name} + description: ${build.metadata.description} + owner: ${event.head.user.email} + source: ${event.head.repo.url} diff --git a/taskcluster/gradle-cache.yml b/taskcluster/gradle-cache.yml index 334545c300..a77b2e190e 100644 --- a/taskcluster/gradle-cache.yml +++ b/taskcluster/gradle-cache.yml @@ -1,11 +1,11 @@ build: - template_file: android_cache-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml + cache: + artifact_url: ${system.gradle_cache.url} + artifact_namespace: ${system.gradle_cache.namespace} system_setup: > - ${java.packages_trusty.apt} - cache: - url: ${system.gradle_cache.url} - namespace: ${system.gradle_cache.namespace} + ${java.packages_xenial.apt} scripts: build: "taskcluster/gradle-build.sh" package: "taskcluster/gradle-package.sh" diff --git a/taskcluster/gradle-package.sh b/taskcluster/gradle-package.sh index 495c05e8d9..840fffc832 100755 --- a/taskcluster/gradle-package.sh +++ b/taskcluster/gradle-package.sh @@ -2,17 +2,6 @@ set -xe -TC_EXPIRE=$1 -TC_INSTANCE=$2 -TC_INDEX=$3 - source $(dirname "$0")/tc-tests-utils.sh cd ${GRADLE_USER_HOME}/../ && tar -czf $TASKCLUSTER_ARTIFACTS/gradle.tar.gz gradle-cache/ - -if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ - "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" -fi; diff --git a/taskcluster/homebrew-darwin-opt-base.tyml b/taskcluster/homebrew-darwin-opt-base.tyml deleted file mode 100644 index ac6c5af356..0000000000 --- a/taskcluster/homebrew-darwin-opt-base.tyml +++ /dev/null @@ -1,59 +0,0 @@ -taskId: ${taskcluster.taskId} -provisionerId: ${taskcluster.generic.provisionerId} -workerType: ${taskcluster.generic.workerType} -taskGroupId: ${taskcluster.taskGroupId} -schedulerId: ${taskcluster.schedulerId} -dependencies: - $map: { $eval: build.dependencies } - each(b): - $eval: as_slugid(b) -created: { $fromNow: '0 sec' } -deadline: { $fromNow: '1 day' } -expires: { $fromNow: '6 months' } -scopes: - - "index:insert-task:project.deepspeech.*" - -payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - - features: - taskclusterProxy: true - - command: - - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - taskIndexExpire: { $fromNow: '6 months' } - in: > - export TASKCLUSTER_ARTIFACTS="$(pwd)/public/" && - export TASKCLUSTER_ORIG_TASKDIR="$(pwd)" && - (mkdir ../tc-workdir/ || rm -fr ../tc-workdir/*) && cd ../tc-workdir/ && - export TASKCLUSTER_TASK_DIR="$(pwd)" && - export LC_ALL=C && - export MACOSX_DEPLOYMENT_TARGET=10.10 && - export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/ && - env && - mkdir -p $TASKCLUSTER_ARTIFACTS/ && - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" ${build.homebrew.url}` && - if [ "$swig_bin" != "200" ]; then - git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ && - cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && - $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://127.0.0.1:8080/index/v1/task/${build.homebrew.namespace}" - fi; - - artifacts: - - type: "directory" - path: "public/" - expires: { $fromNow: '6 months' } - -metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/homebrew_builds-darwin-amd64.yml b/taskcluster/homebrew_builds-darwin-amd64.yml index 4dac3a1332..3377dcac86 100644 --- a/taskcluster/homebrew_builds-darwin-amd64.yml +++ b/taskcluster/homebrew_builds-darwin-amd64.yml @@ -1,8 +1,8 @@ build: - template_file: homebrew-darwin-opt-base.tyml - homebrew: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_builds.5/artifacts/public/homebrew_builds.tar.gz' - namespace: 'project.deepspeech.homebrew_builds.5' + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.homebrew_builds.url} + artifact_namespace: ${system.homebrew_builds.namespace} scripts: build: "taskcluster/homebrew-build.sh --builds" package: "taskcluster/homebrew-package.sh --builds" diff --git a/taskcluster/homebrew_tests-darwin-amd64.yml b/taskcluster/homebrew_tests-darwin-amd64.yml index fc9637f297..dc93c183eb 100644 --- a/taskcluster/homebrew_tests-darwin-amd64.yml +++ b/taskcluster/homebrew_tests-darwin-amd64.yml @@ -1,8 +1,8 @@ build: - template_file: homebrew-darwin-opt-base.tyml - homebrew: - url: 'https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.homebrew_tests.6/artifacts/public/homebrew_tests.tar.gz' - namespace: 'project.deepspeech.homebrew_tests.6' + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.homebrew_tests.url} + artifact_namespace: ${system.homebrew_tests.namespace} scripts: build: "taskcluster/homebrew-build.sh --tests" package: "taskcluster/homebrew-package.sh --tests" diff --git a/taskcluster/linux-amd64-cpu-opt.yml b/taskcluster/linux-amd64-cpu-opt.yml index e2af482b30..50f78a2db8 100644 --- a/taskcluster/linux-amd64-cpu-opt.yml +++ b/taskcluster/linux-amd64-cpu-opt.yml @@ -10,9 +10,9 @@ build: - "index.project.deepspeech.deepspeech.native_client.cpu.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.cpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/linux-amd64-ctc-opt.yml b/taskcluster/linux-amd64-ctc-opt.yml index 00af6149d1..f0979261f6 100644 --- a/taskcluster/linux-amd64-ctc-opt.yml +++ b/taskcluster/linux-amd64-ctc-opt.yml @@ -10,9 +10,9 @@ build: - "index.project.deepspeech.deepspeech.native_client.cpu-ctc.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.cpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: build: 'taskcluster/decoder-build.sh' package: 'taskcluster/decoder-package.sh' diff --git a/taskcluster/linux-amd64-gpu-opt.yml b/taskcluster/linux-amd64-gpu-opt.yml index 532b8bb75f..f34f2de0a0 100644 --- a/taskcluster/linux-amd64-gpu-opt.yml +++ b/taskcluster/linux-amd64-gpu-opt.yml @@ -10,9 +10,9 @@ build: - "index.project.deepspeech.deepspeech.native_client.gpu.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.gpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.gpu/artifacts/public/home.tar.xz" maxRunTime: 14400 scripts: build: "taskcluster/cuda-build.sh" diff --git a/taskcluster/linux-amd64-tflite-opt.yml b/taskcluster/linux-amd64-tflite-opt.yml index 3d4847f91b..ffdb749922 100644 --- a/taskcluster/linux-amd64-tflite-opt.yml +++ b/taskcluster/linux-amd64-tflite-opt.yml @@ -10,9 +10,9 @@ build: - "index.project.deepspeech.deepspeech.native_client.tflite.${event.head.sha}" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.cpu/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.cpu/artifacts/public/home.tar.xz" scripts: build: "taskcluster/host-build.sh tflite" package: "taskcluster/package.sh" diff --git a/taskcluster/linux-arm64-cpu-opt.yml b/taskcluster/linux-arm64-cpu-opt.yml index 1865784ec8..aa4e2a3403 100644 --- a/taskcluster/linux-arm64-cpu-opt.yml +++ b/taskcluster/linux-arm64-cpu-opt.yml @@ -14,12 +14,12 @@ build: apt-get -qq -y install gdebi git pixz && wget http://mirrors.kernel.org/ubuntu/pool/universe/m/multistrap/multistrap_2.2.0ubuntu2_all.deb -O /tmp/multistrap_2.2.0ubuntu2_all.deb && echo "y" | gdebi /tmp/multistrap_2.2.0ubuntu2_all.deb && - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml system_config: > multistrap -d /tmp/multistrap-armbian64-buster/ -f ${system.homedir.linux}/DeepSpeech/ds/native_client/multistrap_armbian64_buster.conf - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.arm64/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.arm64/artifacts/public/home.tar.xz" scripts: build: "taskcluster/arm64-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/linux-opt-base.tyml b/taskcluster/linux-opt-base.tyml index 0298954cf8..f98d84a656 100644 --- a/taskcluster/linux-opt-base.tyml +++ b/taskcluster/linux-opt-base.tyml @@ -30,7 +30,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "ubuntu:14.04" + image: "ubuntu:16.04" env: TENSORFLOW_BUILD_ARTIFACT: ${build.tensorflow} @@ -43,10 +43,8 @@ then: extraSystemSetup: { $eval: strip(str(build.system_setup)) } extraSystemConfig: { $eval: strip(str(build.system_config)) } in: > - apt-get -qq -y remove --purge ubuntu-advantage-tools && - ${aptEc2Mirrors} && adduser --system --home ${system.homedir.linux} ${system.username} && - apt-get -qq update && apt-get -qq -y install ${tensorflow.packages_trusty.apt} pixz pkg-config realpath unzip wget zip && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install ${tensorflow.packages_xenial.apt} pixz pkg-config realpath sudo unzip wget zip && ${extraSystemSetup} && cd ${system.homedir.linux}/ && echo -e "#!/bin/bash\nset -xe\n env && id && (wget -O - $TENSORFLOW_BUILD_ARTIFACT | pixz -d | tar -C ${system.homedir.linux}/ -xf - ) && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && ln -s ~/DeepSpeech/ds/native_client/ ~/DeepSpeech/tf/native_client && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf - && mkdir -p ${system.homedir.linux}/pyenv-root/ && wget -O - ${system.pyenv.linux.url} | tar -C ${system.homedir.linux}/pyenv-root/ -xzf - && if [ ! -z "${build.gradle_cache.url}" ]; then wget -O - ${build.gradle_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi && if [ ! -z "${build.android_cache.url}" ]; then wget -O - ${build.android_cache.url} | tar -C ${system.homedir.linux}/ -xzf - ; fi;" > /tmp/clone.sh && chmod +x /tmp/clone.sh && sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} && diff --git a/taskcluster/linux-rpi3-cpu-opt.yml b/taskcluster/linux-rpi3-cpu-opt.yml index 0cabb87022..82959259b8 100644 --- a/taskcluster/linux-rpi3-cpu-opt.yml +++ b/taskcluster/linux-rpi3-cpu-opt.yml @@ -14,12 +14,12 @@ build: apt-get -qq -y install gdebi git pixz && wget http://mirrors.kernel.org/ubuntu/pool/universe/m/multistrap/multistrap_2.2.0ubuntu2_all.deb -O /tmp/multistrap_2.2.0ubuntu2_all.deb && echo "y" | gdebi /tmp/multistrap_2.2.0ubuntu2_all.deb && - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml system_config: > multistrap -d /tmp/multistrap-raspbian-buster/ -f ${system.homedir.linux}/DeepSpeech/ds/native_client/multistrap_raspbian_buster.conf - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.arm/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.arm/artifacts/public/home.tar.xz" scripts: build: "taskcluster/rpi3-build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/node-gyp-cache-base.tyml b/taskcluster/node-gyp-cache-base.tyml deleted file mode 100644 index 5c0e895d06..0000000000 --- a/taskcluster/node-gyp-cache-base.tyml +++ /dev/null @@ -1,57 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "node:12" - - features: - taskclusterProxy: true - - # This task will inspect system.node_gyp_cache taskcluster index existence: - # - if the artifact does not exists, it will build it - # - if the artifact exists, it will re-mirror it (if we don't do that, new - # index gets published with no artifact and erases existing one) - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; - cache_file=`curl -sSIL -o /dev/null -w "%{http_code}" ${system.node_gyp_cache.url}` && - if [ "$cache_file" != "200" ]; then - mkdir -p ~/DeepSpeech/ds/ && - git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && - cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && - ~/DeepSpeech/ds/${build.scripts.build} && - ~/DeepSpeech/ds/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://taskcluster/index/v1/task/${system.node_gyp_cache.namespace}" - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/node-gyp-cache.yml b/taskcluster/node-gyp-cache.yml index a9be39de1a..a0c48ba52f 100644 --- a/taskcluster/node-gyp-cache.yml +++ b/taskcluster/node-gyp-cache.yml @@ -1,5 +1,12 @@ build: - template_file: node-gyp-cache-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml + docker_image: "node:12" + cache: + artifact_url: "${system.node_gyp_cache.url}" + artifact_namespace: "${system.node_gyp_cache.namespace}" + system_setup: + > + (apt-get -qq -y install sudo || true) scripts: build: "taskcluster/node-gyp-populate.sh" package: "taskcluster/node-gyp-package.sh" diff --git a/taskcluster/node-gyp-populate.sh b/taskcluster/node-gyp-populate.sh index 7043d33244..333c692a54 100755 --- a/taskcluster/node-gyp-populate.sh +++ b/taskcluster/node-gyp-populate.sh @@ -8,7 +8,9 @@ node --version npm --version -npm install -g node-gyp@6.x +npm install node-gyp@6.x + +export PATH=$HOME/node_modules/.bin/:$PATH devDir=$DS_ROOT_TASK/node-gyp-cache/ diff --git a/taskcluster/node-package-cpu.yml b/taskcluster/node-package-cpu.yml index 72870cd51d..6769543b82 100644 --- a/taskcluster/node-package-cpu.yml +++ b/taskcluster/node-package-cpu.yml @@ -8,7 +8,7 @@ build: - "win-amd64-cpu-opt" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml scripts: build: "taskcluster/node-build.sh" diff --git a/taskcluster/node-package-gpu.yml b/taskcluster/node-package-gpu.yml index db48ae3248..a430b4b1ac 100644 --- a/taskcluster/node-package-gpu.yml +++ b/taskcluster/node-package-gpu.yml @@ -5,7 +5,7 @@ build: - "win-amd64-gpu-opt" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml scripts: build: "taskcluster/node-build.sh --cuda" diff --git a/taskcluster/node-package-opt-base.tyml b/taskcluster/node-package-opt-base.tyml index b17daaace0..b14cdd4c39 100644 --- a/taskcluster/node-package-opt-base.tyml +++ b/taskcluster/node-package-opt-base.tyml @@ -30,7 +30,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "ubuntu:14.04" + image: "ubuntu:16.04" command: - "/bin/bash" @@ -40,10 +40,8 @@ then: extraSystemSetup: { $eval: strip(str(build.system_setup)) } extraSystemConfig: { $eval: strip(str(build.system_config)) } in: > - apt-get -qq -y remove --purge ubuntu-advantage-tools && - ${aptEc2Mirrors} && adduser --system --home ${system.homedir.linux} ${system.username} && - apt-get -qq update && apt-get -qq -y install realpath git wget curl make && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install realpath git wget curl make sudo && ${extraSystemSetup} && cd ${system.homedir.linux}/ && echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && mkdir -p ~/DeepSpeech/tf/ && touch ~/DeepSpeech/tf/tc-vars.sh && chmod +x ~/DeepSpeech/tf/tc-vars.sh && mkdir -p ${system.homedir.linux}/.cache/node-gyp/ && wget -O - ${system.node_gyp_cache.url} | tar -C ${system.homedir.linux}/.cache/node-gyp/ -xzf -" > /tmp/clone.sh && chmod +x /tmp/clone.sh && sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && ${extraSystemConfig} && diff --git a/taskcluster/node-package-tflite.yml b/taskcluster/node-package-tflite.yml index d49404d02b..003384461e 100644 --- a/taskcluster/node-package-tflite.yml +++ b/taskcluster/node-package-tflite.yml @@ -6,7 +6,7 @@ build: - "win-amd64-tflite-opt" system_setup: > - ${nodejs.packages_trusty.prep_12} && ${nodejs.packages_trusty.apt_pinning} + ${nodejs.packages_xenial.prep_12} && ${nodejs.packages_xenial.apt_pinning} && apt-get -qq update && apt-get -qq -y install nodejs python-yaml scripts: build: "taskcluster/node-build.sh --tflite" diff --git a/taskcluster/pyenv-build.sh b/taskcluster/pyenv-build.sh index b3faf8fb47..54d11c7a04 100755 --- a/taskcluster/pyenv-build.sh +++ b/taskcluster/pyenv-build.sh @@ -18,11 +18,7 @@ for pyver_conf in ${SUPPORTED_PYTHON_VERSIONS}; do pyalias="${pyver}_${pyconf}" - maybe_ssl102_py37 ${pyver} - - LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH \ - PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf} ${PY37_OPENSSL}" \ - pyenv_install ${pyver} ${pyalias} + PYTHON_CONFIGURE_OPTS="--enable-unicode=${pyconf}" pyenv_install ${pyver} ${pyalias} setup_pyenv_virtualenv "${pyalias}" "deepspeech" diff --git a/taskcluster/pyenv-darwin-amd64.yml b/taskcluster/pyenv-darwin-amd64.yml index 47cceec9c6..3ad055caa1 100644 --- a/taskcluster/pyenv-darwin-amd64.yml +++ b/taskcluster/pyenv-darwin-amd64.yml @@ -1,5 +1,8 @@ build: - template_file: pyenv-darwin-opt-base.tyml + template_file: generic_tc_caching-darwin-opt-base.tyml + cache: + artifact_url: ${system.pyenv.osx.url} + artifact_namespace: ${system.pyenv.osx.namespace} scripts: build: "taskcluster/pyenv-build.sh" package: "taskcluster/pyenv-package.sh" diff --git a/taskcluster/pyenv-linux-amd64.yml b/taskcluster/pyenv-linux-amd64.yml index 393a8872b2..4946e15b40 100644 --- a/taskcluster/pyenv-linux-amd64.yml +++ b/taskcluster/pyenv-linux-amd64.yml @@ -1,8 +1,11 @@ build: - template_file: pyenv-linux-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml + cache: + artifact_url: "${system.pyenv.linux.url}" + artifact_namespace: "${system.pyenv.linux.namespace}" system_setup: > - apt-get -qq update && apt-get -qq -y install python-yaml ${python.packages_trusty.apt} wget + apt-get -qq update && apt-get -qq -y install python-yaml ${python.packages_xenial.apt} wget scripts: build: "taskcluster/pyenv-build.sh" package: "taskcluster/pyenv-package.sh" diff --git a/taskcluster/pyenv-linux-opt-base.tyml b/taskcluster/pyenv-linux-opt-base.tyml deleted file mode 100644 index 15d8f7b0cc..0000000000 --- a/taskcluster/pyenv-linux-opt-base.tyml +++ /dev/null @@ -1,52 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: ${build.docker_image} - - features: - taskclusterProxy: true - - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && - apt-get -qq update && apt-get -qq -y install curl git; - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" ${system.pyenv.linux.url}` && - if [ "$swig_bin" != "200" ]; then - ${extraSystemSetup} && - adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && - mkdir -p /tmp/artifacts/ && chmod 777 /tmp/artifacts && - echo -e "#!/bin/bash\nset -xe\n env && id && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && - sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.build} && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} taskcluster ${system.pyenv.linux.namespace} - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/pyenv-package.sh b/taskcluster/pyenv-package.sh index bc2c0639a0..73f6a542a2 100755 --- a/taskcluster/pyenv-package.sh +++ b/taskcluster/pyenv-package.sh @@ -2,17 +2,6 @@ set -xe -TC_EXPIRE=$1 -TC_INSTANCE=$2 -TC_INDEX=$3 - source $(dirname "$0")/tc-tests-utils.sh cd ${PYENV_ROOT}/ && $TAR -czf $TASKCLUSTER_ARTIFACTS/pyenv.tar.gz . - -if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ - "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" -fi; diff --git a/taskcluster/pyenv-win-amd64.yml b/taskcluster/pyenv-win-amd64.yml index d22872b33e..7d05f1412c 100644 --- a/taskcluster/pyenv-win-amd64.yml +++ b/taskcluster/pyenv-win-amd64.yml @@ -1,5 +1,8 @@ build: - template_file: pyenv-win-opt-base.tyml + template_file: generic_tc_caching-win-opt-base.tyml + cache: + artifact_url: "${system.pyenv.win.url}" + artifact_namespace: "${system.pyenv.win.namespace}" scripts: build: "taskcluster/pyenv-build.sh" package: "taskcluster/pyenv-package.sh" diff --git a/taskcluster/pyenv-win-opt-base.tyml b/taskcluster/pyenv-win-opt-base.tyml deleted file mode 100644 index 59c584bae1..0000000000 --- a/taskcluster/pyenv-win-opt-base.tyml +++ /dev/null @@ -1,59 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerTypeWin} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - - features: - taskclusterProxy: true - - mounts: - - file: msys2-base-x86_64.tar.xz - content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz - - env: - TC_MSYS_VERSION: 'MSYS_NT-6.3-9600' - MSYS: 'winsymlinks:nativestrict' - - command: - - >- - "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | - "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" - - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - - $let: - taskIndexExpire: { $fromNow: '6 months' } - in: > - echo .\msys64\usr\bin\bash.exe --login -cxe "export LC_ALL=C && - export PATH=\"$USERPROFILE/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" && - export TASKCLUSTER_ARTIFACTS=\"$(cygpath -u $USERPROFILE/public)\" && - export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && - echo \"export TASKCLUSTER_TASK_EXIT_CODE=0\" > $USERPROFILE/tc-exit.sh && - env && pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar && mkdir -p $TASKCLUSTER_ARTIFACTS/ && if [ \"`curl -sSIL -o /dev/null -w %%{http_code} ${system.pyenv.win.url}`\" != \"200\" ]; then git clone --quiet ${event.head.repo.url} $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/ && cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ${taskIndexExpire} taskcluster ${system.pyenv.win.namespace}; fi; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh" | cmd /k - - - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE" - - artifacts: - - type: "directory" - path: "public/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/swig-darwin-amd64.yml b/taskcluster/swig-darwin-amd64.yml index cb08cdef19..64ca794b58 100644 --- a/taskcluster/swig-darwin-amd64.yml +++ b/taskcluster/swig-darwin-amd64.yml @@ -1,7 +1,12 @@ build: - template_file: swig-darwin-opt-base.tyml - swig_system: "darwin" - swig_arch: "amd64" + template_file: generic_tc_caching-darwin-opt-base.tyml + build_or_cache: + repo: "${system.swig.repo}" + sha: "${system.swig.sha1}" + dir: "swig" + cache: + artifact_url: "${system.swig_build.osx.url}" + artifact_namespace: "${system.swig_build.osx.namespace}" scripts: build: "taskcluster/build.sh" package: "taskcluster/package.sh" diff --git a/taskcluster/swig-darwin-opt-base.tyml b/taskcluster/swig-darwin-opt-base.tyml deleted file mode 100644 index 0f08e596e9..0000000000 --- a/taskcluster/swig-darwin-opt-base.tyml +++ /dev/null @@ -1,56 +0,0 @@ -taskId: ${taskcluster.taskId} -provisionerId: ${taskcluster.generic.provisionerId} -workerType: ${taskcluster.generic.workerType} -taskGroupId: ${taskcluster.taskGroupId} -schedulerId: ${taskcluster.schedulerId} -dependencies: - $map: { $eval: build.dependencies } - each(b): - $eval: as_slugid(b) -created: { $fromNow: '0 sec' } -deadline: { $fromNow: '1 day' } -expires: { $fromNow: '6 months' } -scopes: - - "index:insert-task:project.deepspeech.*" - -payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - - command: - - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - taskIndexExpire: { $fromNow: '6 months' } - in: > - export TASKCLUSTER_ARTIFACTS="$(pwd)/public/" && - export TASKCLUSTER_TASKDIR="$(pwd)" && - export TASKCLUSTER_ORIG_TASKDIR="$(pwd)" && - export LC_ALL=C && - export MACOSX_DEPLOYMENT_TARGET=10.10 && - export SDKROOT=/Library/Developer/CommandLineTools/SDKs/MacOSX10.14.sdk/ && - export HOMEBREW_NO_AUTO_UPDATE=1 && - env && - mkdir -p $TASKCLUSTER_ARTIFACTS/ && - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}/artifacts/public/ds-swig.tar.gz` && - if [ "$swig_bin" != "200" ]; then - git clone --quiet ${system.swig.repo} $TASKCLUSTER_TASKDIR/swig/ && - cd $TASKCLUSTER_TASKDIR/swig/ && git checkout --quiet ${system.swig.sha1} && - $TASKCLUSTER_TASKDIR/swig/${build.scripts.build} && - $TASKCLUSTER_TASKDIR/swig/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://taskcluster/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}" - fi; - - artifacts: - - type: "directory" - path: "public/" - expires: { $fromNow: '6 months' } - -metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/swig-linux-amd64.yml b/taskcluster/swig-linux-amd64.yml index 27bd058c1d..2a127bbc4d 100644 --- a/taskcluster/swig-linux-amd64.yml +++ b/taskcluster/swig-linux-amd64.yml @@ -1,8 +1,13 @@ build: - template_file: swig-linux-opt-base.tyml - docker_image: "ubuntu:14.04" - swig_system: "linux" - swig_arch: "amd64" + template_file: generic_tc_caching-linux-opt-base.tyml + docker_image: "ubuntu:16.04" + build_or_cache: + repo: "${system.swig.repo}" + sha: "${system.swig.sha1}" + dir: "swig" + cache: + artifact_url: "${system.swig_build.linux.url}" + artifact_namespace: "${system.swig_build.linux.namespace}" system_setup: > apt-get -qq -y install autoconf automake bison build-essential diff --git a/taskcluster/swig-linux-opt-base.tyml b/taskcluster/swig-linux-opt-base.tyml deleted file mode 100644 index fb3662e414..0000000000 --- a/taskcluster/swig-linux-opt-base.tyml +++ /dev/null @@ -1,54 +0,0 @@ -$if: 'event.event in build.allowed' -then: - taskId: ${taskcluster.taskId} - provisionerId: ${taskcluster.docker.provisionerId} - workerType: ${taskcluster.docker.workerType} - taskGroupId: ${taskcluster.taskGroupId} - schedulerId: ${taskcluster.schedulerId} - created: { $fromNow: '0 sec' } - deadline: { $fromNow: '1 day' } - expires: { $fromNow: '6 months' } - scopes: - - "index:insert-task:project.deepspeech.*" - - payload: - maxRunTime: { $eval: to_int(build.maxRunTime) } - image: ${build.docker_image} - - features: - taskclusterProxy: true - - command: - - "/bin/bash" - - "--login" - - "-cxe" - - $let: - extraSystemSetup: { $eval: strip(str(build.system_setup)) } - taskIndexExpire: { $fromNow: '6 months' } - in: > - (apt-get -qq -y remove --purge ubuntu-advantage-tools || true) && - apt-get -qq update && apt-get -qq -y install curl git && ${extraSystemSetup}; - swig_bin=`curl -sSIL -o /dev/null -w "%{http_code}" https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}/artifacts/public/ds-swig.tar.gz` && - if [ "$swig_bin" != "200" ]; then - adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux}/ && - echo -e "#!/bin/bash\nset -xe\n env && id && (git clone --quiet ${system.swig.repo} ~/swig/ && cd ~/swig/ && git checkout --quiet ${system.swig.sha1})" > /tmp/clone.sh && chmod +x /tmp/clone.sh && - sudo -H -u ${system.username} /bin/bash /tmp/clone.sh && - sudo -H -u ${system.username} --preserve-env /bin/bash ${system.homedir.linux}/swig/${build.scripts.build} && - sudo -H -u ${system.username} /bin/bash ${system.homedir.linux}/swig/${build.scripts.package} && - curl -sSL --fail -X PUT \ - -H "Content-Type: application/json" \ - -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${taskIndexExpire}\",\"data\":{}}" \ - "http://taskcluster/index/v1/task/project.deepspeech.swig.${build.swig_system}.${build.swig_arch}.${system.swig.sha1}" - fi; - - artifacts: - "public": - type: "directory" - path: "/tmp/artifacts/" - expires: { $fromNow: '6 months' } - - metadata: - name: ${build.metadata.name} - description: ${build.metadata.description} - owner: ${event.head.user.email} - source: ${event.head.repo.url} diff --git a/taskcluster/swig-win-amd64.yml b/taskcluster/swig-win-amd64.yml index 576bdf73a1..310a81122c 100644 --- a/taskcluster/swig-win-amd64.yml +++ b/taskcluster/swig-win-amd64.yml @@ -1,8 +1,13 @@ build: - template_file: swig-linux-opt-base.tyml + template_file: generic_tc_caching-linux-opt-base.tyml docker_image: "ubuntu:18.04" - swig_system: "win" - swig_arch: "amd64" + build_or_cache: + repo: "${system.swig.repo}" + sha: "${system.swig.sha1}" + dir: "swig" + cache: + artifact_url: "${system.swig_build.win.url}" + artifact_namespace: "${system.swig_build.win.namespace}" system_setup: > apt-get -qq -y install autoconf automake bison build-essential mingw-w64 && diff --git a/taskcluster/tc-all-vars.sh b/taskcluster/tc-all-vars.sh index 1a38e18b76..2efc1dbc21 100755 --- a/taskcluster/tc-all-vars.sh +++ b/taskcluster/tc-all-vars.sh @@ -42,10 +42,6 @@ if [ "${OS}" = "Darwin" ]; then fi; fi; -PY37_OPENSSL_DIR="${PYENV_ROOT}/ssl-xenial" -export PY37_LDPATH="${PY37_OPENSSL_DIR}/usr/lib/" -export LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH - export TASKCLUSTER_ARTIFACTS=${TASKCLUSTER_ARTIFACTS:-/tmp/artifacts} export TASKCLUSTER_TMP_DIR=${TASKCLUSTER_TMP_DIR:-/tmp} diff --git a/taskcluster/tc-android-utils.sh b/taskcluster/tc-android-utils.sh index a71edfb3d6..3bf66927f5 100755 --- a/taskcluster/tc-android-utils.sh +++ b/taskcluster/tc-android-utils.sh @@ -112,6 +112,11 @@ android_setup_emulator() local _flavor=$1 local _api_level=${2:-android-25} + local _api_kind=${3:-google_apis} + + if [ -z "${_api_kind}" ]; then + _api_kind="google_apis" + fi export PATH=${ANDROID_SDK_HOME}/tools/bin/:${ANDROID_SDK_HOME}/platform-tools/:$PATH export DS_BINARY_PREFIX="adb shell LD_LIBRARY_PATH=${ANDROID_TMP_DIR}/ds/ ${ANDROID_TMP_DIR}/ds/" @@ -123,11 +128,11 @@ android_setup_emulator() android_install_sdk_platform "${_api_level}" # Same, yes in case of license - yes | sdkmanager --install "system-images;${_api_level};google_apis;${_flavor}" + yes | sdkmanager --install "system-images;${_api_level};${_api_kind};${_flavor}" android_sdk_accept_licenses - avdmanager create avd --name "${_flavor}-ds-pixel-${_api_level}" --device 17 --package "system-images;${_api_level};google_apis;${_flavor}" + avdmanager create avd --name "${_flavor}-ds-pixel-${_api_level}" --device 17 --package "system-images;${_api_level};${_api_kind};${_flavor}" } android_start_emulator() diff --git a/taskcluster/tc-signal_augmentation-tests.sh b/taskcluster/tc-augmentation-tests.sh similarity index 86% rename from taskcluster/tc-signal_augmentation-tests.sh rename to taskcluster/tc-augmentation-tests.sh index edac0c190a..945f0bfb12 100644 --- a/taskcluster/tc-signal_augmentation-tests.sh +++ b/taskcluster/tc-augmentation-tests.sh @@ -22,7 +22,8 @@ popd set +o pipefail pushd ${HOME}/DeepSpeech/ds/ - time ./bin/run-tc-signal_augmentations.sh + time ./bin/run-tc-sample_augmentations.sh + time ./bin/run-tc-graph_augmentations.sh popd virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/tc-build-utils.sh b/taskcluster/tc-build-utils.sh index 5530517ef2..6a41a88a9c 100755 --- a/taskcluster/tc-build-utils.sh +++ b/taskcluster/tc-build-utils.sh @@ -231,7 +231,7 @@ do_deepspeech_netframework_build() # Setup dependencies nuget install DeepSpeechConsole/packages.config -OutputDirectory packages/ - MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe')" + MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe')" # We need MSYS2_ARG_CONV_EXCL='/' otherwise the '/' of CLI parameters gets mangled and disappears # We build the .NET Client for .NET Framework v4.5,v4.6,v4.7 @@ -270,7 +270,7 @@ do_deepspeech_netframework_wpf_example_build() # Setup dependencies nuget install DeepSpeechWPF/packages.config -OutputDirectory DeepSpeechWPF/packages/ - MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe')" + MSBUILD="$(cygpath 'C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\MSBuild\Current\Bin\MSBuild.exe')" # We need MSYS2_ARG_CONV_EXCL='/' otherwise the '/' of CLI parameters gets mangled and disappears # Build WPF example diff --git a/taskcluster/tc-py-utils.sh b/taskcluster/tc-py-utils.sh index e4937282f5..36f8073157 100755 --- a/taskcluster/tc-py-utils.sh +++ b/taskcluster/tc-py-utils.sh @@ -191,47 +191,6 @@ pyenv_install() fi } -# Hack to extract Ubuntu's 16.04 libssl 1.0.2 packages and use them during the -# local build of Python. -# -# Avoid (risky) upgrade of base system, allowing to keep one task build that -# builds all the python packages -maybe_ssl102_py37() -{ - pyver=$1 - - unset PY37_OPENSSL - - ARCH=$(uname -m) - case "${pyver}" in - 3.7*|3.8*) - if [ "${OS}" = "Linux" -a "${ARCH}" = "x86_64" ]; then - if [ -d "${PY37_OPENSSL_DIR}" ]; then - rm -rf "${PY37_OPENSSL_DIR}" - fi - - mkdir -p ${PY37_OPENSSL_DIR} - ${WGET} -P ${TASKCLUSTER_TMP_DIR} \ - http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl-dev_1.0.2g-1ubuntu4.15_amd64.deb \ - http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.0.0_1.0.2g-1ubuntu4.15_amd64.deb - - for deb in ${TASKCLUSTER_TMP_DIR}/libssl*.deb; do - dpkg -x ${deb} ${PY37_OPENSSL_DIR} - done; - - # Python configure expects things to be under lib/ - mv ${PY37_OPENSSL_DIR}/usr/include/x86_64-linux-gnu/openssl/opensslconf.h ${PY37_OPENSSL_DIR}/usr/include/openssl/ - mv ${PY37_OPENSSL_DIR}/lib/x86_64-linux-gnu/lib* ${PY37_OPENSSL_DIR}/usr/lib/ - mv ${PY37_OPENSSL_DIR}/usr/lib/x86_64-linux-gnu/* ${PY37_OPENSSL_DIR}/usr/lib/ - ln -sfn libcrypto.so.1.0.0 ${PY37_OPENSSL_DIR}/usr/lib/libcrypto.so - ln -sfn libssl.so.1.0.0 ${PY37_OPENSSL_DIR}/usr/lib/libssl.so - - export PY37_OPENSSL="--with-openssl=${PY37_OPENSSL_DIR}/usr" - fi; - ;; - esac -} - maybe_numpy_min_version() { local pyver=$1 diff --git a/taskcluster/tc-train-extra-tests.sh b/taskcluster/tc-train-extra-tests.sh new file mode 100644 index 0000000000..62ec225e09 --- /dev/null +++ b/taskcluster/tc-train-extra-tests.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +set -xe + +source $(dirname "$0")/tc-tests-utils.sh + +extract_python_versions "$1" "pyver" "pyver_pkg" "py_unicode_type" "pyconf" "pyalias" + +bitrate=$2 + +decoder_src=$3 + +if [ "$decoder_src" = "--pypi" ]; then + # Disable automatically picking up decoder package built in this CI group + export DECODER_ARTIFACTS_ROOT="" +fi + +mkdir -p ${TASKCLUSTER_ARTIFACTS} || true +mkdir -p /tmp/train || true +mkdir -p /tmp/train_tflite || true + +virtualenv_activate "${pyalias}" "deepspeech" + +set -o pipefail +pip install --upgrade pip==19.3.1 setuptools==45.0.0 wheel==0.33.6 | cat +pushd ${HOME}/DeepSpeech/ds + pip install --upgrade . | cat +popd +set +o pipefail + +# Prepare correct arguments for training +case "${bitrate}" in + 8k) + sample_rate=8000 + sample_name='LDC93S1_pcms16le_1_8000.wav' + ;; + 16k) + sample_rate=16000 + sample_name='LDC93S1_pcms16le_1_16000.wav' + ;; +esac + +# Easier to rename to that we can exercize the LDC93S1 importer code to +# generate the CSV file. +echo "Moving ${sample_name} to LDC93S1.wav" +mv "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/${sample_name}" "${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/LDC93S1.wav" + +pushd ${HOME}/DeepSpeech/ds/ + # Testing single SDB source + time ./bin/run-tc-ldc93s1_new_sdb.sh 220 "${sample_rate}" + # Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features + time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}" + time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}" + + # Test --metrics_files training argument + time ./bin/run-tc-ldc93s1_new_metrics.sh 2 "${sample_rate}" +popd + +pushd ${HOME}/DeepSpeech/ds/ + time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh +popd + +virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/tc-train-tests.sh b/taskcluster/tc-train-tests.sh index 3b681dcdf7..f939493590 100644 --- a/taskcluster/tc-train-tests.sh +++ b/taskcluster/tc-train-tests.sh @@ -50,11 +50,6 @@ pushd ${HOME}/DeepSpeech/ds/ time ./bin/run-tc-ldc93s1_new.sh 249 "${sample_rate}" time ./bin/run-tc-ldc93s1_new.sh 1 "${sample_rate}" time ./bin/run-tc-ldc93s1_tflite.sh "${sample_rate}" - # Testing single SDB source - time ./bin/run-tc-ldc93s1_new_sdb.sh 220 "${sample_rate}" - # Testing interleaved source (SDB+CSV combination) - run twice to test preprocessed features - time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 109 "${sample_rate}" - time ./bin/run-tc-ldc93s1_new_sdb_csv.sh 1 "${sample_rate}" popd cp /tmp/train/output_graph.pb ${TASKCLUSTER_ARTIFACTS} @@ -69,7 +64,6 @@ cp /tmp/train/output_graph.pbmm ${TASKCLUSTER_ARTIFACTS} pushd ${HOME}/DeepSpeech/ds/ time ./bin/run-tc-ldc93s1_checkpoint.sh - time ./bin/run-tc-ldc93s1_checkpoint_sdb.sh popd virtualenv_deactivate "${pyalias}" "deepspeech" diff --git a/taskcluster/tc-update-index.sh b/taskcluster/tc-update-index.sh new file mode 100755 index 0000000000..fbc7231609 --- /dev/null +++ b/taskcluster/tc-update-index.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Helper script because it is way too painful to deal with Windows' CMD.exe +# ways of escaping things when pushing JSON + +set -xe + +TC_EXPIRE=$1 +TC_INSTANCE=$2 +TC_INDEX=$3 + +source $(dirname "$0")/tc-tests-utils.sh + +if [ ! -z "${TC_EXPIRE}" -a ! -z "${TC_INSTANCE}" -a ! -z "${TC_INDEX}" ]; then + curl -sSL --fail -X PUT \ + -H "Content-Type: application/json" \ + -d "{\"taskId\":\"$TASK_ID\",\"rank\":0,\"expires\":\"${TC_EXPIRE}\",\"data\":{}}" \ + "http://${TC_INSTANCE}/index/v1/task/${TC_INDEX}" +fi; diff --git a/taskcluster/test-apk-android-30-x86_64-opt.yml b/taskcluster/test-apk-android-30-x86_64-opt.yml new file mode 100644 index 0000000000..c3ee8265db --- /dev/null +++ b/taskcluster/test-apk-android-30-x86_64-opt.yml @@ -0,0 +1,23 @@ +build: + template_file: test-android-opt-base.tyml + dependencies: + - "android-x86_64-cpu-opt" + - "test-training_16k-linux-amd64-py36m-opt" + - "swig-linux-amd64" + - "gradle-cache" + - "android-cache-x86_64-android-30" + test_model_task: "test-training_16k-linux-amd64-py36m-opt" + system_setup: + > + apt-get -qq -y install curl make python + cache: + url: ${system.android_cache.x86_64.android_30.url} + namespace: ${system.android_cache.x86_64.android_30.namespace} + gradle_cache: + url: ${system.gradle_cache.url} + namespace: ${system.gradle_cache.namespace} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-android-apk-tests.sh x86_64 android-30" + metadata: + name: "DeepSpeech Android 11.0 x86_64 Google Pixel APK/Java tests" + description: "Testing DeepSpeech APK/Java for Android 11.0 x86_64 Google Pixel, optimized version" diff --git a/taskcluster/test-armbian-opt-base.tyml b/taskcluster/test-armbian-opt-base.tyml index 7218e6848a..afe7b64b4e 100644 --- a/taskcluster/test-armbian-opt-base.tyml +++ b/taskcluster/test-armbian-opt-base.tyml @@ -19,7 +19,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "arm64v8/debian:buster-20190812" + image: "arm64v8/debian:buster-20200422" env: $let: @@ -42,7 +42,7 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - apt-get -qq update && apt-get -qq -y install curl python-simplejson git gnupg pixz sox sudo wget libatlas3-base libopenblas-base xxd && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install curl python-simplejson git gnupg pixz sox sudo wget libatlas3-base libopenblas-base xxd && update-ca-certificates --fresh && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && echo -e "#!/bin/bash\nset -xe\n env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && diff --git a/taskcluster/test-signal_augmentations-linux-amd64-py36m-opt.yml b/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml similarity index 87% rename from taskcluster/test-signal_augmentations-linux-amd64-py36m-opt.yml rename to taskcluster/test-augmentations-linux-amd64-py36m-opt.yml index 6773f9082b..8007cd6c79 100644 --- a/taskcluster/test-signal_augmentations-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-augmentations-linux-amd64-py36m-opt.yml @@ -4,9 +4,9 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: - tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-signal_augmentation-tests.sh 3.6.10:m" + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-augmentation-tests.sh 3.6.10:m" metadata: name: "DeepSpeech Linux AMD64 CPU signal augmentations Py3.6" description: "Augmenting LDC93S1 sample in different ways for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version" diff --git a/taskcluster/test-linux-opt-base.tyml b/taskcluster/test-linux-opt-base.tyml index fcb8b46685..67f8220be2 100644 --- a/taskcluster/test-linux-opt-base.tyml +++ b/taskcluster/test-linux-opt-base.tyml @@ -44,7 +44,6 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - ${aptEc2Mirrors} && apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox sudo wget && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && diff --git a/taskcluster/test-linux-opt-tag-base.tyml b/taskcluster/test-linux-opt-tag-base.tyml index 1e6686b5d3..78c87625b4 100644 --- a/taskcluster/test-linux-opt-tag-base.tyml +++ b/taskcluster/test-linux-opt-tag-base.tyml @@ -44,7 +44,6 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - ${aptEc2Mirrors} && apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox sudo wget && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && diff --git a/taskcluster/test-raspbian-opt-base.tyml b/taskcluster/test-raspbian-opt-base.tyml index f3d2a5e5b2..2938b6b6d9 100644 --- a/taskcluster/test-raspbian-opt-base.tyml +++ b/taskcluster/test-raspbian-opt-base.tyml @@ -19,7 +19,7 @@ then: payload: maxRunTime: { $eval: to_int(build.maxRunTime) } - image: "resin/rpi-raspbian:buster-20190121" + image: "balenalib/rpi-raspbian:buster-20200429" env: $let: @@ -42,7 +42,7 @@ then: - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > - apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox wget libatlas3-base xxd && ${extraSystemSetup} && + apt-get -qq update && apt-get -qq -y install curl python-simplejson git pixz sox wget libatlas3-base xxd && update-ca-certificates --fresh && ${extraSystemSetup} && adduser --system --home ${system.homedir.linux} ${system.username} && cd ${system.homedir.linux} && echo -e "#!/bin/bash\nset -xe\n env && id && mkdir ~/DeepSpeech/ && git clone --quiet ${event.head.repo.url} ~/DeepSpeech/ds/ && cd ~/DeepSpeech/ds && git checkout --quiet ${event.head.sha}" > /tmp/clone.sh && chmod +x /tmp/clone.sh && diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml new file mode 100644 index 0000000000..3b1e137818 --- /dev/null +++ b/taskcluster/test-training-extra_16k-linux-amd64-py35m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.5.8:m 16k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml new file mode 100644 index 0000000000..c75bc7a220 --- /dev/null +++ b/taskcluster/test-training-extra_16k-linux-amd64-py36m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 16k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml new file mode 100644 index 0000000000..79f9419d02 --- /dev/null +++ b/taskcluster/test-training-extra_16k-linux-amd64-py37m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 16k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml new file mode 100644 index 0000000000..a5335c50ac --- /dev/null +++ b/taskcluster/test-training-extra_8k-linux-amd64-py36m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.6.10:m 8k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml new file mode 100644 index 0000000000..32edde28d4 --- /dev/null +++ b/taskcluster/test-training-extra_8k-linux-amd64-py37m-opt.yml @@ -0,0 +1,12 @@ +build: + template_file: test-linux-opt-base.tyml + dependencies: + - "linux-amd64-ctc-opt" + system_setup: + > + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} + args: + tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-extra-tests.sh 3.7.6:m 8k" + metadata: + name: "DeepSpeech Linux AMD64 CPU 8kHz all training features Py3.7" + description: "Training (all features) a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml index dc7f83d8fc..67a1419136 100644 --- a/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py35m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml index ffac9c895d..f2909aa108 100644 --- a/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py36m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml index a7624e73b5..c3b8282a90 100644 --- a/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-pypi_16k-linux-amd64-py37m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml index 48c52ea187..78029d2369 100644 --- a/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-pypi_8k-linux-amd64-py36m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k --pypi" metadata: diff --git a/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml index 18f4d51fed..c044a55093 100644 --- a/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-pypi_8k-linux-amd64-py37m-opt.yml @@ -7,7 +7,7 @@ build: ref_match: "refs/tags/" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k --pypi" metadata: diff --git a/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml b/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml index d33ad21da3..3bd4c39dc1 100644 --- a/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training-unittests_8k-linux-amd64-py35m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-unittests.sh 3.5.8:m" metadata: diff --git a/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml index 195ddfe990..a4949830cd 100644 --- a/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training-unittests_8k-linux-amd64-py36m-opt.yml @@ -4,10 +4,10 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-unittests.sh 3.6.10:m" metadata: name: "DeepSpeech on Linux AMD64 CPU training unittests using Python 3.6" description: "Training unittests DeepSpeech LDC93S1 model for Linux/AMD64 using Python 3.6, for CPU only, and optimized version" - \ No newline at end of file + diff --git a/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml index 1f9aabc3e1..5b9ff7a58a 100644 --- a/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training-unittests_8k-linux-amd64-py37m-opt.yml @@ -4,7 +4,7 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-unittests.sh 3.7.6:m" metadata: diff --git a/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml index 3f68fea380..5e5e2f0ac1 100644 --- a/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py35m-opt.yml @@ -4,9 +4,9 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.5.8:m 16k" metadata: - name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.5" + name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.5" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.5, CPU only, optimized version" diff --git a/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml index 9fa9791bb8..7f19771dd5 100644 --- a/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py36m-opt.yml @@ -4,9 +4,9 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 16k" metadata: - name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.6" + name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.6" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.6, CPU only, optimized version" diff --git a/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml b/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml index a63631dbac..4146dfca08 100644 --- a/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training_16k-linux-amd64-py37m-opt.yml @@ -4,9 +4,9 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 16k" metadata: - name: "DeepSpeech Linux AMD64 CPU 16kHz training Py3.7" + name: "DeepSpeech Linux AMD64 CPU 16kHz basic training Py3.7" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 16kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml b/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml index dc2b486f57..560ffcfb05 100644 --- a/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml +++ b/taskcluster/test-training_8k-linux-amd64-py36m-opt.yml @@ -4,9 +4,9 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.6.10:m 8k" metadata: - name: "DeepSpeech Linux AMD64 CPU 8kHz training Py3.6" + name: "DeepSpeech Linux AMD64 CPU 8kHz basic training Py3.6" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.6, CPU only, optimized version" diff --git a/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml b/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml index c863fa11b3..09bfd1291e 100644 --- a/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml +++ b/taskcluster/test-training_8k-linux-amd64-py37m-opt.yml @@ -4,9 +4,9 @@ build: - "linux-amd64-ctc-opt" system_setup: > - apt-get -qq update && apt-get -qq -y install ${training.packages_trusty.apt} + apt-get -qq update && apt-get -qq -y install ${training.packages_xenial.apt} args: tests_cmdline: "${system.homedir.linux}/DeepSpeech/ds/taskcluster/tc-train-tests.sh 3.7.6:m 8k" metadata: - name: "DeepSpeech Linux AMD64 CPU 8kHz training Py3.7" + name: "DeepSpeech Linux AMD64 CPU 8kHz basic training Py3.7" description: "Training a DeepSpeech LDC93S1 model for Linux/AMD64 8kHz Python 3.7, CPU only, optimized version" diff --git a/taskcluster/test-win-cuda-opt-base.tyml b/taskcluster/test-win-cuda-opt-base.tyml index eb2fb1d1fe..9fa3740420 100644 --- a/taskcluster/test-win-cuda-opt-base.tyml +++ b/taskcluster/test-win-cuda-opt-base.tyml @@ -38,13 +38,14 @@ then: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - $let: extraSystemSetup: { $eval: strip(str(build.system_setup)) } in: > .\msys64\usr\bin\bash.exe --login -cxe "export LC_ALL=C && - export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0/bin/:$PATH\" && + export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1/bin/:$PATH\" && export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" && export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && export TASKCLUSTER_NODE_DIR=\"$(cygpath -w $TASKCLUSTER_TASK_DIR/bin)\" && @@ -57,7 +58,6 @@ then: cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && cd $TASKCLUSTER_TASK_DIR && (mkdir pyenv-root/ && 7z x -so $USERPROFILE/pyenv.tar.gz | 7z x -opyenv-root/ -aoa -ttar -si ) && - pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar vim && ${extraSystemSetup} && /bin/bash ${build.args.tests_cmdline} ; @@ -67,9 +67,12 @@ then: mounts: - file: msys2-base-x86_64.tar.xz content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} - file: pyenv.tar.gz content: url: ${system.pyenv.win.url} diff --git a/taskcluster/test-win-opt-base.tyml b/taskcluster/test-win-opt-base.tyml index 4ddfa59562..f8925d6deb 100644 --- a/taskcluster/test-win-opt-base.tyml +++ b/taskcluster/test-win-opt-base.tyml @@ -40,6 +40,7 @@ then: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - $let: @@ -59,7 +60,6 @@ then: cd $TASKCLUSTER_TASK_DIR/DeepSpeech/ds && git checkout --quiet ${event.head.sha} && cd $TASKCLUSTER_TASK_DIR && (mkdir pyenv-root/ && 7z x -so $USERPROFILE/pyenv.tar.gz | 7z x -opyenv-root/ -aoa -ttar -si ) && - pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar vim && ${extraSystemSetup} && /bin/bash ${build.args.tests_cmdline} ; @@ -69,9 +69,12 @@ then: mounts: - file: msys2-base-x86_64.tar.xz content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} - file: pyenv.tar.gz content: url: ${system.pyenv.win.url} diff --git a/taskcluster/win-amd64-cpu-opt.yml b/taskcluster/win-amd64-cpu-opt.yml index 3251209467..b7dbba5bc8 100644 --- a/taskcluster/win-amd64-cpu-opt.yml +++ b/taskcluster/win-amd64-cpu-opt.yml @@ -8,7 +8,7 @@ build: - "node-gyp-cache" - "swig-win-amd64" - "pyenv-win-amd64" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win/artifacts/public/home.tar.xz" scripts: build: "taskcluster/win-build.sh" package: "taskcluster/win-package.sh" diff --git a/taskcluster/win-amd64-ctc-opt.yml b/taskcluster/win-amd64-ctc-opt.yml index ebd37445a5..b17bc53c33 100644 --- a/taskcluster/win-amd64-ctc-opt.yml +++ b/taskcluster/win-amd64-ctc-opt.yml @@ -8,7 +8,7 @@ build: - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.win-ctc" - "index.project.deepspeech.deepspeech.native_client.${event.head.branchortag}.${event.head.sha}.win-ctc" - "index.project.deepspeech.deepspeech.native_client.win-ctc.${event.head.sha}" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win/artifacts/public/home.tar.xz" scripts: build: 'taskcluster/decoder-build.sh' package: 'taskcluster/decoder-package.sh' diff --git a/taskcluster/win-amd64-gpu-opt.yml b/taskcluster/win-amd64-gpu-opt.yml index 077e79db46..b9c99395c7 100644 --- a/taskcluster/win-amd64-gpu-opt.yml +++ b/taskcluster/win-amd64-gpu-opt.yml @@ -8,7 +8,7 @@ build: - "node-gyp-cache" - "swig-win-amd64" - "pyenv-win-amd64" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win-cuda/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win-cuda/artifacts/public/home.tar.xz" scripts: build: "taskcluster/win-build.sh --cuda" package: "taskcluster/win-package.sh" diff --git a/taskcluster/win-amd64-tflite-opt.yml b/taskcluster/win-amd64-tflite-opt.yml index 993f04f38b..629380b7fe 100644 --- a/taskcluster/win-amd64-tflite-opt.yml +++ b/taskcluster/win-amd64-tflite-opt.yml @@ -8,7 +8,7 @@ build: - "node-gyp-cache" - "swig-win-amd64" - "pyenv-win-amd64" - tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r1.15.ceb46aae5836a0f648a2c3da5942af2b7d1b98bf.win/artifacts/public/home.tar.xz" + tensorflow: "https://community-tc.services.mozilla.com/api/index/v1/task/project.deepspeech.tensorflow.pip.r2.2.c29895fba1b9f9f48e2e54eefb024c69aa333473.win/artifacts/public/home.tar.xz" scripts: build: "taskcluster/win-build.sh --tflite" package: "taskcluster/win-package.sh" diff --git a/taskcluster/win-opt-base.tyml b/taskcluster/win-opt-base.tyml index 8d77b64740..e7db49a41e 100644 --- a/taskcluster/win-opt-base.tyml +++ b/taskcluster/win-opt-base.tyml @@ -40,11 +40,12 @@ payload: "C:\Program Files\7-zip\7z.exe" x -txz -so msys2-base-x86_64.tar.xz | "C:\Program Files\7-zip\7z.exe" x -o%USERPROFILE% -ttar -aoa -si - .\msys64\usr\bin\bash.exe --login -cx "export THIS_BASH_PID=$$; ps -ef | grep '[?]' | awk '{print $2}' | grep -v $THIS_BASH_PID | xargs -r kill; exit 0" + - .\msys64\usr\bin\bash.exe --login -cx "${system.msys2_filesystem_pkg.install}" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - .\msys64\usr\bin\bash.exe --login -cx "pacman -Syu --noconfirm" - echo .\msys64\usr\bin\bash.exe --login -cxe " export LC_ALL=C && - export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:$PATH\" && + export PATH=\"/c/builds/tc-workdir/msys64/usr/bin:/c/Python36:/c/Program Files/Git/bin:/c/Program Files/7-Zip/:/c/Program Files (x86)/Windows Kits/10/bin/x64/:$PATH\" && export TASKCLUSTER_ARTIFACTS=\"$USERPROFILE/public\" && export TASKCLUSTER_TASK_DIR=\"/c/builds/tc-workdir/\" && (mkdir $TASKCLUSTER_TASK_DIR || rm -fr $TASKCLUSTER_TASK_DIR/*) && cd $TASKCLUSTER_TASK_DIR && @@ -58,14 +59,13 @@ payload: ln -s $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/native_client/ $TASKCLUSTER_TASK_DIR/DeepSpeech/tf/native_client && cd $TASKCLUSTER_TASK_DIR && (mkdir pyenv-root/ && 7z x -so $USERPROFILE/pyenv.tar.gz | 7z x -opyenv-root/ -aoa -ttar -si ) && - pacman --noconfirm -R bsdtar && pacman --noconfirm -S tar make && pacman --noconfirm -S zip && $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.build} && $TASKCLUSTER_TASK_DIR/DeepSpeech/ds/${build.scripts.package} ; echo \"export TASKCLUSTER_TASK_EXIT_CODE=$?\" > $USERPROFILE/tc-exit.sh && cd $TASKCLUSTER_TASK_DIR/../ && rm -fr tc-workdir/ && exit $TASKCLUSTER_TASK_EXIT_CODE" | cmd - /k ""C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat"" x64 + /k ""C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat"" x64 - .\msys64\usr\bin\bash.exe --login -cxe "source $USERPROFILE/tc-exit.sh && exit $TASKCLUSTER_TASK_EXIT_CODE" @@ -73,16 +73,19 @@ payload: mounts: - file: msys2-base-x86_64.tar.xz content: - sha256: c4443113497acb2d2e285d40b929fc55f33f8f669902595ecdf66a655b63dc60 - url: >- - https://github.com/msys2/msys2-installer/releases/download/2020-05-17/msys2-base-x86_64-20200517.tar.xz + sha256: ${system.msys2.sha} + url: ${system.msys2.url} + - file: filesystem-2020.02-3-x86_64.pkg.tar.xz + content: + sha256: ${system.msys2_filesystem_pkg.sha} + url: ${system.msys2_filesystem_pkg.url} - file: home.tar.xz content: url: ${build.tensorflow} - file: pyenv.tar.gz content: url: ${system.pyenv.win.url} - - directory: .node-gyp + - directory: AppData/Local/node-gyp/Cache format: tar.gz content: url: ${system.node_gyp_cache.url} diff --git a/taskcluster/worker.cyml b/taskcluster/worker.cyml index 9ef5a85e90..65c5c8954a 100644 --- a/taskcluster/worker.cyml +++ b/taskcluster/worker.cyml @@ -4,8 +4,8 @@ taskcluster: provisionerId: proj-deepspeech workerType: ci workerTypeKvm: kvm - workerTypeWin: win-b - workerTypeCuda: win-gpu + workerTypeWin: win + workerTypeCuda: win-gpu-b dockerrpi3: provisionerId: proj-deepspeech workerType: ds-rpi3 diff --git a/tests/test_value_range.py b/tests/test_value_range.py index 7e17b3eccd..c08dcbd5cc 100644 --- a/tests/test_value_range.py +++ b/tests/test_value_range.py @@ -1,6 +1,8 @@ import unittest -from deepspeech_training.util.helpers import ValueRange, get_value_range, pick_value_from_range +import numpy as np +import tensorflow as tf +from deepspeech_training.util.helpers import ValueRange, get_value_range, pick_value_from_range, tf_pick_value_from_range class TestValueRange(unittest.TestCase): @@ -56,12 +58,23 @@ def test_float_int_3tuple(self): class TestPickValueFromFixedRange(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(TestPickValueFromFixedRange, self).__init__(*args, **kwargs) + self.session = tf.Session() + self.clock_ph = tf.placeholder(dtype=tf.float64, name='clock') def _ending_tester(self, value_range, clock, expected): - is_int = isinstance(value_range.start, int) - result = pick_value_from_range(value_range, clock) - self.assertEqual(result, expected) - self.assertTrue(isinstance(result, int if is_int else float)) + with tf.Session() as session: + tf_pick = tf_pick_value_from_range(value_range, clock=self.clock_ph) + + def run_pick(_, c): + return session.run(tf_pick, feed_dict={self.clock_ph: c}) + + is_int = isinstance(value_range.start, int) + for pick, int_type, float_type in [(pick_value_from_range, int, float), (run_pick, np.int32, np.float32)]: + result = pick(value_range, clock) + self.assertEqual(result, expected) + self.assertTrue(isinstance(result, int_type if is_int else float_type)) def test_int_0(self): self._ending_tester(ValueRange(1, 3, 0), 0.0, 1) @@ -83,31 +96,43 @@ def test_float_1(self): class TestPickValueFromRandomizedRange(unittest.TestCase): - - def _ending_tester(self, value_range, clock, expected_min, expected_max): - is_int = isinstance(value_range.start, int) - results = list(map(lambda x: pick_value_from_range(value_range, clock), range(100))) - self.assertGreater(len(set(results)), 80) - self.assertTrue(all(map(lambda x: expected_min <= x <= expected_max, results))) - self.assertTrue(all(map(lambda x: isinstance(x, int if is_int else float), results))) + def __init__(self, *args, **kwargs): + super(TestPickValueFromRandomizedRange, self).__init__(*args, **kwargs) + self.session = tf.Session() + self.clock_ph = tf.placeholder(dtype=tf.float64, name='clock') + + def _ending_tester(self, value_range, clock_min, clock_max, expected_min, expected_max): + with self.session as session: + tf_pick = tf_pick_value_from_range(value_range, clock=self.clock_ph) + + def run_pick(_, c): + return session.run(tf_pick, feed_dict={self.clock_ph: c}) + + is_int = isinstance(value_range.start, int) + clock_range = np.arange(clock_min, clock_max, (clock_max - clock_min) / 100.0) + for pick, int_type, float_type in [(pick_value_from_range, int, float), (run_pick, np.int32, np.float32)]: + results = [pick(value_range, c) for c in clock_range] + self.assertGreater(len(set(results)), 80) + self.assertTrue(all(map(lambda x: expected_min <= x <= expected_max, results))) + self.assertTrue(all(map(lambda x: isinstance(x, int_type if is_int else float_type), results))) def test_int_0(self): - self._ending_tester(ValueRange(10000, 30000, 10000), 0.0, 0, 20000) + self._ending_tester(ValueRange(10000, 30000, 10000), 0.0, 0.1, 0, 22000) def test_int_half(self): - self._ending_tester(ValueRange(10000, 30000, 10000), 0.5, 10000, 30000) + self._ending_tester(ValueRange(10000, 30000, 10000), 0.4, 0.6, 8000, 32000) def test_int_1(self): - self._ending_tester(ValueRange(10000, 30000, 10000), 1.0, 20000, 40000) + self._ending_tester(ValueRange(10000, 30000, 10000), 0.8, 1.0, 16000, 40000) def test_float_0(self): - self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.0, 0.0, 20000.0) + self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.0, 0.1, 0.0, 22000.0) def test_float_half(self): - self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.5, 10000.0, 30000.0) + self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.4, 0.6, 8000.0, 32000.0) def test_float_1(self): - self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 1.0, 20000.0, 40000.0) + self._ending_tester(ValueRange(10000.0, 30000.0, 10000.0), 0.8, 1.0, 16000.0, 40000.0) if __name__ == '__main__': diff --git a/training/deepspeech_training/VERSION b/training/deepspeech_training/VERSION index b8dd50a7eb..ed1ae43686 100644 --- a/training/deepspeech_training/VERSION +++ b/training/deepspeech_training/VERSION @@ -1 +1 @@ -0.8.0-alpha.3 +0.8.0-alpha.4 diff --git a/training/deepspeech_training/train.py b/training/deepspeech_training/train.py index f1832e62c4..175032da52 100644 --- a/training/deepspeech_training/train.py +++ b/training/deepspeech_training/train.py @@ -10,7 +10,6 @@ os.environ['TF_CPP_MIN_LOG_LEVEL'] = DESIRED_LOG_LEVEL import absl.app -import json import numpy as np import progressbar import shutil @@ -32,7 +31,7 @@ from .util.config import Config, initialize_globals from .util.checkpoints import load_or_init_graph_for_training, load_graph_for_evaluation from .util.evaluate_tools import save_samples_json -from .util.feeding import create_dataset, samples_to_mfccs, audiofile_to_features +from .util.feeding import create_dataset, audio_to_features, audiofile_to_features from .util.flags import create_flags, FLAGS from .util.helpers import check_ctcdecoder_version, ExceptionBox from .util.logging import create_progressbar, log_debug, log_error, log_info, log_progress, log_warn @@ -407,26 +406,13 @@ def log_grads_and_vars(grads_and_vars): def train(): - do_cache_dataset = True - - # pylint: disable=too-many-boolean-expressions - if (FLAGS.data_aug_features_multiplicative > 0 or - FLAGS.data_aug_features_additive > 0 or - FLAGS.augmentation_spec_dropout_keeprate < 1 or - FLAGS.augmentation_freq_and_time_masking or - FLAGS.augmentation_pitch_and_tempo_scaling or - FLAGS.augmentation_speed_up_std > 0 or - FLAGS.augmentation_sparse_warp): - do_cache_dataset = False - exception_box = ExceptionBox() # Create training and validation datasets train_set = create_dataset(FLAGS.train_files.split(','), batch_size=FLAGS.train_batch_size, - repetitions=FLAGS.augmentations_per_epoch, - augmentation_specs=FLAGS.augment, - enable_cache=FLAGS.feature_cache and do_cache_dataset, + epochs=FLAGS.epochs, + augmentations=Config.augmentations, cache_path=FLAGS.feature_cache, train_phase=True, exception_box=exception_box, @@ -450,6 +436,16 @@ def train(): buffering=FLAGS.read_buffer) for source in dev_sources] dev_init_ops = [iterator.make_initializer(dev_set) for dev_set in dev_sets] + if FLAGS.metrics_files: + metrics_sources = FLAGS.metrics_files.split(',') + metrics_sets = [create_dataset([source], + batch_size=FLAGS.dev_batch_size, + train_phase=False, + exception_box=exception_box, + process_ahead=len(Config.available_devices) * FLAGS.dev_batch_size * 2, + buffering=FLAGS.read_buffer) for source in metrics_sources] + metrics_init_ops = [iterator.make_initializer(metrics_set) for metrics_set in metrics_sets] + # Dropout dropout_rates = [tfv1.placeholder(tf.float32, name='dropout_{}'.format(i)) for i in range(6)] dropout_feed_dict = { @@ -488,7 +484,14 @@ def train(): step_summaries_op = tfv1.summary.merge_all('step_summaries') step_summary_writers = { 'train': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'train'), max_queue=120), - 'dev': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'dev'), max_queue=120) + 'dev': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'dev'), max_queue=120), + 'metrics': tfv1.summary.FileWriter(os.path.join(FLAGS.summary_dir, 'metrics'), max_queue=120), + } + + human_readable_set_names = { + 'train': 'Training', + 'dev': 'Validation', + 'metrics': 'Metrics', } # Checkpointing @@ -524,6 +527,12 @@ def run_set(set_name, epoch, init_op, dataset=None): step_summary_writer = step_summary_writers.get(set_name) checkpoint_time = time.time() + if is_train and FLAGS.cache_for_epochs > 0 and FLAGS.feature_cache: + feature_cache_index = FLAGS.feature_cache + '.index' + if epoch % FLAGS.cache_for_epochs == 0 and os.path.isfile(feature_cache_index): + log_info('Invalidating feature cache') + os.remove(feature_cache_index) # this will let TF also overwrite the related cache data files + # Setup progress bar class LossWidget(progressbar.widgets.FormatLabel): def __init__(self): @@ -533,7 +542,7 @@ def __call__(self, progress, data, **kwargs): data['mean_loss'] = total_loss / step_count if step_count else 0.0 return progressbar.widgets.FormatLabel.__call__(self, progress, data, **kwargs) - prefix = 'Epoch {} | {:>10}'.format(epoch, 'Training' if is_train else 'Validation') + prefix = 'Epoch {} | {:>10}'.format(epoch, human_readable_set_names[set_name]) widgets = [' | ', progressbar.widgets.Timer(), ' | Steps: ', progressbar.widgets.Counter(), ' | ', LossWidget()] @@ -550,11 +559,6 @@ def __call__(self, progress, data, **kwargs): session.run([train_op, global_step, loss, non_finite_files, step_summaries_op], feed_dict=feed_dict) exception_box.raise_if_set() - except tf.errors.InvalidArgumentError as err: - if FLAGS.augmentation_sparse_warp: - log_info("Ignoring sparse warp error: {}".format(err)) - continue - raise except tf.errors.OutOfRangeError: exception_box.raise_if_set() break @@ -635,6 +639,16 @@ def __call__(self, progress, data, **kwargs): log_info('Encountered a plateau, reducing learning rate to {}'.format( current_learning_rate)) + if FLAGS.metrics_files: + # Read only metrics, not affecting best validation loss tracking + for source, init_op in zip(metrics_sources, metrics_init_ops): + log_progress('Metrics for epoch %d on %s...' % (epoch, source)) + set_loss, _ = run_set('metrics', epoch, init_op, dataset=source) + log_progress('Metrics for epoch %d on %s - loss: %f' % (epoch, source, set_loss)) + + print('-' * 80) + + except KeyboardInterrupt: pass log_info('FINISHED optimization in {}'.format(datetime.utcnow() - train_start_time)) @@ -653,7 +667,7 @@ def create_inference_graph(batch_size=1, n_steps=16, tflite=False): # Create feature computation graph input_samples = tfv1.placeholder(tf.float32, [Config.audio_window_samples], 'input_samples') samples = tf.expand_dims(input_samples, -1) - mfccs, _ = samples_to_mfccs(samples, FLAGS.audio_sample_rate) + mfccs, _ = audio_to_features(samples, FLAGS.audio_sample_rate) mfccs = tf.identity(mfccs, name='mfccs') # Input tensor will be of shape [batch_size, n_steps, 2*n_context+1, n_input] diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py new file mode 100644 index 0000000000..b7033c51ee --- /dev/null +++ b/training/deepspeech_training/util/augmentations.py @@ -0,0 +1,512 @@ + +import os +import re +import math +import random +import numpy as np + +from multiprocessing import Queue, Process +from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS +from .helpers import LimitingPool, int_range, float_range, pick_value_from_range, tf_pick_value_from_range, MEGABYTE +from .sample_collections import samples_from_source + +BUFFER_SIZE = 1 * MEGABYTE +SPEC_PARSER = re.compile(r'^(?P[a-z_]+)(\[(?P.*)\])?$') + + +class Augmentation: + def __init__(self, p=1.0): + self.probability = float(p) + + +class SampleAugmentation(Augmentation): + def start(self, buffering=BUFFER_SIZE): + pass + + def apply(self, sample, clock=0.0): + raise NotImplementedError + + def stop(self): + pass + + +class GraphAugmentation(Augmentation): + def __init__(self, p=1.0, domain='spectrogram'): + super(GraphAugmentation, self).__init__(p) + if domain not in ['signal', 'spectrogram', 'features']: + raise ValueError('Unsupported augmentation domain: {}'.format(domain)) + self.domain = domain + + def apply(self, tensor, transcript=None, clock=0.0): + raise NotImplementedError + + def apply_with_probability(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + rv = tf.random.stateless_uniform([], seed=(clock * tf.int32.min, clock * tf.int32.max)) + return tf.cond(tf.less(rv, self.probability), + lambda: self.apply(tensor, transcript=transcript, clock=clock), + lambda: tensor) + + def maybe_apply(self, domain, tensor, transcript=None, clock=0.0): + if domain == self.domain: + return self.apply_with_probability(tensor, transcript=transcript, clock=clock) + return tensor + + def units_per_ms(self): + from .flags import FLAGS # pylint: disable=import-outside-toplevel + return FLAGS.audio_sample_rate / 1000.0 if self.domain == 'signal' else 1.0 / FLAGS.feature_win_step + + +def parse_augmentation(augmentation_spec): + """ + Parses an augmentation specification. + + Parameters + ---------- + augmentation_spec : str + Augmentation specification like "reverb[delay=20.0,decay=1.0]". + + Returns + ------- + Instance of an augmentation class from util.augmentations.*. + """ + match = SPEC_PARSER.match(augmentation_spec) + if not match: + raise ValueError('Augmentation specification has wrong format') + cls_name = ''.join(map(lambda p: p[0].upper() + p[1:], match.group('cls').split('_'))) + augmentation_cls = globals()[cls_name] if cls_name in globals() else None + if augmentation_cls is None or not issubclass(augmentation_cls, Augmentation) or augmentation_cls == Augmentation: + raise ValueError('Unknown augmentation: {}'.format(cls_name)) + parameters = match.group('params') + parameters = [] if parameters is None else parameters.split(',') + args = [] + kwargs = {} + for parameter in parameters: + pair = tuple(list(map(str.strip, (parameter.split('='))))) + if len(pair) == 1: + args.append(pair) + elif len(pair) == 2: + kwargs[pair[0]] = pair[1] + else: + raise ValueError('Unable to parse augmentation value assignment') + return augmentation_cls(*args, **kwargs) + + +def parse_augmentations(augmentation_specs): + """ + Parses an augmentation specification. + + Parameters + ---------- + augmentation_specs : list of str + List of augmentation specifications like ["reverb[delay=20.0,decay=1.0]", "volume"]. + + Returns + ------- + List of augmentation class instances from util.augmentations.*. + """ + return [] if augmentation_specs is None else list(map(parse_augmentation, augmentation_specs)) + + +def apply_graph_augmentations(domain, tensor, augmentations, transcript=None, clock=0.0): + """ + Augments training sample tensor of a certain domain with matching augmentations of passed list. + + Parameters + ---------- + domain : str + Domain of the tensor to apply augmentations to. One of "signal", "spectrogram" or "features" + tensor : Tensor of type float32 + Tensor to apply augmentations to. + augmentations : list of augmentation class instances from util.augmentations.*. + List of augmentations of which only the spectrogram ones will get applied to the samples. + transcript : SparseTensor + clock : Tensor of type float32 + Time indicator for augmentation value-ranges. Running from 0.0 (start of training) to 1.0 (end of training). + + Returns + ------- + Tensor of type float32 + The augmented spectrogram + """ + if augmentations is not None: + for augmentation in augmentations: + if isinstance(augmentation, GraphAugmentation): + tensor = augmentation.maybe_apply(domain, tensor, transcript=transcript, clock=clock) + return tensor + + +class AugmentationContext: + def __init__(self, target_audio_type, augmentations): + self.target_audio_type = target_audio_type + self.augmentations = augmentations + + +AUGMENTATION_CONTEXT = None + + +def _init_augmentation_worker(preparation_context): + global AUGMENTATION_CONTEXT # pylint: disable=global-statement + AUGMENTATION_CONTEXT = preparation_context + + +def _augment_sample(timed_sample, context=None): + context = AUGMENTATION_CONTEXT if context is None else context + sample, clock = timed_sample + for augmentation in context.augmentations: + if random.random() < augmentation.probability: + augmentation.apply(sample, clock) + sample.change_audio_type(new_audio_type=context.target_audio_type) + return sample + + +def apply_sample_augmentations(samples, + augmentations, + audio_type=AUDIO_TYPE_NP, + buffering=BUFFER_SIZE, + process_ahead=None, + clock=0.0, + final_clock=None): + """ + Prepares samples for being used during training. + This includes parallel and buffered application of augmentations and a conversion to a specified audio-type. + + Parameters + ---------- + samples : Sample enumeration + Typically produced by util.sample_collections.samples_from_sources. + augmentations : list of augmentation class instances from util.augmentations.*. + List of augmentations of which only the signal ones will get applied to the samples. + audio_type : str + Target audio-type to convert samples to. See util.audio.Sample.__init__ . + buffering : int + Read-buffer size to use while reading files. + process_ahead : int + Number of samples to pre-process ahead of time. + clock : float + Start or fixed clock value between 0.0 and 1.0 for the first or all samples. Has to be <= than clock_to. + final_clock : float + Final clock value between 0.0 and 1.0 for the last sample. Has to be >= than clock. + Requires samples.__len__ attribute. + + Returns + ------- + iterable of util.sample_collections.LabeledSample or util.audio.Sample + """ + def timed_samples(): + if final_clock is None: + for sample in samples: + yield sample, clock + else: + for sample_index, sample in enumerate(samples): + sample_clock = clock + (final_clock - clock) * (sample_index / len(samples)) + yield sample, sample_clock + + assert 0.0 <= clock <= 1.0 + if final_clock is not None: + assert 0.0 <= final_clock <= 1.0 + assert clock <= final_clock + augmentations = [aug for aug in augmentations if isinstance(aug, SampleAugmentation)] if augmentations else [] + try: + for augmentation in augmentations: + augmentation.start(buffering=buffering) + context = AugmentationContext(audio_type, augmentations) + if process_ahead == 0: + for timed_sample in timed_samples(): + yield _augment_sample(timed_sample, context=context) + else: + with LimitingPool(process_ahead=process_ahead, + initializer=_init_augmentation_worker, + initargs=(context,)) as pool: + yield from pool.imap(_augment_sample, timed_samples()) + finally: + for augmentation in augmentations: + augmentation.stop() + + +def _enqueue_overlay_samples(sample_source, queue, buffering=BUFFER_SIZE): + """ + As the central distribution point for overlay samples this function is supposed to run in one process only. + This ensures that samples are not used twice if not required. + It loads the (raw and still compressed) data and provides it to the actual augmentation workers. + These are then doing decompression, potential conversion and overlaying in parallel. + """ + samples = samples_from_source(sample_source, buffering=buffering, labeled=False) + while True: + for sample in samples: + queue.put(sample) + + +class Overlay(SampleAugmentation): + """See "Overlay augmentation" in training documentation""" + def __init__(self, source, p=1.0, snr=3.0, layers=1): + super(Overlay, self).__init__(p) + self.source = source + self.snr = float_range(snr) + self.layers = int_range(layers) + self.current_sample = None + self.queue = None + self.enqueue_process = None + + def start(self, buffering=BUFFER_SIZE): + self.queue = Queue(max(1, math.floor(self.probability * self.layers[1] * os.cpu_count()))) + self.enqueue_process = Process(target=_enqueue_overlay_samples, + args=(self.source, self.queue), + kwargs={'buffering': buffering}) + self.enqueue_process.start() + + def apply(self, sample, clock=0.0): + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + n_layers = pick_value_from_range(self.layers, clock=clock) + audio = sample.audio + overlay_data = np.zeros_like(audio) + for _ in range(n_layers): + overlay_offset = 0 + while overlay_offset < len(audio): + if self.current_sample is None: + next_overlay_sample = self.queue.get() + next_overlay_sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + self.current_sample = next_overlay_sample.audio + n_required = len(audio) - overlay_offset + n_current = len(self.current_sample) + if n_required >= n_current: # take it completely + overlay_data[overlay_offset:overlay_offset + n_current] += self.current_sample + overlay_offset += n_current + self.current_sample = None + else: # take required slice from head and keep tail for next layer or sample + overlay_data[overlay_offset:overlay_offset + n_required] += self.current_sample[0:n_required] + overlay_offset += n_required + self.current_sample = self.current_sample[n_required:] + snr_db = pick_value_from_range(self.snr, clock=clock) + orig_dbfs = max_dbfs(audio) + overlay_gain = orig_dbfs - max_dbfs(overlay_data) - snr_db + audio += overlay_data * gain_db_to_ratio(overlay_gain) + sample.audio = normalize_audio(audio, dbfs=orig_dbfs) + + def stop(self): + if self.enqueue_process is not None: + self.enqueue_process.terminate() + self.enqueue_process = None + self.current_sample = None + self.queue = None + + +class Codec(SampleAugmentation): + """See "Codec augmentation" in training documentation""" + def __init__(self, p=1.0, bitrate=3200): + super(Codec, self).__init__(p) + self.bitrate = int_range(bitrate) + + def apply(self, sample, clock=0.0): + bitrate = pick_value_from_range(self.bitrate, clock=clock) + sample.change_audio_type(new_audio_type=AUDIO_TYPE_PCM) # decoding to ensure it has to get encoded again + sample.change_audio_type(new_audio_type=AUDIO_TYPE_OPUS, bitrate=bitrate) # will get decoded again downstream + + +class Reverb(SampleAugmentation): + """See "Reverb augmentation" in training documentation""" + def __init__(self, p=1.0, delay=20.0, decay=10.0): + super(Reverb, self).__init__(p) + self.delay = float_range(delay) + self.decay = float_range(decay) + + def apply(self, sample, clock=0.0): + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + audio = np.array(sample.audio, dtype=np.float64) + orig_dbfs = max_dbfs(audio) + delay = pick_value_from_range(self.delay, clock=clock) + decay = pick_value_from_range(self.decay, clock=clock) + decay = gain_db_to_ratio(-decay) + result = np.copy(audio) + primes = [17, 19, 23, 29, 31] + for delay_prime in primes: # primes to minimize comb filter interference + layer = np.copy(audio) + n_delay = math.floor(delay * (delay_prime / primes[0]) * sample.audio_format.rate / 1000.0) + n_delay = max(16, n_delay) # 16 samples minimum to avoid performance trap and risk of division by zero + for w_index in range(0, math.floor(len(audio) / n_delay)): + w1 = w_index * n_delay + w2 = (w_index + 1) * n_delay + width = min(len(audio) - w2, n_delay) # last window could be smaller + layer[w2:w2 + width] += decay * layer[w1:w1 + width] + result += layer + audio = normalize_audio(result, dbfs=orig_dbfs) + sample.audio = np.array(audio, dtype=np.float32) + + +class Resample(SampleAugmentation): + """See "Resample augmentation" in training documentation""" + def __init__(self, p=1.0, rate=8000): + super(Resample, self).__init__(p) + self.rate = int_range(rate) + + def apply(self, sample, clock=0.0): + # late binding librosa and its dependencies + from librosa.core import resample # pylint: disable=import-outside-toplevel + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + rate = pick_value_from_range(self.rate, clock=clock) + audio = sample.audio + orig_len = len(audio) + audio = np.swapaxes(audio, 0, 1) + audio = resample(audio, sample.audio_format.rate, rate) + audio = resample(audio, rate, sample.audio_format.rate) + audio = np.swapaxes(audio, 0, 1)[0:orig_len] + sample.audio = audio + + +class Volume(SampleAugmentation): + """See "Volume augmentation" in training documentation""" + def __init__(self, p=1.0, dbfs=3.0103): + super(Volume, self).__init__(p) + self.target_dbfs = float_range(dbfs) + + def apply(self, sample, clock=0.0): + sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) + target_dbfs = pick_value_from_range(self.target_dbfs, clock=clock) + sample.audio = normalize_audio(sample.audio, dbfs=target_dbfs) + + +class Pitch(GraphAugmentation): + """See "Pitch augmentation" in training documentation""" + def __init__(self, p=1.0, pitch=(1.075, 1.075, 0.125)): + super(Pitch, self).__init__(p, domain='spectrogram') + self.pitch = float_range(pitch) + + def apply(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + original_shape = tf.shape(tensor) + pitch = tf_pick_value_from_range(self.pitch, clock=clock) + new_freq_size = tf.cast(tf.cast(original_shape[2], tf.float32) * pitch, tf.int32) + spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(tensor, -1), [original_shape[1], new_freq_size]) + spectrogram_aug = tf.image.crop_to_bounding_box(spectrogram_aug, + offset_height=0, + offset_width=0, + target_height=original_shape[1], + target_width=tf.math.minimum(original_shape[2], new_freq_size)) + spectrogram_aug = tf.cond(pitch < 1, + lambda: tf.image.pad_to_bounding_box(spectrogram_aug, + offset_height=0, + offset_width=0, + target_height=tf.shape(spectrogram_aug)[1], + target_width=original_shape[2]), + lambda: spectrogram_aug) + return spectrogram_aug[:, :, :, 0] + + +class Tempo(GraphAugmentation): + """See "Tempo augmentation" in training documentation""" + def __init__(self, p=1.0, factor=1.1, max_time=-1): + super(Tempo, self).__init__(p, domain='spectrogram') + self.factor = float_range(factor) + self.max_time = float(max_time) + + def apply(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + factor = tf_pick_value_from_range(self.factor, clock=clock) + original_shape = tf.shape(tensor) + new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32) / factor, tf.int32) + if transcript is not None: + new_time_size = tf.math.maximum(new_time_size, tf.shape(transcript)[1]) + if self.max_time > 0: + new_time_size = tf.math.minimum(new_time_size, tf.cast(self.max_time * self.units_per_ms(), tf.int32)) + spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(tensor, -1), [new_time_size, original_shape[2]]) + return spectrogram_aug[:, :, :, 0] + + +class FrequencyMask(GraphAugmentation): + """See "Frequency mask augmentation" in training documentation""" + def __init__(self, p=1.0, n=3, size=2): + super(FrequencyMask, self).__init__(p, domain='spectrogram') + self.n = int_range(n) # pylint: disable=invalid-name + self.size = int_range(size) + + def apply(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + time_max = tf.shape(tensor)[1] + freq_max = tf.shape(tensor)[2] + n = tf_pick_value_from_range(self.n, clock=clock) + + def body(i, spectrogram_aug): + size = tf_pick_value_from_range(self.size, clock=clock) + size = tf.math.maximum(1, tf.math.minimum(freq_max - 1, size)) + seed = tf.cast(clock * tf.int32.max, tf.int32) - i + f0 = tf.random.stateless_uniform((), (-seed, seed), minval=0, maxval=freq_max - size, dtype=tf.dtypes.int32) + freq_mask = tf.concat([tf.ones([1, time_max, f0]), + tf.zeros([1, time_max, size]), + tf.ones([1, time_max, freq_max - f0 - size])], axis=2) + return i + 1, spectrogram_aug * freq_mask + + return tf.while_loop(lambda i, spectrogram_aug: i < n, body, (0, tensor))[1] + + +class TimeMask(GraphAugmentation): + """See "Time mask augmentation" in training documentation""" + def __init__(self, p=1.0, domain='spectrogram', n=3, size=10.0): + super(TimeMask, self).__init__(p, domain=domain) + self.n = int_range(n) # pylint: disable=invalid-name + self.size = float_range(size) + + def apply(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + time_max = tf.shape(tensor)[0 if self.domain == 'signal' else 1] + n = tf_pick_value_from_range(self.n, clock=clock) + + def body(i, augmented): + size = tf.cast(tf_pick_value_from_range(self.size, clock=clock) * self.units_per_ms(), dtype=tf.int32) + size = tf.math.maximum(1, tf.math.minimum(time_max - 1, size)) + seed = tf.cast(clock * tf.int32.max, tf.int32) - i + t0 = tf.random.stateless_uniform((), (-seed, seed), minval=0, maxval=time_max - size, dtype=tf.dtypes.int32) + rest = time_max - t0 - size + if self.domain == 'spectrogram': + fm = tf.shape(tensor)[2] + time_mask = tf.concat([tf.ones([1, t0, fm]), tf.zeros([1, size, fm]), tf.ones([1, rest, fm])], axis=1) + elif self.domain == 'signal': + time_mask = tf.concat([tf.ones([t0, 1]), tf.zeros([size, 1]), tf.ones([rest, 1])], axis=0) + else: + time_mask = tf.concat([tf.ones([1, t0]), tf.zeros([1, size]), tf.ones([1, rest])], axis=1) + return i + 1, augmented * time_mask + + return tf.while_loop(lambda i, augmented: i < n, body, (0, tensor))[1] + + +class Dropout(GraphAugmentation): + """See "Dropout augmentation" in training documentation""" + def __init__(self, p=1.0, domain='spectrogram', rate=0.05): + super(Dropout, self).__init__(p, domain=domain) + self.rate = float_range(rate) + + def apply(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + rate = tf_pick_value_from_range(self.rate, clock=clock) + rate = tf.math.maximum(0.0, rate) + factors = tf.random.stateless_uniform(tf.shape(tensor), + (clock * tf.int32.min, clock * tf.int32.max), + minval=0.0, + maxval=1.0, + dtype=tf.float32) + return tensor * tf.math.sign(tf.math.floor(factors + rate)) + + +class Add(GraphAugmentation): + """See "Add augmentation" in training documentation""" + def __init__(self, p=1.0, domain='features', stddev=5): + super(Add, self).__init__(p, domain=domain) + self.stddev = float_range(stddev) + + def apply(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + stddev = tf_pick_value_from_range(self.stddev, clock=clock) + seed = (clock * tf.int32.min, clock * tf.int32.max) + return tensor + tf.random.stateless_normal(tf.shape(tensor), seed, mean=0.0, stddev=stddev) + + +class Multiply(GraphAugmentation): + """See "Multiply augmentation" in training documentation""" + def __init__(self, p=1.0, domain='features', stddev=5): + super(Multiply, self).__init__(p, domain=domain) + self.stddev = float_range(stddev) + + def apply(self, tensor, transcript=None, clock=0.0): + import tensorflow as tf # pylint: disable=import-outside-toplevel + stddev = tf_pick_value_from_range(self.stddev, clock=clock) + seed = (clock * tf.int32.min, clock * tf.int32.max) + return tensor * tf.random.stateless_normal(tf.shape(tensor), seed, mean=1.0, stddev=stddev) diff --git a/training/deepspeech_training/util/config.py b/training/deepspeech_training/util/config.py index fdd6353903..13a362201d 100755 --- a/training/deepspeech_training/util/config.py +++ b/training/deepspeech_training/util/config.py @@ -2,7 +2,6 @@ import os import sys -import tensorflow as tf import tensorflow.compat.v1 as tfv1 from attrdict import AttrDict @@ -13,6 +12,7 @@ from .logging import log_error, log_warn from .text import Alphabet, UTF8Alphabet from .helpers import parse_file_size +from .augmentations import parse_augmentations class ConfigSingleton: _config = None @@ -30,6 +30,17 @@ def __getattr__(self, name): def initialize_globals(): c = AttrDict() + # Augmentations + c.augmentations = parse_augmentations(FLAGS.augment) + if len(c.augmentations) > 0 and FLAGS.feature_cache and FLAGS.cache_for_epochs == 0: + log_warn('Due to current feature-cache settings the exact same sample augmentations of the first ' + 'epoch will be repeated on all following epochs. This could lead to unintended over-fitting. ' + 'You could use --cache_for_epochs to invalidate the cache after a given number of epochs.') + + # Caching + if FLAGS.cache_for_epochs == 1: + log_warn('--cache_for_epochs == 1 is (re-)creating the feature cache on every epoch but will never use it.') + # Read-buffer FLAGS.read_buffer = parse_file_size(FLAGS.read_buffer) diff --git a/training/deepspeech_training/util/feeding.py b/training/deepspeech_training/util/feeding.py index 31dafcfb93..4c9b681d3b 100644 --- a/training/deepspeech_training/util/feeding.py +++ b/training/deepspeech_training/util/feeding.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function +from collections import Counter from functools import partial import numpy as np @@ -11,13 +12,13 @@ from .config import Config from .text import text_to_char_array from .flags import FLAGS -from .spectrogram_augmentations import augment_freq_time_mask, augment_dropout, augment_pitch_and_tempo, augment_speed_up, augment_sparse_warp +from .augmentations import apply_sample_augmentations, apply_graph_augmentations from .audio import read_frames_from_file, vad_split, pcm_to_np, DEFAULT_FORMAT -from .sample_collections import samples_from_sources, augment_samples +from .sample_collections import samples_from_sources from .helpers import remember_exception, MEGABYTE -def samples_to_mfccs(samples, sample_rate, train_phase=False, sample_id=None): +def audio_to_features(audio, sample_rate, transcript=None, clock=0.0, train_phase=False, augmentations=None, sample_id=None): if train_phase: # We need the lambdas to make TensorFlow happy. # pylint: disable=unnecessary-lambda @@ -27,74 +28,50 @@ def samples_to_mfccs(samples, sample_rate, train_phase=False, sample_id=None): lambda: tf.no_op(), name='matching_sample_rate') - spectrogram = contrib_audio.audio_spectrogram(samples, + if train_phase and augmentations is not None: + audio = apply_graph_augmentations('signal', audio, augmentations, transcript=transcript, clock=clock) + + spectrogram = contrib_audio.audio_spectrogram(audio, window_size=Config.audio_window_samples, stride=Config.audio_step_samples, magnitude_squared=True) - # Data Augmentations - if train_phase: - if FLAGS.augmentation_spec_dropout_keeprate < 1: - spectrogram = augment_dropout(spectrogram, - keep_prob=FLAGS.augmentation_spec_dropout_keeprate) - - # sparse warp must before freq/time masking - if FLAGS.augmentation_sparse_warp: - spectrogram = augment_sparse_warp(spectrogram, - time_warping_para=FLAGS.augmentation_sparse_warp_time_warping_para, - interpolation_order=FLAGS.augmentation_sparse_warp_interpolation_order, - regularization_weight=FLAGS.augmentation_sparse_warp_regularization_weight, - num_boundary_points=FLAGS.augmentation_sparse_warp_num_boundary_points, - num_control_points=FLAGS.augmentation_sparse_warp_num_control_points) - - if FLAGS.augmentation_freq_and_time_masking: - spectrogram = augment_freq_time_mask(spectrogram, - frequency_masking_para=FLAGS.augmentation_freq_and_time_masking_freq_mask_range, - time_masking_para=FLAGS.augmentation_freq_and_time_masking_time_mask_range, - frequency_mask_num=FLAGS.augmentation_freq_and_time_masking_number_freq_masks, - time_mask_num=FLAGS.augmentation_freq_and_time_masking_number_time_masks) - - if FLAGS.augmentation_pitch_and_tempo_scaling: - spectrogram = augment_pitch_and_tempo(spectrogram, - max_tempo=FLAGS.augmentation_pitch_and_tempo_scaling_max_tempo, - max_pitch=FLAGS.augmentation_pitch_and_tempo_scaling_max_pitch, - min_pitch=FLAGS.augmentation_pitch_and_tempo_scaling_min_pitch) - - if FLAGS.augmentation_speed_up_std > 0: - spectrogram = augment_speed_up(spectrogram, speed_std=FLAGS.augmentation_speed_up_std) - - mfccs = contrib_audio.mfcc(spectrogram=spectrogram, - sample_rate=sample_rate, - dct_coefficient_count=Config.n_input, - upper_frequency_limit=FLAGS.audio_sample_rate/2) - mfccs = tf.reshape(mfccs, [-1, Config.n_input]) - - return mfccs, tf.shape(input=mfccs)[0] - - -def audio_to_features(audio, sample_rate, train_phase=False, sample_id=None): - features, features_len = samples_to_mfccs(audio, sample_rate, train_phase=train_phase, sample_id=sample_id) + if train_phase and augmentations is not None: + spectrogram = apply_graph_augmentations('spectrogram', spectrogram, augmentations, transcript=transcript, clock=clock) - if train_phase: - if FLAGS.data_aug_features_multiplicative > 0: - features = features*tf.random.normal(mean=1, stddev=FLAGS.data_aug_features_multiplicative, shape=tf.shape(features)) + features = contrib_audio.mfcc(spectrogram=spectrogram, + sample_rate=sample_rate, + dct_coefficient_count=Config.n_input, + upper_frequency_limit=FLAGS.audio_sample_rate / 2) + features = tf.reshape(features, [-1, Config.n_input]) - if FLAGS.data_aug_features_additive > 0: - features = features+tf.random.normal(mean=0.0, stddev=FLAGS.data_aug_features_additive, shape=tf.shape(features)) + if train_phase and augmentations is not None: + features = apply_graph_augmentations('features', features, augmentations, transcript=transcript, clock=clock) - return features, features_len + return features, tf.shape(input=features)[0] -def audiofile_to_features(wav_filename, train_phase=False): +def audiofile_to_features(wav_filename, clock=0.0, train_phase=False, augmentations=None): samples = tf.io.read_file(wav_filename) decoded = contrib_audio.decode_wav(samples, desired_channels=1) - return audio_to_features(decoded.audio, decoded.sample_rate, train_phase=train_phase, sample_id=wav_filename) + return audio_to_features(decoded.audio, + decoded.sample_rate, + clock=clock, + train_phase=train_phase, + augmentations=augmentations, + sample_id=wav_filename) -def entry_to_features(sample_id, audio, sample_rate, transcript, train_phase=False): +def entry_to_features(sample_id, audio, sample_rate, transcript, clock, train_phase=False, augmentations=None): # https://bugs.python.org/issue32117 - features, features_len = audio_to_features(audio, sample_rate, train_phase=train_phase, sample_id=sample_id) sparse_transcript = tf.SparseTensor(*transcript) + features, features_len = audio_to_features(audio, + sample_rate, + transcript=sparse_transcript, + clock=clock, + train_phase=train_phase, + augmentations=augmentations, + sample_id=sample_id) return sample_id, features, features_len, sparse_transcript @@ -109,25 +86,32 @@ def to_sparse_tuple(sequence): def create_dataset(sources, batch_size, - repetitions=1, - augmentation_specs=None, - enable_cache=False, + epochs=1, + augmentations=None, cache_path=None, train_phase=False, exception_box=None, process_ahead=None, buffering=1 * MEGABYTE): + epoch_counter = Counter() # survives restarts of the dataset and its generator + def generate_values(): + epoch = epoch_counter['epoch'] + if train_phase: + epoch_counter['epoch'] += 1 samples = samples_from_sources(sources, buffering=buffering, labeled=True) - samples = augment_samples(samples, - repetitions=repetitions, - augmentation_specs=augmentation_specs, - buffering=buffering, - process_ahead=2 * batch_size if process_ahead is None else process_ahead) - for sample in samples: + num_samples = len(samples) + samples = apply_sample_augmentations(samples, + augmentations, + buffering=buffering, + process_ahead=2 * batch_size if process_ahead is None else process_ahead, + clock=epoch / epochs, + final_clock=(epoch + 1) / epochs) + for sample_index, sample in enumerate(samples): + clock = (epoch * num_samples + sample_index) / (epochs * num_samples) if train_phase and epochs > 0 else 0.0 transcript = text_to_char_array(sample.transcript, Config.alphabet, context=sample.sample_id) transcript = to_sparse_tuple(transcript) - yield sample.sample_id, sample.audio, sample.audio_format.rate, transcript + yield sample.sample_id, sample.audio, sample.audio_format.rate, transcript, clock # Batching a dataset of 2D SparseTensors creates 3D batches, which fail # when passed to tf.nn.ctc_loss, so we reshape them to remove the extra @@ -143,13 +127,13 @@ def batch_fn(sample_ids, features, features_len, transcripts): sample_ids = sample_ids.batch(batch_size) return tf.data.Dataset.zip((sample_ids, features, transcripts)) - process_fn = partial(entry_to_features, train_phase=train_phase) + process_fn = partial(entry_to_features, train_phase=train_phase, augmentations=augmentations) dataset = (tf.data.Dataset.from_generator(remember_exception(generate_values, exception_box), output_types=(tf.string, tf.float32, tf.int32, - (tf.int64, tf.int32, tf.int64))) + (tf.int64, tf.int32, tf.int64), tf.float64)) .map(process_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)) - if enable_cache: + if cache_path: dataset = dataset.cache(cache_path) dataset = (dataset.window(batch_size, drop_remainder=train_phase).flat_map(batch_fn) .prefetch(len(Config.available_devices))) @@ -172,7 +156,7 @@ def generate_values(): yield time_start, time_end, samples def to_mfccs(time_start, time_end, samples): - features, features_len = samples_to_mfccs(samples, audio_format.rate) + features, features_len = audio_to_features(samples, audio_format.rate) return time_start, time_end, features, features_len def create_batch_set(bs, criteria): diff --git a/training/deepspeech_training/util/flags.py b/training/deepspeech_training/util/flags.py index 69774c7824..c31eb461e7 100644 --- a/training/deepspeech_training/util/flags.py +++ b/training/deepspeech_training/util/flags.py @@ -13,11 +13,13 @@ def create_flags(): f = absl.flags f.DEFINE_string('train_files', '', 'comma separated list of files specifying the dataset used for training. Multiple files will get merged. If empty, training will not be run.') - f.DEFINE_string('dev_files', '', 'comma separated list of files specifying the dataset used for validation. Multiple files will get merged. If empty, validation will not be run.') - f.DEFINE_string('test_files', '', 'comma separated list of files specifying the dataset used for testing. Multiple files will get merged. If empty, the model will not be tested.') + f.DEFINE_string('dev_files', '', 'comma separated list of files specifying the datasets used for validation. Multiple files will get reported separately. If empty, validation will not be run.') + f.DEFINE_string('test_files', '', 'comma separated list of files specifying the datasets used for testing. Multiple files will get reported separately. If empty, the model will not be tested.') + f.DEFINE_string('metrics_files', '', 'comma separated list of files specifying the datasets used for tracking of metrics (after validation step). Currently the only metric is the CTC loss but without affecting the tracking of best validation loss. Multiple files will get reported separately. If empty, metrics will not be computed.') f.DEFINE_string('read_buffer', '1MB', 'buffer-size for reading samples from datasets (supports file-size suffixes KB, MB, GB, TB)') f.DEFINE_string('feature_cache', '', 'cache MFCC features to disk to speed up future training runs on the same data. This flag specifies the path where cached features extracted from --train_files will be saved. If empty, or if online augmentation flags are enabled, caching will be disabled.') + f.DEFINE_integer('cache_for_epochs', 0, 'after how many epochs the feature cache is invalidated again - 0 for "never"') f.DEFINE_integer('feature_win_len', 32, 'feature extraction audio window length in milliseconds') f.DEFINE_integer('feature_win_step', 20, 'feature extraction window step length in milliseconds') @@ -27,32 +29,6 @@ def create_flags(): # ================ f.DEFINE_multi_string('augment', None, 'specifies an augmentation of the training samples. Format is "--augment operation[param1=value1, ...]"') - f.DEFINE_integer('augmentations_per_epoch', 1, 'how often the train set should be repeated and re-augmented per epoch') - - f.DEFINE_float('data_aug_features_additive', 0, 'std of the Gaussian additive noise') - f.DEFINE_float('data_aug_features_multiplicative', 0, 'std of normal distribution around 1 for multiplicative noise') - - f.DEFINE_float('augmentation_spec_dropout_keeprate', 1, 'keep rate of dropout augmentation on spectrogram (if 1, no dropout will be performed on spectrogram)') - - f.DEFINE_boolean('augmentation_sparse_warp', False, 'whether to use spectrogram sparse warp. USE OF THIS FLAG IS UNSUPPORTED, enable sparse warp will increase training time drastically, and the paper also mentioned that this is not a major factor to improve accuracy.') - f.DEFINE_integer('augmentation_sparse_warp_num_control_points', 1, 'specify number of control points') - f.DEFINE_integer('augmentation_sparse_warp_time_warping_para', 20, 'time_warping_para') - f.DEFINE_integer('augmentation_sparse_warp_interpolation_order', 2, 'sparse_warp_interpolation_order') - f.DEFINE_float('augmentation_sparse_warp_regularization_weight', 0.0, 'sparse_warp_regularization_weight') - f.DEFINE_integer('augmentation_sparse_warp_num_boundary_points', 1, 'sparse_warp_num_boundary_points') - - f.DEFINE_boolean('augmentation_freq_and_time_masking', False, 'whether to use frequency and time masking augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_freq_mask_range', 5, 'max range of masks in the frequency domain when performing freqtime-mask augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_number_freq_masks', 3, 'number of masks in the frequency domain when performing freqtime-mask augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_time_mask_range', 2, 'max range of masks in the time domain when performing freqtime-mask augmentation') - f.DEFINE_integer('augmentation_freq_and_time_masking_number_time_masks', 3, 'number of masks in the time domain when performing freqtime-mask augmentation') - - f.DEFINE_float('augmentation_speed_up_std', 0, 'std for speeding-up tempo. If std is 0, this augmentation is not performed') - - f.DEFINE_boolean('augmentation_pitch_and_tempo_scaling', False, 'whether to use spectrogram speed and tempo scaling') - f.DEFINE_float('augmentation_pitch_and_tempo_scaling_min_pitch', 0.95, 'min value of pitch scaling') - f.DEFINE_float('augmentation_pitch_and_tempo_scaling_max_pitch', 1.2, 'max value of pitch scaling') - f.DEFINE_float('augmentation_pitch_and_tempo_scaling_max_tempo', 1.2, 'max vlaue of tempo scaling') # Global Constants # ================ diff --git a/training/deepspeech_training/util/helpers.py b/training/deepspeech_training/util/helpers.py index cd1748cc47..6da708b90d 100644 --- a/training/deepspeech_training/util/helpers.py +++ b/training/deepspeech_training/util/helpers.py @@ -174,3 +174,18 @@ def pick_value_from_range(value_range, clock=None): value = value_range.start + clock * (value_range.end - value_range.start) value = random.uniform(value - value_range.r, value + value_range.r) return round(value) if isinstance(value_range.start, int) else value + + +def tf_pick_value_from_range(value_range, clock=None, double_precision=False): + import tensorflow as tf # pylint: disable=import-outside-toplevel + clock = (tf.random.stateless_uniform([], seed=(-1, 1), dtype=tf.float64) if clock is None + else tf.maximum(tf.constant(0.0, dtype=tf.float64), tf.minimum(tf.constant(1.0, dtype=tf.float64), clock))) + value = value_range.start + clock * (value_range.end - value_range.start) + value = tf.random.stateless_uniform([], + minval=value - value_range.r, + maxval=value + value_range.r, + seed=(clock * tf.int32.min, clock * tf.int32.max), + dtype=tf.float64) + if isinstance(value_range.start, int): + return tf.cast(tf.math.round(value), tf.int64 if double_precision else tf.int32) + return tf.cast(value, tf.float64 if double_precision else tf.float32) diff --git a/training/deepspeech_training/util/importers.py b/training/deepspeech_training/util/importers.py index a4c3c3268d..61f2342d22 100644 --- a/training/deepspeech_training/util/importers.py +++ b/training/deepspeech_training/util/importers.py @@ -8,7 +8,7 @@ from collections import Counter def get_counter(): - return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'total_time': 0}) + return Counter({'all': 0, 'failed': 0, 'invalid_label': 0, 'too_short': 0, 'too_long': 0, 'imported_time': 0, 'total_time': 0}) def get_imported_samples(counter): return counter['all'] - counter['failed'] - counter['too_short'] - counter['too_long'] - counter['invalid_label'] @@ -23,7 +23,7 @@ def print_import_report(counter, sample_rate, max_secs): print('Skipped %d samples that were too short to match the transcript.' % counter['too_short']) if counter['too_long'] > 0: print('Skipped %d samples that were longer than %d seconds.' % (counter['too_long'], max_secs)) - print('Final amount of imported audio: %s.' % secs_to_hours(counter['total_time'] / sample_rate)) + print('Final amount of imported audio: %s from %s.' % (secs_to_hours(counter['imported_time'] / sample_rate), secs_to_hours(counter['total_time'] / sample_rate))) def get_importers_parser(description): parser = argparse.ArgumentParser(description=description) diff --git a/training/deepspeech_training/util/sample_collections.py b/training/deepspeech_training/util/sample_collections.py index 942888f9f2..37210659a0 100644 --- a/training/deepspeech_training/util/sample_collections.py +++ b/training/deepspeech_training/util/sample_collections.py @@ -2,14 +2,12 @@ import os import csv import json -import random from pathlib import Path from functools import partial -from .signal_augmentations import parse_augmentation -from .helpers import MEGABYTE, GIGABYTE, Interleaved, LimitingPool -from .audio import Sample, DEFAULT_FORMAT, AUDIO_TYPE_OPUS, AUDIO_TYPE_NP, SERIALIZABLE_AUDIO_TYPES, get_audio_type_from_extension +from .helpers import MEGABYTE, GIGABYTE, Interleaved +from .audio import Sample, DEFAULT_FORMAT, AUDIO_TYPE_OPUS, SERIALIZABLE_AUDIO_TYPES, get_audio_type_from_extension BIG_ENDIAN = 'big' INT_SIZE = 4 @@ -416,88 +414,3 @@ def samples_from_sources(sample_sources, buffering=BUFFER_SIZE, labeled=None): return samples_from_source(sample_sources[0], buffering=buffering, labeled=labeled) cols = list(map(partial(samples_from_source, buffering=buffering, labeled=labeled), sample_sources)) return Interleaved(*cols, key=lambda s: s.duration) - - -class PreparationContext: - def __init__(self, target_audio_type, augmentations): - self.target_audio_type = target_audio_type - self.augmentations = augmentations - - -AUGMENTATION_CONTEXT = None - - -def _init_augmentation_worker(preparation_context): - global AUGMENTATION_CONTEXT # pylint: disable=global-statement - AUGMENTATION_CONTEXT = preparation_context - - -def _augment_sample(timed_sample, context=None): - context = AUGMENTATION_CONTEXT if context is None else context - sample, clock = timed_sample - for augmentation in context.augmentations: - if random.random() < augmentation.probability: - augmentation.apply(sample, clock) - sample.change_audio_type(new_audio_type=context.target_audio_type) - return sample - - -def augment_samples(samples, - audio_type=AUDIO_TYPE_NP, - augmentation_specs=None, - buffering=BUFFER_SIZE, - process_ahead=None, - repetitions=1, - fixed_clock=None): - """ - Prepares samples for being used during training. - This includes parallel and buffered application of augmentations and a conversion to a specified audio-type. - - Parameters - ---------- - samples : Sample enumeration - Typically produced by samples_from_sources. - audio_type : str - Target audio-type to convert samples to. See util.audio.Sample.__init__ . - augmentation_specs : list of str - Augmentation specifications like ["reverb[delay=20.0,decay=-20]", "volume"]. See TRAINING.rst. - buffering : int - Read-buffer size to use while reading files. - process_ahead : int - Number of samples to pre-process ahead of time. - repetitions : int - How often the input sample enumeration should get repeated for being re-augmented. - fixed_clock : float - Sets the internal clock to a value between 0.0 (beginning of epoch) and 1.0 (end of epoch). - Setting this to a number is used for simulating augmentations at a certain epoch-time. - If kept at None (default), the internal clock will run regularly from 0.0 to 1.0, - hence preparing them for training. - - Returns - ------- - iterable of util.sample_collections.LabeledSample or util.audio.Sample - """ - def timed_samples(): - for repetition in range(repetitions): - for sample_index, sample in enumerate(samples): - if fixed_clock is None: - yield sample, (repetition * len(samples) + sample_index) / (repetitions * len(samples)) - else: - yield sample, fixed_clock - - augmentations = [] if augmentation_specs is None else list(map(parse_augmentation, augmentation_specs)) - try: - for augmentation in augmentations: - augmentation.start(buffering=buffering) - context = PreparationContext(audio_type, augmentations) - if process_ahead == 0: - for timed_sample in timed_samples(): - yield _augment_sample(timed_sample, context=context) - else: - with LimitingPool(process_ahead=process_ahead, - initializer=_init_augmentation_worker, - initargs=(context,)) as pool: - yield from pool.imap(_augment_sample, timed_samples()) - finally: - for augmentation in augmentations: - augmentation.stop() diff --git a/training/deepspeech_training/util/signal_augmentations.py b/training/deepspeech_training/util/signal_augmentations.py deleted file mode 100644 index 6f48dc2bcf..0000000000 --- a/training/deepspeech_training/util/signal_augmentations.py +++ /dev/null @@ -1,222 +0,0 @@ - -import os -import re -import math -import random -import numpy as np - -from multiprocessing import Queue, Process -from .audio import gain_db_to_ratio, max_dbfs, normalize_audio, AUDIO_TYPE_NP, AUDIO_TYPE_PCM, AUDIO_TYPE_OPUS -from .helpers import int_range, float_range, pick_value_from_range, MEGABYTE - -SPEC_PARSER = re.compile(r'^(?P[a-z]+)(\[(?P.*)\])?$') -BUFFER_SIZE = 1 * MEGABYTE - - -class Augmentation: - def __init__(self, p=1.0): - self.probability = float(p) - - def start(self, buffering=BUFFER_SIZE): - pass - - def apply(self, sample, clock): - raise NotImplementedError - - def stop(self): - pass - - -def _enqueue_overlay_samples(sample_source, queue, buffering=BUFFER_SIZE): - """ - As the central distribution point for overlay samples this function is supposed to run in one process only. - This ensures that samples are not used twice if not required. - It loads the (raw and still compressed) data and provides it to the actual augmentation workers. - These are then doing decompression, potential conversion and overlaying in parallel. - """ - # preventing cyclic import problems - from .sample_collections import samples_from_source # pylint: disable=import-outside-toplevel - samples = samples_from_source(sample_source, buffering=buffering, labeled=False) - while True: - for sample in samples: - queue.put(sample) - - -class Overlay(Augmentation): - """See "Overlay augmentation" in TRAINING.rst""" - def __init__(self, source, p=1.0, snr=3.0, layers=1): - super(Overlay, self).__init__(p) - self.source = source - self.snr = float_range(snr) - self.layers = int_range(layers) - self.queue = Queue(max(1, math.floor(self.probability * self.layers[1] * os.cpu_count()))) - self.current_sample = None - self.enqueue_process = None - - def start(self, buffering=BUFFER_SIZE): - self.enqueue_process = Process(target=_enqueue_overlay_samples, - args=(self.source, self.queue), - kwargs={'buffering': buffering}) - self.enqueue_process.start() - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - n_layers = pick_value_from_range(self.layers, clock=clock) - audio = sample.audio - overlay_data = np.zeros_like(audio) - for _ in range(n_layers): - overlay_offset = 0 - while overlay_offset < len(audio): - if self.current_sample is None: - next_overlay_sample = self.queue.get() - next_overlay_sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - self.current_sample = next_overlay_sample.audio - n_required = len(audio) - overlay_offset - n_current = len(self.current_sample) - if n_required >= n_current: # take it completely - overlay_data[overlay_offset:overlay_offset + n_current] += self.current_sample - overlay_offset += n_current - self.current_sample = None - else: # take required slice from head and keep tail for next layer or sample - overlay_data[overlay_offset:overlay_offset + n_required] += self.current_sample[0:n_required] - overlay_offset += n_required - self.current_sample = self.current_sample[n_required:] - snr_db = pick_value_from_range(self.snr, clock=clock) - orig_dbfs = max_dbfs(audio) - overlay_gain = orig_dbfs - max_dbfs(overlay_data) - snr_db - audio += overlay_data * gain_db_to_ratio(overlay_gain) - sample.audio = normalize_audio(audio, dbfs=orig_dbfs) - - def stop(self): - if self.enqueue_process is not None: - self.enqueue_process.terminate() - - -class Reverb(Augmentation): - """See "Reverb augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, delay=20.0, decay=10.0): - super(Reverb, self).__init__(p) - self.delay = float_range(delay) - self.decay = float_range(decay) - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - audio = np.array(sample.audio, dtype=np.float64) - orig_dbfs = max_dbfs(audio) - delay = pick_value_from_range(self.delay, clock=clock) - decay = pick_value_from_range(self.decay, clock=clock) - decay = gain_db_to_ratio(-decay) - result = np.copy(audio) - primes = [17, 19, 23, 29, 31] - for delay_prime in primes: # primes to minimize comb filter interference - layer = np.copy(audio) - n_delay = math.floor(delay * (delay_prime / primes[0]) * sample.audio_format.rate / 1000.0) - n_delay = max(16, n_delay) # 16 samples minimum to avoid performance trap and risk of division by zero - for w_index in range(0, math.floor(len(audio) / n_delay)): - w1 = w_index * n_delay - w2 = (w_index + 1) * n_delay - width = min(len(audio) - w2, n_delay) # last window could be smaller - layer[w2:w2 + width] += decay * layer[w1:w1 + width] - result += layer - audio = normalize_audio(result, dbfs=orig_dbfs) - sample.audio = np.array(audio, dtype=np.float32) - - -class Resample(Augmentation): - """See "Resample augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, rate=8000): - super(Resample, self).__init__(p) - self.rate = int_range(rate) - - def apply(self, sample, clock): - # late binding librosa and its dependencies - from librosa.core import resample # pylint: disable=import-outside-toplevel - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - rate = pick_value_from_range(self.rate, clock=clock) - audio = sample.audio - orig_len = len(audio) - audio = np.swapaxes(audio, 0, 1) - audio = resample(audio, sample.audio_format.rate, rate) - audio = resample(audio, rate, sample.audio_format.rate) - audio = np.swapaxes(audio, 0, 1)[0:orig_len] - sample.audio = audio - - -class Codec(Augmentation): - """See "Codec augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, bitrate=3200): - super(Codec, self).__init__(p) - self.bitrate = int_range(bitrate) - - def apply(self, sample, clock): - bitrate = pick_value_from_range(self.bitrate, clock=clock) - sample.change_audio_type(new_audio_type=AUDIO_TYPE_PCM) # decoding to ensure it has to get encoded again - sample.change_audio_type(new_audio_type=AUDIO_TYPE_OPUS, bitrate=bitrate) # will get decoded again downstream - - -class Gaps(Augmentation): - """See "Gaps augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, n=1, size=50.0): - super(Gaps, self).__init__(p) - self.n_gaps = int_range(n) - self.size = float_range(size) - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - audio = sample.audio - n_gaps = pick_value_from_range(self.n_gaps, clock=clock) - for _ in range(n_gaps): - size = pick_value_from_range(self.size, clock=clock) - size = int(size * sample.audio_format.rate / 1000.0) - size = min(size, len(audio) // 10) # a gap should never exceed 10 percent of the audio - offset = random.randint(0, max(0, len(audio) - size - 1)) - audio[offset:offset + size] = 0 - sample.audio = audio - - -class Volume(Augmentation): - """See "Volume augmentation" in TRAINING.rst""" - def __init__(self, p=1.0, dbfs=3.0103): - super(Volume, self).__init__(p) - self.target_dbfs = float_range(dbfs) - - def apply(self, sample, clock): - sample.change_audio_type(new_audio_type=AUDIO_TYPE_NP) - target_dbfs = pick_value_from_range(self.target_dbfs, clock=clock) - sample.audio = normalize_audio(sample.audio, dbfs=target_dbfs) - - -def parse_augmentation(augmentation_spec): - """ - Parses an augmentation specification. - - Parameters - ---------- - augmentation_spec : str - Augmentation specification like "reverb[delay=20.0,decay=-20]". - - Returns - ------- - Instance of an augmentation class from util.signal_augmentations.*. - """ - match = SPEC_PARSER.match(augmentation_spec) - if not match: - raise ValueError('Augmentation specification has wrong format') - cls_name = match.group('cls') - cls_name = cls_name[0].upper() + cls_name[1:] - augmentation_cls = globals()[cls_name] if cls_name in globals() else None - if not issubclass(augmentation_cls, Augmentation) or augmentation_cls == Augmentation: - raise ValueError('Unknown augmentation: {}'.format(cls_name)) - parameters = match.group('params') - parameters = [] if parameters is None else parameters.split(',') - args = [] - kwargs = {} - for parameter in parameters: - pair = tuple(list(map(str.strip, (parameter.split('='))))) - if len(pair) == 1: - args.append(pair) - elif len(pair) == 2: - kwargs[pair[0]] = pair[1] - else: - raise ValueError('Unable to parse augmentation value assignment') - return augmentation_cls(*args, **kwargs) diff --git a/training/deepspeech_training/util/sparse_image_warp.py b/training/deepspeech_training/util/sparse_image_warp.py deleted file mode 100644 index 0fcdba0ad4..0000000000 --- a/training/deepspeech_training/util/sparse_image_warp.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Image warping using sparse flow defined at control points.""" - -# The following code is from: https://github.com/tensorflow/tensorflow/blob/v1.14.0/tensorflow/contrib/image/python/ops/sparse_image_warp.py -# But refactored for dynamic tensor shape compatibility -# The core idea is to replace every numpy implementation with tensorflow implementation - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -import tensorflow.compat.v1 as tfv1 -from tensorflow.compat import dimension_value -from tensorflow.contrib.image.python.ops import dense_image_warp -from tensorflow.contrib.image.python.ops import interpolate_spline - -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops - -def _to_float32(value): - return tf.cast(value, tf.float32) - -def _to_int32(value): - return tf.cast(value, tf.int32) - -def _get_grid_locations(image_height, image_width): - """Wrapper for np.meshgrid.""" - tfv1.assert_type(image_height, tf.int32) - tfv1.assert_type(image_width, tf.int32) - - y_range = tf.range(image_height) - x_range = tf.range(image_width) - y_grid, x_grid = tf.meshgrid(y_range, x_range, indexing='ij') - return tf.stack((y_grid, x_grid), -1) - - -def _expand_to_minibatch(tensor, batch_size): - """Tile arbitrarily-sized np_array to include new batch dimension.""" - ndim = tf.size(tf.shape(tensor)) - ones = tf.ones((ndim,), tf.int32) - - tiles = tf.concat(([batch_size], ones), 0) - return tf.tile(tf.expand_dims(tensor, 0), tiles) - - -def _get_boundary_locations(image_height, image_width, num_points_per_edge): - """Compute evenly-spaced indices along edge of image.""" - image_height_end = _to_float32(tf.math.subtract(image_height, 1)) - image_width_end = _to_float32(tf.math.subtract(image_width, 1)) - y_range = tf.linspace(0.0, image_height_end, num_points_per_edge + 2) - x_range = tf.linspace(0.0, image_height_end, num_points_per_edge + 2) - ys, xs = tf.meshgrid(y_range, x_range, indexing='ij') - is_boundary = tf.logical_or( - tf.logical_or(tf.equal(xs, 0.0), tf.equal(xs, image_width_end)), - tf.logical_or(tf.equal(ys, 0.0), tf.equal(ys, image_height_end))) - return tf.stack([tf.boolean_mask(ys, is_boundary), tf.boolean_mask(xs, is_boundary)], axis=-1) - - -def _add_zero_flow_controls_at_boundary(control_point_locations, - control_point_flows, image_height, - image_width, boundary_points_per_edge): - """Add control points for zero-flow boundary conditions. - - Augment the set of control points with extra points on the - boundary of the image that have zero flow. - - Args: - control_point_locations: input control points - control_point_flows: their flows - image_height: image height - image_width: image width - boundary_points_per_edge: number of points to add in the middle of each - edge (not including the corners). - The total number of points added is - 4 + 4*(boundary_points_per_edge). - - Returns: - merged_control_point_locations: augmented set of control point locations - merged_control_point_flows: augmented set of control point flows - """ - - batch_size = dimension_value(tf.shape(control_point_locations)[0]) - - boundary_point_locations = _get_boundary_locations(image_height, image_width, - boundary_points_per_edge) - boundary_point_shape = tf.shape(boundary_point_locations) - boundary_point_flows = tf.zeros([boundary_point_shape[0], 2]) - - minbatch_locations = _expand_to_minibatch(boundary_point_locations, batch_size) - type_to_use = control_point_locations.dtype - boundary_point_locations = tf.cast(minbatch_locations, type_to_use) - - minbatch_flows = _expand_to_minibatch(boundary_point_flows, batch_size) - - boundary_point_flows = tf.cast(minbatch_flows, type_to_use) - - merged_control_point_locations = tf.concat( - [control_point_locations, boundary_point_locations], 1) - - merged_control_point_flows = tf.concat( - [control_point_flows, boundary_point_flows], 1) - - return merged_control_point_locations, merged_control_point_flows - - -def sparse_image_warp(image, - source_control_point_locations, - dest_control_point_locations, - interpolation_order=2, - regularization_weight=0.0, - num_boundary_points=0, - name='sparse_image_warp'): - """Image warping using correspondences between sparse control points. - - Apply a non-linear warp to the image, where the warp is specified by - the source and destination locations of a (potentially small) number of - control points. First, we use a polyharmonic spline - (`tf.contrib.image.interpolate_spline`) to interpolate the displacements - between the corresponding control points to a dense flow field. - Then, we warp the image using this dense flow field - (`tf.contrib.image.dense_image_warp`). - - Let t index our control points. For regularization_weight=0, we have: - warped_image[b, dest_control_point_locations[b, t, 0], - dest_control_point_locations[b, t, 1], :] = - image[b, source_control_point_locations[b, t, 0], - source_control_point_locations[b, t, 1], :]. - - For regularization_weight > 0, this condition is met approximately, since - regularized interpolation trades off smoothness of the interpolant vs. - reconstruction of the interpolant at the control points. - See `tf.contrib.image.interpolate_spline` for further documentation of the - interpolation_order and regularization_weight arguments. - - - Args: - image: `[batch, height, width, channels]` float `Tensor` - source_control_point_locations: `[batch, num_control_points, 2]` float - `Tensor` - dest_control_point_locations: `[batch, num_control_points, 2]` float - `Tensor` - interpolation_order: polynomial order used by the spline interpolation - regularization_weight: weight on smoothness regularizer in interpolation - num_boundary_points: How many zero-flow boundary points to include at - each image edge.Usage: - num_boundary_points=0: don't add zero-flow points - num_boundary_points=1: 4 corners of the image - num_boundary_points=2: 4 corners and one in the middle of each edge - (8 points total) - num_boundary_points=n: 4 corners and n-1 along each edge - name: A name for the operation (optional). - - Note that image and offsets can be of type tf.half, tf.float32, or - tf.float64, and do not necessarily have to be the same type. - - Returns: - warped_image: `[batch, height, width, channels]` float `Tensor` with same - type as input image. - flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense - flow field produced by the interpolation. - """ - - image = ops.convert_to_tensor(image) - source_control_point_locations = ops.convert_to_tensor( - source_control_point_locations) - dest_control_point_locations = ops.convert_to_tensor( - dest_control_point_locations) - - control_point_flows = ( - dest_control_point_locations - source_control_point_locations) - - clamp_boundaries = num_boundary_points > 0 - boundary_points_per_edge = num_boundary_points - 1 - - with ops.name_scope(name): - image_shape = tf.shape(image) - batch_size, image_height, image_width = image_shape[0], image_shape[1], image_shape[2] - - # This generates the dense locations where the interpolant - # will be evaluated. - grid_locations = _get_grid_locations(image_height, image_width) - - flattened_grid_locations = tf.reshape(grid_locations, - [tf.multiply(image_height, image_width), 2]) - - # flattened_grid_locations = constant_op.constant( - # _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype) - flattened_grid_locations = _expand_to_minibatch(flattened_grid_locations, batch_size) - flattened_grid_locations = tf.cast(flattened_grid_locations, dtype=image.dtype) - - if clamp_boundaries: - (dest_control_point_locations, - control_point_flows) = _add_zero_flow_controls_at_boundary( - dest_control_point_locations, control_point_flows, image_height, - image_width, boundary_points_per_edge) - - flattened_flows = interpolate_spline.interpolate_spline( - dest_control_point_locations, control_point_flows, - flattened_grid_locations, interpolation_order, regularization_weight) - - dense_flows = array_ops.reshape(flattened_flows, - [batch_size, image_height, image_width, 2]) - - warped_image = dense_image_warp.dense_image_warp(image, dense_flows) - - return warped_image, dense_flows diff --git a/training/deepspeech_training/util/spectrogram_augmentations.py b/training/deepspeech_training/util/spectrogram_augmentations.py deleted file mode 100644 index 9e7d65738d..0000000000 --- a/training/deepspeech_training/util/spectrogram_augmentations.py +++ /dev/null @@ -1,127 +0,0 @@ -import tensorflow as tf -import tensorflow.compat.v1 as tfv1 - -from .sparse_image_warp import sparse_image_warp - -def augment_freq_time_mask(spectrogram, - frequency_masking_para=30, - time_masking_para=10, - frequency_mask_num=3, - time_mask_num=3): - time_max = tf.shape(spectrogram)[1] - freq_max = tf.shape(spectrogram)[2] - # Frequency masking - for _ in range(frequency_mask_num): - f = tf.random.uniform(shape=(), minval=0, maxval=frequency_masking_para, dtype=tf.dtypes.int32) - f0 = tf.random.uniform(shape=(), minval=0, maxval=freq_max - f, dtype=tf.dtypes.int32) - value_ones_freq_prev = tf.ones(shape=[1, time_max, f0]) - value_zeros_freq = tf.zeros(shape=[1, time_max, f]) - value_ones_freq_next = tf.ones(shape=[1, time_max, freq_max-(f0+f)]) - freq_mask = tf.concat([value_ones_freq_prev, value_zeros_freq, value_ones_freq_next], axis=2) - # mel_spectrogram[:, f0:f0 + f, :] = 0 #can't assign to tensor - # mel_spectrogram[:, f0:f0 + f, :] = value_zeros_freq #can't assign to tensor - spectrogram = spectrogram*freq_mask - - # Time masking - for _ in range(time_mask_num): - t = tf.random.uniform(shape=(), minval=0, maxval=time_masking_para, dtype=tf.dtypes.int32) - t0 = tf.random.uniform(shape=(), minval=0, maxval=time_max - t, dtype=tf.dtypes.int32) - value_zeros_time_prev = tf.ones(shape=[1, t0, freq_max]) - value_zeros_time = tf.zeros(shape=[1, t, freq_max]) - value_zeros_time_next = tf.ones(shape=[1, time_max-(t0+t), freq_max]) - time_mask = tf.concat([value_zeros_time_prev, value_zeros_time, value_zeros_time_next], axis=1) - # mel_spectrogram[:, :, t0:t0 + t] = 0 #can't assign to tensor - # mel_spectrogram[:, :, t0:t0 + t] = value_zeros_time #can't assign to tensor - spectrogram = spectrogram*time_mask - - return spectrogram - -def augment_pitch_and_tempo(spectrogram, - max_tempo=1.2, - max_pitch=1.1, - min_pitch=0.95): - original_shape = tf.shape(spectrogram) - choosen_pitch = tf.random.uniform(shape=(), minval=min_pitch, maxval=max_pitch) - choosen_tempo = tf.random.uniform(shape=(), minval=1, maxval=max_tempo) - new_freq_size = tf.cast(tf.cast(original_shape[2], tf.float32)*choosen_pitch, tf.int32) - new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32)/(choosen_tempo), tf.int32) - spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(spectrogram, -1), [new_time_size, new_freq_size]) - spectrogram_aug = tf.image.crop_to_bounding_box(spectrogram_aug, offset_height=0, offset_width=0, target_height=tf.shape(spectrogram_aug)[1], target_width=tf.minimum(original_shape[2], new_freq_size)) - spectrogram_aug = tf.cond(choosen_pitch < 1, - lambda: tf.image.pad_to_bounding_box(spectrogram_aug, offset_height=0, offset_width=0, - target_height=tf.shape(spectrogram_aug)[1], target_width=original_shape[2]), - lambda: spectrogram_aug) - return spectrogram_aug[:, :, :, 0] - - -def augment_speed_up(spectrogram, - speed_std=0.1): - original_shape = tf.shape(spectrogram) - choosen_speed = tf.math.abs(tf.random.normal(shape=(), stddev=speed_std)) # abs makes sure the augmention will only speed up - choosen_speed = 1 + choosen_speed - new_freq_size = tf.cast(tf.cast(original_shape[2], tf.float32), tf.int32) - new_time_size = tf.cast(tf.cast(original_shape[1], tf.float32)/(choosen_speed), tf.int32) - spectrogram_aug = tf.image.resize_bilinear(tf.expand_dims(spectrogram, -1), [new_time_size, new_freq_size]) - return spectrogram_aug[:, :, :, 0] - -def augment_dropout(spectrogram, - keep_prob=0.95): - return tf.nn.dropout(spectrogram, rate=1-keep_prob) - - -def augment_sparse_warp(spectrogram, time_warping_para=20, interpolation_order=2, regularization_weight=0.0, num_boundary_points=1, num_control_points=1): - """Reference: https://arxiv.org/pdf/1904.08779.pdf - Args: - spectrogram: `[batch, time, frequency]` float `Tensor` - time_warping_para: 'W' parameter in paper - interpolation_order: used to put into `sparse_image_warp` - regularization_weight: used to put into `sparse_image_warp` - num_boundary_points: used to put into `sparse_image_warp`, - default=1 means boundary points on 4 corners of the image - num_control_points: number of control points - Returns: - warped_spectrogram: `[batch, time, frequency]` float `Tensor` with same - type as input image. - """ - # reshape to fit `sparse_image_warp`'s input shape - # (1, time steps, freq, 1), batch_size must be 1 - spectrogram = tf.expand_dims(spectrogram, -1) - - original_shape = tf.shape(spectrogram) - tau, freq_size = original_shape[1], original_shape[2] - - # to protect short audio - time_warping_para = tf.math.minimum( - time_warping_para, tf.math.subtract(tf.math.floordiv(tau, 2), 1)) - - # don't choose boundary frequency - choosen_freqs = tf.random.shuffle( - tf.add(tf.range(freq_size - 3), 1))[0: num_control_points] - - source_max = tau - time_warping_para - source_min = tf.math.minimum(source_max - num_control_points, time_warping_para) - - choosen_times = tf.random.shuffle(tf.range(source_min, limit=source_max))[0: num_control_points] - dest_time_widths = tfv1.random_uniform([num_control_points], tf.negative(time_warping_para), time_warping_para, tf.int32) - - sources = [] - dests = [] - for i in range(num_control_points): - # generate source points `t` of time axis between (W, tau-W) - rand_source_time = choosen_times[i] - rand_dest_time = rand_source_time + dest_time_widths[i] - - choosen_freq = choosen_freqs[i] - sources.append([rand_source_time, choosen_freq]) - dests.append([rand_dest_time, choosen_freq]) - - source_control_point_locations = tf.cast([sources], tf.float32) - dest_control_point_locations = tf.cast([dests], tf.float32) - - warped_spectrogram, _ = sparse_image_warp(spectrogram, - source_control_point_locations=source_control_point_locations, - dest_control_point_locations=dest_control_point_locations, - interpolation_order=interpolation_order, - regularization_weight=regularization_weight, - num_boundary_points=num_boundary_points) - return tf.reshape(warped_spectrogram, shape=(1, -1, freq_size))