open-mmlab · hyq-hub · Oct 25, 2024 · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024
diff --git a/models/tts/debatts/README.md b/models/tts/debatts/README.md
@@ -0,0 +1,13 @@
+# Debatts - Mandarin Debate TTS Model
+
+## Introduction
+Debatts is an advanced text-to-speech (TTS) model specifically designed for Mandarin debate contexts. This innovative model leverages short audio prompts to learn and replicate speaker characteristics while dynamically adjusting speaking style by analyzing the audio of debate opponents. This capability allows Debatts to integrate seamlessly into debate scenarios, offering not just speech synthesis but a responsive adaptation to the changing dynamics of debate interactions.
+
+## Environment Setup
+To set up the necessary environment to run Debatts, please use the provided `environment.yml` file. This file contains all the required dependencies and can be easily set up with the following Conda command:
+
+```bash
+conda env create -f environment.yml
+
+## Continuous Updates
+The Debatts project is actively being developed, with continuous updates aimed at enhancing model performance and expanding features. We encourage users to regularly check our repository for the latest updates and improvements to ensure optimal functionality and to take advantage of new capabilities as they become available.
diff --git a/models/tts/debatts/environment.yml b/models/tts/debatts/environment.yml
@@ -0,0 +1,337 @@
+name: debatts
+channels:
+  - pytorch
+  - nvidia
+  - https://repo.anaconda.com/pkgs/main
+  - conda-forge
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/msys2
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/r
+  - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - asttokens=2.4.1=pyhd8ed1ab_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - blas=1.0=mkl
+  - brotli-python=1.0.9=py39h6a678d5_8
+  - bzip2=1.0.8=h7f98852_4
+  - ca-certificates=2024.7.4=hbcca054_0
+  - certifi=2024.7.4=pyhd8ed1ab_0
+  - charset-normalizer=3.3.2=pyhd8ed1ab_0
+  - comm=0.2.2=pyhd8ed1ab_0
+  - cuda-cudart=12.1.105=0
+  - cuda-cupti=12.1.105=0
+  - cuda-libraries=12.1.0=0
+  - cuda-nvrtc=12.1.105=0
+  - cuda-nvtx=12.1.105=0
+  - cuda-opencl=12.5.39=0
+  - cuda-runtime=12.1.0=0
+  - cuda-version=12.5=3
+  - debugpy=1.6.7=py39h6a678d5_0
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - entrypoints=0.4=pyhd8ed1ab_0
+  - executing=2.0.1=pyhd8ed1ab_0
+  - ffmpeg=4.2.2=h20bf706_0
+  - filelock=3.15.4=pyhd8ed1ab_0
+  - freetype=2.10.4=h0708190_1
+  - gmp=6.1.2=hf484d3e_1000
+  - gmpy2=2.1.2=py39heeb90bb_0
+  - gnutls=3.6.15=he1e5248_0
+  - idna=3.7=py39h06a4308_0
+  - intel-openmp=2023.1.0=hdb19cb5_46306
+  - ipykernel=6.29.4=pyh3099207_0
+  - ipython=8.12.0=pyh41d4057_0
+  - jedi=0.19.1=pyhd8ed1ab_0
+  - jinja2=3.1.4=py39h06a4308_0
+  - jpeg=9e=h5eee18b_1
+  - jupyter_client=7.3.4=pyhd8ed1ab_0
+  - jupyter_core=5.7.2=py39hf3d152e_0
+  - lame=3.100=h7f98852_1001
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h295c915_0
+  - libcublas=12.1.0.26=0
+  - libcufft=11.0.2.4=0
+  - libcufile=1.10.0.4=0
+  - libcurand=10.3.6.39=0
+  - libcusolver=11.4.4.55=0
+  - libcusparse=12.0.2.55=0
+  - libdeflate=1.17=h5eee18b_1
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libidn2=2.3.4=h5eee18b_0
+  - libjpeg-turbo=2.0.0=h9bf148f_0
+  - libnpp=12.0.2.50=0
+  - libnvjitlink=12.1.105=0
+  - libnvjpeg=12.1.1.14=0
+  - libopus=1.3.1=h7f98852_1
+  - libpng=1.6.39=h5eee18b_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.1=h6a678d5_0
+  - libunistring=0.9.10=h7f98852_0
+  - libvpx=1.7.0=h439df22_0
+  - libwebp-base=1.3.2=h5eee18b_0
+  - llvm-openmp=14.0.6=h9e868ea_0
+  - lz4-c=1.9.4=h6a678d5_1
+  - markupsafe=2.1.1=py39hb9d737c_1
+  - matplotlib-inline=0.1.7=pyhd8ed1ab_0
+  - mkl=2023.1.0=h213fc3f_46344
+  - mkl-service=2.4.0=py39h5eee18b_1
+  - mkl_fft=1.3.8=py39h5eee18b_0
+  - mkl_random=1.2.4=py39hdb19cb5_0
+  - mpc=1.1.0=h10f8cd9_1
+  - mpfr=4.0.2=hb69a4c5_1
+  - mpmath=1.3.0=py39h06a4308_0
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.6.0=pyhd8ed1ab_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.2.1=py39h06a4308_0
+  - numpy-base=1.26.4=py39hb5e798b_0
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.4.0=h3ad879b_0
+  - openssl=1.1.1w=h7f8727e_0
+  - packaging=24.1=pyhd8ed1ab_0
+  - parso=0.8.4=pyhd8ed1ab_0
+  - pexpect=4.9.0=pyhd8ed1ab_0
+  - pickleshare=0.7.5=py_1003
+  - pillow=10.3.0=py39h5eee18b_0
+  - platformdirs=4.2.2=pyhd8ed1ab_0
+  - prompt-toolkit=3.0.47=pyha770c72_0
+  - prompt_toolkit=3.0.47=hd8ed1ab_0
+  - psutil=5.9.1=py39hb9d737c_0
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - pygments=2.18.0=pyhd8ed1ab_0
+  - pysocks=1.7.1=py39h06a4308_0
+  - python=3.9.15=h7a1cb2a_2
+  - python-dateutil=2.9.0=pyhd8ed1ab_0
+  - python_abi=3.9=2_cp39
+  - pytorch=2.3.1=py3.9_cuda12.1_cudnn8.9.2_0
+  - pytorch-cuda=12.1=ha16c6d3_5
+  - pytorch-mutex=1.0=cuda
+  - pyyaml=6.0.1=py39h5eee18b_0
+  - pyzmq=25.1.2=py39h6a678d5_0
+  - readline=8.2=h5eee18b_0
+  - requests=2.32.3=pyhd8ed1ab_0
+  - six=1.16.0=pyh6c4a22f_0
+  - sqlite=3.45.3=h5eee18b_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - sympy=1.12.1=pyh04b8f61_3
+  - tbb=2021.8.0=hdb19cb5_0
+  - tk=8.6.14=h39e8969_0
+  - torchtriton=2.3.1=py39
+  - torchvision=0.18.1=py39_cu121
+  - tornado=6.1=py39hb9d737c_3
+  - traitlets=5.14.3=pyhd8ed1ab_0
+  - typing_extensions=4.11.0=py39h06a4308_0
+  - urllib3=2.2.2=py39h06a4308_0
+  - wcwidth=0.2.13=pyhd8ed1ab_0
+  - wheel=0.43.0=py39h06a4308_0
+  - x264=1!157.20191217=h7b6447c_0
+  - xz=5.4.6=h5eee18b_1
+  - yaml=0.2.5=h7b6447c_0
+  - zeromq=4.3.5=h6a678d5_0
+  - zlib=1.2.13=h5eee18b_1
+  - zstd=1.5.5=hc292b87_2
+  - pip:
+    - absl-py==2.1.0
+    - accelerate==0.24.1
+    - aiohttp==3.9.5
+    - aiosignal==1.3.1
+    - aliyun-python-sdk-core==2.15.1
+    - aliyun-python-sdk-kms==2.16.3
+    - antlr4-python3-runtime==4.9.3
+    - argparse==1.4.0
+    - asteroid==0.7.0
+    - asteroid-filterbanks==0.4.0
+    - async-timeout==4.0.3
+    - attrs==23.2.0
+    - audiomentations==0.36.0
+    - babel==2.15.0
+    - bitarray==2.9.2
+    - black==24.1.1
+    - braceexpand==0.1.7
+    - cached-property==1.5.2
+    - cffi==1.16.0
+    - click==8.1.7
+    - cn2an==0.5.22
+    - colorama==0.4.6
+    - coloredlogs==15.0.1
+    - contourpy==1.2.1
+    - crcmod==1.7
+    - cryptography==43.0.0
+    - cycler==0.12.1
+    - cython==3.0.10
+    - cytoolz==0.12.3
+    - datasets==2.20.0
+    - diffsptk==2.1.0
+    - diffusers==0.29.2
+    - dill==0.3.8
+    - distance==0.1.3
+    - docker-pycreds==0.4.0
+    - easydict==1.13
+    - editdistance==0.8.1
+    - einops==0.8.0
+    - encodec==0.1.1
+    - evaluate==0.4.2
+    - fairseq==0.12.2
+    - fastdtw==0.3.4
+    - ffmpeg-python==0.2.0
+    - flatbuffers==24.3.25
+    - fonttools==4.53.1
+    - frechet-audio-distance==0.3.1
+    - frozenlist==1.4.1
+    - fsspec==2024.5.0
+    - ftfy==6.2.0
+    - funasr==1.1.4
+    - future==1.0.0
+    - g2p-en==2.1.0
+    - gitdb==4.0.11
+    - gitpython==3.1.43
+    - grpcio==1.64.1
+    - h5py==3.11.0
+    - huggingface-hub==0.23.4
+    - humanfriendly==10.0
+    - hydra-core==1.3.2
+    - importlib-metadata==8.0.0
+    - importlib-resources==6.4.0
+    - inflect==7.3.1
+    - intervaltree==3.1.0
+    - jaconv==0.4.0
+    - jamo==0.4.1
+    - jieba==0.42.1
+    - jiwer==3.0.4
+    - jmespath==0.10.0
+    - joblib==1.4.2
+    - json5==0.9.25
+    - jsonschema==4.22.0
+    - jsonschema-specifications==2023.12.1
+    - julius==0.2.7
+    - kaldiio==2.18.0
+    - kiwisolver==1.4.5
+    - laion-clap==1.1.2
+    - lazy-loader==0.4
+    - lhotse==1.25.0.dev0+git.da4d70d.clean
+    - librosa==0.10.2.post1
+    - lightning-utilities==0.11.3.post0
+    - lilcom==1.8.0
+    - llvmlite==0.43.0
+    - loguru==0.7.2
+    - lxml==5.2.2
+    - markdown==3.6
+    - matplotlib==3.9.1
+    - mir-eval==0.7
+    - modelscope==1.17.1
+    - modules==1.0.0
+    - more-itertools==10.3.0
+    - msgpack==1.0.8
+    - multidict==6.0.5
+    - multiprocess==0.70.16
+    - mypy-extensions==1.0.0
+    - nltk==3.8.1
+    - nnaudio==0.3.3
+    - noisereduce==3.0.2
+    - npy-append-array==0.9.16
+    - numba==0.60.0
+    - numpy==1.23.4
+    - omegaconf==2.3.0
+    - onnxruntime==1.19.0
+    - openai-whisper==20231117
+    - oss2==2.18.6
+    - pandas==2.2.2
+    - pathspec==0.12.1
+    - pb-bss-eval==0.0.2
+    - pedalboard==0.9.9
+    - pesq==0.0.4
+    - pip==24.2
+    - pooch==1.8.2
+    - portalocker==2.10.0
+    - praat-parselmouth==0.4.3
+    - proces==0.1.7
+    - progressbar==2.5
+    - protobuf==4.25.3
+    - ptwt==0.1.9
+    - pyarrow==16.1.0
+    - pyarrow-hotfix==0.6
+    - pycparser==2.22
+    - pycryptodome==3.20.0
+    - pydub==0.25.1
+    - pymcd==0.2.1
+    - pynndescent==0.5.13
+    - pyparsing==3.1.2
+    - pypesq==1.2.4
+    - pypinyin==0.48.0
+    - pysptk==1.0.1
+    - pystoi==0.4.1
+    - pytorch-lightning==2.3.2
+    - pytorch-ranger==0.1.1
+    - pytorch-wpe==0.0.1
+    - pytz==2024.1
+    - pywavelets==1.6.0
+    - pyworld==0.3.4
+    - rapidfuzz==3.9.6
+    - referencing==0.35.1
+    - regex==2024.5.15
+    - resampy==0.4.3
+    - resemblyzer==0.1.4
+    - rir-generator==0.2.0
+    - rpds-py==0.18.1
+    - ruamel-yaml==0.18.6
+    - ruamel-yaml-clib==0.2.8
+    - sacrebleu==2.4.2
+    - safetensors==0.4.3
+    - scikit-learn==1.5.1
+    - scipy==1.10.1
+    - semantic-version==2.10.0
+    - sentencepiece==0.2.0
+    - sentry-sdk==2.8.0
+    - setproctitle==1.3.3
+    - setuptools==70.3.0
+    - setuptools-rust==1.9.0
+    - smmap==5.0.1
+    - sortedcontainers==2.4.0
+    - soundfile==0.12.1
+    - soxr==0.3.7
+    - tabulate==0.9.0
+    - tensorboard==2.17.0
+    - tensorboard-data-server==0.7.2
+    - tensorboardx==2.6.2.2
+    - tgt==1.5
+    - threadpoolctl==3.5.0
+    - tiktoken==0.7.0
+    - timm==1.0.8
+    - tokenizers==0.19.1
+    - tomli==2.0.1
+    - toolz==0.12.1
+    - torch-complex==0.4.4
+    - torch-optimizer==0.1.0
+    - torch-stoi==0.2.1
+    - torchaudio==2.3.1
+    - torchcomp==0.1.1
+    - torchcrepe==0.0.23
+    - torchlibrosa==0.1.0
+    - torchlpc==0.4
+    - torchmetrics==0.11.4
+    - tqdm==4.66.4
+    - transformers==4.44.0
+    - trash-cli==0.24.5.26
+    - typeguard==4.3.0
+    - typing==3.7.4.3
+    - tzdata==2024.1
+    - umap-learn==0.5.6
+    - unidecode==1.3.8
+    - vector-quantize-pytorch==1.12.5
+    - wandb==0.17.4
+    - webdataset==0.2.86
+    - webrtcvad==2.0.10
+    - werkzeug==3.0.3
+    - wget==3.2
+    - xxhash==3.4.1
+    - yarl==1.9.4
+    - zhconv==1.4.3
+    - zhon==2.0.2
+    - zipp==3.19.2