-
Notifications
You must be signed in to change notification settings - Fork 593
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
hehaorui
committed
Oct 25, 2024
1 parent
6a9adb3
commit 760a65b
Showing
56 changed files
with
12,964 additions
and
0 deletions.
There are no files selected for viewing
117 changes: 117 additions & 0 deletions
117
...s/tts/debatts/exp_config_16k_emilia_new_semantic_repcodec_8192_1q_large_101k_fix_new.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
{ | ||
"model_type": "T2S", | ||
"dataset": ["not"], | ||
"preprocess": { | ||
"hop_size": 320, | ||
"sample_rate": 16000, | ||
"processed_dir": "TODO", | ||
"valid_file": "valid.json", | ||
"train_file": "train.json", | ||
"use_phone_cond": false, | ||
"min_dur": 3, | ||
"max_dur": 40, | ||
"use_emilia_101k": true | ||
}, | ||
"model": { | ||
"t2sllama": { | ||
"phone_vocab_size": 1024, | ||
"target_vocab_size": 8192, | ||
"hidden_size": 2048, | ||
"intermediate_size": 8192, | ||
"num_hidden_layer": 8, | ||
"num_attention_head": 16, | ||
"pad_token_id": 9216, | ||
"bos_target_id": 9217, | ||
"eos_target_id": 9218, | ||
"bos_phone_id": 9219, | ||
"eos_phone_id": 9220, | ||
"bos_prompt0_id": 9221, | ||
"eos_prompt0_id": 9222, | ||
"use_lang_emb": false | ||
}, | ||
"kmeans": { | ||
"type": "repcodec", | ||
"stat_mean_var_path":"./ckpt/emilia_wav2vec2bert_stats_10k.pt", | ||
"repcodec": { | ||
"codebook_size": 8192, | ||
"hidden_size": 1024, | ||
"codebook_dim": 8, | ||
"vocos_dim": 384, | ||
"vocos_intermediate_dim": 2048, | ||
"vocos_num_layers": 12 | ||
}, | ||
"pretrained_path":"./ckpt/repcodec/emilia_50k_8192_norm_8d_86k_steps_model.safetensors" | ||
}, | ||
"codec": { | ||
"encoder": { | ||
"d_model": 96, | ||
"up_ratios": [4, 4, 4, 5], | ||
"out_channels": 256, | ||
"use_tanh": false, | ||
"pretrained_path":"./ckpt/codec_16K_320_8/pytorch_model.bin" | ||
}, | ||
"decoder": { | ||
"in_channel": 256, | ||
"upsample_initial_channel": 1536, | ||
"up_ratios": [5, 4, 4, 4], | ||
"num_quantizers": 8, | ||
"codebook_size": 1024, | ||
"codebook_dim": 8, | ||
"quantizer_type": "fvq", | ||
"quantizer_dropout": 0.5, | ||
"commitment": 0.25, | ||
"codebook_loss_weight": 1.0, | ||
"use_l2_normlize": true, | ||
"codebook_type": "euclidean", | ||
"kmeans_init": false, | ||
"kmeans_iters": 10, | ||
"decay": 0.8, | ||
"eps": 0.5, | ||
"threshold_ema_dead_code": 2, | ||
"weight_init": false, | ||
"use_vocos": true, | ||
"vocos_dim": 512, | ||
"vocos_intermediate_dim": 4096, | ||
"vocos_num_layers": 24, | ||
"n_fft": 1280, | ||
"hop_size": 320, | ||
"padding": "same", | ||
"pretrained_path": "./ckpt/codec_16K_320_8/pytorch_model_1.bin" | ||
} | ||
} | ||
}, | ||
"log_dir": "TODO", | ||
"train": { | ||
"max_epoch": 0, | ||
"use_dynamic_batchsize": true, | ||
"max_tokens": 3000, | ||
"max_sentences": 20, | ||
"lr_warmup_steps": 3200, | ||
"lr_scheduler": "inverse_sqrt", | ||
"num_train_steps": 800, | ||
"adam": { | ||
"lr": 1e-5 | ||
}, | ||
"ddp": false, | ||
"random_seed": 114, | ||
"batch_size": 1, | ||
"epochs": 500, | ||
"max_steps": 10000, | ||
"total_training_steps": 8000, | ||
"save_summary_steps": 500, | ||
"save_checkpoints_steps": 300, | ||
"valid_interval": 2000, | ||
"keep_checkpoint_max": 100, | ||
"gradient_accumulation_step": 10, | ||
"tracker": ["tensorboard"], | ||
"save_checkpoint_stride": [1], | ||
"keep_last": [15], | ||
"run_eval": [true], | ||
"dataloader": { | ||
"num_worker": 4, | ||
"pin_memory": true | ||
}, | ||
"use_emilia_dataset": false | ||
} | ||
} | ||
|
9 changes: 9 additions & 0 deletions
9
models/tts/debatts/speech_examples/87_SPEAKER01_2_part03.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"key": "87_SPEAKER01_2_part03", | ||
"text": "你方要论证的是, 他在技术上完全突破了壁垒, 在现实里真的可以落地, 而不是举出一些跟越宇宙可能没有关, 没有关系, 实际关系不大的东西告诉我, 这叫做越宇宙。我方告诉你, 不只是算力和芯片的问题, 还有包括VR的问题, 硬件上算力有问题, 题片有问题, 软软。", | ||
"duration": 15.34, | ||
"language": "zh", | ||
"wav_path": "./87_SPEAKER01_2_part03.wav", | ||
"chenci_prompt_wav_path": "./87_SPEAKER01_2_part03_213_chenci_prompt_6s.wav", | ||
"prompt0_wav_path": "./87_SPEAKER00_1_part01.wav" | ||
} |
Oops, something went wrong.