Skip to content

Commit

Permalink
💥 breaking improve #91
Browse files Browse the repository at this point in the history
- add tts pipeline
- add tn pipeline
- refactor ssml gen
- add 增加 stream_chunk_size 参数
- add 简单的多线程
- add tn_config
- refactor batch generate
- add 增加 wetext tn (处理 en)

- TODO 适配 api
- TODO 适配 webui
- TODO 支持其他model
  • Loading branch information
zhzLuke96 committed Jul 11, 2024
1 parent 7e54bb7 commit 3d62765
Show file tree
Hide file tree
Showing 85 changed files with 1,891 additions and 462 deletions.
2 changes: 1 addition & 1 deletion data/load_json_spk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from modules.speaker import speaker_mgr
from modules.core.speaker import speaker_mgr

# 出处: https://github.com/2noise/ChatTTS/issues/238
data = json.load(open("./data/slct_voice_240605.json", "r"))
Expand Down
12 changes: 6 additions & 6 deletions modules/api/impl/google_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

from modules.api import utils as api_utils
from modules.api.Api import APIManager
from modules.api.impl.handler.SSMLHandler import SSMLHandler
from modules.api.impl.handler.TTSHandler import TTSHandler
from modules.api.impl.model.audio_model import AdjustConfig, AudioFormat
from modules.api.impl.model.chattts_model import ChatTTSConfig, InferConfig
from modules.api.impl.model.enhancer_model import EnhancerConfig
from modules.speaker import Speaker, speaker_mgr
from modules.core.handler.datacls.audio_model import AdjustConfig, AudioFormat
from modules.core.handler.datacls.chattts_model import ChatTTSConfig, InferConfig
from modules.core.handler.datacls.enhancer_model import EnhancerConfig
from modules.core.handler.SSMLHandler import SSMLHandler
from modules.core.handler.TTSHandler import TTSHandler
from modules.core.speaker import Speaker, speaker_mgr


class SynthesisInput(BaseModel):
Expand Down
105 changes: 0 additions & 105 deletions modules/api/impl/handler/SSMLHandler.py

This file was deleted.

10 changes: 5 additions & 5 deletions modules/api/impl/openai_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

from modules.api import utils as api_utils
from modules.api.Api import APIManager
from modules.api.impl.handler.TTSHandler import TTSHandler
from modules.api.impl.model.audio_model import AdjustConfig, AudioFormat
from modules.api.impl.model.chattts_model import ChatTTSConfig, InferConfig
from modules.api.impl.model.enhancer_model import EnhancerConfig
from modules.core.handler.datacls.audio_model import AdjustConfig, AudioFormat
from modules.core.handler.datacls.chattts_model import ChatTTSConfig, InferConfig
from modules.core.handler.datacls.enhancer_model import EnhancerConfig
from modules.core.handler.TTSHandler import TTSHandler
from modules.core.speaker import Speaker, speaker_mgr
from modules.data import styles_mgr
from modules.speaker import Speaker, speaker_mgr


class AudioSpeechRequest(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion modules/api/impl/speaker_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from modules.api import utils as api_utils
from modules.api.Api import APIManager
from modules.speaker import speaker_mgr
from modules.core.speaker import speaker_mgr


class CreateSpeaker(BaseModel):
Expand Down
8 changes: 4 additions & 4 deletions modules/api/impl/ssml_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from pydantic import BaseModel

from modules.api.Api import APIManager
from modules.api.impl.handler.SSMLHandler import SSMLHandler
from modules.api.impl.model.audio_model import AdjustConfig, AudioFormat
from modules.api.impl.model.chattts_model import InferConfig
from modules.api.impl.model.enhancer_model import EnhancerConfig
from modules.core.handler.datacls.audio_model import AdjustConfig, AudioFormat
from modules.core.handler.datacls.chattts_model import InferConfig
from modules.core.handler.datacls.tacls.enhancer_model import EnhancerConfig
from modules.core.handler.SSMLHandler import SSMLHandler


class SSMLRequest(BaseModel):
Expand Down
10 changes: 5 additions & 5 deletions modules/api/impl/tts_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

from modules.api import utils as api_utils
from modules.api.Api import APIManager
from modules.api.impl.handler.TTSHandler import TTSHandler
from modules.api.impl.model.audio_model import AdjustConfig, AudioFormat
from modules.api.impl.model.chattts_model import ChatTTSConfig, InferConfig
from modules.api.impl.model.enhancer_model import EnhancerConfig
from modules.speaker import Speaker
from modules.core.handler.datacls.audio_model import AdjustConfig, AudioFormat
from modules.core.handler.datacls.chattts_model import ChatTTSConfig, InferConfig
from modules.core.handler.datacls.enhancer_model import EnhancerConfig
from modules.core.handler.TTSHandler import TTSHandler
from modules.core.speaker import Speaker

logger = logging.getLogger(__name__)

Expand Down
10 changes: 5 additions & 5 deletions modules/api/impl/xtts_v2_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from pydantic import BaseModel

from modules.api.Api import APIManager
from modules.api.impl.handler.TTSHandler import TTSHandler
from modules.api.impl.model.audio_model import AdjustConfig, AudioFormat
from modules.api.impl.model.chattts_model import ChatTTSConfig, InferConfig
from modules.api.impl.model.enhancer_model import EnhancerConfig
from modules.speaker import speaker_mgr
from modules.core.handler.datacls.chattts_model import ChatTTSConfig, InferConfig
from modules.core.handler.datacls.enhancer_model import EnhancerConfig
from modules.core.handler.datacls.tacls.audio_model import AdjustConfig, AudioFormat
from modules.core.handler.TTSHandler import TTSHandler
from modules.core.speaker import speaker_mgr

logger = logging.getLogger(__name__)

Expand Down
26 changes: 23 additions & 3 deletions modules/api/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from typing import Any, Union
from typing import Any, Dict, Union

from pydantic import BaseModel
from pydub import AudioSegment

from modules.core.speaker import speaker_mgr
from modules.data import styles_mgr
from modules.speaker import speaker_mgr
from modules.ssml import merge_prompt


class ParamsTypeError(Exception):
Expand Down Expand Up @@ -36,6 +35,27 @@ def to_number(value, t, default=0):
return default


def merge_prompt(attrs: dict, elem: Dict[str, Any]):

def attr_num(attrs: Dict[str, Any], k: str, min_value: int, max_value: int):
val = elem.get(k, attrs.get(k, ""))
if val == "":
return
if val == "max":
val = max_value
if val == "min":
val = min_value
val = np.clip(int(val), min_value, max_value)
if "prefix" not in attrs or attrs["prefix"] == None:
attrs["prefix"] = ""
attrs["prefix"] += " " + f"[{k}_{val}]"

attr_num(attrs, "oral", 0, 9)
attr_num(attrs, "speed", 0, 9)
attr_num(attrs, "laugh", 0, 2)
attr_num(attrs, "break", 0, 7)


def calc_spk_style(spk: Union[str, int], style: Union[str, int]):
voice_attrs = {
"spk": None,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from fastapi import Request
from pydub import AudioSegment

from modules.api.impl.model.audio_model import AudioFormat
from modules.ChatTTSInfer import ChatTTSInfer
from modules.utils.audio import ndarray_to_segment
from modules.core.handler.datacls.audio_model import AudioFormat
from modules.core.models.zoo.ChatTTSInfer import ChatTTSInfer
from modules.utils.audio_utils import ndarray_to_segment


def wave_header_chunk(frame_input=b"", channels=1, sample_width=2, sample_rate=24000):
Expand Down
77 changes: 77 additions & 0 deletions modules/core/handler/SSMLHandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from typing import Generator

import numpy as np
from fastapi import HTTPException

from modules.core.handler.AudioHandler import AudioHandler
from modules.core.handler.datacls.audio_model import AdjustConfig
from modules.core.handler.datacls.chattts_model import ChatTTSConfig, InferConfig
from modules.core.handler.datacls.enhancer_model import EnhancerConfig
from modules.core.pipeline.factory import PipelineFactory
from modules.core.pipeline.pipeline import TTSPipeline
from modules.core.pipeline.processor import TTSPipelineContext
from modules.core.ssml.SSMLParser import create_ssml_v01_parser
from modules.core.ssml.SynthesizeSSML import SynthesizeSSML
from modules.Enhancer.ResembleEnhance import apply_audio_enhance_full
from modules.normalization import text_normalize
from modules.utils import audio_utils


class SSMLHandler(AudioHandler):
def __init__(
self,
ssml_content: str,
infer_config: InferConfig,
adjust_config: AdjustConfig,
enhancer_config: EnhancerConfig,
) -> None:
assert isinstance(ssml_content, str), "ssml_content must be a string."
assert isinstance(
infer_config, InferConfig
), "infer_config must be an InferConfig object."
assert isinstance(
adjust_config, AdjustConfig
), "adjest_config should be AdjustConfig"
assert isinstance(
enhancer_config, EnhancerConfig
), "enhancer_config must be an EnhancerConfig object."

self.ssml_content = ssml_content
self.infer_config = infer_config
self.adjest_config = adjust_config
self.enhancer_config = enhancer_config

self.validate()

def validate(self):
# TODO params checker
pass

def create_pipeline(self):
ssml_content = self.ssml_content
infer_config = self.infer_config
adjust_config = self.adjest_config
enhancer_config = self.enhancer_config

ctx = TTSPipelineContext(
ssml=ssml_content,
tts_config=ChatTTSConfig(),
infer_config=infer_config,
adjust_config=adjust_config,
enhancer_config=enhancer_config,
)
pipeline = PipelineFactory.create(ctx)
return pipeline

def enqueue(self) -> tuple[np.ndarray, int]:
pipeline = self.create_pipeline()
results = pipeline.generate()

sample_rate = results[0][0]
audio_data = np.concatenate([r[1] for r in results], axis=0)

return audio_data, sample_rate

def enqueue_stream(self) -> Generator[tuple[np.ndarray, int], None, None]:
pipeline = self.create_pipeline()
# TODO
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@

import numpy as np

from modules.api.impl.handler.AudioHandler import AudioHandler
from modules.api.impl.model.audio_model import AdjustConfig
from modules.api.impl.model.chattts_model import ChatTTSConfig, InferConfig
from modules.api.impl.model.enhancer_model import EnhancerConfig
from modules.core.handler.AudioHandler import AudioHandler
from modules.core.handler.datacls.audio_model import AdjustConfig
from modules.core.handler.datacls.chattts_model import ChatTTSConfig, InferConfig
from modules.core.handler.datacls.enhancer_model import EnhancerConfig
from modules.core.speaker import Speaker
from modules.Enhancer.ResembleEnhance import apply_audio_enhance_full
from modules.normalization import text_normalize
from modules.speaker import Speaker
from modules.synthesize_audio import synthesize_audio
from modules.synthesize_stream import synthesize_stream
from modules.utils.audio import apply_normalize, apply_prosody_to_audio_data
from modules.utils.audio_utils import apply_normalize, apply_prosody_to_audio_data

logger = logging.getLogger(__name__)

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@


class ChatTTSConfig(BaseModel):
# model id
mid: str = "chat-tts"

style: str = ""
temperature: float = 0.3
top_p: float = 0.7
top_k: int = 20
prompt: str = ""
prompt1: str = ""
prompt2: str = ""
prefix: str = ""
Expand All @@ -17,3 +21,6 @@ class InferConfig(BaseModel):
# end_of_sentence
eos: str = "[uv_break]"
seed: int = 42

stream: bool = False
stream_chunk_size: int = 96
File renamed without changes.
8 changes: 8 additions & 0 deletions modules/core/handler/datacls/tn_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import Optional

from pydantic import BaseModel


class TNConfig(BaseModel):
enabled: Optional[list[str]] = None
disabled: Optional[list[str]] = None
Empty file.
Loading

0 comments on commit 3d62765

Please sign in to comment.