Skip to content

Commit

Permalink
chunk styletts2 response and use bertie
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewkennedy5 committed Feb 19, 2024
1 parent 81434ae commit 2531b1f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
13 changes: 8 additions & 5 deletions openduck-py/openduck_py/routers/voice.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import io
import re
from tempfile import NamedTemporaryFile
from uuid import uuid4
from fastapi import APIRouter, Depends, UploadFile, File, Form
Expand Down Expand Up @@ -107,11 +108,13 @@ async def audio_response(
chat.history_json["messages"] = messages
await db.commit()

# TODO: Process styletts2 in chunks of text, and return one chunk at a time in a streaming fashion
audio = styletts2.styletts2_inference(
# TODO: better way to deal with long responses. chunk them
text=response_message.content[:500],
)
audio_chunks = []
sentences = re.split(r"(?<=[.!?]) +", response_message.content)
for i in range(0, len(sentences), 2):
chunk_text = " ".join(sentences[i : i + 2])
audio_chunk = styletts2.styletts2_inference(text=chunk_text)
audio_chunks.append(audio_chunk)
audio = np.concatenate(audio_chunks)
audio = np.int16(audio * 32767) # Scale to 16-bit integer values
output = StreamingResponse(io.BytesIO(audio), media_type="application/octet-stream")
return output
9 changes: 3 additions & 6 deletions openduck-py/openduck_py/voices/styletts2.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def resize_array(input_array, new_size):
loader=lambda x: load_plbert(plbert_config, x),
)

model_path = "styletts2/rap_v1.pt"
model_path = "styletts2/prototype_voice.pth"
model_bucket = "uberduck-models-us-west-2"
model, sampler = load_model(
cache=cache,
Expand All @@ -306,7 +306,7 @@ def resize_array(input_array, new_size):
model_params=model_params,
)

style_prompt_path = "511f17d1-8a30-4be8-86aa-4cdd8b0aed70.wav"
style_prompt_path = "bertie-chipper.wav"
style_prompt_bucket = "uberduck-audio-files"

ref_s = load_object_from_s3(
Expand All @@ -316,10 +316,7 @@ def resize_array(input_array, new_size):
)


def styletts2_inference(
text: str,
language: str = "english",
):
def styletts2_inference(text: str, language: str = "english"):
print("styletts2.run started")

# NOTE (Sam): to deal with short inference issue https://github.com/yl4579/StyleTTS2/issues/46.
Expand Down

0 comments on commit 2531b1f

Please sign in to comment.