Skip to content

Commit

Permalink
NeMo text normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewkennedy5 committed Feb 22, 2024
1 parent 3cd9ea7 commit 32ca684
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
21 changes: 15 additions & 6 deletions openduck-py/openduck_py/routers/voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
from asgiref.sync import sync_to_async
import torch
from torchaudio.functional import resample
from nemo_text_processing.text_normalization.normalize import Normalizer

from openduck_py.models import DBChatHistory
from openduck_py.db import get_db_async, AsyncSession
from openduck_py.voices import styletts2
from openduck_py.routers.templates import generate

model = whisper.load_model("base.en") # Fastest possible whisper model
model = whisper.load_model("base.en")
normalizer = Normalizer(input_case="cased", lang="en")

audio_router = APIRouter(prefix="/audio")

Expand Down Expand Up @@ -46,16 +48,16 @@ async def audio_response(
started_talking = False
while True:
message = await websocket.receive_bytes()
print("Received audio!")
# print("Received audio!")
audio_chunk = np.frombuffer(message, dtype=np.float32)
audio_data.append(audio_chunk)
volume = np.linalg.norm(audio_chunk)
print("Norm:", volume)
# print("Norm:", volume)
if volume < SILENCE_THRESHOLD and started_talking:
print("[INFO] Silence! My turn.")
# print("[INFO] Silence! My turn.")
break
elif volume > SILENCE_THRESHOLD:
print("I'm hearing you load and clear...")
# print("I'm hearing you load and clear...")
started_talking = True

audio_data = np.concatenate(audio_data)
Expand Down Expand Up @@ -101,7 +103,14 @@ async def audio_response(
chat.history_json["messages"] = messages
await db.commit()

sentences = re.split(r"(?<=[.!?]) +", response_message.content)
def normalize_text(text):
normalized_text = normalizer.normalize(text)
print("Original response:", text)
print("Normalized response:", normalized_text)
return normalized_text

normalized = normalize_text(response_message.content)
sentences = re.split(r"(?<=[.!?]) +", normalized)
for sentence in sentences:
# TODO: deal with asyncio
audio_chunk = styletts2.styletts2_inference(text=sentence)
Expand Down
3 changes: 2 additions & 1 deletion openduck-py/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ aioboto3
asgiref
asyncpg
azure-cognitiveservices-speech
Cython
databases
daily-python
google-cloud-texttospeech
Expand All @@ -17,9 +18,9 @@ pytest-mock
einops
einops_exts
fastapi
fugashi
librosa
munch
nemo_toolkit[tts]
nltk
numpy
openai
Expand Down

0 comments on commit 32ca684

Please sign in to comment.