Skip to content

Commit

Permalink
check for any segment with speech prob (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewkennedy5 committed Apr 3, 2024
1 parent 57cdd2f commit 9e78ec7
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions openduck-py/openduck_py/routers/ml.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from pprint import pprint
import io

import numpy as np
from fastapi import APIRouter, UploadFile, File, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import io

from whisper import load_model
import numpy as np
from nemo_text_processing.text_normalization.normalize import Normalizer

from openduck_py.voices.styletts2 import styletts2_inference
Expand Down Expand Up @@ -44,13 +45,17 @@ async def transcribe_audio(
audio_bytes = await audio.read()
audio_data = np.frombuffer(audio_bytes, dtype=np.float32)
response = whisper_model.transcribe(audio_data)
pprint(response)
if len(response["segments"]) == 0:
return {"text": ""}
no_speech_prob = response["segments"][0]["no_speech_prob"]
print("No speech prob:", no_speech_prob)
transcription = response["text"]
if no_speech_prob > NO_SPEECH_PROB_THRESHOLD:
transcription = ""

transcription = " ".join(
[
segment["text"]
for segment in response["segments"]
if segment["no_speech_prob"] <= NO_SPEECH_PROB_THRESHOLD
]
)
return {"text": transcription}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
Expand Down

0 comments on commit 9e78ec7

Please sign in to comment.