Skip to content

Commit

Permalink
misc
Browse files Browse the repository at this point in the history
  • Loading branch information
vivekuppal committed Jun 17, 2024
1 parent bdcf85c commit d022c40
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 16 deletions.
21 changes: 18 additions & 3 deletions app/transcribe/app_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
sys.path.append('../..')
import interactions # noqa: E402 pylint: disable=C0413
from sdk import transcriber_models as tm # noqa: E402 pylint: disable=C0413
from tsutils import utilities
from tsutils import utilities, language


def create_responder(provider_name: str, config, convo, save_to_file: bool,
Expand Down Expand Up @@ -134,7 +134,8 @@ def create_transcriber(

if name.lower() == 'deepgram':
stt_model_config: dict = {
'api_key': config['Deepgram']['api_key']
'api_key': config['Deepgram']['api_key'],
'audio_lang': get_language_code(config['OpenAI']['audio_lang'])
}
model = model_factory.get_stt_model_instance(
stt_model=tm.STTEnum.DEEPGRAM_API,
Expand All @@ -149,6 +150,7 @@ def create_transcriber(
elif name.lower() == 'whisper.cpp':
stt_model_config: dict = {
'local_transcripton_model_file': 'ggml-' + config['WhisperCpp']['local_transcripton_model_file'],
'audio_lang': get_language_code(config['OpenAI']['audio_lang'])
}
model = model_factory.get_stt_model_instance(
stt_model=tm.STTEnum.WHISPER_CPP,
Expand All @@ -163,6 +165,7 @@ def create_transcriber(
stt_model_config: dict = {
'api_key': config['OpenAI']['api_key'],
'local_transcripton_model_file': config['OpenAI']['local_transcripton_model_file'],
'audio_lang': get_language_code(config['OpenAI']['audio_lang'])
}
model = model_factory.get_stt_model_instance(
stt_model=tm.STTEnum.WHISPER_LOCAL,
Expand All @@ -176,7 +179,8 @@ def create_transcriber(
elif name.lower() == 'whisper' and api:
stt_model_config: dict = {
'api_key': config['OpenAI']['api_key'],
'timeout': config['OpenAI']['response_request_timeout_seconds']
'timeout': config['OpenAI']['response_request_timeout_seconds'],
'audio_lang': get_language_code(config['OpenAI']['audio_lang'])
}
model = model_factory.get_stt_model_instance(
stt_model=tm.STTEnum.WHISPER_API,
Expand All @@ -192,6 +196,17 @@ def create_transcriber(
global_vars.set_transcriber(t)


def get_language_code(lang: str) -> str:
"""Get the language code from the configuration.
"""
lang_lower = lang.lower()
try:
return next(key for key, value in language.LANGUAGES_DICT.items() if value == lang_lower)
except StopIteration:
# Return dafault lang if nothing else is found
return 'en'


def shutdown(global_vars: TranscriptionGlobals):
"""Activities to be performed right before application shutdown.
"""
Expand Down
6 changes: 5 additions & 1 deletion app/transcribe/audio_player.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def play_audio_loop(self, config: dict):
def _get_language_code(self, lang: str) -> str:
"""Get the language code from the configuration.
"""
return next(key for key, value in LANGUAGES_DICT.items() if value == lang)
try:
return next(key for key, value in LANGUAGES_DICT.items() if value == lang)
except StopIteration:
# Return dafault lang if nothing else is found
return 'en'

def _get_speech_text(self) -> str:
"""Get the speech text from the conversation.
Expand Down
5 changes: 3 additions & 2 deletions app/transcribe/audio_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
sys.path.append('../..')
import custom_speech_recognition as sr # noqa: E402 pylint: disable=C0413
from tsutils import app_logging as al # noqa: E402 pylint: disable=C0413
from tsutils import duration # noqa: E402 pylint: disable=C0413
from tsutils import duration, utilities # noqa: E402 pylint: disable=C0413
from sdk.transcriber_models import WhisperCPPSTTModel


Expand Down Expand Up @@ -228,9 +228,10 @@ def convert_wav_to_16khz_format(self, file_path: str) -> str:
file_descritor, mod_file_path = tempfile.mkstemp(suffix=".wav")
os.close(file_descritor)
# print(f'Convert file {file_path} to 16khz file {mod_file_path}')
log_file = f"{utilities.get_data_path(app_name='Transcribe')}/logs/ffmpeg.txt"
subprocess.call(["ffmpeg", '-i', file_path, '-ar', '16000', '-ac', # nosec
'1', '-c:a', 'pcm_s16le', '-y', mod_file_path],
stdout=open(file='logs/ffmpeg.txt', mode='a', encoding='utf-8'),
stdout=open(file=log_file, mode='a', encoding='utf-8'),
stderr=subprocess.STDOUT)
return mod_file_path
except Exception as ex:
Expand Down
30 changes: 21 additions & 9 deletions sdk/transcriber_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class WhisperSTTModel(STTModelInterface):
"""
def __init__(self, stt_model_config: dict):
self.model = stt_model_config['local_transcripton_model_file']
self.lang = 'en'
self.lang = stt_model_config['audio_lang']
model_filename = MODELS_DIR + self.model + ".pt"
self.model_name = self.model + ".pt"
self.model_filename = os.path.join(MODELS_DIR, model_filename)
Expand Down Expand Up @@ -206,7 +206,7 @@ def __init__(self, stt_model_config: dict):
# A better solution is to create a base class for APIWhisperSTTModel,
# WhisperSTTModel and create set_lang method there and remove it from
# this class
self.lang = 'en'
self.lang = stt_model_config['audio_lang']

def set_lang(self, lang: str):
"""Set STT Language"""
Expand Down Expand Up @@ -249,7 +249,8 @@ def get_sentences(self, wav_file_path) -> dict:
"""
try:
with open(wav_file_path, "rb") as audio_file:
result = self.stt_client.audio.transcriptions.create(model='whisper-1', file=audio_file)
result = self.stt_client.audio.transcriptions.create(model='whisper-1', file=audio_file,
language=self.lang)
except Exception as exception:
print('Exception in transcribing audio using whisper API.')
print(exception)
Expand All @@ -264,7 +265,7 @@ class WhisperCPPSTTModel(STTModelInterface):
This model works best when used with GPU
"""
def __init__(self, stt_model_config: dict):
self.lang = 'en-US'
self.lang = stt_model_config['audio_lang']
model = stt_model_config['local_transcripton_model_file']
self.model_filename = MODELS_DIR + model + ".bin"
self.model = model
Expand All @@ -287,15 +288,18 @@ def get_transcription(self, wav_file_path: str):
"""
mod_file_path = wav_file_path
try:
log_file = f"{utilities.get_data_path(app_name='Transcribe')}/logs/whisper.cpp.txt"
# main.exe <filename> -oj
if os.path.isfile("../../bin/main.exe"):
subprocess.call(["../../bin/main.exe", mod_file_path, '-oj', '-m', self.model_filename],
stdout=open(file='logs/whisper.cpp.txt', mode='a', encoding='utf-8'),
subprocess.call(["../../bin/main.exe", mod_file_path, '-oj', '-m',
self.model_filename, '-l', self.lang],
stdout=open(file=log_file, mode='a', encoding='utf-8'),
stderr=subprocess.STDOUT)
else:
# This path is used in case of binary.
subprocess.call(["./bin/main.exe", mod_file_path, '-oj', '-m', self.model_filename],
stdout=open(file='logs/whisper.cpp.txt', mode='a', encoding='utf-8'),
subprocess.call(["./bin/main.exe", mod_file_path, '-oj', '-m', self.model_filename,
'-l', self.lang],
stdout=open(file=log_file, mode='a', encoding='utf-8'),
stderr=subprocess.STDOUT)
except Exception as ex:
print(f'ERROR: converting wav file {wav_file_path} to text using whisper.cpp.')
Expand Down Expand Up @@ -331,7 +335,15 @@ def process_response(self, response) -> str:
def get_sentences(self, wav_file_path: str):
"""Not Implemented
"""
raise Exception('Method not implemnted') # pylint: disable=W0719
transcript = ''
response = self.get_transcription(wav_file_path=wav_file_path)
for segment in response["transcription"]:
if segment["text"].strip() == '[BLANK_AUDIO]':
continue
transcript += segment["text"]
return transcript

# raise Exception('Method not implemnted') # pylint: disable=W0719


class DeepgramSTTModel(STTModelInterface):
Expand Down
2 changes: 1 addition & 1 deletion tsutils/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
'tt': 'tatar',
'te': 'telugu',
'th': 'thai',
'bo': 'tibetan',
'bo': 'tibetan',
'tr': 'turkish',
'tk': 'turkmen',
'uk': 'ukrainian',
Expand Down

0 comments on commit d022c40

Please sign in to comment.