From 7c95bd15b84eefe77ba4211d961306f5e49b38f4 Mon Sep 17 00:00:00 2001 From: vivek Date: Tue, 18 Jun 2024 11:34:59 -0400 Subject: [PATCH] add multilingual support for deepgram --- sdk/transcriber_models.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/sdk/transcriber_models.py b/sdk/transcriber_models.py index 2eaef04..6e7e2ff 100644 --- a/sdk/transcriber_models.py +++ b/sdk/transcriber_models.py @@ -357,7 +357,8 @@ def __init__(self, stt_model_config: dict): # This parameter exists primarily to adhere to the interface. # Deepgram does auto language detection. - self.lang = 'en-US' + # self.lang = 'en-US' + self.lang = stt_model_config['audio_lang'] print('[INFO] Using Deepgram API for transcription.') self.audio_model = DeepgramClient(stt_model_config["api_key"]) @@ -406,18 +407,29 @@ def get_sentences(self, wav_file_path: str): payload: FileSource = { "buffer": buffer_data } - - options = PrerecordedOptions( - model="nova", - smart_format=True, - utterances=True, - punctuate=True, - paragraphs=True, - detect_language=True) + if self.lang.startswith('en'): + options = PrerecordedOptions( + model="nova", + smart_format=True, + utterances=True, + punctuate=True, + paragraphs=True, + detect_language=True, + language=self.lang) + else: + options = PrerecordedOptions( + model="general", + smart_format=True, + utterances=True, + punctuate=True, + paragraphs=True, + detect_language=True, + language=self.lang) response = self.audio_model.listen.prerecorded.v("1").transcribe_file(payload, options) # This is not necessary and just a debugging aid - with open('logs/deep.json', mode='a', encoding='utf-8') as deep_log: + log_file = f"{utilities.get_data_path(app_name='Transcribe')}/logs/deep.json" + with open(log_file, mode='a', encoding='utf-8') as deep_log: deep_log.write(response.to_json(indent=4)) results = [] for utterance in response.results.utterances: