Skip to content

Commit

Permalink
deepgram
Browse files Browse the repository at this point in the history
  • Loading branch information
vivekuppal committed Mar 29, 2024
1 parent f25f087 commit 5e298d5
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 10 deletions.
5 changes: 4 additions & 1 deletion app/transcribe/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def create_args() -> argparse.Namespace:
\nThis option requires an API KEY and will consume Open AI credits.')
cmd_args.add_argument('-e', '--experimental', action='store_true',
help='Experimental command line argument. Behavior is undefined.')
cmd_args.add_argument('-stt', '--speech_to_text', action='store', default='whisper',
cmd_args.add_argument('-stt', '--speech_to_text', action='store', default=None,
choices=['whisper', 'whisper.cpp', 'deepgram'],
help='Specify the Speech to text Engine.'
'\nLocal STT models tend to perform best for response times.'
Expand Down Expand Up @@ -164,6 +164,9 @@ def update_args_config(args: argparse.Namespace, config: dict):
if args.speaker_device_index is not None:
config['General']['speaker_device_index'] = int(args.speaker_device_index)

if args.speech_to_text is not None:
config['General']['stt'] = args.speech_to_text


def update_audio_devices(global_vars: TranscriptionGlobals, config: dict):
"""Handle all application configuration using the command line args"""
Expand Down
2 changes: 1 addition & 1 deletion app/transcribe/audio_transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def transcribe_audio_queue(self, audio_queue: queue.Queue):
except Exception as exception:
print(exception)
finally:
# print(f'transcribe_audio_queue: filesize: {os.path.getsize(path)}')
# print(f'transcribe_audio_queue: file: {path} filesize: {os.path.getsize(path)}')
os.unlink(path)

if text != '' and text.lower() != 'you':
Expand Down
3 changes: 2 additions & 1 deletion app/transcribe/gpt_responder.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,8 @@ def __init__(self,
base_url = self.config['OpenAI']['base_url']
self.llm_client = openai.OpenAI(api_key=api_key, base_url=base_url)
self.model = self.config['OpenAI']['ai_model']
print(f'[INFO] Using OpenAI for inference. Model: {self.model}')
stt = self.config['General']['stt']
print(f'[INFO] Using {stt} for inference. Model: {self.model}')
super().__init__(config=self.config,
convo=convo,
save_to_file=save_to_file,
Expand Down
3 changes: 1 addition & 2 deletions app/transcribe/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def main():
args = create_args()

config = configuration.Config().data

au.start_ffmpeg()

# Initiate global variables
Expand All @@ -29,7 +28,7 @@ def main():

update_args_config(args, config)
global_vars.initiate_audio_devices(config)
au.create_transcriber(name=args.speech_to_text,
au.create_transcriber(name=config['General']['stt'],
config=config,
api=bool(config['General']['use_api']),
global_vars=global_vars)
Expand Down
3 changes: 0 additions & 3 deletions app/transcribe/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,4 @@ deepgram-sdk==3.1.0
#--extra-index-url https://download.pytorch.org/whl/cu118
--extra-index-url https://download.pytorch.org/whl/cu121
torch
transformers>=4.36.0 # not directly required, pinned by Snyk to avoid a vulnerability
bandit==1.7.8
setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability
3 changes: 2 additions & 1 deletion examples/deepgram/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def main():
smart_format=True,
utterances=True,
punctuate=True,
diarize=True)
diarize=True,
detect_language=True)

response = deepgram.listen.prerecorded.v("1").transcribe_file(payload, options)
print(response.to_json(indent=4))
Expand Down
6 changes: 5 additions & 1 deletion sdk/transcriber_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,9 @@ def __init__(self, stt_model_config: dict):
# Check for api_key
if stt_model_config["api_key"] is None:
raise Exception("Attempt to create Deepgram STT Model without an api key.") # pylint: disable=W0719

# This parameter exists primarily to adhere to the interface.
# Deepgram does auto language detection.
self.lang = 'en-US'

print('[INFO] Using Deepgram API for transcription.')
Expand All @@ -308,7 +311,8 @@ def get_transcription(self, wav_file_path: str):
smart_format=True,
utterances=True,
punctuate=True,
paragraphs=True)
paragraphs=True,
detect_language=True)

response = self.audio_model.listen.prerecorded.v("1").transcribe_file(payload, options)
# This is not necessary and just a debugging aid
Expand Down

0 comments on commit 5e298d5

Please sign in to comment.