From 82b531fa45dfa6509232a62ac16856247823fe34 Mon Sep 17 00:00:00 2001 From: pluja Date: Sat, 25 Nov 2023 19:58:35 +0100 Subject: [PATCH] Add support for large-v3 and bump faster-whisper --- .gitignore | 1 + frontend/src/lib/components/ModalTranscriptionForm.svelte | 3 ++- transcription-api/backends/backend.py | 2 +- transcription-api/models.py | 1 + transcription-api/requirements.txt | 2 +- 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index a31b381..e54532f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ whisper_models/ .env .DS_Store *.beta.yml +venv diff --git a/frontend/src/lib/components/ModalTranscriptionForm.svelte b/frontend/src/lib/components/ModalTranscriptionForm.svelte index 5cc1ce8..251d50f 100644 --- a/frontend/src/lib/components/ModalTranscriptionForm.svelte +++ b/frontend/src/lib/components/ModalTranscriptionForm.svelte @@ -50,7 +50,8 @@ 'small.en', 'medium', 'medium.en', - 'large-v2' + 'large-v2', + 'large-v3' ]; // Sort the languages languages.sort((a, b) => { diff --git a/transcription-api/backends/backend.py b/transcription-api/backends/backend.py index d859171..9153ce3 100644 --- a/transcription-api/backends/backend.py +++ b/transcription-api/backends/backend.py @@ -3,7 +3,7 @@ import numpy as np from faster_whisper.audio import decode_audio # type: ignore -SUPPORTED_MODELS = ["tiny", "tiny.en", "small", "small.en", "base", "base.en", "medium", "medium.en", "large-v2"] +SUPPORTED_MODELS = ["tiny", "tiny.en", "small", "small.en", "base", "base.en", "medium", "medium.en", "large-v2", "large-v3"] WordData = TypedDict( "WordData", {"word": str, "start": float | str, "end": float | str, "score": float} diff --git a/transcription-api/models.py b/transcription-api/models.py index 341f0d2..cbfeceb 100644 --- a/transcription-api/models.py +++ b/transcription-api/models.py @@ -15,6 +15,7 @@ class ModelSize(str, Enum): medium_en = "medium.en" medium = "medium" large_v2 = "large-v2" + large_v3 = "large-v3" class Languages(str, Enum): auto = "auto" diff --git a/transcription-api/requirements.txt b/transcription-api/requirements.txt index 551050f..f50cbc0 100644 --- a/transcription-api/requirements.txt +++ b/transcription-api/requirements.txt @@ -1,5 +1,5 @@ fastapi==0.100.1 -faster_whisper==0.9.0 +faster-whisper @ https://github.com/guillaumekln/faster-whisper/archive/e1a218fab1ab02d637b79565995bf1a9c4c83a09.tar.gz pydantic==2.1.1 python-dotenv==1.0.0 uvicorn==0.23.2