stable-ts support #99

abdeladim-s · Jan 13, 2024 · 2273074 · 2273074
1 parent 1790a52
commit 2273074
Show file tree

Hide file tree

Showing 5 changed files with 530 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -34,13 +34,15 @@
     * > faster-whisper is a reimplementation of OpenAI's Whisper model using [CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for Transformer models.
       >
       > This implementation is up to 4 times faster than [openai/whisper](https://github.com/openai/whisper) for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.
-  * [x] :new: [m-bain /whisperX](https://github.com/m-bain/whisperX)
+  * [x] [m-bain/whisperX](https://github.com/m-bain/whisperX)
     * >fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and speaker diarization.
       >- ⚡️ Batched inference for 70x realtime transcription using whisper large-v2
       >- 🪶 [faster-whisper](https://github.com/guillaumekln/faster-whisper) backend, requires <8GB gpu memory for large-v2 with beam_size=5
       >- 🎯 Accurate word-level timestamps using wav2vec2 alignment
       >- 👯‍♂️ Multispeaker ASR using speaker diarization from [pyannote-audio](https://github.com/pyannote/pyannote-audio) (speaker ID labels) 
       >- 🗣️ VAD preprocessing, reduces hallucination & batching with no WER degradation.
+  * [x] :new: [jianfch/stable-ts](https://github.com/jianfch/stable-ts)
+    * >**Stabilizing Timestamps for Whisper**: This library modifies [Whisper](https://github.com/openai/whisper) to produce more reliable timestamps and extends its functionality.
 
 * Web UI
   * Fully offline, no third party services 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "subsai"
-version = "1.2.4"
+version = "1.2.5"
 authors = [
     {name = "abdeladim-s"},
 ]

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,4 @@
-torch==2.1.2
-openai-whisper==20231117
+openai-whisper==20231106
 streamlit~=1.20.0
 streamlit_player~=0.1.5
 streamlit-aggrid~=0.3.4
@@ -11,3 +10,4 @@ pywhispercpp==1.1.1
 dl_translate==0.3.0
 faster_whisper==0.10.0
 whisperx @ git+https://github.com/m-bain/whisperx@0ae0d49d1dd789322181f579c71719f4cac83ae1
+stable-ts
diff --git a/src/subsai/configs.py b/src/subsai/configs.py
@@ -14,6 +14,7 @@
 from subsai.models.whisper_timestamped_model import WhisperTimeStamped
 from subsai.models.whispercpp_model import WhisperCppModel
 from subsai.utils import get_available_devices, available_translation_models
+from subsai.models.stable_ts_model import StableTsModel
 
 AVAILABLE_MODELS = {
     'openai/whisper': {
@@ -54,7 +55,13 @@
         'description': """**whisperX** is a fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and speaker diarization.""",
         'url': 'https://github.com/m-bain/whisperX',
         'config_schema': WhisperXModel.config_schema,
-    }
+    },
+    'jianfch/stable-ts': {
+        'class': StableTsModel,
+        'description': '**Stabilizing Timestamps for Whisper** This library modifies [Whisper](https://github.com/openai/whisper) to produce more reliable timestamps and extends its functionality.',
+        'url': 'https://github.com/jianfch/stable-ts',
+        'config_schema': StableTsModel.config_schema,
+    },
 }
 
 BASIC_TOOLS_CONFIGS = {