-
Notifications
You must be signed in to change notification settings - Fork 1
/
ct2_logic.py
86 lines (76 loc) · 3.53 KB
/
ct2_logic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import sounddevice as sd
import numpy as np
import wave
import os
import tempfile
import threading
from faster_whisper import WhisperModel
import yaml
from PySide6.QtWidgets import QApplication
from PySide6.QtGui import QClipboard
class VoiceRecorder:
def __init__(self, window, samplerate=44100, channels=1, dtype='int16'):
self.samplerate, self.channels, self.dtype = samplerate, channels, dtype
self.window = window
self.is_recording, self.frames = False, []
self.load_settings()
def load_settings(self):
try:
with open("config.yaml", "r") as f:
config = yaml.safe_load(f)
if "device_type" not in config:
config["device_type"] = "cpu"
if "model_name" not in config:
config["model_name"] = "base.en"
if "quantization_type" not in config:
config["quantization_type"] = "int8"
self.update_model(config["model_name"], config["quantization_type"], config["device_type"])
except FileNotFoundError:
self.update_model("base.en", "int8", "cpu")
def save_settings(self, model_name, quantization_type, device_type):
try:
with open("config.yaml", "r") as f:
config = yaml.safe_load(f)
except FileNotFoundError:
config = {}
config["model_name"] = model_name
config["quantization_type"] = quantization_type
config["device_type"] = device_type
with open("config.yaml", "w") as f:
yaml.safe_dump(config, f)
def update_model(self, model_name, quantization_type, device_type):
model_str = f"ctranslate2-4you/whisper-{model_name}-ct2-{quantization_type}"
self.model = WhisperModel(model_str, device=device_type, compute_type=quantization_type, cpu_threads=26)
self.window.update_status(f"Model updated to {model_name} with {quantization_type} quantization on {device_type} device")
self.save_settings(model_name, quantization_type, device_type)
def transcribe_audio(self, audio_file):
segments, _ = self.model.transcribe(audio_file)
clipboard_text = "\n".join([segment.text for segment in segments])
clipboard = QApplication.clipboard()
clipboard.setText(clipboard_text)
self.window.update_status("Audio saved and transcribed")
def record_audio(self):
self.window.update_status("Recording...")
def callback(indata, frames, time, status):
if status:
print(status)
self.frames.append(indata.copy())
with sd.InputStream(samplerate=self.samplerate, channels=self.channels, dtype=self.dtype, callback=callback):
while self.is_recording:
sd.sleep(100)
def save_audio(self):
self.is_recording = False
temp_filename = tempfile.mktemp(suffix=".wav")
data = np.concatenate(self.frames, axis=0)
with wave.open(temp_filename, "wb") as wf:
wf.setnchannels(self.channels)
wf.setsampwidth(2) # Always 2 for int16
wf.setframerate(self.samplerate)
wf.writeframes(data.tobytes())
self.transcribe_audio(temp_filename)
os.remove(temp_filename)
self.frames.clear()
def start_recording(self):
if not self.is_recording:
self.is_recording = True
threading.Thread(target=self.record_audio).start()