-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
79 lines (61 loc) · 2.12 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import torch
import os
import platform
from dotenv import load_dotenv
from rich.progress import Progress, TimeElapsedColumn, BarColumn, TextColumn
from transformers import pipeline
from utils.result import build_result
from utils.diarization_pipeline import diarize
from utils.convert_output import convert
load_dotenv()
is_cuda_available = torch.cuda.is_available()
# Check if the system is macOS (Darwin)
if platform.system() == "Darwin":
device = "mps" # Use Metal Performance Shaders on macOS
else:
# Check if CUDA is available, otherwise use CPU
device = "cuda:0" if is_cuda_available else "cpu"
torch_dtype = torch.float16 if is_cuda_available else torch.float32
model = "openai/whisper-large-v3"
pipe = pipeline(
"automatic-speech-recognition",
model=model,
torch_dtype=torch.float16,
device=device,
model_kwargs={"attn_implementation": "sdpa"}
)
if device == "mps":
torch.mps.empty_cache()
audio_dir = "audio"
hf_auth_token = os.getenv("HF_AUTH_TOKEN")
for file_name in os.listdir(audio_dir):
if (file_name.startswith(".")):
continue
audio_path = os.path.join(audio_dir, file_name)
with Progress(
TextColumn("🖋️ [progress.description]{task.description}"),
BarColumn(style="yellow1", pulse_style="white"),
TimeElapsedColumn(),
) as progress:
progress.add_task(
f"[yellow]Transcribing {file_name} [Device: {device}]...", total=None)
outputs = pipe(
audio_path,
chunk_length_s=30,
batch_size=24,
generate_kwargs={"task": "transcribe"},
return_timestamps=True,
)
# Determine the speakers_transcript based on the hf_auth_token
if hf_auth_token is None:
speakers_transcript = []
else:
speakers_transcript = diarize(
outputs, audio_path, device, hf_auth_token)
# Build the result and format
result = build_result(speakers_transcript, outputs)
output_path = f"transcriptions/{os.path.splitext(file_name)[0]}.txt"
convert(result, output_path)
print(
f"✨ Your file has been successfully transcribed!"
)