-
Notifications
You must be signed in to change notification settings - Fork 3
/
whispersubs.lua
294 lines (210 loc) · 7.69 KB
/
whispersubs.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
-- These are just some temp files in order to process the subs
local TMP_WAV_PATH = "/tmp/mpv_whisper_tmp_wav.wav"
local TMP_SUB_PATH = "/tmp/mpv_whisper_tmp_sub" -- without file ext "srt"
local TMP_STREAM_PATH = "/tmp/mpv_whisper_tmp_stream"
-- This is the main variable you will want to modify
-- It's just the main whisper.cpp example command-line interface
-- Set things like the model location and other things, just avoid setting any input or output options
local WHISPER_CMD = "/models/whisper.cpp/main -m /models/whisper-ggml-medium.bin --threads 6 --language en"
local CHUNK_SIZE = 15 * 1000 -- the amount of subs to process at a time in ms
local WAV_CHUNK_SIZE = CHUNK_SIZE + 1000 -- pad the wav time
local INIT_POS = 0 -- starting position to start creating subs in ms
local STREAM_TIMEOUT = 15 -- timeout for init stream to start
local SHOW_PROGRESS = false -- visual aid to see where it's still processing subtitles
local running = false
local stream_cmd
local stream_process
local stream_downloaded = false
local function formatProgress(ms)
local seconds = math.floor(ms / 1000)
local minutes = math.floor(seconds / 60)
local hours = math.floor(minutes / 60)
local seconds = seconds % 60
local minutes = minutes % 60
local hours = hours % 24
return string.format("%02d:%02d:%02d.%03d", hours, minutes, seconds, ms % 1000)
end
local function cleanup()
os.execute('rm '..TMP_WAV_PATH, 'r')
os.execute('rm '..TMP_SUB_PATH..'*', 'r')
os.execute('rm '..TMP_STREAM_PATH..'*', 'r')
end
local function stop()
if stream_process then
stream_process:close()
end
cleanup()
end
local function saveSubs(media_path)
local sub_path = media_path:match("(.+)%..+$") -- remove file ext from media
sub_path = sub_path..'.srt'..'"' -- add the file ext back with the "
mp.commandv('show-text', 'Subtitles finished processing: saving to'..sub_path, 5000)
os.execute('cp '..TMP_SUB_PATH..'.srt '..sub_path, 'r')
end
local function appendSubs(current_pos)
os.execute(WHISPER_CMD..' --output-srt -d '..CHUNK_SIZE..' -f '..TMP_WAV_PATH..' -of '..TMP_SUB_PATH..'_append', 'r')
-- offset srt timings to current_pos
os.execute('ffmpeg -hide_banner -loglevel error -itsoffset '..current_pos..'ms -i '..TMP_SUB_PATH..'_append.srt'..' -c copy -y '..TMP_SUB_PATH..'_append_offset.srt', 'r')
-- Append subs manually because whisper won't
os.execute('cat '..TMP_SUB_PATH..'_append_offset.srt'..' >> '..TMP_SUB_PATH..'.srt', 'r')
if SHOW_PROGRESS then
mp.commandv('show-text','Whisper Subtitles: '..formatProgress(current_pos + CHUNK_SIZE))
end
mp.command('sub-reload')
return current_pos + CHUNK_SIZE
end
local function createSubs(current_pos)
mp.commandv('show-text','Whisper Subtitles: Generating initial subtitles')
current_pos = appendSubs(current_pos)
mp.commandv('sub-add', TMP_SUB_PATH..'.srt')
return current_pos
end
local function createWAV(media_path, current_pos)
local handle = io.popen('ffmpeg -hide_banner -loglevel error -ss '..current_pos..'ms -t '..WAV_CHUNK_SIZE..'ms '..'-i '..media_path..' -ar 16000 -ac 1 -c:a pcm_s16le -y '..TMP_WAV_PATH..' 2>&1', 'r')
if handle then
local output = handle:read('*all')
print(output)
handle:close()
if output:find 'No such file' then
return false
elseif output:find 'Invalid' then
return false
end
return true
else
return false
end
end
-- Check if the length of the wave file is long enough to be processed by CHUNK_SIZE
-- This is only really a streaming issue that is still downloading
local function isWavLongEnough(current_pos)
local handle = io.popen("ffprobe -i "..TMP_WAV_PATH.." -show_format -v quiet | sed -n 's/duration=//p'", 'r')
local output = handle:read('*all')
handle:close()
local duration = tonumber(output)
if duration then
if duration*1000 >= CHUNK_SIZE then
return true;
end
end
mp.commandv('show-text','Whisper Subtitles: Waiting for more stream to download', 3000)
return false
end
-- Check if stream is still not zombie
local function checkStreamStatus()
local handle = io.popen('ps --no-headers -o state -C "'..stream_cmd..'"', 'r')
local output = handle:read("*all")
handle:close()
if output:find 'Z' then
stream_downloaded = true;
local output = stream_process:read('*all')
stream_process:close()
stream_process = nil
mp.commandv('show-text','Whisper Subtitles: Finished downloading stream')
end
end
local function startStream(stream_path)
stream_cmd = 'yt-dlp --no-part -r 10M -x -o '..TMP_STREAM_PATH..' '..stream_path
stream_process = io.popen(stream_cmd)
mp.commandv('show-text','Whisper Subtitles: Stream download started')
end
local function runStream(file_length, current_pos)
if running then
-- Towards the of the file lets just process the time left if smaller than CHUNK_SIZE
local time_left = file_length - current_pos
if (time_left < CHUNK_SIZE) then
CHUNK_SIZE = time_left
end
if (time_left > 0) then
if (createWAV(TMP_STREAM_PATH..'*', current_pos)) then
if not stream_downloaded then
checkStreamStatus()
if (isWavLongEnough(current_pos)) then
current_pos = appendSubs(current_pos)
else -- Wait longer for stream
os.execute('sleep 1')
end
else
current_pos = appendSubs(current_pos)
end
end
-- Callback
mp.add_timeout(0.1, function() runStream(file_length, current_pos) end)
else
mp.commandv('show-text', 'Whisper Subtitles: Subtitles finished processing', 3000)
end
end
end
local function runLocal(media_path, file_length, current_pos)
if running then
-- Towards the of the file lets just process the time left if smaller than CHUNK_SIZE
local time_left = file_length - current_pos
if (time_left < CHUNK_SIZE) then
CHUNK_SIZE = time_left
end
if (time_left > 0) then
if (createWAV(media_path..'*', current_pos)) then
current_pos = appendSubs(current_pos)
end
-- Callback
mp.add_timeout(0.1, function() runLocal(media_path, file_length, current_pos) end)
else
saveSubs(media_path)
cleanup()
end
end
end
local function start()
--init vars
local media_path = mp.get_property('path')
media_path = '"'..media_path..'"' -- fix spaces
local file_length = mp.get_property_number('duration/full') * 1000
local current_pos = INIT_POS
stream_process = nil
-- In the rare case that the media is less than CHUNK_SIZE
local time_left = file_length - current_pos
if (time_left < CHUNK_SIZE) then
CHUNK_SIZE = time_left
end
-- Determine if media is a stream
if mp.get_property('demuxer-via-network') == 'yes' then
stream_downloaded = false
startStream(media_path)
-- Wait for stream to get long enough
local wav_created = false
for i=0,STREAM_TIMEOUT,1 do
wav_created = createWAV(TMP_STREAM_PATH..'*', current_pos)
if wav_created then
if (isWavLongEnough(current_pos)) then break end
end
os.execute('sleep 1')
end
if not wav_created or not (isWavLongEnough(current_pos)) then
mp.commandv('show-text', 'Whisper Subtitles: Timed out waiting for stream for '..STREAM_TIMEOUT..' seconds', 5000)
stop()
return
end
current_pos = createSubs(current_pos)
mp.add_timeout(0.1, function() runStream(file_length, current_pos) end)
else
createWAV(media_path, current_pos)
current_pos = createSubs(current_pos)
mp.add_timeout(0.1, function() runLocal(media_path, file_length, current_pos) end)
end
end
local function toggle()
if running then
running = false
mp.commandv('show-text', 'Whisper subtitles: no')
mp.unregister_event("start-file", start)
mp.unregister_event('end-file', stop)
stop()
else
running = true
mp.commandv('show-text', 'Whisper subtitles: yes')
mp.register_event("start-file", start)
mp.register_event('end-file', stop)
start()
end
end
mp.add_key_binding('ctrl+.', 'whisper_subs', toggle)