-
Notifications
You must be signed in to change notification settings - Fork 13
/
fast_video.py
142 lines (112 loc) · 4.33 KB
/
fast_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# for now, speed up silence by 2.0x, therfore, dropping 1 out of 3 frames
# "silence" is considered value below 700
import cv2
import numpy as np
from scipy.io import wavfile
import math
from audiotsm import phasevocoder
from arrayWav import ArrReader, ArrWriter
import sys
import subprocess
videoFile = sys.argv[1]
silentSpeed = float(sys.argv[2])
cap = cv2.VideoCapture(videoFile)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
fps = round(cap.get(cv2.CAP_PROP_FPS))
extractAudio = "ffmpeg -i {} -ab 160k -ac 2 -ar 44100 -vn output.wav".format(videoFile)
subprocess.call(extractAudio, shell=True)
out = cv2.VideoWriter("spedup.mp4", fourcc, fps, (width, height))
sampleRate, audioData = wavfile.read("output.wav")
skipped = 0
channels = int(audioData.shape[1])
def getMaxVolume(s):
maxv = np.max(s)
minv = np.min(s)
return max(maxv,-minv)
nFrames = 0
def writeFrames(frames, nAudio, speed, samplePerSecond, writer):
numAudioChunks = round(nAudio / samplePerSecond * fps)
global nFrames
numWrites = numAudioChunks - nFrames
# a = [1, 2, 3], len(a) == 3 but a[3] is error
limit = len(frames) - 1
for i in range(numWrites):
frameIndex = round(i * speed)
if frameIndex > limit:
writer.write(frames[-1])
else:
writer.write(frames[frameIndex])
nFrames += 1
normal = 0
# 0 for silent, 1 for normal
switchStart = 0
maxVolume = getMaxVolume(audioData)
fadeInSamples = 400
preMask = np.arange(fadeInSamples)/fadeInSamples
mask = np.repeat(preMask[:, np.newaxis], 2, axis = 1)
y = np.zeros_like(audioData, dtype=np.int16)
yPointer = 0
frameBuffer = []
while (cap.isOpened()):
ret, frame = cap.read()
# 1000 milisecond == 1 second, since samplerate is in seconds, I need to convert this to second as well
currentTime = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000
if ret == False:
break
audioSampleStart = math.floor(currentTime * sampleRate)
# import pdb; pdb.set_trace()
# audioSampleStart + one frame worth of samples
audioSampleEnd = audioSampleStart + (sampleRate // fps)
switchEnd = audioSampleEnd
audioChunk = audioData[audioSampleStart:audioSampleEnd]
# if it's quite
if getMaxVolume(audioChunk) < 500:
skipped += 1
# if the frame is 'switched'
frameBuffer.append(frame)
normal = 0
else: # if it's 'loud'
# and the last frame is 'loud'
if normal:
out.write(frame)
nFrames += 1
switchStart = switchEnd
yPointerEnd = yPointer + audioChunk.shape[0]
y[yPointer : yPointerEnd] = audioChunk
yPointer = yPointerEnd
else:
spedChunk = audioData[switchStart:switchEnd]
spedupAudio = np.zeros((0,2), dtype=np.int16)
# ArrReader (array, channels, samplerate, samplewidth)
with ArrReader(spedChunk, channels, sampleRate, 2) as reader:
# 2 as sampleWidth for now
with ArrWriter(spedupAudio, channels, sampleRate, 2) as writer:
tsm = phasevocoder(reader.channels, speed=silentSpeed)
tsm.run(reader, writer)
spedupAudio = writer.output
yPointerEnd = yPointer + spedupAudio.shape[0]
y[yPointer : yPointerEnd] = spedupAudio
yPointer = yPointerEnd
writeFrames(frameBuffer, yPointerEnd, silentSpeed, sampleRate, out)
frameBuffer = []
switchStart = switchEnd
normal = 1
if skipped % 1000 == 0:
print("{} frames inspected".format(skipped))
skipped += 1
y = y[:yPointer]
wavfile.write("spedupAudio.wav", sampleRate, y)
cap.release()
out.release()
cv2.destroyAllWindows()
outFile = "{}_faster{}".format(videoFile[:videoFile.rfind('.')],videoFile[videoFile.rfind('.'):])
mergeCommand = "ffmpeg -i spedup.mp4 -i spedupAudio.wav -c:v copy -c:a aac {}".format(outFile)
error = subprocess.call(mergeCommand, shell=True)
if error == 0:
removeCommand = "rm output.wav spedup.mp4 spedupAudio.wav"
rmError = subprocess.call(removeCommand, shell=True)
# rm is not available on Windows, so rm would return != 0
if rmError != 0:
error = subprocess.call("del output.wav spedup.mp4 spedupAudio.wav", shell=True)