We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a903e57 commit b9c8c50Copy full SHA for b9c8c50
whisperx/asr.py
@@ -251,7 +251,10 @@ def data(audio, segments):
251
252
253
def detect_language(self, audio: np.ndarray):
254
- segment = log_mel_spectrogram(audio[: N_SAMPLES], padding=0)
+ if audio.shape[0] < N_SAMPLES:
255
+ print("Warning: audio is shorter than 30s, language detection may be inaccurate.")
256
+ segment = log_mel_spectrogram(audio[: N_SAMPLES],
257
+ padding=0 if audio.shape[0] >= N_SAMPLES else N_SAMPLES - audio.shape[0])
258
encoder_output = self.model.encode(segment)
259
results = self.model.model.detect_language(encoder_output)
260
language_token, language_probability = results[0][0]
0 commit comments