Skip to content

Commit

Permalink
catch word without start time #53
Browse files Browse the repository at this point in the history
  • Loading branch information
abdeladim-s committed Jul 19, 2023
1 parent 3b355f2 commit 7297b24
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/subsai/models/whisperX_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
See [m-bain/whisperX](https://github.com/m-bain/whisperX)
"""

import logging
from typing import Tuple
import pysubs2
import torch
Expand Down Expand Up @@ -143,9 +143,14 @@ def transcribe(self, media_file) -> str:
if self.segment_type == 'word': # word level timestamps
for segment in result['segments']:
for word in segment['words']:
event = SSAEvent(start=pysubs2.make_time(s=word["start"]), end=pysubs2.make_time(s=word["end"]))
event.plaintext = word["word"].strip()
subs.append(event)
try:
event = SSAEvent(start=pysubs2.make_time(s=word["start"]), end=pysubs2.make_time(s=word["end"]))
event.plaintext = word["word"].strip()
subs.append(event)
except Exception as e:
logging.warning(f"Something wrong with {word}")
logging.warning(e)

elif self.segment_type == 'sentence':
for segment in result['segments']:
event = SSAEvent(start=pysubs2.make_time(s=segment["start"]), end=pysubs2.make_time(s=segment["end"]))
Expand Down

0 comments on commit 7297b24

Please sign in to comment.