Skip to content

Commit

Permalink
feat: add SegmentData type for temporary processing during alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
Barabazs committed Jan 13, 2025
1 parent 024bc84 commit 2f93e02
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
11 changes: 9 additions & 2 deletions whisperx/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Forced Alignment with Whisper
C. Max Bain
"""

from dataclasses import dataclass
from typing import Iterable, Optional, Union, List

Expand All @@ -13,7 +14,13 @@

from .audio import SAMPLE_RATE, load_audio
from .utils import interpolate_nans
from .types import AlignedTranscriptionResult, SingleSegment, SingleAlignedSegment, SingleWordSegment
from .types import (
AlignedTranscriptionResult,
SingleSegment,
SingleAlignedSegment,
SingleWordSegment,
SegmentData,
)
from nltk.tokenize.punkt import PunktSentenceTokenizer, PunktParameters

PUNKT_ABBREVIATIONS = ['dr', 'vs', 'mr', 'mrs', 'prof']
Expand Down Expand Up @@ -131,7 +138,7 @@ def align(
# 1. Preprocess to keep only characters in dictionary
total_segments = len(transcript)
# Store temporary processing values
segment_data = {}
segment_data: dict[int, SegmentData] = {}
for sdx, segment in enumerate(transcript):
# strip spaces at beginning / end, but keep track of the amount.
if print_progress:
Expand Down
13 changes: 12 additions & 1 deletion whisperx/types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TypedDict, Optional, List
from typing import TypedDict, Optional, List, Tuple


class SingleWordSegment(TypedDict):
Expand Down Expand Up @@ -30,6 +30,17 @@ class SingleSegment(TypedDict):
text: str


class SegmentData(TypedDict):
"""
Temporary processing data used during alignment.
Contains cleaned and preprocessed data for each segment.
"""
clean_char: List[str] # Cleaned characters that exist in model dictionary
clean_cdx: List[int] # Original indices of cleaned characters
clean_wdx: List[int] # Indices of words containing valid characters
sentence_spans: List[Tuple[int, int]] # Start and end indices of sentences


class SingleAlignedSegment(TypedDict):
"""
A single segment (up to multiple sentences) of a speech with word alignment.
Expand Down

0 comments on commit 2f93e02

Please sign in to comment.