From 2b16587daaf40ba5204040eefb731cea4e9fdf35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sat, 21 Mar 2026 22:10:14 +0100 Subject: [PATCH 1/9] Use PGS as a reference for subtitles --- ffsubsync/ffsubsync.py | 35 ++++++- ffsubsync/speech_transformers.py | 157 ++++++++++++++++++++++++++++++- 2 files changed, 189 insertions(+), 3 deletions(-) diff --git a/ffsubsync/ffsubsync.py b/ffsubsync/ffsubsync.py index b0114cc..a0940de 100755 --- a/ffsubsync/ffsubsync.py +++ b/ffsubsync/ffsubsync.py @@ -30,6 +30,7 @@ from ffsubsync.speech_transformers import ( VideoSpeechTransformer, DeserializeSpeechTransformer, + PGSSpeechTransformer, make_subtitle_speech_pipeline, ) from ffsubsync.subtitle_parser import make_subtitle_parser @@ -220,6 +221,24 @@ def try_sync( def make_reference_pipe(args: argparse.Namespace) -> Pipeline: + pgs_stream = getattr(args, "pgs_ref_stream", None) + if pgs_stream is not None: + if not pgs_stream.startswith("0:"): + pgs_stream = "0:" + pgs_stream + return Pipeline( + [ + ( + "speech_extract", + PGSSpeechTransformer( + sample_rate=SAMPLE_RATE, + start_seconds=args.start_seconds, + ffmpeg_path=args.ffmpeg_path, + ref_stream=pgs_stream, + gui_mode=args.gui_mode, + ), + ), + ] + ) ref_format = _ref_format(args.reference) if ref_format in SUBTITLE_EXTENSIONS: if args.vad is not None: @@ -451,7 +470,7 @@ def _run_impl(args: argparse.Namespace, result: Dict[str, Any]) -> bool: def validate_and_transform_args( - parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace] + parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace], ) -> Optional[argparse.Namespace]: if isinstance(parser_or_args, argparse.Namespace): parser = None @@ -484,7 +503,7 @@ def validate_and_transform_args( def run( - parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace] + parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace], ) -> Dict[str, Any]: sync_was_successful = False result = { @@ -556,6 +575,18 @@ def add_main_args_for_cli(parser: argparse.ArgumentParser) -> None: "Example: `ffs ref.mkv -i in.srt -o out.srt --reference-stream s:2`" ), ) + parser.add_argument( + "--pgs-ref-stream", + "--pgsstream", + default=None, + help=( + "Extract PGS (Presentation Graphic Stream) image-based subtitles from " + "the specified stream in the reference MKV and use their on-screen " + "timings as sync reference instead of audio voice-activity detection. " + "Formatted like ffmpeg stream specifiers (leading `0:` is optional). " + "Example: `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream s:0`" + ), + ) def add_cli_only_args(parser: argparse.ArgumentParser) -> None: diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index 38883d1..6350f75 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -6,7 +6,7 @@ import subprocess import sys from datetime import timedelta -from typing import cast, Callable, Dict, List, Optional, Union +from typing import cast, Callable, Dict, List, Optional, Tuple, Union import ffmpeg import numpy as np @@ -531,3 +531,158 @@ def fit(self, fname, *_) -> "DeserializeSpeechTransformer": def transform(self, *_) -> np.ndarray: assert self.deserialized_speech_results_ is not None return self.deserialized_speech_results_ + + +def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]: + """Parse raw PGS (Presentation Graphic Stream / SUP) binary data. + + Each PGS display set is introduced by a Presentation Composition Segment + (PCS, type 0x16). The PCS carries the number of composition objects: > 0 + means a subtitle image is now on-screen, 0 means the screen is being + cleared. We walk through all PCS segments and pair "show" and "clear" + events to produce (start_seconds, end_seconds) intervals. + """ + import struct + + PGS_MAGIC = b"\x50\x47" # "PG" – PGS packet sync word + SEG_PCS = 0x16 + # PCS layout after the 13-byte header: + # video_w(2) + video_h(2) + frame_rate(1) + comp_number(2) + + # comp_state(1) + palette_update_flag(1) + palette_id(1) + num_objects(1) + PCS_PALETTE_UPDATE_OFFSET = 8 # offset of palette_update_flag inside PCS data + PCS_NUM_OBJECTS_OFFSET = 10 # offset of num_comp_objects inside PCS data + PCS_MIN_LENGTH = 11 # minimum PCS data length to read the above + + HEADER_SIZE = 13 # 2 magic + 4 PTS + 4 DTS + 1 type + 2 length + + results: List[Tuple[float, float]] = [] + current_start: Optional[float] = None + pos = 0 + + while pos + HEADER_SIZE <= len(data): + if data[pos : pos + 2] != PGS_MAGIC: + # lost sync – try to find the next magic bytes + next_magic = data.find(PGS_MAGIC, pos + 1) + if next_magic == -1: + break + pos = next_magic + continue + + # PTS is in 90 kHz ticks + pts: float = struct.unpack_from(">I", data, pos + 2)[0] / 90000.0 + seg_type: int = data[pos + 10] + seg_length: int = struct.unpack_from(">H", data, pos + 11)[0] + + if seg_type == SEG_PCS and seg_length >= PCS_MIN_LENGTH: + pcs_start = pos + HEADER_SIZE + palette_update_flag: int = data[pcs_start + PCS_PALETTE_UPDATE_OFFSET] + num_objects: int = data[pcs_start + PCS_NUM_OBJECTS_OFFSET] + + if palette_update_flag == 0x80: + # palette-only update – displayed subtitle is unchanged, skip + pass + elif num_objects > 0: + current_start = pts + elif current_start is not None: + results.append((current_start, pts)) + current_start = None + + pos += HEADER_SIZE + seg_length + + return results + + +class PGSSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin): + """Use PGS (Presentation Graphic Stream) subtitle timings as a sync reference. + + PGS subtitles are bitmap-based (e.g. Blu-ray) and cannot be converted to + text by ffmpeg. This transformer extracts the raw SUP stream from the + video file, parses the on-screen / off-screen timestamps from the binary + Presentation Composition Segments, and builds the same kind of sparse + binary signal that :class:`SubtitleSpeechTransformer` produces for text + subtitles. The resulting signal can then be aligned against the input + subtitle file in the normal ffsubsync pipeline. + """ + + def __init__( + self, + sample_rate: int, + start_seconds: int = 0, + ffmpeg_path: Optional[str] = None, + ref_stream: Optional[str] = None, + gui_mode: bool = False, + ) -> None: + super(PGSSpeechTransformer, self).__init__() + self.sample_rate: int = sample_rate + self.start_seconds: int = start_seconds + self.ffmpeg_path: Optional[str] = ffmpeg_path + self.ref_stream: Optional[str] = ref_stream + self.gui_mode: bool = gui_mode + self.pgs_speech_results_: Optional[np.ndarray] = None + + def fit(self, fname: str, *_) -> "PGSSpeechTransformer": + stream = self.ref_stream if self.ref_stream is not None else "0:s:0" + if not stream.startswith("0:"): + stream = "0:" + stream + + ffmpeg_args = [ + ffmpeg_bin_path( + "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path + ), + "-loglevel", + "fatal", + "-nostdin", + "-i", + fname, + "-map", + stream, + "-c:s", + "copy", + "-f", + "sup", + "-", + ] + + logger.info("extracting PGS subtitle stream %s from %s...", stream, fname) + process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True)) + pgs_data, _ = process.communicate() + + if process.returncode != 0 or not pgs_data: + raise ValueError( + "Failed to extract PGS stream {} from {}. " + "Make sure the stream exists and is an hdmv_pgs_subtitle track " + "(check with: ffprobe -show_streams {}).".format(stream, fname, fname) + ) + + logger.info("...done; parsing PGS timings...") + timings = _parse_pgs_timings(pgs_data) + + if not timings: + raise ValueError( + "No subtitle timings found in PGS stream {}.".format(stream) + ) + + logger.info("found %d PGS subtitle segments", len(timings)) + + max_time = max(end for _, end in timings) + num_samples = int(max_time * self.sample_rate) + 2 + samples = np.zeros(num_samples, dtype=float) + + for start, end in timings: + start_sample = int(round((start - self.start_seconds) * self.sample_rate)) + end_sample = int(round((end - self.start_seconds) * self.sample_rate)) + start_sample = max(start_sample, 0) + end_sample = min(end_sample, num_samples) + if start_sample < end_sample: + samples[start_sample:end_sample] = 1.0 + + self.pgs_speech_results_ = samples + self.fit_boundaries(self.pgs_speech_results_) + logger.info( + "total PGS subtitle frames: %d", int(np.sum(self.pgs_speech_results_)) + ) + return self + + def transform(self, *_) -> np.ndarray: + assert self.pgs_speech_results_ is not None + return self.pgs_speech_results_ From a0c262534ec872fe23a4b778f3c1e023d768ee52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sat, 21 Mar 2026 22:17:04 +0100 Subject: [PATCH 2/9] Automatically detect PGS stream --- ffsubsync/ffsubsync.py | 21 ++++++++------ ffsubsync/speech_transformers.py | 49 ++++++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/ffsubsync/ffsubsync.py b/ffsubsync/ffsubsync.py index a0940de..9f54151 100755 --- a/ffsubsync/ffsubsync.py +++ b/ffsubsync/ffsubsync.py @@ -223,8 +223,10 @@ def try_sync( def make_reference_pipe(args: argparse.Namespace) -> Pipeline: pgs_stream = getattr(args, "pgs_ref_stream", None) if pgs_stream is not None: - if not pgs_stream.startswith("0:"): - pgs_stream = "0:" + pgs_stream + # "auto" (bare --pgs-ref-stream flag) → let PGSSpeechTransformer auto-detect + resolved_stream: Optional[str] = None if pgs_stream == "auto" else pgs_stream + if resolved_stream is not None and not resolved_stream.startswith("0:"): + resolved_stream = "0:" + resolved_stream return Pipeline( [ ( @@ -233,7 +235,7 @@ def make_reference_pipe(args: argparse.Namespace) -> Pipeline: sample_rate=SAMPLE_RATE, start_seconds=args.start_seconds, ffmpeg_path=args.ffmpeg_path, - ref_stream=pgs_stream, + ref_stream=resolved_stream, gui_mode=args.gui_mode, ), ), @@ -578,13 +580,16 @@ def add_main_args_for_cli(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--pgs-ref-stream", "--pgsstream", + nargs="?", + const="auto", default=None, help=( - "Extract PGS (Presentation Graphic Stream) image-based subtitles from " - "the specified stream in the reference MKV and use their on-screen " - "timings as sync reference instead of audio voice-activity detection. " - "Formatted like ffmpeg stream specifiers (leading `0:` is optional). " - "Example: `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream s:0`" + "Use a PGS (Presentation Graphic Stream) image-based subtitle track from " + "the reference MKV as the sync reference instead of audio VAD. " + "Optionally specify the stream (leading `0:` is optional, e.g. `s:0` or `3`). " + "Omit the value to auto-detect the first hdmv_pgs_subtitle track. " + "Example: `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream` (auto) " + "or `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream s:2` (explicit)." ), ) diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index 6350f75..b2638d5 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -533,6 +533,41 @@ def transform(self, *_) -> np.ndarray: return self.deserialized_speech_results_ +def find_pgs_stream( + fname: str, + ffmpeg_path: Optional[str] = None, + gui_mode: bool = False, +) -> Optional[str]: + """Return the ffmpeg stream specifier for the first PGS subtitle track in *fname*. + + Uses ``ffprobe`` to inspect the file. Returns a string like ``"0:s:0"`` on + success, or ``None`` if the file has no ``hdmv_pgs_subtitle`` streams. + """ + try: + probe = ffmpeg.probe( + fname, + cmd=ffmpeg_bin_path("ffprobe", gui_mode, ffmpeg_resources_path=ffmpeg_path), + ) + except Exception as e: + logger.warning("ffprobe failed while searching for PGS streams: %s", e) + return None + + sub_index = 0 + for stream in probe.get("streams", []): + if stream.get("codec_type") == "subtitle": + if stream.get("codec_name") == "hdmv_pgs_subtitle": + specifier = "0:s:{}".format(sub_index) + logger.info( + "auto-detected PGS stream: %s (ffmpeg stream index %s)", + specifier, + stream.get("index"), + ) + return specifier + sub_index += 1 + + return None + + def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]: """Parse raw PGS (Presentation Graphic Stream / SUP) binary data. @@ -621,9 +656,17 @@ def __init__( self.pgs_speech_results_: Optional[np.ndarray] = None def fit(self, fname: str, *_) -> "PGSSpeechTransformer": - stream = self.ref_stream if self.ref_stream is not None else "0:s:0" - if not stream.startswith("0:"): - stream = "0:" + stream + if self.ref_stream is None: + stream = find_pgs_stream(fname, self.ffmpeg_path, self.gui_mode) + if stream is None: + raise ValueError( + "No hdmv_pgs_subtitle stream found in {}. " + "Specify one explicitly with --pgs-ref-stream.".format(fname) + ) + else: + stream = self.ref_stream + if not stream.startswith("0:"): + stream = "0:" + stream ffmpeg_args = [ ffmpeg_bin_path( From dc350e0e77308167687904d0cfc5eb834efbf9df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sat, 21 Mar 2026 22:49:37 +0100 Subject: [PATCH 3/9] Fix PGS timing --- ffsubsync/speech_transformers.py | 76 +++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 22 deletions(-) diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index b2638d5..aeb3443 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -571,24 +571,37 @@ def find_pgs_stream( def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]: """Parse raw PGS (Presentation Graphic Stream / SUP) binary data. - Each PGS display set is introduced by a Presentation Composition Segment - (PCS, type 0x16). The PCS carries the number of composition objects: > 0 - means a subtitle image is now on-screen, 0 means the screen is being - cleared. We walk through all PCS segments and pair "show" and "clear" - events to produce (start_seconds, end_seconds) intervals. + PGS packet header (13 bytes): + 0-1 : "PG" magic (0x50 0x47) + 2-5 : PTS – unsigned 32-bit big-endian, 90 kHz ticks + 6-9 : DTS – unsigned 32-bit big-endian (ignored here) + 10 : segment type (0x16 = PCS, 0x14 = PDS, 0x15 = ODS, 0x17 = WDS, 0x80 = END) + 11-12: segment data length, big-endian + + PCS data layout (offsets from byte 13): + 0-1 : video width + 2-3 : video height + 4 : frame rate byte + 5-6 : composition number + 7 : composition state (0x00=Normal, 0x40=Acquisition, 0x80=Epoch Start) + 8 : palette update flag (0x80 = palette-only, subtitle unchanged) + 9 : palette ID + 10 : number of composition objects + + Logic: + - Palette-update-only PCS → skip (existing subtitle is unchanged). + - Any other PCS → close any currently-open subtitle first (handles + both explicit clears and back-to-back / epoch-start transitions), + then open a new one if num_objects > 0. """ import struct - PGS_MAGIC = b"\x50\x47" # "PG" – PGS packet sync word + PGS_MAGIC = b"\x50\x47" # "PG" SEG_PCS = 0x16 - # PCS layout after the 13-byte header: - # video_w(2) + video_h(2) + frame_rate(1) + comp_number(2) + - # comp_state(1) + palette_update_flag(1) + palette_id(1) + num_objects(1) - PCS_PALETTE_UPDATE_OFFSET = 8 # offset of palette_update_flag inside PCS data - PCS_NUM_OBJECTS_OFFSET = 10 # offset of num_comp_objects inside PCS data - PCS_MIN_LENGTH = 11 # minimum PCS data length to read the above - - HEADER_SIZE = 13 # 2 magic + 4 PTS + 4 DTS + 1 type + 2 length + PCS_PALETTE_UPDATE_OFFSET = 8 + PCS_NUM_OBJECTS_OFFSET = 10 + PCS_MIN_LENGTH = 11 + HEADER_SIZE = 13 results: List[Tuple[float, float]] = [] current_start: Optional[float] = None @@ -596,14 +609,12 @@ def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]: while pos + HEADER_SIZE <= len(data): if data[pos : pos + 2] != PGS_MAGIC: - # lost sync – try to find the next magic bytes next_magic = data.find(PGS_MAGIC, pos + 1) if next_magic == -1: break pos = next_magic continue - # PTS is in 90 kHz ticks pts: float = struct.unpack_from(">I", data, pos + 2)[0] / 90000.0 seg_type: int = data[pos + 10] seg_length: int = struct.unpack_from(">H", data, pos + 11)[0] @@ -614,13 +625,19 @@ def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]: num_objects: int = data[pcs_start + PCS_NUM_OBJECTS_OFFSET] if palette_update_flag == 0x80: - # palette-only update – displayed subtitle is unchanged, skip + # Palette-only update: subtitle image unchanged, skip entirely. pass - elif num_objects > 0: - current_start = pts - elif current_start is not None: - results.append((current_start, pts)) - current_start = None + else: + # Any other PCS closes the currently-displayed subtitle (if any). + # This correctly handles: + # - explicit clear (num_objects=0 after a subtitle) + # - back-to-back subtitles (num_objects>0 replaces previous) + # - epoch start (composition_state=0x80, implicit clear) + if current_start is not None: + results.append((current_start, pts)) + current_start = None + if num_objects > 0: + current_start = pts pos += HEADER_SIZE + seg_length @@ -639,6 +656,13 @@ class PGSSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin): subtitle file in the normal ffsubsync pipeline. """ + # PGS is already in the MKV timebase so its duration cannot be compared + # against the SRT to infer a framerate ratio. Returning None here prevents + # the duration-based framerate inference in try_sync from running. + @property + def num_frames(self) -> None: + return None + def __init__( self, sample_rate: int, @@ -706,6 +730,14 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer": ) logger.info("found %d PGS subtitle segments", len(timings)) + for i, (s, e) in enumerate(timings[:8]): + logger.debug( + " PGS[%d]: %s --> %s (%.3fs)", + i, + str(timedelta(seconds=s)), + str(timedelta(seconds=e)), + e - s, + ) max_time = max(end for _, end in timings) num_samples = int(max_time * self.sample_rate) + 2 From 362f9b4d950311d67aae4cc0f6d59df569019cf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sat, 21 Mar 2026 23:04:04 +0100 Subject: [PATCH 4/9] Fix syncing bug --- ffsubsync/ffsubsync.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ffsubsync/ffsubsync.py b/ffsubsync/ffsubsync.py index 9f54151..2d10c8d 100755 --- a/ffsubsync/ffsubsync.py +++ b/ffsubsync/ffsubsync.py @@ -150,8 +150,10 @@ def try_sync( continue else: srt_pipe.fit(srtin) - if not skip_infer_framerate_ratio and hasattr( - reference_pipe[-1], "num_frames" + if ( + not skip_infer_framerate_ratio + and hasattr(reference_pipe[-1], "num_frames") + and reference_pipe[-1].num_frames is not None ): inferred_framerate_ratio_from_length = ( float(reference_pipe[-1].num_frames) From a4b63a876bb45cc4ca405b32f53f27e4c331fb12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sat, 21 Mar 2026 23:31:03 +0100 Subject: [PATCH 5/9] Improve performance --- ffsubsync/speech_transformers.py | 144 ++++++++++++++++++++++++------- 1 file changed, 114 insertions(+), 30 deletions(-) diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index aeb3443..f0feee2 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -568,6 +568,84 @@ def find_pgs_stream( return None +def _get_pgs_timings_via_ffprobe( + fname: str, + stream: str, + ffmpeg_path: Optional[str] = None, + gui_mode: bool = False, +) -> Optional[List[Tuple[float, float]]]: + """Fast path: read PGS timings from MKV container metadata using ffprobe. + + MKV stores per-packet PTS and duration for subtitle streams, so we can + get start/end timestamps without extracting or parsing the raw SUP binary. + Show events are large packets with a numeric ``duration_time``; clear events + are tiny (~30-byte) packets with ``duration_time=N/A``. + + Returns a list of ``(start_seconds, end_seconds)`` tuples, or ``None`` if + ffprobe fails or the durations are not usable (fall back to SUP parsing). + """ + ffprobe_cmd = ffmpeg_bin_path( + "ffprobe", gui_mode, ffmpeg_resources_path=ffmpeg_path + ) + # ffprobe -select_streams does not accept the "0:" input-index prefix; + # strip it so "0:s:0" → "s:0" and "0:3" → "3". + probe_stream = stream[2:] if stream.startswith("0:") else stream + args = [ + ffprobe_cmd, + "-v", + "quiet", + "-show_packets", + "-select_streams", + probe_stream, + "-show_entries", + "packet=pts_time,duration_time,size", + fname, + ] + process = subprocess.Popen(args, **subprocess_args(include_stdout=True)) + stdout, _ = process.communicate() + if process.returncode != 0 or not stdout: + return None + + results: List[Tuple[float, float]] = [] + pts_time: Optional[float] = None + duration_time: Optional[float] = None + size: Optional[int] = None + + for raw_line in stdout.decode("utf-8", errors="replace").splitlines(): + line = raw_line.strip() + if line == "[PACKET]": + pts_time = duration_time = size = None + elif line == "[/PACKET]": + if ( + pts_time is not None + and duration_time is not None + and size is not None + and size > 50 # skip clear events (~30 bytes) + ): + results.append((pts_time, pts_time + duration_time)) + elif line.startswith("pts_time="): + try: + pts_time = float(line.split("=", 1)[1]) + except ValueError: + pass + elif line.startswith("duration_time="): + val = line.split("=", 1)[1] + if val != "N/A": + try: + duration_time = float(val) + except ValueError: + pass + elif line.startswith("size="): + try: + size = int(line.split("=", 1)[1]) + except ValueError: + pass + + if not results: + return None + return results + + def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]: """Parse raw PGS (Presentation Graphic Stream / SUP) binary data. @@ -692,37 +770,43 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer": if not stream.startswith("0:"): stream = "0:" + stream - ffmpeg_args = [ - ffmpeg_bin_path( - "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path - ), - "-loglevel", - "fatal", - "-nostdin", - "-i", - fname, - "-map", - stream, - "-c:s", - "copy", - "-f", - "sup", - "-", - ] - - logger.info("extracting PGS subtitle stream %s from %s...", stream, fname) - process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True)) - pgs_data, _ = process.communicate() - - if process.returncode != 0 or not pgs_data: - raise ValueError( - "Failed to extract PGS stream {} from {}. " - "Make sure the stream exists and is an hdmv_pgs_subtitle track " - "(check with: ffprobe -show_streams {}).".format(stream, fname, fname) + logger.info("reading PGS timings for stream %s from %s...", stream, fname) + timings = _get_pgs_timings_via_ffprobe( + fname, stream, self.ffmpeg_path, self.gui_mode + ) + if timings is None: + # Fallback: extract raw SUP stream and parse binary + logger.info("ffprobe fast path unavailable, extracting raw PGS stream...") + ffmpeg_args = [ + ffmpeg_bin_path( + "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path + ), + "-loglevel", + "fatal", + "-nostdin", + "-i", + fname, + "-map", + stream, + "-c:s", + "copy", + "-f", + "sup", + "-", + ] + process = subprocess.Popen( + ffmpeg_args, **subprocess_args(include_stdout=True) ) - - logger.info("...done; parsing PGS timings...") - timings = _parse_pgs_timings(pgs_data) + pgs_data, _ = process.communicate() + if process.returncode != 0 or not pgs_data: + raise ValueError( + "Failed to extract PGS stream {} from {}. " + "Make sure the stream exists and is an hdmv_pgs_subtitle track " + "(check with: ffprobe -show_streams {}).".format( + stream, fname, fname + ) + ) + timings = _parse_pgs_timings(pgs_data) if not timings: raise ValueError( From c2d84ec8b4e6938d2b6b57957a95d2f082c29d55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sun, 22 Mar 2026 00:02:38 +0100 Subject: [PATCH 6/9] Add mkvextract method --- ffsubsync/speech_transformers.py | 123 ++++++++++++++++++++++++++++++- 1 file changed, 120 insertions(+), 3 deletions(-) diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index f0feee2..3d36c1f 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -568,6 +568,118 @@ def find_pgs_stream( return None +def _get_pgs_timings_via_mkvextract( + fname: str, + stream: str, + gui_mode: bool = False, +) -> Optional[List[Tuple[float, float]]]: + """Fastest path: extract PGS SUP data using mkvextract (mkvtoolnix). + + mkvextract uses the MKV Cues (seek index) to jump directly to subtitle + clusters, skipping all video/audio data. Roughly 3x faster than + ``ffprobe -show_packets`` for files on slow (network) mounts. + + Returns ``None`` if mkvtoolnix is not installed or extraction fails. + """ + import json + import os + import shutil + import tempfile + + if not shutil.which("mkvextract") or not shutil.which("mkvmerge"): + logger.debug("mkvextract/mkvmerge not found in PATH") + return None + + # mkvextract only makes sense for MKV containers. + if not fname.lower().endswith((".mkv", ".mka", ".mks")): + logger.debug("mkvextract skipped: not an MKV container") + return None + + # --- Resolve mkvmerge track ID from stream specifier --- + try: + proc = subprocess.Popen( + ["mkvmerge", "--identify", "--identification-format", "json", fname], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ) + stdout, _ = proc.communicate(timeout=15) + if proc.returncode not in (0, 1): # mkvmerge exits 1 for warnings + logger.debug("mkvmerge --identify failed (returncode %d)", proc.returncode) + return None + info = json.loads(stdout.decode("utf-8", errors="replace")) + except Exception as exc: + logger.debug("mkvmerge --identify exception: %s", exc) + return None + + tracks = info.get("tracks", []) + pgs_tracks = [ + t + for t in tracks + if t.get("type") == "subtitles" and "PGS" in t.get("codec", "").upper() + ] + if not pgs_tracks: + logger.debug("mkvmerge found no PGS subtitle tracks") + return None + + # Map "0:s:N" → N-th PGS track; "0:N" or "N" → track with that ID. + track_id: Optional[int] = None + if "s:" in stream: + try: + sub_idx = int(stream.rsplit(":", 1)[-1]) + if sub_idx < len(pgs_tracks): + track_id = pgs_tracks[sub_idx]["id"] + except (ValueError, KeyError, IndexError): + pass + else: + try: + abs_idx = int(stream.rsplit(":", 1)[-1]) + for t in pgs_tracks: + if t.get("id") == abs_idx: + track_id = abs_idx + break + except (ValueError, KeyError): + pass + if track_id is None: + track_id = pgs_tracks[0]["id"] # fallback: first PGS track + + # --- Extract SUP stream to a local temp file --- + # mkvextract writes track data to a named file, not to stdout, so we use + # a temp file on local disk (fast SSD write) then read it back. + tmp_fd, tmp_path = tempfile.mkstemp(suffix=".sup") + os.close(tmp_fd) + try: + proc = subprocess.Popen( + ["mkvextract", "tracks", fname, "{}:{}".format(track_id, tmp_path)], + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + try: + proc.wait(timeout=300) + except subprocess.TimeoutExpired: + proc.kill() + logger.debug("mkvextract timed out") + return None + if proc.returncode != 0: + logger.debug("mkvextract failed (returncode %d)", proc.returncode) + return None + with open(tmp_path, "rb") as fh: + pgs_data = fh.read() + except Exception as exc: + logger.debug("mkvextract exception: %s", exc) + return None + finally: + try: + os.unlink(tmp_path) + except OSError: + pass + + if not pgs_data: + return None + return _parse_pgs_timings(pgs_data) + + def _get_pgs_timings_via_ffprobe( fname: str, stream: str, @@ -771,9 +883,14 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer": stream = "0:" + stream logger.info("reading PGS timings for stream %s from %s...", stream, fname) - timings = _get_pgs_timings_via_ffprobe( - fname, stream, self.ffmpeg_path, self.gui_mode - ) + timings = _get_pgs_timings_via_mkvextract(fname, stream, self.gui_mode) + if timings is not None: + logger.info("used mkvextract fast path (%d segments)", len(timings)) + else: + logger.info("mkvextract unavailable or failed, trying ffprobe...") + timings = _get_pgs_timings_via_ffprobe( + fname, stream, self.ffmpeg_path, self.gui_mode + ) if timings is None: # Fallback: extract raw SUP stream and parse binary logger.info("ffprobe fast path unavailable, extracting raw PGS stream...") From abab44a33c3c296c56a47b99f2d6cddf6305322c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sun, 22 Mar 2026 14:47:02 +0100 Subject: [PATCH 7/9] Cleanup --- ffsubsync/speech_transformers.py | 236 ++----------------------------- 1 file changed, 9 insertions(+), 227 deletions(-) diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index 3d36c1f..4b43c21 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -568,118 +568,6 @@ def find_pgs_stream( return None -def _get_pgs_timings_via_mkvextract( - fname: str, - stream: str, - gui_mode: bool = False, -) -> Optional[List[Tuple[float, float]]]: - """Fastest path: extract PGS SUP data using mkvextract (mkvtoolnix). - - mkvextract uses the MKV Cues (seek index) to jump directly to subtitle - clusters, skipping all video/audio data. Roughly 3x faster than - ``ffprobe -show_packets`` for files on slow (network) mounts. - - Returns ``None`` if mkvtoolnix is not installed or extraction fails. - """ - import json - import os - import shutil - import tempfile - - if not shutil.which("mkvextract") or not shutil.which("mkvmerge"): - logger.debug("mkvextract/mkvmerge not found in PATH") - return None - - # mkvextract only makes sense for MKV containers. - if not fname.lower().endswith((".mkv", ".mka", ".mks")): - logger.debug("mkvextract skipped: not an MKV container") - return None - - # --- Resolve mkvmerge track ID from stream specifier --- - try: - proc = subprocess.Popen( - ["mkvmerge", "--identify", "--identification-format", "json", fname], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - ) - stdout, _ = proc.communicate(timeout=15) - if proc.returncode not in (0, 1): # mkvmerge exits 1 for warnings - logger.debug("mkvmerge --identify failed (returncode %d)", proc.returncode) - return None - info = json.loads(stdout.decode("utf-8", errors="replace")) - except Exception as exc: - logger.debug("mkvmerge --identify exception: %s", exc) - return None - - tracks = info.get("tracks", []) - pgs_tracks = [ - t - for t in tracks - if t.get("type") == "subtitles" and "PGS" in t.get("codec", "").upper() - ] - if not pgs_tracks: - logger.debug("mkvmerge found no PGS subtitle tracks") - return None - - # Map "0:s:N" → N-th PGS track; "0:N" or "N" → track with that ID. - track_id: Optional[int] = None - if "s:" in stream: - try: - sub_idx = int(stream.rsplit(":", 1)[-1]) - if sub_idx < len(pgs_tracks): - track_id = pgs_tracks[sub_idx]["id"] - except (ValueError, KeyError, IndexError): - pass - else: - try: - abs_idx = int(stream.rsplit(":", 1)[-1]) - for t in pgs_tracks: - if t.get("id") == abs_idx: - track_id = abs_idx - break - except (ValueError, KeyError): - pass - if track_id is None: - track_id = pgs_tracks[0]["id"] # fallback: first PGS track - - # --- Extract SUP stream to a local temp file --- - # mkvextract writes track data to a named file, not to stdout, so we use - # a temp file on local disk (fast SSD write) then read it back. - tmp_fd, tmp_path = tempfile.mkstemp(suffix=".sup") - os.close(tmp_fd) - try: - proc = subprocess.Popen( - ["mkvextract", "tracks", fname, "{}:{}".format(track_id, tmp_path)], - stdin=subprocess.PIPE, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) - try: - proc.wait(timeout=300) - except subprocess.TimeoutExpired: - proc.kill() - logger.debug("mkvextract timed out") - return None - if proc.returncode != 0: - logger.debug("mkvextract failed (returncode %d)", proc.returncode) - return None - with open(tmp_path, "rb") as fh: - pgs_data = fh.read() - except Exception as exc: - logger.debug("mkvextract exception: %s", exc) - return None - finally: - try: - os.unlink(tmp_path) - except OSError: - pass - - if not pgs_data: - return None - return _parse_pgs_timings(pgs_data) - - def _get_pgs_timings_via_ffprobe( fname: str, stream: str, @@ -758,82 +646,6 @@ def _get_pgs_timings_via_ffprobe( return results -def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]: - """Parse raw PGS (Presentation Graphic Stream / SUP) binary data. - - PGS packet header (13 bytes): - 0-1 : "PG" magic (0x50 0x47) - 2-5 : PTS – unsigned 32-bit big-endian, 90 kHz ticks - 6-9 : DTS – unsigned 32-bit big-endian (ignored here) - 10 : segment type (0x16 = PCS, 0x14 = PDS, 0x15 = ODS, 0x17 = WDS, 0x80 = END) - 11-12: segment data length, big-endian - - PCS data layout (offsets from byte 13): - 0-1 : video width - 2-3 : video height - 4 : frame rate byte - 5-6 : composition number - 7 : composition state (0x00=Normal, 0x40=Acquisition, 0x80=Epoch Start) - 8 : palette update flag (0x80 = palette-only, subtitle unchanged) - 9 : palette ID - 10 : number of composition objects - - Logic: - - Palette-update-only PCS → skip (existing subtitle is unchanged). - - Any other PCS → close any currently-open subtitle first (handles - both explicit clears and back-to-back / epoch-start transitions), - then open a new one if num_objects > 0. - """ - import struct - - PGS_MAGIC = b"\x50\x47" # "PG" - SEG_PCS = 0x16 - PCS_PALETTE_UPDATE_OFFSET = 8 - PCS_NUM_OBJECTS_OFFSET = 10 - PCS_MIN_LENGTH = 11 - HEADER_SIZE = 13 - - results: List[Tuple[float, float]] = [] - current_start: Optional[float] = None - pos = 0 - - while pos + HEADER_SIZE <= len(data): - if data[pos : pos + 2] != PGS_MAGIC: - next_magic = data.find(PGS_MAGIC, pos + 1) - if next_magic == -1: - break - pos = next_magic - continue - - pts: float = struct.unpack_from(">I", data, pos + 2)[0] / 90000.0 - seg_type: int = data[pos + 10] - seg_length: int = struct.unpack_from(">H", data, pos + 11)[0] - - if seg_type == SEG_PCS and seg_length >= PCS_MIN_LENGTH: - pcs_start = pos + HEADER_SIZE - palette_update_flag: int = data[pcs_start + PCS_PALETTE_UPDATE_OFFSET] - num_objects: int = data[pcs_start + PCS_NUM_OBJECTS_OFFSET] - - if palette_update_flag == 0x80: - # Palette-only update: subtitle image unchanged, skip entirely. - pass - else: - # Any other PCS closes the currently-displayed subtitle (if any). - # This correctly handles: - # - explicit clear (num_objects=0 after a subtitle) - # - back-to-back subtitles (num_objects>0 replaces previous) - # - epoch start (composition_state=0x80, implicit clear) - if current_start is not None: - results.append((current_start, pts)) - current_start = None - if num_objects > 0: - current_start = pts - - pos += HEADER_SIZE + seg_length - - return results - - class PGSSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin): """Use PGS (Presentation Graphic Stream) subtitle timings as a sync reference. @@ -883,47 +695,17 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer": stream = "0:" + stream logger.info("reading PGS timings for stream %s from %s...", stream, fname) - timings = _get_pgs_timings_via_mkvextract(fname, stream, self.gui_mode) - if timings is not None: - logger.info("used mkvextract fast path (%d segments)", len(timings)) - else: - logger.info("mkvextract unavailable or failed, trying ffprobe...") - timings = _get_pgs_timings_via_ffprobe( - fname, stream, self.ffmpeg_path, self.gui_mode - ) + timings = _get_pgs_timings_via_ffprobe( + fname, stream, self.ffmpeg_path, self.gui_mode + ) if timings is None: - # Fallback: extract raw SUP stream and parse binary - logger.info("ffprobe fast path unavailable, extracting raw PGS stream...") - ffmpeg_args = [ - ffmpeg_bin_path( - "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path - ), - "-loglevel", - "fatal", - "-nostdin", - "-i", - fname, - "-map", - stream, - "-c:s", - "copy", - "-f", - "sup", - "-", - ] - process = subprocess.Popen( - ffmpeg_args, **subprocess_args(include_stdout=True) - ) - pgs_data, _ = process.communicate() - if process.returncode != 0 or not pgs_data: - raise ValueError( - "Failed to extract PGS stream {} from {}. " - "Make sure the stream exists and is an hdmv_pgs_subtitle track " - "(check with: ffprobe -show_streams {}).".format( - stream, fname, fname - ) + raise ValueError( + "Failed to get PGS timings via ffprobe for stream {} from {}. " + "Make sure the stream exists and is an hdmv_pgs_subtitle track " + "(check with: ffprobe -show_streams {}).".format( + stream, fname, fname ) - timings = _parse_pgs_timings(pgs_data) + ) if not timings: raise ValueError( From 385403f6d145430b1b2e29d7c3bac5538e7ef686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sun, 22 Mar 2026 15:03:17 +0100 Subject: [PATCH 8/9] Switch to ffmpeg python module --- ffsubsync/speech_transformers.py | 80 +++++++++++--------------------- 1 file changed, 28 insertions(+), 52 deletions(-) diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index 4b43c21..c6236c5 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -574,7 +574,7 @@ def _get_pgs_timings_via_ffprobe( ffmpeg_path: Optional[str] = None, gui_mode: bool = False, ) -> Optional[List[Tuple[float, float]]]: - """Fast path: read PGS timings from MKV container metadata using ffprobe. + """Read PGS timings from container metadata using ffprobe. MKV stores per-packet PTS and duration for subtitle streams, so we can get start/end timestamps without extracting or parsing the raw SUP binary. @@ -582,7 +582,7 @@ def _get_pgs_timings_via_ffprobe( are tiny (~30-byte) packets with ``duration_time=N/A``. Returns a list of ``(start_seconds, end_seconds)`` tuples, or ``None`` if - ffprobe fails or the durations are not usable (fall back to SUP parsing). + ffprobe fails or returns no usable durations. """ ffprobe_cmd = ffmpeg_bin_path( "ffprobe", gui_mode, ffmpeg_resources_path=ffmpeg_path @@ -590,56 +590,34 @@ def _get_pgs_timings_via_ffprobe( # ffprobe -select_streams does not accept the "0:" input-index prefix; # strip it so "0:s:0" → "s:0" and "0:3" → "3". probe_stream = stream[2:] if stream.startswith("0:") else stream - args = [ - ffprobe_cmd, - "-v", - "quiet", - "-show_packets", - "-select_streams", - probe_stream, - "-show_entries", - "packet=pts_time,duration_time,size", - fname, - ] - process = subprocess.Popen(args, **subprocess_args(include_stdout=True)) - stdout, _ = process.communicate() - if process.returncode != 0 or not stdout: + try: + probe_data = ffmpeg.probe( + fname, + cmd=ffprobe_cmd, + show_packets=None, + select_streams=probe_stream, + show_entries="packet=pts_time,duration_time,size", + ) + except Exception: return None results: List[Tuple[float, float]] = [] - pts_time: Optional[float] = None - duration_time: Optional[float] = None - size: Optional[int] = None - - for raw_line in stdout.decode("utf-8", errors="replace").splitlines(): - line = raw_line.strip() - if line == "[PACKET]": - pts_time = duration_time = size = None - elif line == "[/PACKET]": - if ( - pts_time is not None - and duration_time is not None - and size is not None - and size > 50 # skip clear events (~30 bytes) - ): - results.append((pts_time, pts_time + duration_time)) - elif line.startswith("pts_time="): - try: - pts_time = float(line.split("=", 1)[1]) - except ValueError: - pass - elif line.startswith("duration_time="): - val = line.split("=", 1)[1] - if val != "N/A": - try: - duration_time = float(val) - except ValueError: - pass - elif line.startswith("size="): - try: - size = int(line.split("=", 1)[1]) - except ValueError: - pass + for packet in probe_data.get("packets", []): + pts_time_str = packet.get("pts_time") + duration_time_str = packet.get("duration_time") + size_str = packet.get("size") + if pts_time_str is None or duration_time_str is None or size_str is None: + continue + if duration_time_str == "N/A": + continue + try: + pts_time = float(pts_time_str) + duration_time = float(duration_time_str) + size = int(size_str) + except ValueError: + continue + if size > 50: # skip clear events (~30 bytes) + results.append((pts_time, pts_time + duration_time)) if not results: return None @@ -702,9 +680,7 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer": raise ValueError( "Failed to get PGS timings via ffprobe for stream {} from {}. " "Make sure the stream exists and is an hdmv_pgs_subtitle track " - "(check with: ffprobe -show_streams {}).".format( - stream, fname, fname - ) + "(check with: ffprobe -show_streams {}).".format(stream, fname, fname) ) if not timings: From b4b41d323aa18b352b115bf4d186e8245032628b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= Date: Sun, 22 Mar 2026 19:43:11 +0100 Subject: [PATCH 9/9] Add tests --- tests/test_pgs.py | 117 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 tests/test_pgs.py diff --git a/tests/test_pgs.py b/tests/test_pgs.py new file mode 100644 index 0000000..ea48f2e --- /dev/null +++ b/tests/test_pgs.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +from unittest.mock import patch + +import pytest + +from ffsubsync.speech_transformers import _get_pgs_timings_via_ffprobe + + +def _make_packet(pts_time, duration_time, size): + return { + "pts_time": str(pts_time), + "duration_time": "N/A" if duration_time is None else str(duration_time), + "size": str(size), + } + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_basic(mock_probe, mock_bin): + mock_probe.return_value = { + "packets": [ + _make_packet(1.0, 2.5, 1000), + _make_packet(5.0, 1.0, 800), + ] + } + result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") + assert result == [(1.0, 3.5), (5.0, 6.0)] + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_strips_0_prefix_from_stream(mock_probe, mock_bin): + """'0:s:0' should be passed to ffprobe as 's:0'.""" + mock_probe.return_value = {"packets": [_make_packet(0.0, 1.0, 100)]} + _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") + _, kwargs = mock_probe.call_args + assert kwargs["select_streams"] == "s:0" + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_stream_without_prefix_unchanged(mock_probe, mock_bin): + mock_probe.return_value = {"packets": [_make_packet(0.0, 1.0, 100)]} + _get_pgs_timings_via_ffprobe("test.mkv", "s:1") + _, kwargs = mock_probe.call_args + assert kwargs["select_streams"] == "s:1" + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_skips_clear_events_small_size(mock_probe, mock_bin): + """Packets with size <= 50 are clear events and must be skipped.""" + mock_probe.return_value = { + "packets": [ + _make_packet(1.0, 2.0, 1000), # show event + _make_packet(3.0, 0.001, 30), # clear event, size <= 50 + ] + } + result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") + assert result == [(1.0, 3.0)] + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_skips_na_duration(mock_probe, mock_bin): + """Packets with duration_time=N/A must be skipped.""" + mock_probe.return_value = { + "packets": [ + _make_packet(1.0, None, 1000), # N/A duration + _make_packet(5.0, 2.0, 900), + ] + } + result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") + assert result == [(5.0, 7.0)] + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_returns_none_when_no_usable_packets(mock_probe, mock_bin): + """Returns None if all packets are filtered out.""" + mock_probe.return_value = { + "packets": [ + _make_packet(1.0, None, 1000), # N/A duration + _make_packet(2.0, 1.0, 20), # too small + ] + } + assert _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") is None + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_returns_none_on_empty_packets(mock_probe, mock_bin): + mock_probe.return_value = {"packets": []} + assert _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") is None + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_returns_none_when_ffprobe_raises(mock_probe, mock_bin): + mock_probe.side_effect = Exception("ffprobe not found") + assert _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") is None + + +@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe") +@patch("ffsubsync.speech_transformers.ffmpeg.probe") +def test_skips_packets_with_missing_fields(mock_probe, mock_bin): + """Packets missing any required field are silently skipped.""" + mock_probe.return_value = { + "packets": [ + {"pts_time": "1.0", "duration_time": "2.0"}, # missing size + {"pts_time": "3.0", "size": "500"}, # missing duration_time + {"duration_time": "1.0", "size": "500"}, # missing pts_time + _make_packet(10.0, 1.0, 200), # valid + ] + } + result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") + assert result == [(10.0, 11.0)]