From 2b16587daaf40ba5204040eefb731cea4e9fdf35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sat, 21 Mar 2026 22:10:14 +0100
Subject: [PATCH 1/9] Use PGS as a reference for subtitles

---
 ffsubsync/ffsubsync.py           |  35 ++++++-
 ffsubsync/speech_transformers.py | 157 ++++++++++++++++++++++++++++++-
 2 files changed, 189 insertions(+), 3 deletions(-)

diff --git a/ffsubsync/ffsubsync.py b/ffsubsync/ffsubsync.py
index b0114cc..a0940de 100755
--- a/ffsubsync/ffsubsync.py
+++ b/ffsubsync/ffsubsync.py
@@ -30,6 +30,7 @@
 from ffsubsync.speech_transformers import (
     VideoSpeechTransformer,
     DeserializeSpeechTransformer,
+    PGSSpeechTransformer,
     make_subtitle_speech_pipeline,
 )
 from ffsubsync.subtitle_parser import make_subtitle_parser
@@ -220,6 +221,24 @@ def try_sync(
 
 
 def make_reference_pipe(args: argparse.Namespace) -> Pipeline:
+    pgs_stream = getattr(args, "pgs_ref_stream", None)
+    if pgs_stream is not None:
+        if not pgs_stream.startswith("0:"):
+            pgs_stream = "0:" + pgs_stream
+        return Pipeline(
+            [
+                (
+                    "speech_extract",
+                    PGSSpeechTransformer(
+                        sample_rate=SAMPLE_RATE,
+                        start_seconds=args.start_seconds,
+                        ffmpeg_path=args.ffmpeg_path,
+                        ref_stream=pgs_stream,
+                        gui_mode=args.gui_mode,
+                    ),
+                ),
+            ]
+        )
     ref_format = _ref_format(args.reference)
     if ref_format in SUBTITLE_EXTENSIONS:
         if args.vad is not None:
@@ -451,7 +470,7 @@ def _run_impl(args: argparse.Namespace, result: Dict[str, Any]) -> bool:
 
 
 def validate_and_transform_args(
-    parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace]
+    parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace],
 ) -> Optional[argparse.Namespace]:
     if isinstance(parser_or_args, argparse.Namespace):
         parser = None
@@ -484,7 +503,7 @@ def validate_and_transform_args(
 
 
 def run(
-    parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace]
+    parser_or_args: Union[argparse.ArgumentParser, argparse.Namespace],
 ) -> Dict[str, Any]:
     sync_was_successful = False
     result = {
@@ -556,6 +575,18 @@ def add_main_args_for_cli(parser: argparse.ArgumentParser) -> None:
             "Example: `ffs ref.mkv -i in.srt -o out.srt --reference-stream s:2`"
         ),
     )
+    parser.add_argument(
+        "--pgs-ref-stream",
+        "--pgsstream",
+        default=None,
+        help=(
+            "Extract PGS (Presentation Graphic Stream) image-based subtitles from "
+            "the specified stream in the reference MKV and use their on-screen "
+            "timings as sync reference instead of audio voice-activity detection. "
+            "Formatted like ffmpeg stream specifiers (leading `0:` is optional). "
+            "Example: `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream s:0`"
+        ),
+    )
 
 
 def add_cli_only_args(parser: argparse.ArgumentParser) -> None:
diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py
index 38883d1..6350f75 100644
--- a/ffsubsync/speech_transformers.py
+++ b/ffsubsync/speech_transformers.py
@@ -6,7 +6,7 @@
 import subprocess
 import sys
 from datetime import timedelta
-from typing import cast, Callable, Dict, List, Optional, Union
+from typing import cast, Callable, Dict, List, Optional, Tuple, Union
 
 import ffmpeg
 import numpy as np
@@ -531,3 +531,158 @@ def fit(self, fname, *_) -> "DeserializeSpeechTransformer":
     def transform(self, *_) -> np.ndarray:
         assert self.deserialized_speech_results_ is not None
         return self.deserialized_speech_results_
+
+
+def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]:
+    """Parse raw PGS (Presentation Graphic Stream / SUP) binary data.
+
+    Each PGS display set is introduced by a Presentation Composition Segment
+    (PCS, type 0x16).  The PCS carries the number of composition objects: > 0
+    means a subtitle image is now on-screen, 0 means the screen is being
+    cleared.  We walk through all PCS segments and pair "show" and "clear"
+    events to produce (start_seconds, end_seconds) intervals.
+    """
+    import struct
+
+    PGS_MAGIC = b"\x50\x47"  # "PG" – PGS packet sync word
+    SEG_PCS = 0x16
+    # PCS layout after the 13-byte header:
+    #   video_w(2) + video_h(2) + frame_rate(1) + comp_number(2) +
+    #   comp_state(1) + palette_update_flag(1) + palette_id(1) + num_objects(1)
+    PCS_PALETTE_UPDATE_OFFSET = 8  # offset of palette_update_flag inside PCS data
+    PCS_NUM_OBJECTS_OFFSET = 10  # offset of num_comp_objects inside PCS data
+    PCS_MIN_LENGTH = 11  # minimum PCS data length to read the above
+
+    HEADER_SIZE = 13  # 2 magic + 4 PTS + 4 DTS + 1 type + 2 length
+
+    results: List[Tuple[float, float]] = []
+    current_start: Optional[float] = None
+    pos = 0
+
+    while pos + HEADER_SIZE <= len(data):
+        if data[pos : pos + 2] != PGS_MAGIC:
+            # lost sync – try to find the next magic bytes
+            next_magic = data.find(PGS_MAGIC, pos + 1)
+            if next_magic == -1:
+                break
+            pos = next_magic
+            continue
+
+        # PTS is in 90 kHz ticks
+        pts: float = struct.unpack_from(">I", data, pos + 2)[0] / 90000.0
+        seg_type: int = data[pos + 10]
+        seg_length: int = struct.unpack_from(">H", data, pos + 11)[0]
+
+        if seg_type == SEG_PCS and seg_length >= PCS_MIN_LENGTH:
+            pcs_start = pos + HEADER_SIZE
+            palette_update_flag: int = data[pcs_start + PCS_PALETTE_UPDATE_OFFSET]
+            num_objects: int = data[pcs_start + PCS_NUM_OBJECTS_OFFSET]
+
+            if palette_update_flag == 0x80:
+                # palette-only update – displayed subtitle is unchanged, skip
+                pass
+            elif num_objects > 0:
+                current_start = pts
+            elif current_start is not None:
+                results.append((current_start, pts))
+                current_start = None
+
+        pos += HEADER_SIZE + seg_length
+
+    return results
+
+
+class PGSSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin):
+    """Use PGS (Presentation Graphic Stream) subtitle timings as a sync reference.
+
+    PGS subtitles are bitmap-based (e.g. Blu-ray) and cannot be converted to
+    text by ffmpeg.  This transformer extracts the raw SUP stream from the
+    video file, parses the on-screen / off-screen timestamps from the binary
+    Presentation Composition Segments, and builds the same kind of sparse
+    binary signal that :class:`SubtitleSpeechTransformer` produces for text
+    subtitles.  The resulting signal can then be aligned against the input
+    subtitle file in the normal ffsubsync pipeline.
+    """
+
+    def __init__(
+        self,
+        sample_rate: int,
+        start_seconds: int = 0,
+        ffmpeg_path: Optional[str] = None,
+        ref_stream: Optional[str] = None,
+        gui_mode: bool = False,
+    ) -> None:
+        super(PGSSpeechTransformer, self).__init__()
+        self.sample_rate: int = sample_rate
+        self.start_seconds: int = start_seconds
+        self.ffmpeg_path: Optional[str] = ffmpeg_path
+        self.ref_stream: Optional[str] = ref_stream
+        self.gui_mode: bool = gui_mode
+        self.pgs_speech_results_: Optional[np.ndarray] = None
+
+    def fit(self, fname: str, *_) -> "PGSSpeechTransformer":
+        stream = self.ref_stream if self.ref_stream is not None else "0:s:0"
+        if not stream.startswith("0:"):
+            stream = "0:" + stream
+
+        ffmpeg_args = [
+            ffmpeg_bin_path(
+                "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path
+            ),
+            "-loglevel",
+            "fatal",
+            "-nostdin",
+            "-i",
+            fname,
+            "-map",
+            stream,
+            "-c:s",
+            "copy",
+            "-f",
+            "sup",
+            "-",
+        ]
+
+        logger.info("extracting PGS subtitle stream %s from %s...", stream, fname)
+        process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))
+        pgs_data, _ = process.communicate()
+
+        if process.returncode != 0 or not pgs_data:
+            raise ValueError(
+                "Failed to extract PGS stream {} from {}. "
+                "Make sure the stream exists and is an hdmv_pgs_subtitle track "
+                "(check with: ffprobe -show_streams {}).".format(stream, fname, fname)
+            )
+
+        logger.info("...done; parsing PGS timings...")
+        timings = _parse_pgs_timings(pgs_data)
+
+        if not timings:
+            raise ValueError(
+                "No subtitle timings found in PGS stream {}.".format(stream)
+            )
+
+        logger.info("found %d PGS subtitle segments", len(timings))
+
+        max_time = max(end for _, end in timings)
+        num_samples = int(max_time * self.sample_rate) + 2
+        samples = np.zeros(num_samples, dtype=float)
+
+        for start, end in timings:
+            start_sample = int(round((start - self.start_seconds) * self.sample_rate))
+            end_sample = int(round((end - self.start_seconds) * self.sample_rate))
+            start_sample = max(start_sample, 0)
+            end_sample = min(end_sample, num_samples)
+            if start_sample < end_sample:
+                samples[start_sample:end_sample] = 1.0
+
+        self.pgs_speech_results_ = samples
+        self.fit_boundaries(self.pgs_speech_results_)
+        logger.info(
+            "total PGS subtitle frames: %d", int(np.sum(self.pgs_speech_results_))
+        )
+        return self
+
+    def transform(self, *_) -> np.ndarray:
+        assert self.pgs_speech_results_ is not None
+        return self.pgs_speech_results_

From a0c262534ec872fe23a4b778f3c1e023d768ee52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sat, 21 Mar 2026 22:17:04 +0100
Subject: [PATCH 2/9] Automatically detect PGS stream

---
 ffsubsync/ffsubsync.py           | 21 ++++++++------
 ffsubsync/speech_transformers.py | 49 ++++++++++++++++++++++++++++++--
 2 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/ffsubsync/ffsubsync.py b/ffsubsync/ffsubsync.py
index a0940de..9f54151 100755
--- a/ffsubsync/ffsubsync.py
+++ b/ffsubsync/ffsubsync.py
@@ -223,8 +223,10 @@ def try_sync(
 def make_reference_pipe(args: argparse.Namespace) -> Pipeline:
     pgs_stream = getattr(args, "pgs_ref_stream", None)
     if pgs_stream is not None:
-        if not pgs_stream.startswith("0:"):
-            pgs_stream = "0:" + pgs_stream
+        # "auto" (bare --pgs-ref-stream flag) → let PGSSpeechTransformer auto-detect
+        resolved_stream: Optional[str] = None if pgs_stream == "auto" else pgs_stream
+        if resolved_stream is not None and not resolved_stream.startswith("0:"):
+            resolved_stream = "0:" + resolved_stream
         return Pipeline(
             [
                 (
@@ -233,7 +235,7 @@ def make_reference_pipe(args: argparse.Namespace) -> Pipeline:
                         sample_rate=SAMPLE_RATE,
                         start_seconds=args.start_seconds,
                         ffmpeg_path=args.ffmpeg_path,
-                        ref_stream=pgs_stream,
+                        ref_stream=resolved_stream,
                         gui_mode=args.gui_mode,
                     ),
                 ),
@@ -578,13 +580,16 @@ def add_main_args_for_cli(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         "--pgs-ref-stream",
         "--pgsstream",
+        nargs="?",
+        const="auto",
         default=None,
         help=(
-            "Extract PGS (Presentation Graphic Stream) image-based subtitles from "
-            "the specified stream in the reference MKV and use their on-screen "
-            "timings as sync reference instead of audio voice-activity detection. "
-            "Formatted like ffmpeg stream specifiers (leading `0:` is optional). "
-            "Example: `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream s:0`"
+            "Use a PGS (Presentation Graphic Stream) image-based subtitle track from "
+            "the reference MKV as the sync reference instead of audio VAD. "
+            "Optionally specify the stream (leading `0:` is optional, e.g. `s:0` or `3`). "
+            "Omit the value to auto-detect the first hdmv_pgs_subtitle track. "
+            "Example: `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream` (auto) "
+            "or `ffs ref.mkv -i in.srt -o out.srt --pgs-ref-stream s:2` (explicit)."
         ),
     )
 
diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py
index 6350f75..b2638d5 100644
--- a/ffsubsync/speech_transformers.py
+++ b/ffsubsync/speech_transformers.py
@@ -533,6 +533,41 @@ def transform(self, *_) -> np.ndarray:
         return self.deserialized_speech_results_
 
 
+def find_pgs_stream(
+    fname: str,
+    ffmpeg_path: Optional[str] = None,
+    gui_mode: bool = False,
+) -> Optional[str]:
+    """Return the ffmpeg stream specifier for the first PGS subtitle track in *fname*.
+
+    Uses ``ffprobe`` to inspect the file.  Returns a string like ``"0:s:0"`` on
+    success, or ``None`` if the file has no ``hdmv_pgs_subtitle`` streams.
+    """
+    try:
+        probe = ffmpeg.probe(
+            fname,
+            cmd=ffmpeg_bin_path("ffprobe", gui_mode, ffmpeg_resources_path=ffmpeg_path),
+        )
+    except Exception as e:
+        logger.warning("ffprobe failed while searching for PGS streams: %s", e)
+        return None
+
+    sub_index = 0
+    for stream in probe.get("streams", []):
+        if stream.get("codec_type") == "subtitle":
+            if stream.get("codec_name") == "hdmv_pgs_subtitle":
+                specifier = "0:s:{}".format(sub_index)
+                logger.info(
+                    "auto-detected PGS stream: %s (ffmpeg stream index %s)",
+                    specifier,
+                    stream.get("index"),
+                )
+                return specifier
+            sub_index += 1
+
+    return None
+
+
 def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]:
     """Parse raw PGS (Presentation Graphic Stream / SUP) binary data.
 
@@ -621,9 +656,17 @@ def __init__(
         self.pgs_speech_results_: Optional[np.ndarray] = None
 
     def fit(self, fname: str, *_) -> "PGSSpeechTransformer":
-        stream = self.ref_stream if self.ref_stream is not None else "0:s:0"
-        if not stream.startswith("0:"):
-            stream = "0:" + stream
+        if self.ref_stream is None:
+            stream = find_pgs_stream(fname, self.ffmpeg_path, self.gui_mode)
+            if stream is None:
+                raise ValueError(
+                    "No hdmv_pgs_subtitle stream found in {}. "
+                    "Specify one explicitly with --pgs-ref-stream.".format(fname)
+                )
+        else:
+            stream = self.ref_stream
+            if not stream.startswith("0:"):
+                stream = "0:" + stream
 
         ffmpeg_args = [
             ffmpeg_bin_path(

From dc350e0e77308167687904d0cfc5eb834efbf9df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sat, 21 Mar 2026 22:49:37 +0100
Subject: [PATCH 3/9] Fix PGS timing

---
 ffsubsync/speech_transformers.py | 76 +++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 22 deletions(-)

diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py
index b2638d5..aeb3443 100644
--- a/ffsubsync/speech_transformers.py
+++ b/ffsubsync/speech_transformers.py
@@ -571,24 +571,37 @@ def find_pgs_stream(
 def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]:
     """Parse raw PGS (Presentation Graphic Stream / SUP) binary data.
 
-    Each PGS display set is introduced by a Presentation Composition Segment
-    (PCS, type 0x16).  The PCS carries the number of composition objects: > 0
-    means a subtitle image is now on-screen, 0 means the screen is being
-    cleared.  We walk through all PCS segments and pair "show" and "clear"
-    events to produce (start_seconds, end_seconds) intervals.
+    PGS packet header (13 bytes):
+      0-1  : "PG" magic (0x50 0x47)
+      2-5  : PTS  – unsigned 32-bit big-endian, 90 kHz ticks
+      6-9  : DTS  – unsigned 32-bit big-endian (ignored here)
+      10   : segment type (0x16 = PCS, 0x14 = PDS, 0x15 = ODS, 0x17 = WDS, 0x80 = END)
+      11-12: segment data length, big-endian
+
+    PCS data layout (offsets from byte 13):
+      0-1  : video width
+      2-3  : video height
+      4    : frame rate byte
+      5-6  : composition number
+      7    : composition state  (0x00=Normal, 0x40=Acquisition, 0x80=Epoch Start)
+      8    : palette update flag  (0x80 = palette-only, subtitle unchanged)
+      9    : palette ID
+      10   : number of composition objects
+
+    Logic:
+      - Palette-update-only PCS → skip (existing subtitle is unchanged).
+      - Any other PCS → close any currently-open subtitle first (handles
+        both explicit clears and back-to-back / epoch-start transitions),
+        then open a new one if num_objects > 0.
     """
     import struct
 
-    PGS_MAGIC = b"\x50\x47"  # "PG" – PGS packet sync word
+    PGS_MAGIC = b"\x50\x47"  # "PG"
     SEG_PCS = 0x16
-    # PCS layout after the 13-byte header:
-    #   video_w(2) + video_h(2) + frame_rate(1) + comp_number(2) +
-    #   comp_state(1) + palette_update_flag(1) + palette_id(1) + num_objects(1)
-    PCS_PALETTE_UPDATE_OFFSET = 8  # offset of palette_update_flag inside PCS data
-    PCS_NUM_OBJECTS_OFFSET = 10  # offset of num_comp_objects inside PCS data
-    PCS_MIN_LENGTH = 11  # minimum PCS data length to read the above
-
-    HEADER_SIZE = 13  # 2 magic + 4 PTS + 4 DTS + 1 type + 2 length
+    PCS_PALETTE_UPDATE_OFFSET = 8
+    PCS_NUM_OBJECTS_OFFSET = 10
+    PCS_MIN_LENGTH = 11
+    HEADER_SIZE = 13
 
     results: List[Tuple[float, float]] = []
     current_start: Optional[float] = None
@@ -596,14 +609,12 @@ def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]:
 
     while pos + HEADER_SIZE <= len(data):
         if data[pos : pos + 2] != PGS_MAGIC:
-            # lost sync – try to find the next magic bytes
             next_magic = data.find(PGS_MAGIC, pos + 1)
             if next_magic == -1:
                 break
             pos = next_magic
             continue
 
-        # PTS is in 90 kHz ticks
         pts: float = struct.unpack_from(">I", data, pos + 2)[0] / 90000.0
         seg_type: int = data[pos + 10]
         seg_length: int = struct.unpack_from(">H", data, pos + 11)[0]
@@ -614,13 +625,19 @@ def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]:
             num_objects: int = data[pcs_start + PCS_NUM_OBJECTS_OFFSET]
 
             if palette_update_flag == 0x80:
-                # palette-only update – displayed subtitle is unchanged, skip
+                # Palette-only update: subtitle image unchanged, skip entirely.
                 pass
-            elif num_objects > 0:
-                current_start = pts
-            elif current_start is not None:
-                results.append((current_start, pts))
-                current_start = None
+            else:
+                # Any other PCS closes the currently-displayed subtitle (if any).
+                # This correctly handles:
+                #   - explicit clear (num_objects=0 after a subtitle)
+                #   - back-to-back subtitles (num_objects>0 replaces previous)
+                #   - epoch start (composition_state=0x80, implicit clear)
+                if current_start is not None:
+                    results.append((current_start, pts))
+                    current_start = None
+                if num_objects > 0:
+                    current_start = pts
 
         pos += HEADER_SIZE + seg_length
 
@@ -639,6 +656,13 @@ class PGSSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin):
     subtitle file in the normal ffsubsync pipeline.
     """
 
+    # PGS is already in the MKV timebase so its duration cannot be compared
+    # against the SRT to infer a framerate ratio.  Returning None here prevents
+    # the duration-based framerate inference in try_sync from running.
+    @property
+    def num_frames(self) -> None:
+        return None
+
     def __init__(
         self,
         sample_rate: int,
@@ -706,6 +730,14 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer":
             )
 
         logger.info("found %d PGS subtitle segments", len(timings))
+        for i, (s, e) in enumerate(timings[:8]):
+            logger.debug(
+                "  PGS[%d]: %s --> %s (%.3fs)",
+                i,
+                str(timedelta(seconds=s)),
+                str(timedelta(seconds=e)),
+                e - s,
+            )
 
         max_time = max(end for _, end in timings)
         num_samples = int(max_time * self.sample_rate) + 2

From 362f9b4d950311d67aae4cc0f6d59df569019cf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sat, 21 Mar 2026 23:04:04 +0100
Subject: [PATCH 4/9] Fix syncing bug

---
 ffsubsync/ffsubsync.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ffsubsync/ffsubsync.py b/ffsubsync/ffsubsync.py
index 9f54151..2d10c8d 100755
--- a/ffsubsync/ffsubsync.py
+++ b/ffsubsync/ffsubsync.py
@@ -150,8 +150,10 @@ def try_sync(
                     continue
                 else:
                     srt_pipe.fit(srtin)
-            if not skip_infer_framerate_ratio and hasattr(
-                reference_pipe[-1], "num_frames"
+            if (
+                not skip_infer_framerate_ratio
+                and hasattr(reference_pipe[-1], "num_frames")
+                and reference_pipe[-1].num_frames is not None
             ):
                 inferred_framerate_ratio_from_length = (
                     float(reference_pipe[-1].num_frames)

From a4b63a876bb45cc4ca405b32f53f27e4c331fb12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sat, 21 Mar 2026 23:31:03 +0100
Subject: [PATCH 5/9] Improve performance

---
 ffsubsync/speech_transformers.py | 144 ++++++++++++++++++++++++-------
 1 file changed, 114 insertions(+), 30 deletions(-)

diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py
index aeb3443..f0feee2 100644
--- a/ffsubsync/speech_transformers.py
+++ b/ffsubsync/speech_transformers.py
@@ -568,6 +568,84 @@ def find_pgs_stream(
     return None
 
 
+def _get_pgs_timings_via_ffprobe(
+    fname: str,
+    stream: str,
+    ffmpeg_path: Optional[str] = None,
+    gui_mode: bool = False,
+) -> Optional[List[Tuple[float, float]]]:
+    """Fast path: read PGS timings from MKV container metadata using ffprobe.
+
+    MKV stores per-packet PTS and duration for subtitle streams, so we can
+    get start/end timestamps without extracting or parsing the raw SUP binary.
+    Show events are large packets with a numeric ``duration_time``; clear events
+    are tiny (~30-byte) packets with ``duration_time=N/A``.
+
+    Returns a list of ``(start_seconds, end_seconds)`` tuples, or ``None`` if
+    ffprobe fails or the durations are not usable (fall back to SUP parsing).
+    """
+    ffprobe_cmd = ffmpeg_bin_path(
+        "ffprobe", gui_mode, ffmpeg_resources_path=ffmpeg_path
+    )
+    # ffprobe -select_streams does not accept the "0:" input-index prefix;
+    # strip it so "0:s:0" → "s:0" and "0:3" → "3".
+    probe_stream = stream[2:] if stream.startswith("0:") else stream
+    args = [
+        ffprobe_cmd,
+        "-v",
+        "quiet",
+        "-show_packets",
+        "-select_streams",
+        probe_stream,
+        "-show_entries",
+        "packet=pts_time,duration_time,size",
+        fname,
+    ]
+    process = subprocess.Popen(args, **subprocess_args(include_stdout=True))
+    stdout, _ = process.communicate()
+    if process.returncode != 0 or not stdout:
+        return None
+
+    results: List[Tuple[float, float]] = []
+    pts_time: Optional[float] = None
+    duration_time: Optional[float] = None
+    size: Optional[int] = None
+
+    for raw_line in stdout.decode("utf-8", errors="replace").splitlines():
+        line = raw_line.strip()
+        if line == "[PACKET]":
+            pts_time = duration_time = size = None
+        elif line == "[/PACKET]":
+            if (
+                pts_time is not None
+                and duration_time is not None
+                and size is not None
+                and size > 50  # skip clear events (~30 bytes)
+            ):
+                results.append((pts_time, pts_time + duration_time))
+        elif line.startswith("pts_time="):
+            try:
+                pts_time = float(line.split("=", 1)[1])
+            except ValueError:
+                pass
+        elif line.startswith("duration_time="):
+            val = line.split("=", 1)[1]
+            if val != "N/A":
+                try:
+                    duration_time = float(val)
+                except ValueError:
+                    pass
+        elif line.startswith("size="):
+            try:
+                size = int(line.split("=", 1)[1])
+            except ValueError:
+                pass
+
+    if not results:
+        return None
+    return results
+
+
 def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]:
     """Parse raw PGS (Presentation Graphic Stream / SUP) binary data.
 
@@ -692,37 +770,43 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer":
             if not stream.startswith("0:"):
                 stream = "0:" + stream
 
-        ffmpeg_args = [
-            ffmpeg_bin_path(
-                "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path
-            ),
-            "-loglevel",
-            "fatal",
-            "-nostdin",
-            "-i",
-            fname,
-            "-map",
-            stream,
-            "-c:s",
-            "copy",
-            "-f",
-            "sup",
-            "-",
-        ]
-
-        logger.info("extracting PGS subtitle stream %s from %s...", stream, fname)
-        process = subprocess.Popen(ffmpeg_args, **subprocess_args(include_stdout=True))
-        pgs_data, _ = process.communicate()
-
-        if process.returncode != 0 or not pgs_data:
-            raise ValueError(
-                "Failed to extract PGS stream {} from {}. "
-                "Make sure the stream exists and is an hdmv_pgs_subtitle track "
-                "(check with: ffprobe -show_streams {}).".format(stream, fname, fname)
+        logger.info("reading PGS timings for stream %s from %s...", stream, fname)
+        timings = _get_pgs_timings_via_ffprobe(
+            fname, stream, self.ffmpeg_path, self.gui_mode
+        )
+        if timings is None:
+            # Fallback: extract raw SUP stream and parse binary
+            logger.info("ffprobe fast path unavailable, extracting raw PGS stream...")
+            ffmpeg_args = [
+                ffmpeg_bin_path(
+                    "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path
+                ),
+                "-loglevel",
+                "fatal",
+                "-nostdin",
+                "-i",
+                fname,
+                "-map",
+                stream,
+                "-c:s",
+                "copy",
+                "-f",
+                "sup",
+                "-",
+            ]
+            process = subprocess.Popen(
+                ffmpeg_args, **subprocess_args(include_stdout=True)
             )
-
-        logger.info("...done; parsing PGS timings...")
-        timings = _parse_pgs_timings(pgs_data)
+            pgs_data, _ = process.communicate()
+            if process.returncode != 0 or not pgs_data:
+                raise ValueError(
+                    "Failed to extract PGS stream {} from {}. "
+                    "Make sure the stream exists and is an hdmv_pgs_subtitle track "
+                    "(check with: ffprobe -show_streams {}).".format(
+                        stream, fname, fname
+                    )
+                )
+            timings = _parse_pgs_timings(pgs_data)
 
         if not timings:
             raise ValueError(

From c2d84ec8b4e6938d2b6b57957a95d2f082c29d55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sun, 22 Mar 2026 00:02:38 +0100
Subject: [PATCH 6/9] Add mkvextract method

---
 ffsubsync/speech_transformers.py | 123 ++++++++++++++++++++++++++++++-
 1 file changed, 120 insertions(+), 3 deletions(-)

diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py
index f0feee2..3d36c1f 100644
--- a/ffsubsync/speech_transformers.py
+++ b/ffsubsync/speech_transformers.py
@@ -568,6 +568,118 @@ def find_pgs_stream(
     return None
 
 
+def _get_pgs_timings_via_mkvextract(
+    fname: str,
+    stream: str,
+    gui_mode: bool = False,
+) -> Optional[List[Tuple[float, float]]]:
+    """Fastest path: extract PGS SUP data using mkvextract (mkvtoolnix).
+
+    mkvextract uses the MKV Cues (seek index) to jump directly to subtitle
+    clusters, skipping all video/audio data.  Roughly 3x faster than
+    ``ffprobe -show_packets`` for files on slow (network) mounts.
+
+    Returns ``None`` if mkvtoolnix is not installed or extraction fails.
+    """
+    import json
+    import os
+    import shutil
+    import tempfile
+
+    if not shutil.which("mkvextract") or not shutil.which("mkvmerge"):
+        logger.debug("mkvextract/mkvmerge not found in PATH")
+        return None
+
+    # mkvextract only makes sense for MKV containers.
+    if not fname.lower().endswith((".mkv", ".mka", ".mks")):
+        logger.debug("mkvextract skipped: not an MKV container")
+        return None
+
+    # --- Resolve mkvmerge track ID from stream specifier ---
+    try:
+        proc = subprocess.Popen(
+            ["mkvmerge", "--identify", "--identification-format", "json", fname],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+        )
+        stdout, _ = proc.communicate(timeout=15)
+        if proc.returncode not in (0, 1):  # mkvmerge exits 1 for warnings
+            logger.debug("mkvmerge --identify failed (returncode %d)", proc.returncode)
+            return None
+        info = json.loads(stdout.decode("utf-8", errors="replace"))
+    except Exception as exc:
+        logger.debug("mkvmerge --identify exception: %s", exc)
+        return None
+
+    tracks = info.get("tracks", [])
+    pgs_tracks = [
+        t
+        for t in tracks
+        if t.get("type") == "subtitles" and "PGS" in t.get("codec", "").upper()
+    ]
+    if not pgs_tracks:
+        logger.debug("mkvmerge found no PGS subtitle tracks")
+        return None
+
+    # Map "0:s:N" → N-th PGS track; "0:N" or "N" → track with that ID.
+    track_id: Optional[int] = None
+    if "s:" in stream:
+        try:
+            sub_idx = int(stream.rsplit(":", 1)[-1])
+            if sub_idx < len(pgs_tracks):
+                track_id = pgs_tracks[sub_idx]["id"]
+        except (ValueError, KeyError, IndexError):
+            pass
+    else:
+        try:
+            abs_idx = int(stream.rsplit(":", 1)[-1])
+            for t in pgs_tracks:
+                if t.get("id") == abs_idx:
+                    track_id = abs_idx
+                    break
+        except (ValueError, KeyError):
+            pass
+    if track_id is None:
+        track_id = pgs_tracks[0]["id"]  # fallback: first PGS track
+
+    # --- Extract SUP stream to a local temp file ---
+    # mkvextract writes track data to a named file, not to stdout, so we use
+    # a temp file on local disk (fast SSD write) then read it back.
+    tmp_fd, tmp_path = tempfile.mkstemp(suffix=".sup")
+    os.close(tmp_fd)
+    try:
+        proc = subprocess.Popen(
+            ["mkvextract", "tracks", fname, "{}:{}".format(track_id, tmp_path)],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+        try:
+            proc.wait(timeout=300)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            logger.debug("mkvextract timed out")
+            return None
+        if proc.returncode != 0:
+            logger.debug("mkvextract failed (returncode %d)", proc.returncode)
+            return None
+        with open(tmp_path, "rb") as fh:
+            pgs_data = fh.read()
+    except Exception as exc:
+        logger.debug("mkvextract exception: %s", exc)
+        return None
+    finally:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+
+    if not pgs_data:
+        return None
+    return _parse_pgs_timings(pgs_data)
+
+
 def _get_pgs_timings_via_ffprobe(
     fname: str,
     stream: str,
@@ -771,9 +883,14 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer":
                 stream = "0:" + stream
 
         logger.info("reading PGS timings for stream %s from %s...", stream, fname)
-        timings = _get_pgs_timings_via_ffprobe(
-            fname, stream, self.ffmpeg_path, self.gui_mode
-        )
+        timings = _get_pgs_timings_via_mkvextract(fname, stream, self.gui_mode)
+        if timings is not None:
+            logger.info("used mkvextract fast path (%d segments)", len(timings))
+        else:
+            logger.info("mkvextract unavailable or failed, trying ffprobe...")
+            timings = _get_pgs_timings_via_ffprobe(
+                fname, stream, self.ffmpeg_path, self.gui_mode
+            )
         if timings is None:
             # Fallback: extract raw SUP stream and parse binary
             logger.info("ffprobe fast path unavailable, extracting raw PGS stream...")

From abab44a33c3c296c56a47b99f2d6cddf6305322c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sun, 22 Mar 2026 14:47:02 +0100
Subject: [PATCH 7/9] Cleanup

---
 ffsubsync/speech_transformers.py | 236 ++-----------------------------
 1 file changed, 9 insertions(+), 227 deletions(-)

diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py
index 3d36c1f..4b43c21 100644
--- a/ffsubsync/speech_transformers.py
+++ b/ffsubsync/speech_transformers.py
@@ -568,118 +568,6 @@ def find_pgs_stream(
     return None
 
 
-def _get_pgs_timings_via_mkvextract(
-    fname: str,
-    stream: str,
-    gui_mode: bool = False,
-) -> Optional[List[Tuple[float, float]]]:
-    """Fastest path: extract PGS SUP data using mkvextract (mkvtoolnix).
-
-    mkvextract uses the MKV Cues (seek index) to jump directly to subtitle
-    clusters, skipping all video/audio data.  Roughly 3x faster than
-    ``ffprobe -show_packets`` for files on slow (network) mounts.
-
-    Returns ``None`` if mkvtoolnix is not installed or extraction fails.
-    """
-    import json
-    import os
-    import shutil
-    import tempfile
-
-    if not shutil.which("mkvextract") or not shutil.which("mkvmerge"):
-        logger.debug("mkvextract/mkvmerge not found in PATH")
-        return None
-
-    # mkvextract only makes sense for MKV containers.
-    if not fname.lower().endswith((".mkv", ".mka", ".mks")):
-        logger.debug("mkvextract skipped: not an MKV container")
-        return None
-
-    # --- Resolve mkvmerge track ID from stream specifier ---
-    try:
-        proc = subprocess.Popen(
-            ["mkvmerge", "--identify", "--identification-format", "json", fname],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.DEVNULL,
-        )
-        stdout, _ = proc.communicate(timeout=15)
-        if proc.returncode not in (0, 1):  # mkvmerge exits 1 for warnings
-            logger.debug("mkvmerge --identify failed (returncode %d)", proc.returncode)
-            return None
-        info = json.loads(stdout.decode("utf-8", errors="replace"))
-    except Exception as exc:
-        logger.debug("mkvmerge --identify exception: %s", exc)
-        return None
-
-    tracks = info.get("tracks", [])
-    pgs_tracks = [
-        t
-        for t in tracks
-        if t.get("type") == "subtitles" and "PGS" in t.get("codec", "").upper()
-    ]
-    if not pgs_tracks:
-        logger.debug("mkvmerge found no PGS subtitle tracks")
-        return None
-
-    # Map "0:s:N" → N-th PGS track; "0:N" or "N" → track with that ID.
-    track_id: Optional[int] = None
-    if "s:" in stream:
-        try:
-            sub_idx = int(stream.rsplit(":", 1)[-1])
-            if sub_idx < len(pgs_tracks):
-                track_id = pgs_tracks[sub_idx]["id"]
-        except (ValueError, KeyError, IndexError):
-            pass
-    else:
-        try:
-            abs_idx = int(stream.rsplit(":", 1)[-1])
-            for t in pgs_tracks:
-                if t.get("id") == abs_idx:
-                    track_id = abs_idx
-                    break
-        except (ValueError, KeyError):
-            pass
-    if track_id is None:
-        track_id = pgs_tracks[0]["id"]  # fallback: first PGS track
-
-    # --- Extract SUP stream to a local temp file ---
-    # mkvextract writes track data to a named file, not to stdout, so we use
-    # a temp file on local disk (fast SSD write) then read it back.
-    tmp_fd, tmp_path = tempfile.mkstemp(suffix=".sup")
-    os.close(tmp_fd)
-    try:
-        proc = subprocess.Popen(
-            ["mkvextract", "tracks", fname, "{}:{}".format(track_id, tmp_path)],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-        )
-        try:
-            proc.wait(timeout=300)
-        except subprocess.TimeoutExpired:
-            proc.kill()
-            logger.debug("mkvextract timed out")
-            return None
-        if proc.returncode != 0:
-            logger.debug("mkvextract failed (returncode %d)", proc.returncode)
-            return None
-        with open(tmp_path, "rb") as fh:
-            pgs_data = fh.read()
-    except Exception as exc:
-        logger.debug("mkvextract exception: %s", exc)
-        return None
-    finally:
-        try:
-            os.unlink(tmp_path)
-        except OSError:
-            pass
-
-    if not pgs_data:
-        return None
-    return _parse_pgs_timings(pgs_data)
-
-
 def _get_pgs_timings_via_ffprobe(
     fname: str,
     stream: str,
@@ -758,82 +646,6 @@ def _get_pgs_timings_via_ffprobe(
     return results
 
 
-def _parse_pgs_timings(data: bytes) -> List[Tuple[float, float]]:
-    """Parse raw PGS (Presentation Graphic Stream / SUP) binary data.
-
-    PGS packet header (13 bytes):
-      0-1  : "PG" magic (0x50 0x47)
-      2-5  : PTS  – unsigned 32-bit big-endian, 90 kHz ticks
-      6-9  : DTS  – unsigned 32-bit big-endian (ignored here)
-      10   : segment type (0x16 = PCS, 0x14 = PDS, 0x15 = ODS, 0x17 = WDS, 0x80 = END)
-      11-12: segment data length, big-endian
-
-    PCS data layout (offsets from byte 13):
-      0-1  : video width
-      2-3  : video height
-      4    : frame rate byte
-      5-6  : composition number
-      7    : composition state  (0x00=Normal, 0x40=Acquisition, 0x80=Epoch Start)
-      8    : palette update flag  (0x80 = palette-only, subtitle unchanged)
-      9    : palette ID
-      10   : number of composition objects
-
-    Logic:
-      - Palette-update-only PCS → skip (existing subtitle is unchanged).
-      - Any other PCS → close any currently-open subtitle first (handles
-        both explicit clears and back-to-back / epoch-start transitions),
-        then open a new one if num_objects > 0.
-    """
-    import struct
-
-    PGS_MAGIC = b"\x50\x47"  # "PG"
-    SEG_PCS = 0x16
-    PCS_PALETTE_UPDATE_OFFSET = 8
-    PCS_NUM_OBJECTS_OFFSET = 10
-    PCS_MIN_LENGTH = 11
-    HEADER_SIZE = 13
-
-    results: List[Tuple[float, float]] = []
-    current_start: Optional[float] = None
-    pos = 0
-
-    while pos + HEADER_SIZE <= len(data):
-        if data[pos : pos + 2] != PGS_MAGIC:
-            next_magic = data.find(PGS_MAGIC, pos + 1)
-            if next_magic == -1:
-                break
-            pos = next_magic
-            continue
-
-        pts: float = struct.unpack_from(">I", data, pos + 2)[0] / 90000.0
-        seg_type: int = data[pos + 10]
-        seg_length: int = struct.unpack_from(">H", data, pos + 11)[0]
-
-        if seg_type == SEG_PCS and seg_length >= PCS_MIN_LENGTH:
-            pcs_start = pos + HEADER_SIZE
-            palette_update_flag: int = data[pcs_start + PCS_PALETTE_UPDATE_OFFSET]
-            num_objects: int = data[pcs_start + PCS_NUM_OBJECTS_OFFSET]
-
-            if palette_update_flag == 0x80:
-                # Palette-only update: subtitle image unchanged, skip entirely.
-                pass
-            else:
-                # Any other PCS closes the currently-displayed subtitle (if any).
-                # This correctly handles:
-                #   - explicit clear (num_objects=0 after a subtitle)
-                #   - back-to-back subtitles (num_objects>0 replaces previous)
-                #   - epoch start (composition_state=0x80, implicit clear)
-                if current_start is not None:
-                    results.append((current_start, pts))
-                    current_start = None
-                if num_objects > 0:
-                    current_start = pts
-
-        pos += HEADER_SIZE + seg_length
-
-    return results
-
-
 class PGSSpeechTransformer(TransformerMixin, ComputeSpeechFrameBoundariesMixin):
     """Use PGS (Presentation Graphic Stream) subtitle timings as a sync reference.
 
@@ -883,47 +695,17 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer":
                 stream = "0:" + stream
 
         logger.info("reading PGS timings for stream %s from %s...", stream, fname)
-        timings = _get_pgs_timings_via_mkvextract(fname, stream, self.gui_mode)
-        if timings is not None:
-            logger.info("used mkvextract fast path (%d segments)", len(timings))
-        else:
-            logger.info("mkvextract unavailable or failed, trying ffprobe...")
-            timings = _get_pgs_timings_via_ffprobe(
-                fname, stream, self.ffmpeg_path, self.gui_mode
-            )
+        timings = _get_pgs_timings_via_ffprobe(
+            fname, stream, self.ffmpeg_path, self.gui_mode
+        )
         if timings is None:
-            # Fallback: extract raw SUP stream and parse binary
-            logger.info("ffprobe fast path unavailable, extracting raw PGS stream...")
-            ffmpeg_args = [
-                ffmpeg_bin_path(
-                    "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path
-                ),
-                "-loglevel",
-                "fatal",
-                "-nostdin",
-                "-i",
-                fname,
-                "-map",
-                stream,
-                "-c:s",
-                "copy",
-                "-f",
-                "sup",
-                "-",
-            ]
-            process = subprocess.Popen(
-                ffmpeg_args, **subprocess_args(include_stdout=True)
-            )
-            pgs_data, _ = process.communicate()
-            if process.returncode != 0 or not pgs_data:
-                raise ValueError(
-                    "Failed to extract PGS stream {} from {}. "
-                    "Make sure the stream exists and is an hdmv_pgs_subtitle track "
-                    "(check with: ffprobe -show_streams {}).".format(
-                        stream, fname, fname
-                    )
+            raise ValueError(
+                "Failed to get PGS timings via ffprobe for stream {} from {}. "
+                "Make sure the stream exists and is an hdmv_pgs_subtitle track "
+                "(check with: ffprobe -show_streams {}).".format(
+                    stream, fname, fname
                 )
-            timings = _parse_pgs_timings(pgs_data)
+            )
 
         if not timings:
             raise ValueError(

From 385403f6d145430b1b2e29d7c3bac5538e7ef686 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sun, 22 Mar 2026 15:03:17 +0100
Subject: [PATCH 8/9] Switch to ffmpeg python module

---
 ffsubsync/speech_transformers.py | 80 +++++++++++---------------------
 1 file changed, 28 insertions(+), 52 deletions(-)

diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py
index 4b43c21..c6236c5 100644
--- a/ffsubsync/speech_transformers.py
+++ b/ffsubsync/speech_transformers.py
@@ -574,7 +574,7 @@ def _get_pgs_timings_via_ffprobe(
     ffmpeg_path: Optional[str] = None,
     gui_mode: bool = False,
 ) -> Optional[List[Tuple[float, float]]]:
-    """Fast path: read PGS timings from MKV container metadata using ffprobe.
+    """Read PGS timings from container metadata using ffprobe.
 
     MKV stores per-packet PTS and duration for subtitle streams, so we can
     get start/end timestamps without extracting or parsing the raw SUP binary.
@@ -582,7 +582,7 @@ def _get_pgs_timings_via_ffprobe(
     are tiny (~30-byte) packets with ``duration_time=N/A``.
 
     Returns a list of ``(start_seconds, end_seconds)`` tuples, or ``None`` if
-    ffprobe fails or the durations are not usable (fall back to SUP parsing).
+    ffprobe fails or returns no usable durations.
     """
     ffprobe_cmd = ffmpeg_bin_path(
         "ffprobe", gui_mode, ffmpeg_resources_path=ffmpeg_path
@@ -590,56 +590,34 @@ def _get_pgs_timings_via_ffprobe(
     # ffprobe -select_streams does not accept the "0:" input-index prefix;
     # strip it so "0:s:0" → "s:0" and "0:3" → "3".
     probe_stream = stream[2:] if stream.startswith("0:") else stream
-    args = [
-        ffprobe_cmd,
-        "-v",
-        "quiet",
-        "-show_packets",
-        "-select_streams",
-        probe_stream,
-        "-show_entries",
-        "packet=pts_time,duration_time,size",
-        fname,
-    ]
-    process = subprocess.Popen(args, **subprocess_args(include_stdout=True))
-    stdout, _ = process.communicate()
-    if process.returncode != 0 or not stdout:
+    try:
+        probe_data = ffmpeg.probe(
+            fname,
+            cmd=ffprobe_cmd,
+            show_packets=None,
+            select_streams=probe_stream,
+            show_entries="packet=pts_time,duration_time,size",
+        )
+    except Exception:
         return None
 
     results: List[Tuple[float, float]] = []
-    pts_time: Optional[float] = None
-    duration_time: Optional[float] = None
-    size: Optional[int] = None
-
-    for raw_line in stdout.decode("utf-8", errors="replace").splitlines():
-        line = raw_line.strip()
-        if line == "[PACKET]":
-            pts_time = duration_time = size = None
-        elif line == "[/PACKET]":
-            if (
-                pts_time is not None
-                and duration_time is not None
-                and size is not None
-                and size > 50  # skip clear events (~30 bytes)
-            ):
-                results.append((pts_time, pts_time + duration_time))
-        elif line.startswith("pts_time="):
-            try:
-                pts_time = float(line.split("=", 1)[1])
-            except ValueError:
-                pass
-        elif line.startswith("duration_time="):
-            val = line.split("=", 1)[1]
-            if val != "N/A":
-                try:
-                    duration_time = float(val)
-                except ValueError:
-                    pass
-        elif line.startswith("size="):
-            try:
-                size = int(line.split("=", 1)[1])
-            except ValueError:
-                pass
+    for packet in probe_data.get("packets", []):
+        pts_time_str = packet.get("pts_time")
+        duration_time_str = packet.get("duration_time")
+        size_str = packet.get("size")
+        if pts_time_str is None or duration_time_str is None or size_str is None:
+            continue
+        if duration_time_str == "N/A":
+            continue
+        try:
+            pts_time = float(pts_time_str)
+            duration_time = float(duration_time_str)
+            size = int(size_str)
+        except ValueError:
+            continue
+        if size > 50:  # skip clear events (~30 bytes)
+            results.append((pts_time, pts_time + duration_time))
 
     if not results:
         return None
@@ -702,9 +680,7 @@ def fit(self, fname: str, *_) -> "PGSSpeechTransformer":
             raise ValueError(
                 "Failed to get PGS timings via ffprobe for stream {} from {}. "
                 "Make sure the stream exists and is an hdmv_pgs_subtitle track "
-                "(check with: ffprobe -show_streams {}).".format(
-                    stream, fname, fname
-                )
+                "(check with: ffprobe -show_streams {}).".format(stream, fname, fname)
             )
 
         if not timings:

From b4b41d323aa18b352b115bf4d186e8245032628b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciek=20W=C3=B3jcikowski?= <maciek@wojcikowski.pl>
Date: Sun, 22 Mar 2026 19:43:11 +0100
Subject: [PATCH 9/9] Add tests

---
 tests/test_pgs.py | 117 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 tests/test_pgs.py

diff --git a/tests/test_pgs.py b/tests/test_pgs.py
new file mode 100644
index 0000000..ea48f2e
--- /dev/null
+++ b/tests/test_pgs.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+from unittest.mock import patch
+
+import pytest
+
+from ffsubsync.speech_transformers import _get_pgs_timings_via_ffprobe
+
+
+def _make_packet(pts_time, duration_time, size):
+    return {
+        "pts_time": str(pts_time),
+        "duration_time": "N/A" if duration_time is None else str(duration_time),
+        "size": str(size),
+    }
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_basic(mock_probe, mock_bin):
+    mock_probe.return_value = {
+        "packets": [
+            _make_packet(1.0, 2.5, 1000),
+            _make_packet(5.0, 1.0, 800),
+        ]
+    }
+    result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0")
+    assert result == [(1.0, 3.5), (5.0, 6.0)]
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_strips_0_prefix_from_stream(mock_probe, mock_bin):
+    """'0:s:0' should be passed to ffprobe as 's:0'."""
+    mock_probe.return_value = {"packets": [_make_packet(0.0, 1.0, 100)]}
+    _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0")
+    _, kwargs = mock_probe.call_args
+    assert kwargs["select_streams"] == "s:0"
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_stream_without_prefix_unchanged(mock_probe, mock_bin):
+    mock_probe.return_value = {"packets": [_make_packet(0.0, 1.0, 100)]}
+    _get_pgs_timings_via_ffprobe("test.mkv", "s:1")
+    _, kwargs = mock_probe.call_args
+    assert kwargs["select_streams"] == "s:1"
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_skips_clear_events_small_size(mock_probe, mock_bin):
+    """Packets with size <= 50 are clear events and must be skipped."""
+    mock_probe.return_value = {
+        "packets": [
+            _make_packet(1.0, 2.0, 1000),  # show event
+            _make_packet(3.0, 0.001, 30),  # clear event, size <= 50
+        ]
+    }
+    result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0")
+    assert result == [(1.0, 3.0)]
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_skips_na_duration(mock_probe, mock_bin):
+    """Packets with duration_time=N/A must be skipped."""
+    mock_probe.return_value = {
+        "packets": [
+            _make_packet(1.0, None, 1000),  # N/A duration
+            _make_packet(5.0, 2.0, 900),
+        ]
+    }
+    result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0")
+    assert result == [(5.0, 7.0)]
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_returns_none_when_no_usable_packets(mock_probe, mock_bin):
+    """Returns None if all packets are filtered out."""
+    mock_probe.return_value = {
+        "packets": [
+            _make_packet(1.0, None, 1000),  # N/A duration
+            _make_packet(2.0, 1.0, 20),  # too small
+        ]
+    }
+    assert _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") is None
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_returns_none_on_empty_packets(mock_probe, mock_bin):
+    mock_probe.return_value = {"packets": []}
+    assert _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") is None
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_returns_none_when_ffprobe_raises(mock_probe, mock_bin):
+    mock_probe.side_effect = Exception("ffprobe not found")
+    assert _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0") is None
+
+
+@patch("ffsubsync.speech_transformers.ffmpeg_bin_path", return_value="ffprobe")
+@patch("ffsubsync.speech_transformers.ffmpeg.probe")
+def test_skips_packets_with_missing_fields(mock_probe, mock_bin):
+    """Packets missing any required field are silently skipped."""
+    mock_probe.return_value = {
+        "packets": [
+            {"pts_time": "1.0", "duration_time": "2.0"},  # missing size
+            {"pts_time": "3.0", "size": "500"},  # missing duration_time
+            {"duration_time": "1.0", "size": "500"},  # missing pts_time
+            _make_packet(10.0, 1.0, 200),  # valid
+        ]
+    }
+    result = _get_pgs_timings_via_ffprobe("test.mkv", "0:s:0")
+    assert result == [(10.0, 11.0)]