diff --git a/web-ui/src/mpegts/audio/pcm-audio-player.ts b/web-ui/src/mpegts/audio/pcm-audio-player.ts index 97df8c43..ea493977 100644 --- a/web-ui/src/mpegts/audio/pcm-audio-player.ts +++ b/web-ui/src/mpegts/audio/pcm-audio-player.ts @@ -23,7 +23,7 @@ */ import { isIOS } from "../../lib/platform"; -import { maxBufferHoleSec, type PlayerConfig } from "../config"; +import type { PlayerConfig } from "../config"; import Log from "../utils/logger"; import { type Stretcher, WasmStretcher } from "./wasm-stretcher"; @@ -52,8 +52,6 @@ const CHAIN_RESTART_DELAY = 0.04; const HARD_RESYNC_THRESHOLD = 1.5; /** Input gaps/overlaps within this are absorbed silently (PTS jitter). */ const GAP_SNAP = 0.005; -/** Input gaps up to this long are filled with silence; larger ones re-anchor. */ -const MAX_SILENCE_GAP = 2.0; /** Fade-in length applied when the chain (re)starts, to avoid clicks. */ const FADE_SEC = 0.005; /** Proportional gain for drift correction via stretch ratio. */ @@ -72,6 +70,8 @@ const DRIFT_EMA_ALPHA = 0.4; const CONTROL_INTERVAL_MS = 250; /** Upper bound for pending (not yet scheduled) chunks. */ const MAX_PENDING_CHUNKS = 600; +/** Seconds of decoded PCM to keep in the pending scheduling window after a resync. */ +const PENDING_REFILL_WINDOW_SEC = 2.0; /** Control ticks between verbose drift diagnostics (~10s). */ const DRIFT_LOG_TICKS = 40; @@ -133,11 +133,8 @@ export class PCMAudioPlayer { private driftLogCounter = 0; private controlTimer: ReturnType | null = null; + private isBuffering: boolean = false; private isSeeking: boolean = false; - /** Last `timeupdate` playback position, used to measure seek delta (seeking may already show the target time). */ - private lastKnownVideoTime: number = 0; - /** Set when a large seek already cancelled the scheduling chain in `onVideoSeeking`. */ - private largeSeekCancelled: boolean = false; // Bound event handlers for cleanup private boundOnVideoSeeking: (() => void) | null = null; @@ -147,6 +144,10 @@ export class PCMAudioPlayer { private boundOnVolumeChange: (() => void) | null = null; private boundOnTimeUpdate: (() => void) | null = null; private boundOnRateChange: (() => void) | null = null; + private boundOnVideoWaiting: (() => void) | null = null; + private boundOnVideoStalled: (() => void) | null = null; + private boundOnVideoPlaying: (() => void) | null = null; + private boundOnVideoCanPlay: (() => void) | null = null; /** Called when AudioContext is blocked by autoplay policy (needs user interaction). */ onSuspended: (() => void) | null = null; @@ -187,7 +188,7 @@ export class PCMAudioPlayer { this.context.onstatechange = () => { Log.v(TAG, `AudioContext state changed to: ${this.context?.state}`); - if (this.context?.state === "running") { + if (this.context?.state === "running" && this.canScheduleAudio()) { this.resyncFromBuffer(this.videoElement?.currentTime ?? 0); } }; @@ -211,9 +212,6 @@ export class PCMAudioPlayer { this.setMuted(video.muted); }; this.boundOnTimeUpdate = () => { - if (!video.seeking) { - this.lastKnownVideoTime = video.currentTime; - } this.controlTick(); this.pump(); }; @@ -225,6 +223,14 @@ export class PCMAudioPlayer { // no audible interruption. this.controlTick(); }; + this.boundOnVideoWaiting = () => this.enterBuffering("waiting"); + this.boundOnVideoStalled = () => { + if (video.readyState < HTMLMediaElement.HAVE_FUTURE_DATA) { + this.enterBuffering("stalled"); + } + }; + this.boundOnVideoPlaying = () => this.maybeExitBuffering(); + this.boundOnVideoCanPlay = () => this.maybeExitBuffering(); video.addEventListener("seeking", this.boundOnVideoSeeking); video.addEventListener("seeked", this.boundOnVideoSeeked); @@ -233,6 +239,10 @@ export class PCMAudioPlayer { video.addEventListener("volumechange", this.boundOnVolumeChange); video.addEventListener("timeupdate", this.boundOnTimeUpdate); video.addEventListener("ratechange", this.boundOnRateChange); + video.addEventListener("waiting", this.boundOnVideoWaiting); + video.addEventListener("stalled", this.boundOnVideoStalled); + video.addEventListener("playing", this.boundOnVideoPlaying); + video.addEventListener("canplay", this.boundOnVideoCanPlay); this.controlTimer = setInterval(() => { this.controlTick(); @@ -253,6 +263,10 @@ export class PCMAudioPlayer { if (this.boundOnVolumeChange) this.videoElement.removeEventListener("volumechange", this.boundOnVolumeChange); if (this.boundOnTimeUpdate) this.videoElement.removeEventListener("timeupdate", this.boundOnTimeUpdate); if (this.boundOnRateChange) this.videoElement.removeEventListener("ratechange", this.boundOnRateChange); + if (this.boundOnVideoWaiting) this.videoElement.removeEventListener("waiting", this.boundOnVideoWaiting); + if (this.boundOnVideoStalled) this.videoElement.removeEventListener("stalled", this.boundOnVideoStalled); + if (this.boundOnVideoPlaying) this.videoElement.removeEventListener("playing", this.boundOnVideoPlaying); + if (this.boundOnVideoCanPlay) this.videoElement.removeEventListener("canplay", this.boundOnVideoCanPlay); } this.boundOnVideoSeeking = null; this.boundOnVideoSeeked = null; @@ -261,6 +275,10 @@ export class PCMAudioPlayer { this.boundOnVolumeChange = null; this.boundOnTimeUpdate = null; this.boundOnRateChange = null; + this.boundOnVideoWaiting = null; + this.boundOnVideoStalled = null; + this.boundOnVideoPlaying = null; + this.boundOnVideoCanPlay = null; this.videoElement = null; } @@ -281,11 +299,7 @@ export class PCMAudioPlayer { this.insertToBuffer(chunk); this.cleanupBuffer(); - if (!this.isSeeking && !this.videoElement?.paused) { - this.pendingChunks.push(chunk); - if (this.pendingChunks.length > MAX_PENDING_CHUNKS) { - this.pendingChunks.shift(); - } + if (this.canScheduleAudio()) { this.pump(); } } @@ -348,7 +362,7 @@ export class PCMAudioPlayer { */ private pump(): void { const ctx = this.context; - if (!ctx || !this.gainNode || this.isSeeking || this.pendingChunks.length === 0 || this.videoElement?.paused) { + if (!ctx || !this.gainNode || !this.canScheduleAudio()) { return; } @@ -371,7 +385,14 @@ export class PCMAudioPlayer { return; } - while (this.pendingChunks.length > 0) { + while (true) { + if (this.pendingChunks.length === 0) { + this.refillPendingFromBuffer(this.inputCursor ?? this.videoElement?.currentTime ?? 0); + } + if (this.pendingChunks.length === 0) { + break; + } + // Throttle: keep at most SCHEDULE_AHEAD seconds scheduled ahead if (this.nextStartTime - ctx.currentTime >= SCHEDULE_AHEAD) { break; @@ -390,21 +411,10 @@ export class PCMAudioPlayer { const cursor = this.inputCursor as number; const delta = chunk.time - cursor; - if (delta > MAX_SILENCE_GAP) { - // Forward discontinuity: re-anchor at the new position - Log.v(TAG, `Audio stream jump +${delta.toFixed(3)}s, re-anchoring`); - this.cancelChain(true); - this.anchor(chunk.time); - continue; - } - + let chunkEndTime = chunk.endTime; if (delta > GAP_SNAP) { - // Small gap: fill with silence to keep the timeline correct - const gapFrames = Math.round(delta * chunk.sampleRate); - if (gapFrames > 0) { - this.feedStretcher(stretcher, new Float32Array(gapFrames * chunk.channels), chunk.sampleRate); - this.inputCursor = cursor + gapFrames / chunk.sampleRate; - } + Log.w(TAG, `Unexpected PCM pending gap ${delta.toFixed(3)}s; snapping to cursor`); + chunkEndTime = cursor + chunk.duration; } let samples = chunk.samples; @@ -421,7 +431,7 @@ export class PCMAudioPlayer { this.pendingChunks.shift(); this.feedStretcher(stretcher, samples, chunk.sampleRate); - this.inputCursor = chunk.endTime; + this.inputCursor = chunkEndTime; } } @@ -433,6 +443,59 @@ export class PCMAudioPlayer { this.videoElement?.pause(); } + // ==================== Media readiness ==================== + + private hasPlayableVideoData(): boolean { + const video = this.videoElement; + return ( + !!video && + !video.paused && + !video.seeking && + !this.isSeeking && + video.readyState >= HTMLMediaElement.HAVE_FUTURE_DATA + ); + } + + private canScheduleAudio(): boolean { + return !this.isBuffering && this.hasPlayableVideoData(); + } + + private resetDriftState(): void { + this.driftEma = 0; + this.hasDriftEma = false; + } + + private enterBuffering(reason: "waiting" | "stalled"): void { + const video = this.videoElement; + if (!video || video.paused || video.seeking || this.isSeeking) { + return; + } + + if (!this.isBuffering) { + Log.v(TAG, `Video ${reason}; pausing PCM audio scheduling`); + } + this.isBuffering = true; + this.cancelChain(true); + this.pendingChunks = []; + this.inputCursor = null; + this.resetDriftState(); + } + + private maybeExitBuffering(): void { + const video = this.videoElement; + if (!video || !this.hasPlayableVideoData()) { + return; + } + + if (!this.isBuffering) { + return; + } + + this.isBuffering = false; + Log.v(TAG, "Video playback resumed; resyncing PCM audio"); + this.resyncFromBuffer(video.currentTime); + } + private anchor(time: number): void { this.stretcher?.reset(); // Feedforward the current playback rate immediately: waiting for the next @@ -575,7 +638,7 @@ export class PCMAudioPlayer { private controlTick(): void { const ctx = this.context; const video = this.videoElement; - if (!ctx || !video || ctx.state !== "running" || video.paused || this.isSeeking || !this.stretcher) { + if (!ctx || !video || ctx.state !== "running" || !this.canScheduleAudio() || !this.stretcher) { return; } @@ -617,6 +680,7 @@ export class PCMAudioPlayer { const correction = Math.min(correctionMax, Math.max(-correctionMax, correctionDrift * correctionGain)); const ratio = Math.min(2, Math.max(0.5, rate * (1 - correction))); this.stretcher.setRatio(ratio); + this.pump(); if (++this.driftLogCounter >= DRIFT_LOG_TICKS) { this.driftLogCounter = 0; @@ -629,6 +693,53 @@ export class PCMAudioPlayer { // ==================== Buffer Management ==================== + private trimChunkStart(chunk: AudioChunk, targetTime: number): AudioChunk | null { + if (targetTime <= chunk.time + GAP_SNAP) { + return chunk; + } + + const cutFrames = Math.round((targetTime - chunk.time) * chunk.sampleRate); + const totalFrames = Math.floor(chunk.samples.length / chunk.channels); + if (cutFrames >= totalFrames) { + return null; + } + + const time = chunk.time + cutFrames / chunk.sampleRate; + return { + samples: chunk.samples.subarray(cutFrames * chunk.channels), + channels: chunk.channels, + sampleRate: chunk.sampleRate, + time, + duration: chunk.endTime - time, + endTime: chunk.endTime, + }; + } + + private refillPendingFromBuffer(startTime: number): void { + if (this.pendingChunks.length >= MAX_PENDING_CHUNKS) return; + + const startIndex = this.findChunkIndexByTime(startTime); + if (startIndex < 0) return; + + const endTime = startTime + PENDING_REFILL_WINDOW_SEC; + for (let i = startIndex; i < this.audioBuffer.length && this.pendingChunks.length < MAX_PENDING_CHUNKS; i++) { + const source = this.audioBuffer[i]; + if (source.endTime <= startTime + GAP_SNAP) { + continue; + } + if (source.time >= endTime) { + break; + } + + const chunk = this.trimChunkStart(source, startTime); + if (!chunk) { + continue; + } + this.pendingChunks.push(chunk); + startTime = chunk.endTime; + } + } + private insertToBuffer(chunk: AudioChunk): void { let low = 0; let high = this.audioBuffer.length; @@ -641,10 +752,41 @@ export class PCMAudioPlayer { } } - if (low < this.audioBuffer.length && Math.abs(this.audioBuffer[low].time - chunk.time) < 0.001) { - this.audioBuffer[low] = chunk; + let normalized = chunk; + if (low > 0) { + const prev = this.audioBuffer[low - 1]; + if (normalized.time > prev.endTime) { + normalized = { ...normalized, time: prev.endTime, endTime: prev.endTime + normalized.duration }; + } else if (normalized.time < prev.endTime) { + const trimmed = this.trimChunkStart(normalized, prev.endTime); + if (!trimmed) { + return; + } + normalized = trimmed; + } + } + + if (low < this.audioBuffer.length && Math.abs(this.audioBuffer[low].time - normalized.time) < 0.001) { + this.audioBuffer[low] = normalized; + } else if (low < this.audioBuffer.length && normalized.endTime > this.audioBuffer[low].time) { + const next = this.audioBuffer[low]; + const keepFrames = Math.max(0, Math.round((next.time - normalized.time) * normalized.sampleRate)); + if (keepFrames === 0) { + return; + } + const totalFrames = Math.floor(normalized.samples.length / normalized.channels); + const frames = Math.min(keepFrames, totalFrames); + const endTime = normalized.time + frames / normalized.sampleRate; + this.audioBuffer.splice(low, 0, { + samples: normalized.samples.subarray(0, frames * normalized.channels), + channels: normalized.channels, + sampleRate: normalized.sampleRate, + time: normalized.time, + duration: endTime - normalized.time, + endTime, + }); } else { - this.audioBuffer.splice(low, 0, chunk); + this.audioBuffer.splice(low, 0, normalized); } } @@ -702,8 +844,7 @@ export class PCMAudioPlayer { this.cancelChain(true); this.pendingChunks = []; this.inputCursor = null; - this.driftEma = 0; - this.hasDriftEma = false; + this.resetDriftState(); const startIndex = this.findChunkIndexByTime(targetTime); if (startIndex < 0) { @@ -711,67 +852,24 @@ export class PCMAudioPlayer { return; } - for (let i = startIndex; i < this.audioBuffer.length; i++) { - let chunk = this.audioBuffer[i]; - if (i === startIndex && targetTime > chunk.time + GAP_SNAP) { - // Trim the head so the chain starts exactly at the target position - const cutFrames = Math.round((targetTime - chunk.time) * chunk.sampleRate); - const totalFrames = Math.floor(chunk.samples.length / chunk.channels); - if (cutFrames >= totalFrames) { - continue; - } - const time = chunk.time + cutFrames / chunk.sampleRate; - chunk = { - samples: chunk.samples.subarray(cutFrames * chunk.channels), - channels: chunk.channels, - sampleRate: chunk.sampleRate, - time, - duration: chunk.endTime - time, - endTime: chunk.endTime, - }; - } - this.pendingChunks.push(chunk); - } + this.refillPendingFromBuffer(targetTime); Log.v(TAG, `Resync at ${targetTime.toFixed(3)}s, refilled ${this.pendingChunks.length} chunks`); this.pump(); } private onVideoSeeking(): void { - this.largeSeekCancelled = false; - const video = this.videoElement; - if (video) { - const delta = Math.abs(video.currentTime - this.lastKnownVideoTime); - if (delta > maxBufferHoleSec(this.config)) { - this.cancelChain(); - this.pendingChunks = []; - this.largeSeekCancelled = true; - } - } + this.isBuffering = false; + this.cancelChain(); + this.pendingChunks = []; this.isSeeking = true; } private onVideoSeeked(): void { if (!this.videoElement) return; const targetTime = this.videoElement.currentTime; - const prevTime = this.lastKnownVideoTime; - const delta = targetTime - prevTime; - - if (delta > 0 && delta <= maxBufferHoleSec(this.config)) { - Log.v(TAG, `Small forward seek (${(delta * 1000).toFixed(0)}ms), skipping audio resync`); - this.isSeeking = false; - this.lastKnownVideoTime = targetTime; - this.pump(); - return; - } Log.v(TAG, `Video seeked to ${targetTime.toFixed(3)}, resyncing audio`); - if (!this.largeSeekCancelled) { - this.cancelChain(); - this.pendingChunks = []; - } - this.largeSeekCancelled = false; this.isSeeking = false; - this.lastKnownVideoTime = targetTime; this.resyncFromBuffer(targetTime); } @@ -786,8 +884,11 @@ export class PCMAudioPlayer { } catch (_e) { Log.w(TAG, "Failed to resume AudioContext on play()"); } - } else if (this.videoElement) { - this.resyncFromBuffer(this.videoElement.currentTime); + } else { + const video = this.videoElement; + if (video && this.canScheduleAudio()) { + this.resyncFromBuffer(video.currentTime); + } } if (this.audioElement) { @@ -800,6 +901,7 @@ export class PCMAudioPlayer { } pause(): void { + this.isBuffering = false; this.cancelChain(); this.pendingChunks = []; this.inputCursor = null; @@ -819,13 +921,13 @@ export class PCMAudioPlayer { this.pendingChunks = []; this.audioBuffer = []; + this.isBuffering = false; this.isSeeking = false; this.inputCursor = null; this.stretcher?.reset(); this.stretcherFailed = false; this.softSyncUntil = 0; - this.driftEma = 0; - this.hasDriftEma = false; + this.resetDriftState(); } setVolume(volume: number): void { diff --git a/web-ui/src/mpegts/config.ts b/web-ui/src/mpegts/config.ts index de600e74..f95ecaea 100644 --- a/web-ui/src/mpegts/config.ts +++ b/web-ui/src/mpegts/config.ts @@ -9,14 +9,6 @@ export interface PlayerConfig { /** PlaybackRate (clamped to [1, 2]) used for latency chasing. Requires `liveSync: true`. @default 1.2 */ liveSyncPlaybackRate: number; - /** - * Maximum media timestamp hole (milliseconds) treated as continuous at remux time; - * gaps at or below this size are bridged onto the output timeline. Also used by the - * PCM audio player to skip resync on small forward seeks. - * @default 300 - */ - maxBufferHoleMs: number; - /** URLs to WASM decoder files, keyed by codec. Omit to disable software decoding for that codec. * e.g. `{ mp2: "/assets/mp2_decoder.wasm" }` */ wasmDecoders: { mp2?: string }; @@ -38,8 +30,6 @@ export const defaultConfig: PlayerConfig = { liveSyncTargetLatency: 1.5, liveSyncPlaybackRate: 1.2, - maxBufferHoleMs: 300, - wasmDecoders: {}, bufferCleanupMaxBackward: 180, @@ -52,8 +42,3 @@ export const defaultConfig: PlayerConfig = { export function createDefaultConfig(): PlayerConfig { return { ...defaultConfig }; } - -/** `maxBufferHoleMs` as seconds for MSE / Web Audio timeline comparisons. */ -export function maxBufferHoleSec(config: Pick): number { - return config.maxBufferHoleMs / 1000; -} diff --git a/web-ui/src/mpegts/demux/ts-demuxer.ts b/web-ui/src/mpegts/demux/ts-demuxer.ts index 84e0760a..016619ca 100644 --- a/web-ui/src/mpegts/demux/ts-demuxer.ts +++ b/web-ui/src/mpegts/demux/ts-demuxer.ts @@ -53,6 +53,10 @@ type AdaptationFieldInfo = { random_access_indicator?: number; elementary_stream_priority_indicator?: number; }; +type CommonPidKey = keyof PMT["common_pids"]; +type TSDemuxerOptions = { + waitForInitialVideoKeyframe?: boolean; +}; type AACAudioMetadata = { codec: "aac"; audio_object_type: MPEG4AudioObjectTypes; @@ -100,9 +104,13 @@ type AudioData = data: MP3Data; }; +const VIDEO_PID_KEYS: readonly CommonPidKey[] = ["h264", "h265"]; +const AUDIO_PID_KEYS: readonly CommonPidKey[] = ["adts_aac", "loas_aac", "ac3", "eac3", "mp3"]; + export type OnErrorCallback = (type: string, info: string) => void; export type OnTrackMetadataCallback = (type: string, metadata: unknown) => void; -export type OnDataAvailableCallback = (audioTrack: unknown, videoTrack: unknown) => void; +export type OnDataAvailableCallback = (audioTrack: unknown, videoTrack: unknown, force?: boolean) => void; +export type OnTrackDiscontinuityCallback = (track: "audio" | "video") => void; class TSDemuxer { private readonly TAG: string = "TSDemuxer"; @@ -110,6 +118,7 @@ class TSDemuxer { public onError: OnErrorCallback | null = null; public onTrackMetadata: OnTrackMetadataCallback | null = null; public onDataAvailable: OnDataAvailableCallback | null = null; + public onTrackDiscontinuity: OnTrackDiscontinuityCallback | null = null; /** Software audio decode support (MP2) */ public onRawAudioData: ((frame: { codec: "mp2"; data: Uint8Array; pts: number }) => void) | null = null; @@ -127,6 +136,7 @@ class TSDemuxer { private pes_slice_queues_: PIDToSliceQueues = {}; private section_slice_queues_: PIDToSliceQueues = {}; + private continuity_counters_: Record = {}; private video_metadata_: { vps: H265NaluHVC1 | undefined; @@ -159,9 +169,13 @@ class TSDemuxer { private video_init_segment_dispatched_ = false; private audio_init_segment_dispatched_ = false; private video_metadata_changed_ = false; + private video_output_started_ = false; + private video_discontinuity_pending_ = false; private loas_previous_frame: LOASAACFrame | null = null; private soft_decode_audio_codec_: "mp2" | null = null; + private audio_drop_until_sync_ = false; + private drop_video_until_keyframe_ = true; private video_track_ = { type: "video", @@ -182,9 +196,13 @@ class TSDemuxer { this.timestamp_offset_ = value; } - public constructor(probe_data: TSProbeResult) { + public constructor(probe_data: TSProbeResult, options: TSDemuxerOptions = {}) { this.ts_packet_size_ = probe_data.ts_packet_size as number; this.sync_offset_ = probe_data.sync_offset as number; + if (options.waitForInitialVideoKeyframe === false) { + this.drop_video_until_keyframe_ = false; + this.video_output_started_ = true; + } } public destroy() { @@ -201,10 +219,22 @@ class TSDemuxer { this.onError = null; this.onTrackMetadata = null; this.onDataAvailable = null; + this.onTrackDiscontinuity = null; this.onRawAudioData = null; this.soft_decode_audio_codec_ = null; } + public resetSegmentBoundary(probe_data?: TSProbeResult): void { + if (probe_data) { + this.ts_packet_size_ = probe_data.ts_packet_size as number; + this.sync_offset_ = probe_data.sync_offset as number; + } + this.first_parse_ = true; + this.pes_slice_queues_ = {}; + this.section_slice_queues_ = {}; + this.continuity_counters_ = {}; + } + public static probe(data: Uint8Array): TSProbeResult { let sync_offset = -1; let ts_packet_size = 188; @@ -261,6 +291,128 @@ class TSDemuxer { }; } + private isCommonPid(pid: number, keys: readonly CommonPidKey[]): boolean { + const commonPids = this.pmt_?.common_pids; + return !!commonPids && keys.some((key) => commonPids[key] === pid); + } + + private isVideoPid(pid: number): boolean { + return this.isCommonPid(pid, VIDEO_PID_KEYS); + } + + private isAudioPid(pid: number): boolean { + return this.isCommonPid(pid, AUDIO_PID_KEYS); + } + + private isMediaPid(pid: number): boolean { + return this.isVideoPid(pid) || this.isAudioPid(pid); + } + + private resetAudioParserState(): void { + this.audio_last_sample_pts_ = undefined; + this.aac_last_incomplete_data_ = null; + this.loas_previous_frame = null; + this.audio_drop_until_sync_ = true; + } + + private clearAudioTrack(): void { + this.audio_track_.samples = []; + this.audio_track_.length = 0; + } + + private clearVideoTrack(): void { + this.video_track_.samples = []; + this.video_track_.length = 0; + } + + private clearAudioPESQueues(): void { + const commonPids = this.pmt_?.common_pids; + if (!commonPids) { + return; + } + + for (const key of AUDIO_PID_KEYS) { + const pid = commonPids[key]; + if (pid !== undefined) { + delete this.pes_slice_queues_[pid]; + } + } + } + + private shouldWaitForVideoKeyframe(): boolean { + return this.has_video_ && !this.video_output_started_; + } + + private flushMediaBeforeTrackDiscontinuity(): void { + if (this.shouldWaitForVideoKeyframe() || !this.isInitSegmentDispatched()) { + return; + } + if (this.audio_track_.length || this.video_track_.length) { + this.onDataAvailable?.(this.audio_track_, this.video_track_, true); + } + } + + private resumeVideoOutputFromKeyframe(): void { + const reason = this.video_discontinuity_pending_ ? "after TS discontinuity; resuming" : "at stream start; starting"; + this.drop_video_until_keyframe_ = false; + this.video_output_started_ = true; + this.video_discontinuity_pending_ = false; + this.clearAudioTrack(); + this.resetAudioParserState(); + Log.v(this.TAG, `Video keyframe found ${reason} video output timeline`); + } + + private handleTrackDiscontinuity(pid: number, reason: string): void { + delete this.pes_slice_queues_[pid]; + + if (this.isVideoPid(pid)) { + this.flushMediaBeforeTrackDiscontinuity(); + this.clearVideoTrack(); + this.drop_video_until_keyframe_ = true; + this.video_output_started_ = false; + this.video_discontinuity_pending_ = true; + this.clearAudioTrack(); + this.clearAudioPESQueues(); + this.resetAudioParserState(); + Log.w(this.TAG, `Video TS discontinuity on pid ${pid}: ${reason}; dropping until keyframe`); + this.onTrackDiscontinuity?.("video"); + return; + } + + if (this.isAudioPid(pid)) { + this.resetAudioParserState(); + Log.w(this.TAG, `Audio TS discontinuity on pid ${pid}: ${reason}; resetting audio parser state`); + this.onTrackDiscontinuity?.("audio"); + } + } + + private shouldProcessPayload(pid: number, continuityCounter: number, discontinuityIndicator?: number): boolean { + if (discontinuityIndicator === 1) { + this.continuity_counters_[pid] = continuityCounter; + this.handleTrackDiscontinuity(pid, "discontinuity indicator"); + return true; + } + + const lastCounter = this.continuity_counters_[pid]; + if (lastCounter === undefined) { + this.continuity_counters_[pid] = continuityCounter; + return true; + } + + if (continuityCounter === lastCounter) { + Log.w(this.TAG, `Duplicate TS packet on pid ${pid} with continuity counter ${continuityCounter}; skipping`); + return false; + } + + const expected = (lastCounter + 1) & 0x0f; + if (continuityCounter !== expected) { + this.handleTrackDiscontinuity(pid, `expected continuity counter ${expected}, got ${continuityCounter}`); + } + + this.continuity_counters_[pid] = continuityCounter; + return true; + } + public parseChunks(chunk: Uint8Array, byte_start: number): number { if (!this.onError || !this.onTrackMetadata || !this.onDataAvailable) { throw new IllegalStateException("onError & onTrackMetadata & onDataAvailable callback must be specified"); @@ -347,15 +499,14 @@ class TSDemuxer { const stream_type = this.pmt_.pid_stream_type[pid]; // process PES only for known common_pids - if ( - pid === this.pmt_.common_pids.h264 || - pid === this.pmt_.common_pids.h265 || - pid === this.pmt_.common_pids.adts_aac || - pid === this.pmt_.common_pids.loas_aac || - pid === this.pmt_.common_pids.ac3 || - pid === this.pmt_.common_pids.eac3 || - pid === this.pmt_.common_pids.mp3 - ) { + if (this.isMediaPid(pid)) { + if (!this.shouldProcessPayload(pid, continuity_conunter, adaptation_field_info.discontinuity_indicator)) { + offset += 188; + if (this.ts_packet_size_ === 204) { + offset += 16; + } + continue; + } this.handlePESSlice(chunk, offset + ts_payload_start_index, ts_payload_length, { pid, stream_type, @@ -630,7 +781,7 @@ class TSDemuxer { this.parseH264Payload(payload, pts, dts, pes_data.file_position, pes_data.random_access_indicator); break; case StreamType.kH265: - this.parseH265Payload(payload, pts, dts, pes_data.file_position); + this.parseH265Payload(payload, pts, dts, pes_data.file_position, pes_data.random_access_indicator); break; default: break; @@ -877,6 +1028,13 @@ class TSDemuxer { const pts_ms = Math.floor(pts / this.timescale_); const dts_ms = Math.floor(dts / this.timescale_); + if (this.drop_video_until_keyframe_ || !this.video_output_started_) { + if (!keyframe || units.length === 0) { + return; + } + this.resumeVideoOutputFromKeyframe(); + } + if (units.length) { const track = this.video_track_; const avc_sample = { @@ -893,7 +1051,13 @@ class TSDemuxer { } } - private parseH265Payload(data: Uint8Array, pts: number | undefined, dts: number | undefined, file_position: number) { + private parseH265Payload( + data: Uint8Array, + pts: number | undefined, + dts: number | undefined, + file_position: number, + random_access_indicator?: number, + ) { const annexb_parser = new H265AnnexBParser(data); let nalu_payload: H265NaluPayload | null = null; const units: { type: H265NaluType; data: Uint8Array }[] = []; @@ -955,6 +1119,8 @@ class TSDemuxer { nalu_hvc1.type === H265NaluType.kSliceCRA_NUT ) { keyframe = true; + } else if (random_access_indicator === 1) { + keyframe = true; } // Push samples to remuxer only if initialization metadata has been dispatched @@ -972,6 +1138,13 @@ class TSDemuxer { const pts_ms = Math.floor(pts / this.timescale_); const dts_ms = Math.floor(dts / this.timescale_); + if (this.drop_video_until_keyframe_ || !this.video_output_started_) { + if (!keyframe || units.length === 0) { + return; + } + this.resumeVideoOutputFromKeyframe(); + } + if (units.length) { const track = this.video_track_; const hvc_sample = { @@ -1107,6 +1280,9 @@ class TSDemuxer { } private dispatchVideoMediaSegment() { + if (this.shouldWaitForVideoKeyframe()) { + return; + } if (this.isInitSegmentDispatched()) { if (this.video_track_.length) { this.onDataAvailable?.(null, this.video_track_); @@ -1115,6 +1291,9 @@ class TSDemuxer { } private dispatchAudioMediaSegment() { + if (this.shouldWaitForVideoKeyframe()) { + return; + } if (this.isInitSegmentDispatched()) { if (this.audio_track_.length) { this.onDataAvailable?.(this.audio_track_, null); @@ -1123,6 +1302,9 @@ class TSDemuxer { } private dispatchAudioVideoMediaSegment() { + if (this.shouldWaitForVideoKeyframe()) { + return; + } if (this.isInitSegmentDispatched()) { if (this.audio_track_.length || this.video_track_.length) { this.onDataAvailable?.(this.audio_track_, this.video_track_); @@ -1136,6 +1318,9 @@ class TSDemuxer { // Wait for first IDR frame and video init segment being dispatched return; } + if (this.shouldWaitForVideoKeyframe()) { + return; + } if (this.aac_last_incomplete_data_) { const buf = new Uint8Array(data.byteLength + this.aac_last_incomplete_data_.byteLength); @@ -1176,6 +1361,9 @@ class TSDemuxer { let last_sample_pts_ms: number | undefined; aac_frame = adts_parser.readNextAACFrame(); + if (aac_frame != null) { + this.audio_drop_until_sync_ = false; + } while (aac_frame != null) { ref_sample_duration = (1024 / aac_frame.sampling_frequency) * 1000; const audio_sample = { @@ -1231,6 +1419,9 @@ class TSDemuxer { // Wait for first IDR frame and video init segment being dispatched return; } + if (this.shouldWaitForVideoKeyframe()) { + return; + } if (this.aac_last_incomplete_data_) { const buf = new Uint8Array(data.byteLength + this.aac_last_incomplete_data_.byteLength); @@ -1271,6 +1462,9 @@ class TSDemuxer { let last_sample_pts_ms: number | undefined; aac_frame = loas_parser.readNextAACFrame(this.loas_previous_frame ?? undefined); + if (aac_frame != null) { + this.audio_drop_until_sync_ = false; + } while (aac_frame != null) { this.loas_previous_frame = aac_frame; ref_sample_duration = (1024 / aac_frame.sampling_frequency) * 1000; @@ -1327,6 +1521,9 @@ class TSDemuxer { // Wait for first IDR frame and video init segment being dispatched return; } + if (this.shouldWaitForVideoKeyframe()) { + return; + } let ref_sample_duration: number; let base_pts_ms!: number; @@ -1351,6 +1548,9 @@ class TSDemuxer { let last_sample_pts_ms: number | undefined; ac3_frame = adts_parser.readNextAC3Frame(); + if (ac3_frame != null) { + this.audio_drop_until_sync_ = false; + } while (ac3_frame != null) { ref_sample_duration = (1536 / ac3_frame.sampling_frequency) * 1000; const audio_sample = { @@ -1403,6 +1603,9 @@ class TSDemuxer { // Wait for first IDR frame and video init segment being dispatched return; } + if (this.shouldWaitForVideoKeyframe()) { + return; + } let ref_sample_duration: number; let base_pts_ms!: number; @@ -1427,6 +1630,9 @@ class TSDemuxer { let last_sample_pts_ms: number | undefined; eac3_frame = adts_parser.readNextEAC3Frame(); + if (eac3_frame != null) { + this.audio_drop_until_sync_ = false; + } while (eac3_frame != null) { ref_sample_duration = (1536 / eac3_frame.sampling_frequency) * 1000; const audio_sample = { @@ -1479,6 +1685,9 @@ class TSDemuxer { // Wait for first IDR frame and video init segment being dispatched return; } + if (this.shouldWaitForVideoKeyframe()) { + return; + } const _mpegAudioV10SampleRateTable = [44100, 48000, 32000, 0]; const _mpegAudioV20SampleRateTable = [22050, 24000, 16000, 0]; @@ -1523,6 +1732,12 @@ class TSDemuxer { // A payload may start mid-frame (frame straddling a PES boundary); header // fields parsed from such payloads are garbage and must not drive metadata. const sync_at_start = data.length >= 4 && data[0] === 0xff && (data[1] & 0xe0) === 0xe0; + if (this.audio_drop_until_sync_) { + if (!sync_at_start) { + return; + } + this.audio_drop_until_sync_ = false; + } if (this.onRawAudioData && !soft_decode_active && !sync_at_start) { // Can't classify a payload that starts mid-frame; wait for an aligned one return; diff --git a/web-ui/src/mpegts/player/live-sync.ts b/web-ui/src/mpegts/player/live-sync.ts index ccf6b73d..205e1d3b 100644 --- a/web-ui/src/mpegts/player/live-sync.ts +++ b/web-ui/src/mpegts/player/live-sync.ts @@ -3,7 +3,6 @@ import Log from "../utils/logger"; import { type LiveSessionAnchor, lagBehindLiveEdge } from "./wall-clock"; const TAG = "LiveSync"; -const STALL_TAG = "StallJumper"; /** Each live-edge underrun raises the latency floor by this much (seconds). */ const UNDERRUN_BACKOFF_STEP = 1; @@ -104,48 +103,3 @@ export function setupLiveSync( video.playbackRate = 1; }; } - -/** - * Detect and fix stuck playback at startup. - * If the video is stalled or hasn't received canplay and the currentTime is before - * the first buffered range, seek to the start of the buffered range. - */ -export interface StallJumper { - check(): void; - destroy(): void; -} - -export function setupStartupStallJumper(video: HTMLMediaElement): StallJumper { - let canplayReceived = false; - - function onCanPlay(): void { - canplayReceived = true; - video.removeEventListener("canplay", onCanPlay); - } - - function detectAndFix(isStalled?: boolean): void { - const buffered = video.buffered; - if (isStalled || !canplayReceived || video.readyState < 2) { - if (buffered.length > 0 && video.currentTime < buffered.start(0)) { - const target = buffered.start(0); - Log.w(STALL_TAG, `Playback stuck at ${video.currentTime}, seeking to ${target}`); - video.currentTime = target; - } - } - } - - function onStalled(): void { - detectAndFix(true); - } - - video.addEventListener("canplay", onCanPlay); - video.addEventListener("stalled", onStalled); - - return { - check: () => detectAndFix(), - destroy: () => { - video.removeEventListener("canplay", onCanPlay); - video.removeEventListener("stalled", onStalled); - }, - }; -} diff --git a/web-ui/src/mpegts/player/mpegts-player.ts b/web-ui/src/mpegts/player/mpegts-player.ts index fbfdb550..3e0634b8 100644 --- a/web-ui/src/mpegts/player/mpegts-player.ts +++ b/web-ui/src/mpegts/player/mpegts-player.ts @@ -1,38 +1,12 @@ import { markPlaybackUnlocked, PCMAudioPlayer } from "../audio/pcm-audio-player"; import type { PlayerConfig } from "../config"; import type { PlayerImpl, PlayerSegment } from "../types"; -import Log from "../utils/logger"; import type { WorkerCommand, WorkerEvent } from "../worker/messages"; import TransmuxWorker from "../worker/transmux-worker.ts?worker&inline"; -import { type StallJumper, setupLiveSync, setupStartupStallJumper } from "./live-sync"; +import { setupLiveSync } from "./live-sync"; import { createMSE, type MSE } from "./mse"; import type { LiveSessionAnchor } from "./wall-clock"; -const TAG = "Player"; - -/** Attach verbose listeners to media element events for diagnosing playback stalls. */ -function setupVideoDebugLogs(video: HTMLVideoElement): () => void { - const events = ["loadedmetadata", "canplay", "playing", "waiting", "stalled", "pause", "seeking", "seeked", "error"]; - const handler = (e: Event) => { - const buffered: string[] = []; - for (let i = 0; i < video.buffered.length; i++) { - buffered.push(`${video.buffered.start(i).toFixed(2)}-${video.buffered.end(i).toFixed(2)}`); - } - Log.v( - TAG, - `video event '${e.type}': currentTime=${video.currentTime.toFixed(2)}, readyState=${video.readyState}, paused=${video.paused}, buffered=[${buffered.join(",")}]${e.type === "error" ? `, error=${video.error?.code}:${video.error?.message}` : ""}`, - ); - }; - for (const ev of events) { - video.addEventListener(ev, handler); - } - return () => { - for (const ev of events) { - video.removeEventListener(ev, handler); - } - }; -} - /** Check if a given time position is within any buffered range of the video element. */ export function isBuffered(video: HTMLMediaElement, seconds: number): boolean { const buffered = video.buffered; @@ -58,7 +32,6 @@ export function createMpegtsPlayer( let workerInitialized = false; let pendingSegments: PlayerSegment[] | null = null; let destroyLiveSync: (() => void) | null = null; - let stallJumper: StallJumper | null = null; let mseGeneration = 0; let liveSyncEnabled = config.liveSync; /** Live edge assuming continuous playback since session start. */ @@ -267,10 +240,6 @@ export function createMpegtsPlayer( // request, which restarts a live stream mid-flow and corrupts the timeline. // The MSE layer already defers appends while ManagedMediaSource streaming=false. - // Buffered ranges change exactly on SourceBuffer updateend; re-check for startup - // stalls there (iOS does not reliably fire progress/stalled on the media element). - mse.onBufferedChange = () => stallJumper?.check(); - mse.onSourceClose = () => { // The UA killed the media pipeline (e.g. iOS reclaiming resources in // background). Stop fetching — this session cannot be revived. @@ -297,16 +266,10 @@ export function createMpegtsPlayer( }; } - let destroyVideoDebugLogs: (() => void) | null = null; - function initLiveHelpers(): void { if (!destroyLiveSync && liveSyncEnabled) { destroyLiveSync = setupLiveSync(video, config, () => liveSessionAnchor); } - stallJumper?.destroy(); - stallJumper = setupStartupStallJumper(video); - destroyVideoDebugLogs?.(); - destroyVideoDebugLogs = setupVideoDebugLogs(video); } const onVideoPlay = () => markPlaybackUnlocked(); @@ -369,10 +332,6 @@ export function createMpegtsPlayer( destroyPCMPlayer(); destroyLiveSync?.(); destroyLiveSync = null; - stallJumper?.destroy(); - stallJumper = null; - destroyVideoDebugLogs?.(); - destroyVideoDebugLogs = null; }, destroy() { diff --git a/web-ui/src/mpegts/player/mse.ts b/web-ui/src/mpegts/player/mse.ts index 51574b10..12763b60 100644 --- a/web-ui/src/mpegts/player/mse.ts +++ b/web-ui/src/mpegts/player/mse.ts @@ -42,8 +42,6 @@ export interface MSE { onBufferFull: (() => void) | null; /** Fired when buffer space becomes available again after a previous onBufferFull. */ onBufferAvailable: (() => void) | null; - /** Fired after each SourceBuffer update completes (buffered ranges may have changed). */ - onBufferedChange: (() => void) | null; /** ManagedMediaSource: UA wants more media data appended (streaming → true). */ onStartStreaming: (() => void) | null; /** ManagedMediaSource: UA has enough buffered data (streaming → false). */ @@ -291,7 +289,6 @@ export function createMSE(video: HTMLVideoElement, config: PlayerConfig): MSE { } function onSourceBufferUpdateEnd(): void { - mse.onBufferedChange?.(); tryApplyDuration(); if (hasPendingRemoveRanges()) { doRemoveRanges(); @@ -368,7 +365,6 @@ export function createMSE(video: HTMLVideoElement, config: PlayerConfig): MSE { const mse: MSE = { onBufferFull: null, onBufferAvailable: null, - onBufferedChange: null, onStartStreaming: null, onEndStreaming: null, onSourceClose: null, @@ -587,7 +583,6 @@ export function createMSE(video: HTMLVideoElement, config: PlayerConfig): MSE { mse.onBufferFull = null; mse.onBufferAvailable = null; - mse.onBufferedChange = null; mse.onStartStreaming = null; mse.onEndStreaming = null; mse.onSourceClose = null; diff --git a/web-ui/src/mpegts/remux/mp4-generator.ts b/web-ui/src/mpegts/remux/mp4-generator.ts index 627dd091..f5d6ba17 100644 --- a/web-ui/src/mpegts/remux/mp4-generator.ts +++ b/web-ui/src/mpegts/remux/mp4-generator.ts @@ -1176,10 +1176,10 @@ class MP4 { data.set( [ - 0x00, + 0x01, // version 0x00, 0x0f, - 0x01, // version(0) & flags + 0x01, // flags: data-offset + sample duration/size/flags/cts (sampleCount >>> 24) & 0xff, // sample_count (sampleCount >>> 16) & 0xff, (sampleCount >>> 8) & 0xff, @@ -1211,7 +1211,7 @@ class MP4 { (flags.isDependedOn << 6) | (flags.hasRedundancy << 4) | (flags.isNonSync || 0), 0x00, 0x00, // sample_degradation_priority - (cts >>> 24) & 0xff, // sample_composition_time_offset + (cts >>> 24) & 0xff, // sample_composition_time_offset (signed in trun version 1) (cts >>> 16) & 0xff, (cts >>> 8) & 0xff, cts & 0xff, diff --git a/web-ui/src/mpegts/remux/mp4-remuxer.ts b/web-ui/src/mpegts/remux/mp4-remuxer.ts index 82abd165..5c3395d4 100644 --- a/web-ui/src/mpegts/remux/mp4-remuxer.ts +++ b/web-ui/src/mpegts/remux/mp4-remuxer.ts @@ -83,6 +83,13 @@ interface MP4Sample { flags: MP4SampleFlags; } +interface TrackTimingState { + lastOriginalEndDts: number | undefined; + lastOutputEndDts: number | undefined; + lastOutputDuration: number | undefined; + durationResidual: number; +} + type InitSegmentCallback = (type: string, segment: InitSegment) => void; type MediaSegmentCallback = (type: string, segment: MediaSegment) => void; @@ -111,6 +118,12 @@ class MP4Remuxer { private _videoNextDts: number | undefined; private _audioStashedLastSample: AudioSample | null; private _videoStashedLastSample: VideoSample | null; + private _audioTiming: TrackTimingState; + private _videoTiming: TrackTimingState; + private _pcmTiming: TrackTimingState; + private _videoCtsOffset: number | undefined; + private _videoInitialCtsOffset: number | undefined; + private _videoInitialOutputTime: number | undefined; private _audioMeta: TrackMetadata | null; private _videoMeta: TrackMetadata | null; @@ -135,6 +148,12 @@ class MP4Remuxer { this._videoNextDts = undefined; this._audioStashedLastSample = null; this._videoStashedLastSample = null; + this._audioTiming = this._createTrackTimingState(); + this._videoTiming = this._createTrackTimingState(); + this._pcmTiming = this._createTrackTimingState(); + this._videoCtsOffset = undefined; + this._videoInitialCtsOffset = undefined; + this._videoInitialOutputTime = undefined; this._audioMeta = null; this._videoMeta = null; @@ -154,6 +173,12 @@ class MP4Remuxer { this._dtsBaseInited = false; this._silentAudioMode = false; this._silentAudioLastDts = undefined; + this._audioTiming = this._createTrackTimingState(); + this._videoTiming = this._createTrackTimingState(); + this._pcmTiming = this._createTrackTimingState(); + this._videoCtsOffset = undefined; + this._videoInitialCtsOffset = undefined; + this._videoInitialOutputTime = undefined; this._audioMeta = null; this._videoMeta = null; this._onInitSegment = null; @@ -196,13 +221,59 @@ class MP4Remuxer { this._silentAudioLastDts = undefined; } + private _createTrackTimingState(): TrackTimingState { + return { + lastOriginalEndDts: undefined, + lastOutputEndDts: undefined, + lastOutputDuration: undefined, + durationResidual: 0, + }; + } + /** * Map upstream timestamps onto a continuous output timeline. * When `_nextDts` is set (continuous playback), always splice to it. - * After a discontinuity, keep upstream timing relative to the current remux base. + * After a discontinuity, preserve the last timeline correction and bridge + * forward holes so MSE stays on a continuous output timeline. */ - private _computeDtsCorrection(firstSampleOriginalDts: number, nextDts: number | undefined): number { - return nextDts !== undefined ? firstSampleOriginalDts - nextDts : 0; + private _computeDtsCorrection( + type: "audio" | "video", + firstSampleOriginalDts: number, + nextDts: number | undefined, + timing: TrackTimingState, + ): number { + if (nextDts !== undefined) { + return firstSampleOriginalDts - nextDts; + } + + if (timing.lastOriginalEndDts === undefined || timing.lastOutputEndDts === undefined) { + return firstSampleOriginalDts - this._dtsBaseOffset; + } + + const distance = firstSampleOriginalDts - timing.lastOriginalEndDts; + const bridgedDistance = distance > 0 ? 0 : distance; + if (bridgedDistance !== distance) { + Log.v(this.TAG, `${type}: bridging ${Math.round(distance)}ms timestamp hole after discontinuity`); + } + const expectedDts = timing.lastOutputEndDts + bridgedDistance; + return firstSampleOriginalDts - expectedDts; + } + + private _recordTrackTiming(timing: TrackTimingState, sample: MP4Sample): void { + timing.lastOriginalEndDts = sample.originalDts + sample.duration; + timing.lastOutputEndDts = sample.dts + sample.duration; + } + + private _nextSampleDuration(timing: TrackTimingState, refSampleDuration: unknown, fallbackDuration: number): number { + const reference = + typeof refSampleDuration === "number" && Number.isFinite(refSampleDuration) && refSampleDuration > 0 + ? refSampleDuration + : (timing.lastOutputDuration ?? fallbackDuration); + const withResidual = reference + timing.durationResidual; + const duration = Math.max(1, Math.round(withResidual)); + timing.durationResidual = withResidual - duration; + timing.lastOutputDuration = duration; + return duration; } /** @@ -213,7 +284,7 @@ class MP4Remuxer { this._dtsBaseOffset = offsetMs; } - remux(audioTrack: DemuxTrack | undefined, videoTrack: DemuxTrack | undefined): void { + remux(audioTrack: DemuxTrack | null | undefined, videoTrack: DemuxTrack | null | undefined, force = false): void { if (!this._onMediaSegment) { throw new IllegalStateException("MP4Remuxer: onMediaSegment callback must be specificed!"); } @@ -221,14 +292,10 @@ class MP4Remuxer { this._calculateDtsBase(audioTrack, videoTrack); } if (videoTrack) { - this._remuxVideo(videoTrack); + this._remuxVideo(videoTrack, force); } if (audioTrack) { - this._remuxAudio(audioTrack); - } - // In silent audio mode, generate silent frames synced to video - if (this._silentAudioMode && videoTrack?.samples?.length) { - this._generateSilentAudio(videoTrack); + this._remuxAudio(audioTrack, force); } } @@ -237,7 +304,7 @@ class MP4Remuxer { * Used in soft decode mode to keep MSE audio track active (prevents * Safari/Chrome from pausing video when tab goes to background). */ - private _generateSilentAudio(videoTrack: DemuxTrack): void { + private _generateSilentAudio(videoSamples: MP4Sample[]): void { if (!this._audioMeta || !this._onMediaSegment) { return; } @@ -251,11 +318,14 @@ class MP4Remuxer { return; } - const videoSamples = videoTrack.samples as VideoSample[]; - const videoEndDts = videoSamples[videoSamples.length - 1].dts - this._dtsBase; + if (videoSamples.length === 0) { + return; + } + + const videoEndDts = videoSamples[videoSamples.length - 1].dts + videoSamples[videoSamples.length - 1].duration; if (this._silentAudioLastDts === undefined) { - this._silentAudioLastDts = videoSamples[0].dts - this._dtsBase; + this._silentAudioLastDts = videoSamples[0].dts; } const samples: Array<{ unit: Uint8Array; dts: number; pts: number }> = []; @@ -373,7 +443,10 @@ class MP4Remuxer { }); } - private _calculateDtsBase(audioTrack: DemuxTrack | undefined, videoTrack: DemuxTrack | undefined): void { + private _calculateDtsBase( + audioTrack: DemuxTrack | null | undefined, + videoTrack: DemuxTrack | null | undefined, + ): void { if (this._dtsBaseInited) { return; } @@ -385,11 +458,13 @@ class MP4Remuxer { this._videoDtsBase = (videoTrack.samples[0] as VideoSample).dts; } - // In silent audio mode, use video DTS as base (no real audio samples) - if (this._silentAudioMode) { + // With video present, the output timeline starts at the first emitted + // keyframe. Audio before that point is discarded/compressed onto this same + // video anchor so MSE never starts at an audio-only offset. + if (this._videoDtsBase !== Infinity) { this._dtsBase = this._videoDtsBase; } else { - this._dtsBase = Math.min(this._audioDtsBase, this._videoDtsBase); + this._dtsBase = this._audioDtsBase; } this._dtsBase -= this._dtsBaseOffset; this._dtsBaseInited = true; @@ -402,6 +477,39 @@ class MP4Remuxer { return this._dtsBase; } + getInitialPresentationOffset(): number { + return this._videoInitialCtsOffset ?? 0; + } + + getInitialOutputTime(): number { + return this._videoInitialOutputTime ?? 0; + } + + mapPcmTimestamp(ptsMs: number, durationMs: number): number | undefined { + if (!this._dtsBaseInited) { + return undefined; + } + + const originalTime = ptsMs - this._dtsBase; + const duration = Math.max(0, durationMs); + let outputTime: number; + + if (this._pcmTiming.lastOriginalEndDts === undefined || this._pcmTiming.lastOutputEndDts === undefined) { + outputTime = Math.max(this.getInitialOutputTime(), originalTime - this.getInitialPresentationOffset()); + } else { + const distance = originalTime - this._pcmTiming.lastOriginalEndDts; + outputTime = this._pcmTiming.lastOutputEndDts + (distance > 0 ? 0 : distance); + if (distance > 0) { + Log.v(this.TAG, `PCM: bridging ${Math.round(distance)}ms timestamp hole`); + } + } + + this._pcmTiming.lastOriginalEndDts = originalTime + duration; + this._pcmTiming.lastOutputEndDts = outputTime + duration; + this._pcmTiming.lastOutputDuration = duration; + return outputTime / 1000; + } + flushStashedSamples(): void { const videoSample = this._videoStashedLastSample; const audioSample = this._audioStashedLastSample; @@ -446,7 +554,6 @@ class MP4Remuxer { const track = audioTrack; const samples = track.samples; - let dtsCorrection: number | undefined; let firstDts = -1; const refSampleDuration = this._audioMeta.refSampleDuration; @@ -500,55 +607,28 @@ class MP4Remuxer { const firstSampleOriginalDts = (samples[0] as AudioSample).dts - this._dtsBase; - dtsCorrection = this._computeDtsCorrection(firstSampleOriginalDts, this._audioNextDts); + const dtsCorrection = this._computeDtsCorrection( + "audio", + firstSampleOriginalDts, + this._audioNextDts, + this._audioTiming, + ); const mp4Samples: MP4Sample[] = []; + let nextOutputDts = firstSampleOriginalDts - dtsCorrection; // Correct dts for each sample, and calculate sample duration. Then output to mp4Samples for (let i = 0; i < samples.length; i++) { const sample = samples[i] as AudioSample; const originalDts = sample.dts - this._dtsBase; - let sampleDuration = 0; if (originalDts < -0.001) { continue; //pass the first sample with the invalid dts } - const dts = originalDts - (dtsCorrection as number); - - if (i !== samples.length - 1) { - const nextDts = (samples[i + 1] as AudioSample).dts - this._dtsBase - (dtsCorrection as number); - sampleDuration = nextDts - dts; - } else { - // the last sample - if (lastSample != null) { - // use stashed sample's dts to calculate sample duration - const nextDts = lastSample.dts - this._dtsBase - (dtsCorrection as number); - sampleDuration = nextDts - dts; - } else if (mp4Samples.length >= 1) { - // use second last sample duration - sampleDuration = mp4Samples[mp4Samples.length - 1].duration as number; - } else { - // the only one sample, use reference sample duration - sampleDuration = Math.floor(refSampleDuration as number); - } - } - - if (sampleDuration <= 0) { - const fallbackDuration = - Math.floor(refSampleDuration as number) || - (mp4Samples.length >= 1 ? (mp4Samples[mp4Samples.length - 1].duration as number) : 0) || - 26; - Log.w( - this.TAG, - `Audio: non-monotonic dts detected (dts: ${dts} ms, duration: ${Math.round(sampleDuration)} ms), ` + - `clamping sample duration to ${fallbackDuration} ms`, - ); - dtsCorrection = (dtsCorrection as number) + (sampleDuration - fallbackDuration); - sampleDuration = fallbackDuration; - } - - this._audioNextDts = dts + sampleDuration; + const dts = nextOutputDts; + const sampleDuration = this._nextSampleDuration(this._audioTiming, refSampleDuration, 26); + nextOutputDts += sampleDuration; if (firstDts === -1) { firstDts = dts; @@ -602,6 +682,9 @@ class MP4Remuxer { track.samples = mp4Samples; track.sequenceNumber++; + const latest = mp4Samples[mp4Samples.length - 1]; + this._audioNextDts = latest.dts + latest.duration; + this._recordTrackTiming(this._audioTiming, latest); let moofbox: Uint8Array; @@ -637,7 +720,6 @@ class MP4Remuxer { const track = videoTrack; const samples = track.samples; - let dtsCorrection: number | undefined; let firstDts = -1; if (!samples || samples.length === 0) { @@ -674,61 +756,45 @@ class MP4Remuxer { const firstSampleOriginalDts = (samples[0] as VideoSample).dts - this._dtsBase; - dtsCorrection = this._computeDtsCorrection(firstSampleOriginalDts, this._videoNextDts); + const dtsCorrection = this._computeDtsCorrection( + "video", + firstSampleOriginalDts, + this._videoNextDts, + this._videoTiming, + ); const mp4Samples: MP4Sample[] = []; + let nextOutputDts = firstSampleOriginalDts - dtsCorrection; + const presentationFloor = this._videoInitialOutputTime ?? this._dtsBaseOffset; // Correct dts for each sample, and calculate sample duration. Then output to mp4Samples for (let i = 0; i < samples.length; i++) { const sample = samples[i] as VideoSample; const originalDts = sample.dts - this._dtsBase; const isKeyframe = sample.isKeyframe; - const dts = originalDts - (dtsCorrection as number); - const cts = sample.cts; - const pts = dts + cts; - if (firstDts === -1) { - firstDts = dts; + if (this._videoCtsOffset === undefined) { + this._videoCtsOffset = sample.cts; + this._videoInitialCtsOffset = sample.cts; } - let sampleDuration = 0; - - if (i !== samples.length - 1) { - const nextDts = (samples[i + 1] as VideoSample).dts - this._dtsBase - (dtsCorrection as number); - sampleDuration = nextDts - dts; - } else { - // the last sample - if (lastSample != null) { - // use stashed sample's dts to calculate sample duration - const nextDts = lastSample.dts - this._dtsBase - (dtsCorrection as number); - sampleDuration = nextDts - dts; - } else if (mp4Samples.length >= 1) { - // use second last sample duration - sampleDuration = mp4Samples[mp4Samples.length - 1].duration as number; - } else { - // the only one sample, use reference sample duration - sampleDuration = Math.floor(this._videoMeta?.refSampleDuration ?? 0); - } + const dts = nextOutputDts; + const pts = originalDts - dtsCorrection + sample.cts - this._videoCtsOffset; + if (pts < presentationFloor - 0.001) { + mdatBytes -= sample.length; + continue; } - if (sampleDuration <= 0) { - // Spliced streams (e.g. telco catchup recordings) can regress dts mid-batch. A - // non-positive duration would be written into the trun box as a huge unsigned - // value and trigger a decode error, so clamp it to keep the timeline monotonic. - const fallbackDuration = - Math.floor(this._videoMeta?.refSampleDuration ?? 0) || - (mp4Samples.length >= 1 ? (mp4Samples[mp4Samples.length - 1].duration as number) : 0) || - 40; - Log.w( - this.TAG, - `Video: non-monotonic dts detected (dts: ${dts} ms, duration: ${Math.round(sampleDuration)} ms), ` + - `clamping sample duration to ${fallbackDuration} ms`, - ); - // Re-anchor the remaining samples of this batch so their dts continue right - // after the clamped sample (mirrors the inter-batch dtsCorrection behavior) - dtsCorrection = (dtsCorrection as number) + (sampleDuration - fallbackDuration); - sampleDuration = fallbackDuration; + if (firstDts === -1) { + firstDts = dts; } + if (this._videoInitialOutputTime === undefined) { + this._videoInitialOutputTime = dts; + } + + const sampleDuration = this._nextSampleDuration(this._videoTiming, this._videoMeta?.refSampleDuration, 40); + nextOutputDts += sampleDuration; + const cts = pts - dts; mp4Samples.push({ dts: dts, @@ -749,8 +815,15 @@ class MP4Remuxer { }); } + if (mp4Samples.length === 0) { + track.samples = []; + track.length = 0; + return; + } + const latest = mp4Samples[mp4Samples.length - 1]; this._videoNextDts = latest.dts + latest.duration; + this._recordTrackTiming(this._videoTiming, latest); track.samples = mp4Samples; track.sequenceNumber++; @@ -783,6 +856,9 @@ class MP4Remuxer { type: "video", data: segment.buffer, }); + if (this._silentAudioMode) { + this._generateSilentAudio(mp4Samples); + } } private _mergeBoxes(moof: Uint8Array, mdat: Uint8Array): Uint8Array { diff --git a/web-ui/src/mpegts/worker/pipeline.ts b/web-ui/src/mpegts/worker/pipeline.ts index c8cde906..b6856cb4 100644 --- a/web-ui/src/mpegts/worker/pipeline.ts +++ b/web-ui/src/mpegts/worker/pipeline.ts @@ -3,7 +3,7 @@ import { createDefaultConfig } from "../config"; import { WorkerAudioDecoder } from "../decoder/worker-audio-decoder"; import DemuxErrors from "../demux/demux-errors"; import TSDemuxer from "../demux/ts-demuxer"; -import { containsMoov, parseInitSegment, probeFmp4, splitInitFromSegment } from "../hls/fmp4"; +import { containsMoov, getSegmentStartTime, parseInitSegment, probeFmp4, splitInitFromSegment } from "../hls/fmp4"; import { type HlsInfo, HlsSource } from "../hls/hls-source"; import FetchLoader, { LoaderErrors } from "../io/fetch-loader"; import MP4Remuxer from "../remux/mp4-remuxer"; @@ -49,7 +49,6 @@ class LoadError extends Error { } const HLS_URL_RE = /\.m3u8?($|\?)/i; - /** Sentinel rejection value for intentionally cancelled segment loads. */ const CANCELLED = Symbol("cancelled"); @@ -98,6 +97,8 @@ class Pipeline { private _fmp4InitSent = false; private _fmp4Chunks: Uint8Array[] = []; private _lastInitUrl: string | null = null; + private _fmp4Timescales = new Map(); + private _fmp4TimestampOffsetWarningLogged = false; private _workerAudioDecoder: WorkerAudioDecoder | null = null; private _workerAudioDecoderInitPromise: Promise | null = null; @@ -108,7 +109,13 @@ class Pipeline { private _audioSamplesSinceAnchor = 0; private _audioSampleRate = 0; /** PCM decoded before the remuxer dts base is known (flushed once available). */ - private _pendingPcm: Array<{ pcm: Float32Array; channels: number; sampleRate: number; ptsMs: number }> = []; + private _pendingPcm: Array<{ + pcm: Float32Array; + channels: number; + sampleRate: number; + ptsMs: number; + durationMs: number; + }> = []; /** Incremented on audio timing resets to invalidate decode callbacks queued before the reset. */ private _audioGen = 0; @@ -202,6 +209,8 @@ class Pipeline { this._fmp4InitSent = false; this._fmp4Chunks = []; this._lastInitUrl = null; + this._fmp4Timescales = new Map(); + this._fmp4TimestampOffsetWarningLogged = false; this._paused = false; this._resumeGate?.(); this._resumeGate = null; @@ -268,7 +277,7 @@ class Pipeline { if (this._runId !== runId) return; if (this._fmp4Mode) { - this._flushFmp4Segment(); + this._flushFmp4Segment(meta); } // Flush stashed samples at every segment boundary so the next segment's first // remux batch is not mixed with the previous segment's tail (which would share @@ -311,6 +320,10 @@ class Pipeline { this._resetAudioTiming(); } + private _shouldAnchorSegment(meta: SegmentMeta): boolean { + return meta.resetRemuxer || !this._hlsSource; + } + private _resetAudioTiming(): void { this._audioGen++; this._audioAnchorPtsMs = null; @@ -389,10 +402,19 @@ class Pipeline { // ---- MPEG-TS path ---- private _setupTSDemuxerRemuxer(probeData: unknown, meta: SegmentMeta): void { + const shouldAnchor = this._shouldAnchorSegment(meta); + if (this._hlsSource && !shouldAnchor && this._demuxer && this._remuxer) { + this._demuxer.resetSegmentBoundary(probeData as ConstructorParameters[0]); + return; + } + + const waitForInitialVideoKeyframe = shouldAnchor || !this._demuxer || !this._remuxer; if (this._demuxer) { this._demuxer.destroy(); } - const demuxer = new TSDemuxer(probeData as ConstructorParameters[0]); + const demuxer = new TSDemuxer(probeData as ConstructorParameters[0], { + waitForInitialVideoKeyframe, + }); this._demuxer = demuxer; if (!this._remuxer) { @@ -405,6 +427,14 @@ class Pipeline { demuxer.onError = this._onDemuxException.bind(this); demuxer.timestampBase = meta.timestampBase * 90000; // seconds → 90kHz ticks + demuxer.onTrackDiscontinuity = (track) => { + if (track === "video") { + this._remuxer?.flushStashedSamples(); + this._remuxer?.insertDiscontinuity(); + } + this._workerAudioDecoder?.reset(); + this._resetAudioTiming(); + }; // Set up software audio decode callback when MP2 WASM URL is configured if (this._config.wasmDecoders.mp2) { @@ -455,7 +485,10 @@ class Pipeline { } private _sendFmp4Init(data: Uint8Array): void { - const codec = this._hlsSource?.info.codecs ?? parseInitSegment(data).codecs.join(","); + const initInfo = parseInitSegment(data); + this._fmp4Timescales = initInfo.timescales; + this._fmp4TimestampOffsetWarningLogged = false; + const codec = this._hlsSource?.info.codecs ?? initInfo.codecs.join(","); this._callbacks.onInitSegment("video", { type: "video", container: "video/mp4", @@ -465,13 +498,37 @@ class Pipeline { this._fmp4InitSent = true; } + private _warnFmp4TimestampOffsetUnavailable(reason: string): void { + if (this._fmp4TimestampOffsetWarningLogged) { + return; + } + this._fmp4TimestampOffsetWarningLogged = true; + Log.w(this.TAG, `fMP4 timestampOffset unavailable: ${reason}; appending media with original tfdt`); + } + + private _getFmp4TimestampOffset(meta: SegmentMeta, media: Uint8Array): number | undefined { + if (this._fmp4Timescales.size === 0) { + this._warnFmp4TimestampOffsetUnavailable("init segment timescales missing"); + return undefined; + } + + const segmentStart = getSegmentStartTime(media, this._fmp4Timescales); + if (segmentStart === null) { + this._warnFmp4TimestampOffsetUnavailable("media segment has no tfdt"); + return undefined; + } + + const timestampOffset = (meta.start - segmentStart) * 1000; + return Math.abs(timestampOffset) < 0.001 ? 0 : timestampOffset; + } + private _onFmp4Chunk(data: Uint8Array): number { this._fmp4Chunks.push(data); return data.byteLength; } /** Forward a fully buffered fMP4 segment to MSE (extracting the init part on first use). */ - private _flushFmp4Segment(): void { + private _flushFmp4Segment(meta: SegmentMeta): void { if (this._fmp4Chunks.length === 0) { return; } @@ -496,7 +553,12 @@ class Pipeline { } if (media.byteLength > 0) { - this._callbacks.onMediaSegment("video", { type: "video", data: toArrayBuffer(media) }); + this._pendingDtsOffsetMs = 0; + this._callbacks.onMediaSegment("video", { + type: "video", + data: toArrayBuffer(media), + timestampOffset: this._getFmp4TimestampOffset(meta, media), + }); } } @@ -552,10 +614,10 @@ class Pipeline { * PCM decoded before the first remux (dts base unknown) is queued. */ private _emitPcm(pcm: Float32Array, channels: number, sampleRate: number, ptsMs: number): void { - this._pendingPcm.push({ pcm, channels, sampleRate, ptsMs }); + const durationMs = (Math.floor(pcm.length / channels) / sampleRate) * 1000; + this._pendingPcm.push({ pcm, channels, sampleRate, ptsMs, durationMs }); - const dtsBase = this._remuxer?.getTimestampBase(); - if (dtsBase === undefined) { + if (this._remuxer?.getTimestampBase() === undefined) { // Bound the queue: ~25s of audio at one payload per ~72ms is plenty if (this._pendingPcm.length > 512) { this._pendingPcm.shift(); @@ -564,7 +626,11 @@ class Pipeline { } for (const item of this._pendingPcm) { - this._callbacks.onPCMAudioData(item.pcm, item.channels, item.sampleRate, (item.ptsMs - dtsBase) / 1000); + const time = this._remuxer?.mapPcmTimestamp(item.ptsMs, item.durationMs); + if (time === undefined) { + break; + } + this._callbacks.onPCMAudioData(item.pcm, item.channels, item.sampleRate, time); } this._pendingPcm = []; }