diff --git a/.changeset/av1-packet-trailers.md b/.changeset/av1-packet-trailers.md new file mode 100644 index 000000000..220c648f5 --- /dev/null +++ b/.changeset/av1-packet-trailers.md @@ -0,0 +1,8 @@ +--- +webrtc-sys: patch +libwebrtc: patch +livekit: patch +livekit-ffi: patch +--- + +Fix AV1 subscriber decode when packet trailers are enabled. diff --git a/livekit/tests/packet_trailer_test.rs b/livekit/tests/packet_trailer_test.rs index b89108d73..c0aae32a1 100644 --- a/livekit/tests/packet_trailer_test.rs +++ b/livekit/tests/packet_trailer_test.rs @@ -104,6 +104,17 @@ async fn test_timestamp_and_frame_id_vp8_e2ee() -> Result<()> { .await } +#[test_log::test(tokio::test)] +async fn test_timestamp_and_frame_id_av1() -> Result<()> { + run_packet_trailer_test(PacketTrailerTestParams { + attach_timestamp: true, + attach_frame_id: true, + e2ee: false, + codec: VideoCodec::AV1, + }) + .await +} + // ==================== Implementation ==================== /// Publishes solid-color video frames with packet trailer metadata (user_timestamp diff --git a/webrtc-sys/build.rs b/webrtc-sys/build.rs index 4535fc5e4..fa68a91dc 100644 --- a/webrtc-sys/build.rs +++ b/webrtc-sys/build.rs @@ -94,6 +94,7 @@ fn main() { "src/apm.cpp", "src/audio_mixer.cpp", "src/packet_trailer.cpp", + "src/packet_trailer_av1.cpp", ]); if is_desktop { diff --git a/webrtc-sys/include/livekit/packet_trailer.h b/webrtc-sys/include/livekit/packet_trailer.h index 5b30701eb..191dab2df 100644 --- a/webrtc-sys/include/livekit/packet_trailer.h +++ b/webrtc-sys/include/livekit/packet_trailer.h @@ -78,6 +78,14 @@ struct PacketTrailerMetadata { uint32_t ssrc; // SSRC that produced this entry (for simulcast tracking) }; +/// Parses a trailer payload (TLV region followed by the trailer envelope) and +/// returns the embedded metadata, or `std::nullopt` if the payload is missing +/// the magic envelope or contains no recognized TLV elements. +/// +/// Shared by the codec-agnostic trailer path and the AV1 OBU path. +std::optional ParseTrailerPayload( + webrtc::ArrayView trailer); + /// Frame transformer that appends/extracts packet trailers. /// This transformer can be used standalone or in conjunction with e2ee. /// @@ -168,12 +176,14 @@ class PacketTrailerTransformer : public webrtc::FrameTransformerInterface { std::vector AppendTrailer( webrtc::ArrayView data, uint64_t user_timestamp, - uint32_t frame_id); + uint32_t frame_id, + bool is_av1); /// Extract and remove frame metadata trailer from frame data std::optional ExtractTrailer( webrtc::ArrayView data, - std::vector& out_data); + std::vector& out_data, + bool is_av1); const Direction direction_; std::atomic enabled_{true}; diff --git a/webrtc-sys/include/livekit/packet_trailer_av1.h b/webrtc-sys/include/livekit/packet_trailer_av1.h new file mode 100644 index 000000000..dabbb9740 --- /dev/null +++ b/webrtc-sys/include/livekit/packet_trailer_av1.h @@ -0,0 +1,53 @@ +/* + * Copyright 2026 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "api/array_view.h" +#include "api/frame_transformer_interface.h" +#include "livekit/packet_trailer.h" + +namespace livekit_ffi { +namespace av1 { + +/// Returns true if the frame's MIME type identifies it as AV1. +bool IsAv1Frame(const webrtc::TransformableFrameInterface& frame); + +/// Inserts a LiveKit packet-trailer metadata OBU into an AV1 temporal unit. +/// +/// `trailer` is the already-built TLV trailer payload (see +/// [`PacketTrailerTransformer`]). The OBU is placed after any temporal +/// delimiter and sequence header OBUs so it is not mistaken for frame data. +std::vector InsertTrailerObu( + webrtc::ArrayView data, + webrtc::ArrayView trailer); + +/// Extracts and removes a LiveKit packet-trailer metadata OBU from an AV1 +/// temporal unit. +/// +/// On success the parsed metadata is returned and `out_data` receives the +/// frame data with the metadata OBU removed. Otherwise `out_data` receives +/// an unchanged copy of `data` and `std::nullopt` is returned. +std::optional ExtractTrailer( + webrtc::ArrayView data, + std::vector& out_data); + +} // namespace av1 +} // namespace livekit_ffi diff --git a/webrtc-sys/src/packet_trailer.cpp b/webrtc-sys/src/packet_trailer.cpp index 96ff6418f..aa7b43d69 100644 --- a/webrtc-sys/src/packet_trailer.cpp +++ b/webrtc-sys/src/packet_trailer.cpp @@ -21,6 +21,7 @@ #include #include "api/make_ref_counted.h" +#include "livekit/packet_trailer_av1.h" #include "livekit/peer_connection_factory.h" #include "livekit/rtp_receiver.h" #include "livekit/rtp_sender.h" @@ -37,8 +38,97 @@ uint64_t CurrentUnixTimeMicros() { std::chrono::duration_cast(now).count()); } +std::vector BuildTrailerPayload(uint64_t user_timestamp, + uint32_t frame_id) { + const bool has_frame_id = frame_id != 0; + const size_t trailer_len = kTimestampTlvSize + + (has_frame_id ? kFrameIdTlvSize : 0) + + kTrailerEnvelopeSize; + std::vector trailer; + trailer.reserve(trailer_len); + + // All TLV bytes are XORed with 0xFF to prevent H.264 NAL start code + // sequences (0x000001 / 0x00000001) from appearing inside the trailer. + trailer.push_back(kTagTimestampUs ^ 0xFF); + trailer.push_back(8 ^ 0xFF); + for (int i = 7; i >= 0; --i) { + trailer.push_back( + static_cast(((user_timestamp >> (i * 8)) & 0xFF) ^ 0xFF)); + } + + if (has_frame_id) { + trailer.push_back(kTagFrameId ^ 0xFF); + trailer.push_back(4 ^ 0xFF); + for (int i = 3; i >= 0; --i) { + trailer.push_back( + static_cast(((frame_id >> (i * 8)) & 0xFF) ^ 0xFF)); + } + } + + trailer.push_back(static_cast(trailer_len ^ 0xFF)); + trailer.insert(trailer.end(), std::begin(kPacketTrailerMagic), + std::end(kPacketTrailerMagic)); + return trailer; +} + } // namespace +std::optional ParseTrailerPayload( + webrtc::ArrayView trailer) { + if (trailer.size() < kTrailerEnvelopeSize) { + return std::nullopt; + } + + const uint8_t* magic_start = trailer.data() + trailer.size() - 4; + if (std::memcmp(magic_start, kPacketTrailerMagic, 4) != 0) { + return std::nullopt; + } + + uint8_t trailer_len = trailer[trailer.size() - 5] ^ 0xFF; + if (trailer_len != trailer.size() || trailer_len < kTrailerEnvelopeSize) { + return std::nullopt; + } + + size_t tlv_region_len = trailer_len - kTrailerEnvelopeSize; + PacketTrailerMetadata meta{0, 0, 0}; + bool found_any = false; + size_t pos = 0; + + while (pos + 2 <= tlv_region_len) { + uint8_t tag = trailer[pos] ^ 0xFF; + uint8_t len = trailer[pos + 1] ^ 0xFF; + pos += 2; + + if (pos + len > tlv_region_len) { + break; + } + + const uint8_t* val = trailer.data() + pos; + if (tag == kTagTimestampUs && len == 8) { + uint64_t ts = 0; + for (int i = 0; i < 8; ++i) { + ts = (ts << 8) | (val[i] ^ 0xFF); + } + meta.user_timestamp = ts; + found_any = true; + } else if (tag == kTagFrameId && len == 4) { + uint32_t fid = 0; + for (int i = 0; i < 4; ++i) { + fid = (fid << 8) | (val[i] ^ 0xFF); + } + meta.frame_id = fid; + found_any = true; + } + + pos += len; + } + + if (!found_any) { + return std::nullopt; + } + return meta; +} + // PacketTrailerTransformer implementation PacketTrailerTransformer::PacketTrailerTransformer(Direction direction) @@ -87,6 +177,7 @@ void PacketTrailerTransformer::TransformSend( uint32_t ssrc = frame->GetSsrc(); auto data = frame->GetData(); + const bool is_av1 = av1::IsAv1Frame(*frame); PacketTrailerMetadata meta_to_embed = LookupSendMetadata(*frame, ssrc, rtp_timestamp); emit_publish_timing(VideoPublishTimingStage::EncoderOutput, @@ -97,7 +188,7 @@ void PacketTrailerTransformer::TransformSend( std::vector new_data; if (enabled_.load()) { new_data = AppendTrailer(data, meta_to_embed.user_timestamp, - meta_to_embed.frame_id); + meta_to_embed.frame_id, is_av1); frame->SetData(webrtc::ArrayView(new_data)); } @@ -158,9 +249,10 @@ void PacketTrailerTransformer::TransformReceive( uint32_t ssrc = frame->GetSsrc(); uint32_t rtp_timestamp = frame->GetTimestamp(); auto data = frame->GetData(); + const bool is_av1 = av1::IsAv1Frame(*frame); std::vector stripped_data; - auto meta = ExtractTrailer(data, stripped_data); + auto meta = ExtractTrailer(data, stripped_data, is_av1); PacketTrailerMetadata timing_meta{0, 0, ssrc}; if (meta.has_value()) { @@ -238,49 +330,29 @@ void PacketTrailerTransformer::TransformReceive( std::vector PacketTrailerTransformer::AppendTrailer( webrtc::ArrayView data, uint64_t user_timestamp, - uint32_t frame_id) { - const bool has_frame_id = frame_id != 0; - const size_t trailer_len = kTimestampTlvSize + - (has_frame_id ? kFrameIdTlvSize : 0) + - kTrailerEnvelopeSize; - std::vector result; - result.reserve(data.size() + trailer_len); - - // Copy original data - result.insert(result.end(), data.begin(), data.end()); - - // All TLV bytes are XORed with 0xFF to prevent H.264 NAL start code - // sequences (0x000001 / 0x00000001) from appearing inside the trailer. - - // TLV: timestamp_us (tag=0x01, len=8, 8 bytes big-endian) - result.push_back(kTagTimestampUs ^ 0xFF); - result.push_back(8 ^ 0xFF); - for (int i = 7; i >= 0; --i) { - result.push_back( - static_cast(((user_timestamp >> (i * 8)) & 0xFF) ^ 0xFF)); - } + uint32_t frame_id, + bool is_av1) { + std::vector trailer = BuildTrailerPayload(user_timestamp, frame_id); - if (has_frame_id) { - // TLV: frame_id (tag=0x02, len=4, 4 bytes big-endian) - result.push_back(kTagFrameId ^ 0xFF); - result.push_back(4 ^ 0xFF); - for (int i = 3; i >= 0; --i) { - result.push_back( - static_cast(((frame_id >> (i * 8)) & 0xFF) ^ 0xFF)); - } + if (is_av1) { + return av1::InsertTrailerObu(data, trailer); } - // Envelope: trailer_len (1B, XORed) + magic (4B, NOT XORed) - result.push_back(static_cast(trailer_len ^ 0xFF)); - result.insert(result.end(), std::begin(kPacketTrailerMagic), - std::end(kPacketTrailerMagic)); - + std::vector result; + result.reserve(data.size() + trailer.size()); + result.insert(result.end(), data.begin(), data.end()); + result.insert(result.end(), trailer.begin(), trailer.end()); return result; } std::optional PacketTrailerTransformer::ExtractTrailer( webrtc::ArrayView data, - std::vector& out_data) { + std::vector& out_data, + bool is_av1) { + if (is_av1) { + return av1::ExtractTrailer(data, out_data); + } + if (data.size() < kTrailerEnvelopeSize) { out_data.assign(data.begin(), data.end()); return std::nullopt; @@ -302,48 +374,14 @@ std::optional PacketTrailerTransformer::ExtractTrailer( // Walk the TLV region: everything from trailer_start up to the envelope. const uint8_t* trailer_start = data.data() + data.size() - trailer_len; - size_t tlv_region_len = trailer_len - kTrailerEnvelopeSize; - - PacketTrailerMetadata meta{0, 0, 0}; - bool found_any = false; - size_t pos = 0; - - while (pos + 2 <= tlv_region_len) { - uint8_t tag = trailer_start[pos] ^ 0xFF; - uint8_t len = trailer_start[pos + 1] ^ 0xFF; - pos += 2; - - if (pos + len > tlv_region_len) { - break; - } - - const uint8_t* val = trailer_start + pos; - - if (tag == kTagTimestampUs && len == 8) { - uint64_t ts = 0; - for (int i = 0; i < 8; ++i) { - ts = (ts << 8) | (val[i] ^ 0xFF); - } - meta.user_timestamp = ts; - found_any = true; - } else if (tag == kTagFrameId && len == 4) { - uint32_t fid = 0; - for (int i = 0; i < 4; ++i) { - fid = (fid << 8) | (val[i] ^ 0xFF); - } - meta.frame_id = fid; - found_any = true; - } - // Unknown tags are silently skipped. - - pos += len; + auto meta = ParseTrailerPayload( + webrtc::ArrayView(trailer_start, trailer_len)); + if (!meta.has_value()) { + out_data.assign(data.begin(), data.end()); + return std::nullopt; } out_data.assign(data.begin(), data.end() - trailer_len); - - if (!found_any) { - return std::nullopt; - } return meta; } diff --git a/webrtc-sys/src/packet_trailer_av1.cpp b/webrtc-sys/src/packet_trailer_av1.cpp new file mode 100644 index 000000000..c1349feb7 --- /dev/null +++ b/webrtc-sys/src/packet_trailer_av1.cpp @@ -0,0 +1,224 @@ +/* + * Copyright 2026 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "livekit/packet_trailer_av1.h" + +#include +#include +#include +#include + +namespace livekit_ffi { +namespace av1 { + +namespace { + +constexpr uint8_t kAv1ObuSizePresentBit = 0b0000'0010; +constexpr uint8_t kAv1ObuExtensionFlag = 0b0000'0100; +constexpr uint8_t kAv1ObuTypeMask = 0b0111'1000; +constexpr uint8_t kAv1ObuTypeSequenceHeader = 1; +constexpr uint8_t kAv1ObuTypeTemporalDelimiter = 2; +constexpr uint8_t kAv1ObuTypeMetadata = 5; +constexpr uint64_t kAv1MetadataTypeLiveKitPacketTrailer = 31; + +void WriteLeb128(uint64_t value, std::vector& out) { + while (value >= 0x80) { + out.push_back(static_cast((value & 0x7F) | 0x80)); + value >>= 7; + } + out.push_back(static_cast(value)); +} + +bool ReadLeb128(webrtc::ArrayView data, + size_t& pos, + uint64_t& value) { + value = 0; + int shift = 0; + for (int bytes = 0; bytes < 8; ++bytes) { + if (pos >= data.size()) { + return false; + } + uint8_t byte = data[pos++]; + value |= static_cast(byte & 0x7F) << shift; + if ((byte & 0x80) == 0) { + return true; + } + shift += 7; + } + return false; +} + +std::vector BuildMetadataObu( + webrtc::ArrayView trailer) { + std::vector metadata_payload; + WriteLeb128(kAv1MetadataTypeLiveKitPacketTrailer, metadata_payload); + metadata_payload.insert(metadata_payload.end(), trailer.begin(), trailer.end()); + + std::vector obu; + obu.reserve(1 + 8 + metadata_payload.size()); + obu.push_back(static_cast((kAv1ObuTypeMetadata << 3) | + kAv1ObuSizePresentBit)); + WriteLeb128(metadata_payload.size(), obu); + obu.insert(obu.end(), metadata_payload.begin(), metadata_payload.end()); + return obu; +} + +size_t FindMetadataInsertOffset(webrtc::ArrayView data) { + size_t pos = 0; + size_t insert_offset = 0; + + while (pos < data.size()) { + const size_t obu_start = pos; + uint8_t obu_header = data[pos++]; + if ((obu_header & 0x80) != 0) { + return 0; + } + + const uint8_t obu_type = (obu_header & kAv1ObuTypeMask) >> 3; + if ((obu_header & kAv1ObuExtensionFlag) != 0) { + if (pos >= data.size()) { + return 0; + } + ++pos; + } + + size_t payload_size = data.size() - pos; + if ((obu_header & kAv1ObuSizePresentBit) != 0) { + uint64_t leb_payload_size = 0; + if (!ReadLeb128(data, pos, leb_payload_size) || + leb_payload_size > data.size() - pos) { + return 0; + } + payload_size = static_cast(leb_payload_size); + } + + const size_t obu_end = pos + payload_size; + if (obu_type == kAv1ObuTypeTemporalDelimiter) { + pos = obu_end; + continue; + } + + if (obu_type != kAv1ObuTypeSequenceHeader) { + break; + } + + insert_offset = obu_end; + pos = obu_end; + + if ((data[obu_start] & kAv1ObuSizePresentBit) == 0) { + break; + } + } + + return insert_offset; +} + +} // namespace + +bool IsAv1Frame(const webrtc::TransformableFrameInterface& frame) { + std::string mime_type = frame.GetMimeType(); + std::transform(mime_type.begin(), mime_type.end(), mime_type.begin(), + [](unsigned char c) { + return static_cast(std::tolower(c)); + }); + return mime_type.find("av1") != std::string::npos; +} + +std::vector InsertTrailerObu( + webrtc::ArrayView data, + webrtc::ArrayView trailer) { + std::vector obu = BuildMetadataObu(trailer); + if (data.empty()) { + return obu; + } + + const size_t insert_offset = FindMetadataInsertOffset(data); + std::vector result; + result.reserve(data.size() + obu.size()); + result.insert(result.end(), data.begin(), data.begin() + insert_offset); + result.insert(result.end(), obu.begin(), obu.end()); + result.insert(result.end(), data.begin() + insert_offset, data.end()); + return result; +} + +std::optional ExtractTrailer( + webrtc::ArrayView data, + std::vector& out_data) { + std::vector stripped_data; + stripped_data.reserve(data.size()); + size_t pos = 0; + + while (pos < data.size()) { + const size_t obu_start = pos; + uint8_t obu_header = data[pos++]; + if ((obu_header & 0x80) != 0) { + out_data.assign(data.begin(), data.end()); + return std::nullopt; + } + + const uint8_t obu_type = (obu_header & kAv1ObuTypeMask) >> 3; + if ((obu_header & kAv1ObuExtensionFlag) != 0) { + if (pos >= data.size()) { + out_data.assign(data.begin(), data.end()); + return std::nullopt; + } + ++pos; + } + + size_t payload_size = data.size() - pos; + if ((obu_header & kAv1ObuSizePresentBit) != 0) { + uint64_t leb_payload_size = 0; + if (!ReadLeb128(data, pos, leb_payload_size) || + leb_payload_size > data.size() - pos) { + out_data.assign(data.begin(), data.end()); + return std::nullopt; + } + payload_size = static_cast(leb_payload_size); + } + + const size_t payload_start = pos; + const size_t obu_end = payload_start + payload_size; + + if (obu_type == kAv1ObuTypeMetadata) { + auto metadata_payload = data.subview(payload_start, obu_end - payload_start); + size_t metadata_pos = 0; + uint64_t metadata_type = 0; + if (ReadLeb128(metadata_payload, metadata_pos, metadata_type) && + metadata_type == kAv1MetadataTypeLiveKitPacketTrailer && + metadata_pos <= metadata_payload.size()) { + auto trailer_payload = metadata_payload.subview( + metadata_pos, metadata_payload.size() - metadata_pos); + if (auto meta = ParseTrailerPayload(trailer_payload)) { + stripped_data.insert(stripped_data.end(), data.begin() + obu_end, + data.end()); + out_data = std::move(stripped_data); + return meta; + } + } + } + + stripped_data.insert(stripped_data.end(), data.begin() + obu_start, + data.begin() + obu_end); + + pos = obu_end; + } + + out_data.assign(data.begin(), data.end()); + return std::nullopt; +} + +} // namespace av1 +} // namespace livekit_ffi