From 5ccdaa2b6b637fbd511b5c14344cbf45bd1efcee Mon Sep 17 00:00:00 2001 From: Jonathan Lennox Date: Thu, 23 May 2024 17:29:28 -0400 Subject: [PATCH] fix(AV1 DD): Separate AV1 DD template's decodeTargetLayers and decodeTargetProtectedBy fields. (#2136) The former can be present when the latter is not. --- .../jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt | 2 +- .../Av1DDAdaptiveSourceProjectionContext.kt | 6 +- .../videobridge/cc/av1/Av1DDQualityFilter.kt | 3 +- .../Av1DependencyDescriptorHeaderExtension.kt | 45 +++++++------- ...DependencyDescriptorHeaderExtensionTest.kt | 61 +++++++++++++++++++ 5 files changed, 89 insertions(+), 28 deletions(-) diff --git a/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt b/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt index dbb83f0d58..27ea15920f 100644 --- a/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt +++ b/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt @@ -193,7 +193,7 @@ fun Av1DependencyDescriptorHeaderExtension.getScalabilityStructure( val layers = ArrayList() - structure.decodeTargetInfo.forEachIndexed { i, dt -> + structure.decodeTargetLayers.forEachIndexed { i, dt -> if (!activeDecodeTargetsBitmask.containsDecodeTarget(i)) { return@forEachIndexed } diff --git a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt index 6388dc3f04..af464bef34 100644 --- a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt +++ b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt @@ -182,7 +182,7 @@ class Av1DDAdaptiveSourceProjectionContext( } else { frame.frameInfo?.dtisPresent ?: emptySet() } - val chainsToCheck = dtsToCheck.map { structure.decodeTargetInfo[it].protectedBy }.toSet() + val chainsToCheck = dtsToCheck.mapNotNull { structure.decodeTargetProtectedBy.getOrNull(it) }.toSet() return map.nextFrameWith(frame) { if (it.isAccepted) return@nextFrameWith false if (it.frameInfo == null) { @@ -197,8 +197,8 @@ class Av1DDAdaptiveSourceProjectionContext( private fun Av1DDFrame.partOfActiveChain(chainIdx: Int): Boolean { val structure = structure ?: return false val frameInfo = frameInfo ?: return false - for (i in structure.decodeTargetInfo.indices) { - if (structure.decodeTargetInfo[i].protectedBy != chainIdx) { + for (i in structure.decodeTargetProtectedBy.indices) { + if (structure.decodeTargetProtectedBy[i] != chainIdx) { continue } if (frameInfo.dti[i] == DTI.NOT_PRESENT || frameInfo.dti[i] == DTI.DISCARDABLE) { diff --git a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt index 01808bd5a2..dc83977718 100644 --- a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt +++ b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt @@ -19,7 +19,6 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings import org.jitsi.nlj.RtpLayerDesc.Companion.SUSPENDED_ENCODING_ID import org.jitsi.nlj.RtpLayerDesc.Companion.SUSPENDED_INDEX import org.jitsi.nlj.RtpLayerDesc.Companion.getEidFromIndex -import org.jitsi.nlj.RtpLayerDesc.Companion.indexString import org.jitsi.nlj.rtp.codec.av1.Av1DDRtpLayerDesc import org.jitsi.nlj.rtp.codec.av1.Av1DDRtpLayerDesc.Companion.SUSPENDED_DT import org.jitsi.nlj.rtp.codec.av1.Av1DDRtpLayerDesc.Companion.getDtFromIndex @@ -108,7 +107,7 @@ internal class Av1DDQualityFilter( val accept = doAcceptFrame(frame, incomingEncoding, externalTargetIndex, receivedTime) val currentDt = getDtFromIndex(currentIndex) val mark = currentDt != SUSPENDED_DT && - (frame.frameInfo?.spatialId == frame.structure?.decodeTargetInfo?.get(currentDt)?.spatialId) + (frame.frameInfo?.spatialId == frame.structure?.decodeTargetLayers?.get(currentDt)?.spatialId) val isResumption = (prevIndex == SUSPENDED_INDEX && currentIndex != SUSPENDED_INDEX) if (isResumption) { check(accept) { diff --git a/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt b/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt index 679c6e3e83..c18061c8f7 100644 --- a/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt +++ b/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt @@ -284,7 +284,8 @@ fun Int.bitsForFdiff() = when { class Av1TemplateDependencyStructure( var templateIdOffset: Int, val templateInfo: List, - val decodeTargetInfo: List, + val decodeTargetProtectedBy: List, + val decodeTargetLayers: List, val maxRenderResolutions: List, val maxSpatialId: Int, val maxTemporalId: Int @@ -293,7 +294,7 @@ class Av1TemplateDependencyStructure( get() = templateInfo.size val decodeTargetCount - get() = decodeTargetInfo.size + get() = decodeTargetLayers.size val chainCount: Int = templateInfo.first().chains.size @@ -327,8 +328,8 @@ class Av1TemplateDependencyStructure( // TemplateChains length += nsBits(decodeTargetCount + 1, chainCount) if (chainCount > 0) { - decodeTargetInfo.forEach { - length += nsBits(chainCount, it.protectedBy) + decodeTargetProtectedBy.forEach { + length += nsBits(chainCount, it) } length += templateCount * chainCount * 4 } @@ -343,7 +344,8 @@ class Av1TemplateDependencyStructure( templateIdOffset, // These objects are not mutable so it's safe to copy them by reference templateInfo, - decodeTargetInfo, + decodeTargetProtectedBy, + decodeTargetLayers, maxRenderResolutions, maxSpatialId, maxTemporalId @@ -412,8 +414,8 @@ class Av1TemplateDependencyStructure( private fun writeTemplateChains(writer: BitWriter) { writer.writeNs(decodeTargetCount + 1, chainCount) - decodeTargetInfo.forEach { - writer.writeNs(chainCount, it.protectedBy) + decodeTargetProtectedBy.forEach { + writer.writeNs(chainCount, it) } templateInfo.forEach { t -> t.chains.forEach { chain -> @@ -461,7 +463,8 @@ class Av1TemplateDependencyStructure( return OrderedJsonObject().apply { put("templateIdOffset", templateIdOffset) put("templateInfo", templateInfo.toIndexedMap()) - put("decodeTargetInfo", decodeTargetInfo.toIndexedMap()) + put("decodeTargetProtectedBy", decodeTargetProtectedBy.toIndexedMap()) + put("decodeTargetLayers", decodeTargetLayers.toIndexedMap()) if (maxRenderResolutions.isNotEmpty()) { put("maxRenderResolutions", maxRenderResolutions.toIndexedMap()) } @@ -612,7 +615,8 @@ class Av1DependencyDescriptorReader( /* Data for template dependency structure */ private var templateIdOffset: Int = 0 private val templateInfo = mutableListOf() - private val decodeTargetInfo = mutableListOf() + private val decodeTargetProtectedBy = mutableListOf() + private val decodeTargetLayers = mutableListOf() private val maxRenderResolutions = mutableListOf() private var dtCnt = 0 @@ -623,7 +627,8 @@ class Av1DependencyDescriptorReader( */ templateCnt = 0 templateInfo.clear() - decodeTargetInfo.clear() + decodeTargetProtectedBy.clear() + decodeTargetLayers.clear() maxRenderResolutions.clear() } @@ -650,7 +655,8 @@ class Av1DependencyDescriptorReader( return Av1TemplateDependencyStructure( templateIdOffset, templateInfo.toList(), - decodeTargetInfo.toList(), + decodeTargetProtectedBy.toList(), + decodeTargetLayers.toList(), maxRenderResolutions.toList(), maxSpatialId, maxTemporalId @@ -733,7 +739,7 @@ class Av1DependencyDescriptorReader( val chainCount = reader.ns(dtCnt + 1) if (chainCount != 0) { for (dtIndex in 0 until dtCnt) { - decodeTargetInfo.add(dtIndex, DecodeTargetInfo(reader.ns(chainCount))) + decodeTargetProtectedBy.add(dtIndex, reader.ns(chainCount)) } for (templateIndex in 0 until templateCnt) { for (chainIndex in 0 until chainCount) { @@ -764,10 +770,9 @@ class Av1DependencyDescriptorReader( } } } - decodeTargetInfo[dtIndex].spatialId = spatialId - decodeTargetInfo[dtIndex].temporalId = temporalId + decodeTargetLayers.add(dtIndex, DecodeTargetLayer(spatialId, temporalId)) } - check(decodeTargetInfo.size == dtCnt) + check(decodeTargetLayers.size == dtCnt) } private fun readRenderResolutions() { @@ -843,16 +848,12 @@ class TemplateFrameInfo( override val chains: MutableList = mutableListOf() ) : FrameInfo(spatialId, temporalId, dti, fdiff, chains) -class DecodeTargetInfo( - val protectedBy: Int +class DecodeTargetLayer( + val spatialId: Int, + val temporalId: Int ) : JSONAware { - /** Todo: only want to be able to set these from the constructor */ - var spatialId: Int = -1 - var temporalId: Int = -1 - override fun toJSONString(): String { return OrderedJsonObject().apply { - put("protectedBy", protectedBy) put("spatialId", spatialId) put("temporalId", temporalId) }.toJSONString() diff --git a/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt b/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt index 4a69bb9c93..6be28bd3de 100644 --- a/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt +++ b/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt @@ -68,6 +68,11 @@ class Av1DependencyDescriptorHeaderExtensionTest : ShouldSpec() { "8700ed80e3061eaa82804028280514d14134518010a091889a09409fc059c13fc0b3c0" ) + /* The header Chrome 126 generates for VP8 keyframes when AV1 DD is enabled for VP8. It has no chains. */ + val descNoChains = parseHexBinary( + "8000138002044eaaaf2860414d34538a0940413fc0b3c0" + ) + init { context("AV1 Dependency Descriptors") { context("a descriptor with a single-layer dependency structure") { @@ -544,6 +549,62 @@ class Av1DependencyDescriptorHeaderExtensionTest : ShouldSpec() { buf shouldBe descS3T3 } } + context("A descriptor with no chains") { + val ldsr = Av1DependencyDescriptorReader(descNoChains, 0, descNoChains.size) + val lds = ldsr.parse(null) + should("be parsed properly") { + lds.startOfFrame shouldBe true + lds.endOfFrame shouldBe false + lds.frameNumber shouldBe 0x0013 + lds.activeDecodeTargetsBitmask shouldBe 0x7 + + val structure = lds.newTemplateDependencyStructure + structure shouldNotBe null + structure!!.decodeTargetCount shouldBe 3 + structure.maxTemporalId shouldBe 2 + structure.maxSpatialId shouldBe 0 + } + should("calculate correct frame info") { + val ldsi = lds.frameInfo + ldsi.spatialId shouldBe 0 + ldsi.temporalId shouldBe 0 + } + should("calculate correctly whether layer switching needs keyframes") { + val structure = lds.newTemplateDependencyStructure!! + val fromS = 0 + for (fromT in 0..2) { + val fromDT = 3 * fromS + fromT + val toS = 0 + for (toT in 0..2) { + val toDT = 3 * toS + toT + /* With this structure you can switch down spatial layers, or to other temporal + * layers within the same spatial layer, without a keyframe; but switching up + * spatial layers needs a keyframe. + */ + withClue("from DT $fromDT to DT $toDT") { + structure.canSwitchWithoutKeyframe( + fromDt = fromDT, + toDt = toDT + ) shouldBe true + } + } + } + } + should("calculate DTI bitmasks corresponding to a given DT") { + val structure = lds.newTemplateDependencyStructure!! + structure.getDtBitmaskForDt(0) shouldBe 0b001 + structure.getDtBitmaskForDt(1) shouldBe 0b011 + structure.getDtBitmaskForDt(2) shouldBe 0b111 + } + should("Calculate its own length properly") { + lds.encodedLength shouldBe descNoChains.size + } + should("Be re-encoded to the same bytes") { + val buf = ByteArray(lds.encodedLength) + lds.write(buf, 0, buf.size) + buf shouldBe descNoChains + } + } } } }