From 5ccdaa2b6b637fbd511b5c14344cbf45bd1efcee Mon Sep 17 00:00:00 2001
From: Jonathan Lennox <jonathan.lennox@8x8.com>
Date: Thu, 23 May 2024 17:29:28 -0400
Subject: [PATCH] fix(AV1 DD): Separate AV1 DD template's decodeTargetLayers
 and decodeTargetProtectedBy fields. (#2136)

The former can be present when the latter is not.
---
 .../jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt    |  2 +-
 .../Av1DDAdaptiveSourceProjectionContext.kt   |  6 +-
 .../videobridge/cc/av1/Av1DDQualityFilter.kt  |  3 +-
 .../Av1DependencyDescriptorHeaderExtension.kt | 45 +++++++-------
 ...DependencyDescriptorHeaderExtensionTest.kt | 61 +++++++++++++++++++
 5 files changed, 89 insertions(+), 28 deletions(-)

diff --git a/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt b/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt
index dbb83f0d58..27ea15920f 100644
--- a/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt
+++ b/jitsi-media-transform/src/main/kotlin/org/jitsi/nlj/rtp/codec/av1/Av1DDPacket.kt
@@ -193,7 +193,7 @@ fun Av1DependencyDescriptorHeaderExtension.getScalabilityStructure(
 
     val layers = ArrayList<Av1DDRtpLayerDesc>()
 
-    structure.decodeTargetInfo.forEachIndexed { i, dt ->
+    structure.decodeTargetLayers.forEachIndexed { i, dt ->
         if (!activeDecodeTargetsBitmask.containsDecodeTarget(i)) {
             return@forEachIndexed
         }
diff --git a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt
index 6388dc3f04..af464bef34 100644
--- a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt
+++ b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDAdaptiveSourceProjectionContext.kt
@@ -182,7 +182,7 @@ class Av1DDAdaptiveSourceProjectionContext(
         } else {
             frame.frameInfo?.dtisPresent ?: emptySet()
         }
-        val chainsToCheck = dtsToCheck.map { structure.decodeTargetInfo[it].protectedBy }.toSet()
+        val chainsToCheck = dtsToCheck.mapNotNull { structure.decodeTargetProtectedBy.getOrNull(it) }.toSet()
         return map.nextFrameWith(frame) {
             if (it.isAccepted) return@nextFrameWith false
             if (it.frameInfo == null) {
@@ -197,8 +197,8 @@ class Av1DDAdaptiveSourceProjectionContext(
     private fun Av1DDFrame.partOfActiveChain(chainIdx: Int): Boolean {
         val structure = structure ?: return false
         val frameInfo = frameInfo ?: return false
-        for (i in structure.decodeTargetInfo.indices) {
-            if (structure.decodeTargetInfo[i].protectedBy != chainIdx) {
+        for (i in structure.decodeTargetProtectedBy.indices) {
+            if (structure.decodeTargetProtectedBy[i] != chainIdx) {
                 continue
             }
             if (frameInfo.dti[i] == DTI.NOT_PRESENT || frameInfo.dti[i] == DTI.DISCARDABLE) {
diff --git a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt
index 01808bd5a2..dc83977718 100644
--- a/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt
+++ b/jvb/src/main/kotlin/org/jitsi/videobridge/cc/av1/Av1DDQualityFilter.kt
@@ -19,7 +19,6 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
 import org.jitsi.nlj.RtpLayerDesc.Companion.SUSPENDED_ENCODING_ID
 import org.jitsi.nlj.RtpLayerDesc.Companion.SUSPENDED_INDEX
 import org.jitsi.nlj.RtpLayerDesc.Companion.getEidFromIndex
-import org.jitsi.nlj.RtpLayerDesc.Companion.indexString
 import org.jitsi.nlj.rtp.codec.av1.Av1DDRtpLayerDesc
 import org.jitsi.nlj.rtp.codec.av1.Av1DDRtpLayerDesc.Companion.SUSPENDED_DT
 import org.jitsi.nlj.rtp.codec.av1.Av1DDRtpLayerDesc.Companion.getDtFromIndex
@@ -108,7 +107,7 @@ internal class Av1DDQualityFilter(
         val accept = doAcceptFrame(frame, incomingEncoding, externalTargetIndex, receivedTime)
         val currentDt = getDtFromIndex(currentIndex)
         val mark = currentDt != SUSPENDED_DT &&
-            (frame.frameInfo?.spatialId == frame.structure?.decodeTargetInfo?.get(currentDt)?.spatialId)
+            (frame.frameInfo?.spatialId == frame.structure?.decodeTargetLayers?.get(currentDt)?.spatialId)
         val isResumption = (prevIndex == SUSPENDED_INDEX && currentIndex != SUSPENDED_INDEX)
         if (isResumption) {
             check(accept) {
diff --git a/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt b/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt
index 679c6e3e83..c18061c8f7 100644
--- a/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt
+++ b/rtp/src/main/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtension.kt
@@ -284,7 +284,8 @@ fun Int.bitsForFdiff() = when {
 class Av1TemplateDependencyStructure(
     var templateIdOffset: Int,
     val templateInfo: List<FrameInfo>,
-    val decodeTargetInfo: List<DecodeTargetInfo>,
+    val decodeTargetProtectedBy: List<Int>,
+    val decodeTargetLayers: List<DecodeTargetLayer>,
     val maxRenderResolutions: List<Resolution>,
     val maxSpatialId: Int,
     val maxTemporalId: Int
@@ -293,7 +294,7 @@ class Av1TemplateDependencyStructure(
         get() = templateInfo.size
 
     val decodeTargetCount
-        get() = decodeTargetInfo.size
+        get() = decodeTargetLayers.size
 
     val chainCount: Int =
         templateInfo.first().chains.size
@@ -327,8 +328,8 @@ class Av1TemplateDependencyStructure(
             // TemplateChains
             length += nsBits(decodeTargetCount + 1, chainCount)
             if (chainCount > 0) {
-                decodeTargetInfo.forEach {
-                    length += nsBits(chainCount, it.protectedBy)
+                decodeTargetProtectedBy.forEach {
+                    length += nsBits(chainCount, it)
                 }
                 length += templateCount * chainCount * 4
             }
@@ -343,7 +344,8 @@ class Av1TemplateDependencyStructure(
             templateIdOffset,
             // These objects are not mutable so it's safe to copy them by reference
             templateInfo,
-            decodeTargetInfo,
+            decodeTargetProtectedBy,
+            decodeTargetLayers,
             maxRenderResolutions,
             maxSpatialId,
             maxTemporalId
@@ -412,8 +414,8 @@ class Av1TemplateDependencyStructure(
 
     private fun writeTemplateChains(writer: BitWriter) {
         writer.writeNs(decodeTargetCount + 1, chainCount)
-        decodeTargetInfo.forEach {
-            writer.writeNs(chainCount, it.protectedBy)
+        decodeTargetProtectedBy.forEach {
+            writer.writeNs(chainCount, it)
         }
         templateInfo.forEach { t ->
             t.chains.forEach { chain ->
@@ -461,7 +463,8 @@ class Av1TemplateDependencyStructure(
         return OrderedJsonObject().apply {
             put("templateIdOffset", templateIdOffset)
             put("templateInfo", templateInfo.toIndexedMap())
-            put("decodeTargetInfo", decodeTargetInfo.toIndexedMap())
+            put("decodeTargetProtectedBy", decodeTargetProtectedBy.toIndexedMap())
+            put("decodeTargetLayers", decodeTargetLayers.toIndexedMap())
             if (maxRenderResolutions.isNotEmpty()) {
                 put("maxRenderResolutions", maxRenderResolutions.toIndexedMap())
             }
@@ -612,7 +615,8 @@ class Av1DependencyDescriptorReader(
     /* Data for template dependency structure */
     private var templateIdOffset: Int = 0
     private val templateInfo = mutableListOf<TemplateFrameInfo>()
-    private val decodeTargetInfo = mutableListOf<DecodeTargetInfo>()
+    private val decodeTargetProtectedBy = mutableListOf<Int>()
+    private val decodeTargetLayers = mutableListOf<DecodeTargetLayer>()
     private val maxRenderResolutions = mutableListOf<Resolution>()
 
     private var dtCnt = 0
@@ -623,7 +627,8 @@ class Av1DependencyDescriptorReader(
          */
         templateCnt = 0
         templateInfo.clear()
-        decodeTargetInfo.clear()
+        decodeTargetProtectedBy.clear()
+        decodeTargetLayers.clear()
         maxRenderResolutions.clear()
     }
 
@@ -650,7 +655,8 @@ class Av1DependencyDescriptorReader(
         return Av1TemplateDependencyStructure(
             templateIdOffset,
             templateInfo.toList(),
-            decodeTargetInfo.toList(),
+            decodeTargetProtectedBy.toList(),
+            decodeTargetLayers.toList(),
             maxRenderResolutions.toList(),
             maxSpatialId,
             maxTemporalId
@@ -733,7 +739,7 @@ class Av1DependencyDescriptorReader(
         val chainCount = reader.ns(dtCnt + 1)
         if (chainCount != 0) {
             for (dtIndex in 0 until dtCnt) {
-                decodeTargetInfo.add(dtIndex, DecodeTargetInfo(reader.ns(chainCount)))
+                decodeTargetProtectedBy.add(dtIndex, reader.ns(chainCount))
             }
             for (templateIndex in 0 until templateCnt) {
                 for (chainIndex in 0 until chainCount) {
@@ -764,10 +770,9 @@ class Av1DependencyDescriptorReader(
                     }
                 }
             }
-            decodeTargetInfo[dtIndex].spatialId = spatialId
-            decodeTargetInfo[dtIndex].temporalId = temporalId
+            decodeTargetLayers.add(dtIndex, DecodeTargetLayer(spatialId, temporalId))
         }
-        check(decodeTargetInfo.size == dtCnt)
+        check(decodeTargetLayers.size == dtCnt)
     }
 
     private fun readRenderResolutions() {
@@ -843,16 +848,12 @@ class TemplateFrameInfo(
     override val chains: MutableList<Int> = mutableListOf()
 ) : FrameInfo(spatialId, temporalId, dti, fdiff, chains)
 
-class DecodeTargetInfo(
-    val protectedBy: Int
+class DecodeTargetLayer(
+    val spatialId: Int,
+    val temporalId: Int
 ) : JSONAware {
-    /** Todo: only want to be able to set these from the constructor */
-    var spatialId: Int = -1
-    var temporalId: Int = -1
-
     override fun toJSONString(): String {
         return OrderedJsonObject().apply {
-            put("protectedBy", protectedBy)
             put("spatialId", spatialId)
             put("temporalId", temporalId)
         }.toJSONString()
diff --git a/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt b/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt
index 4a69bb9c93..6be28bd3de 100644
--- a/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt
+++ b/rtp/src/test/kotlin/org/jitsi/rtp/rtp/header_extensions/Av1DependencyDescriptorHeaderExtensionTest.kt
@@ -68,6 +68,11 @@ class Av1DependencyDescriptorHeaderExtensionTest : ShouldSpec() {
         "8700ed80e3061eaa82804028280514d14134518010a091889a09409fc059c13fc0b3c0"
     )
 
+    /* The header Chrome 126 generates for VP8 keyframes when AV1 DD is enabled for VP8.  It has no chains. */
+    val descNoChains = parseHexBinary(
+        "8000138002044eaaaf2860414d34538a0940413fc0b3c0"
+    )
+
     init {
         context("AV1 Dependency Descriptors") {
             context("a descriptor with a single-layer dependency structure") {
@@ -544,6 +549,62 @@ class Av1DependencyDescriptorHeaderExtensionTest : ShouldSpec() {
                     buf shouldBe descS3T3
                 }
             }
+            context("A descriptor with no chains") {
+                val ldsr = Av1DependencyDescriptorReader(descNoChains, 0, descNoChains.size)
+                val lds = ldsr.parse(null)
+                should("be parsed properly") {
+                    lds.startOfFrame shouldBe true
+                    lds.endOfFrame shouldBe false
+                    lds.frameNumber shouldBe 0x0013
+                    lds.activeDecodeTargetsBitmask shouldBe 0x7
+
+                    val structure = lds.newTemplateDependencyStructure
+                    structure shouldNotBe null
+                    structure!!.decodeTargetCount shouldBe 3
+                    structure.maxTemporalId shouldBe 2
+                    structure.maxSpatialId shouldBe 0
+                }
+                should("calculate correct frame info") {
+                    val ldsi = lds.frameInfo
+                    ldsi.spatialId shouldBe 0
+                    ldsi.temporalId shouldBe 0
+                }
+                should("calculate correctly whether layer switching needs keyframes") {
+                    val structure = lds.newTemplateDependencyStructure!!
+                    val fromS = 0
+                    for (fromT in 0..2) {
+                        val fromDT = 3 * fromS + fromT
+                        val toS = 0
+                        for (toT in 0..2) {
+                            val toDT = 3 * toS + toT
+                            /* With this structure you can switch down spatial layers, or to other temporal
+                             * layers within the same spatial layer, without a keyframe; but switching up
+                             * spatial layers needs a keyframe.
+                             */
+                            withClue("from DT $fromDT to DT $toDT") {
+                                structure.canSwitchWithoutKeyframe(
+                                    fromDt = fromDT,
+                                    toDt = toDT
+                                ) shouldBe true
+                            }
+                        }
+                    }
+                }
+                should("calculate DTI bitmasks corresponding to a given DT") {
+                    val structure = lds.newTemplateDependencyStructure!!
+                    structure.getDtBitmaskForDt(0) shouldBe 0b001
+                    structure.getDtBitmaskForDt(1) shouldBe 0b011
+                    structure.getDtBitmaskForDt(2) shouldBe 0b111
+                }
+                should("Calculate its own length properly") {
+                    lds.encodedLength shouldBe descNoChains.size
+                }
+                should("Be re-encoded to the same bytes") {
+                    val buf = ByteArray(lds.encodedLength)
+                    lds.write(buf, 0, buf.size)
+                    buf shouldBe descNoChains
+                }
+            }
         }
     }
 }