[codec context] Let decode return a generator

Some codecs (VP9) can both buffer _many_ frames, and take a long time encoding each frame. Accumulated, the last `encode(None)`-flush can end taking a long time >30s, without detectable progress. FFmpeg and many encoders themselves output one frame at a time, but PyAV currently buffer them all up into lists returned. This change redefines encode, to yield frames as they are made available. The change was benchmarked to also yield a net performance improvement - about 16% less time elapsed when encoding really small (24x18) frames using the `mpeg4` encoder.
PyAV-Org · Feb 25, 2023 · 7adfa9f · 7adfa9f
1 parent 6982d81
commit 7adfa9f
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 15 deletions.
diff --git a/av/codec/context.pxd b/av/codec/context.pxd
@@ -33,8 +33,7 @@ cdef class CodecContext(object):
 
     cdef _set_default_time_base(self)
 
-    # Wraps both versions of the transcode API, returning lists.
-    cpdef encode(self, Frame frame=?)
+    # Wraps the decode API, returning a list of frames
     cpdef decode(self, Packet packet=?)
 
     # Used by both transcode APIs to setup user-land objects.
@@ -50,7 +49,6 @@ cdef class CodecContext(object):
     # resampling audio to a higher rate but with fixed size frames), and the
     # send/recv buffer may be limited to a single frame. Ergo, we need to flush
     # the buffer as often as possible.
-    cdef _send_frame_and_recv(self, Frame frame)
     cdef _recv_packet(self)
     cdef _send_packet_and_recv(self, Packet packet)
     cdef _recv_frame(self)

diff --git a/av/codec/context.pyx b/av/codec/context.pyx
@@ -399,7 +399,7 @@ cdef class CodecContext(object):
 
         return packets
 
-    cdef _send_frame_and_recv(self, Frame frame):
+    def _send_frame_and_recv(self, Frame frame):
 
         cdef Packet packet
 
@@ -408,14 +408,10 @@ cdef class CodecContext(object):
             res = lib.avcodec_send_frame(self.ptr, frame.ptr if frame is not None else NULL)
         err_check(res)
 
-        out = []
-        while True:
+        packet = self._recv_packet()
+        while packet:
+            yield packet
             packet = self._recv_packet()
-            if packet:
-                out.append(packet)
-            else:
-                break
-        return out
 
     cdef _send_packet_and_recv(self, Packet packet):
 
@@ -473,7 +469,7 @@ cdef class CodecContext(object):
         if not res:
             return packet
 
-    cpdef encode(self, Frame frame=None):
+    def encode(self, Frame frame=None):
         """Encode a list of :class:`.Packet` from the given :class:`.Frame`."""
 
         if self.ptr.codec_type not in [lib.AVMEDIA_TYPE_VIDEO, lib.AVMEDIA_TYPE_AUDIO]:
@@ -489,12 +485,10 @@ cdef class CodecContext(object):
             if frame is not None:
                 frame._rebase_time(self.ptr.time_base)
 
-        res = []
         for frame in frames:
             for packet in self._send_frame_and_recv(frame):
                 self._setup_encoded_packet(packet)
-                res.append(packet)
-        return res
+                yield packet
 
     cdef _setup_encoded_packet(self, Packet packet):
         # We coerced the frame's time_base into the CodecContext's during encoding,