[codec context] Add decode_lazy() returning a generator

Some codecs (VP9) can both buffer _many_ frames, and take a long time encoding each frame. Accumulated, the last `encode(None)`-flush can end taking a long time >30s, without detectable progress. FFmpeg and many encoders themselves output one frame at a time, but PyAV currently buffer them all up into lists returned. This change adds a `encode_lazy` yielding frames as they are made ready. The change was benchmarked to also yield a net performance improvement. For both `encode()` and `encode_lazy` encoding really small (24x18) frames using the `mpeg4` encoder seem to take ~11% less time.
PyAV-Org · Feb 25, 2023 · 9f3b419 · 9f3b419
1 parent 6982d81
commit 9f3b419
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 13 deletions.
diff --git a/av/codec/context.pxd b/av/codec/context.pxd
@@ -40,6 +40,7 @@ cdef class CodecContext(object):
     # Used by both transcode APIs to setup user-land objects.
     # TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing
     # packets are bogus). It should take all info it needs from the context and/or stream.
+    cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame)
     cdef _prepare_frames_for_encode(self, Frame frame)
     cdef _setup_encoded_packet(self, Packet)
     cdef _setup_decoded_frame(self, Frame, Packet)
@@ -50,7 +51,6 @@ cdef class CodecContext(object):
     # resampling audio to a higher rate but with fixed size frames), and the
     # send/recv buffer may be limited to a single frame. Ergo, we need to flush
     # the buffer as often as possible.
-    cdef _send_frame_and_recv(self, Frame frame)
     cdef _recv_packet(self)
     cdef _send_packet_and_recv(self, Packet packet)
     cdef _recv_frame(self)

diff --git a/av/codec/context.pyx b/av/codec/context.pyx
@@ -399,7 +399,7 @@ cdef class CodecContext(object):
 
         return packets
 
-    cdef _send_frame_and_recv(self, Frame frame):
+    def _send_frame_and_recv(self, Frame frame):
 
         cdef Packet packet
 
@@ -408,14 +408,10 @@ cdef class CodecContext(object):
             res = lib.avcodec_send_frame(self.ptr, frame.ptr if frame is not None else NULL)
         err_check(res)
 
-        out = []
-        while True:
+        packet = self._recv_packet()
+        while packet:
+            yield packet
             packet = self._recv_packet()
-            if packet:
-                out.append(packet)
-            else:
-                break
-        return out
 
     cdef _send_packet_and_recv(self, Packet packet):
 
@@ -473,9 +469,7 @@ cdef class CodecContext(object):
         if not res:
             return packet
 
-    cpdef encode(self, Frame frame=None):
-        """Encode a list of :class:`.Packet` from the given :class:`.Frame`."""
-
+    cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame):
         if self.ptr.codec_type not in [lib.AVMEDIA_TYPE_VIDEO, lib.AVMEDIA_TYPE_AUDIO]:
             raise NotImplementedError('Encoding is only supported for audio and video.')
 
@@ -489,13 +483,23 @@ cdef class CodecContext(object):
             if frame is not None:
                 frame._rebase_time(self.ptr.time_base)
 
+        return frames
+
+    cpdef encode(self, Frame frame=None):
+        """Encode a list of :class:`.Packet` from the given :class:`.Frame`."""
         res = []
-        for frame in frames:
+        for frame in self._prepare_and_time_rebase_frames_for_encode(frame):
             for packet in self._send_frame_and_recv(frame):
                 self._setup_encoded_packet(packet)
                 res.append(packet)
         return res
 
+    def encode_lazy(self, Frame frame=None):
+        for frame in self._prepare_and_time_rebase_frames_for_encode(frame):
+            for packet in self._send_frame_and_recv(frame):
+                self._setup_encoded_packet(packet)
+                yield packet
+
     cdef _setup_encoded_packet(self, Packet packet):
         # We coerced the frame's time_base into the CodecContext's during encoding,
         # and FFmpeg copied the frame's pts/dts to the packet, so keep track of