[Spice-devel] [PATCH spice 10/13] server: Avoid copying the input frame in the GStreamer encoder. (take 4)

Tue Jul 21 11:01:37 PDT 2015

To do so we reference the source bitmap chunks in the GStreamer buffer and rely on the buffer's lifetime being short enough.
Note that we can only avoid copies for the first 1 Mpixels or so. That's because Spice splits larger frames into more chunks and we can fit memory fragments inside a GStreamer buffer. So for those we will avoid copies for the first 3840 KB and copy the rest.

Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
---

This makes it possible to avoid copying the source frame when using
GStreamer 1.0. Paradoxically we are still forced to do some copying for
the larger frames.

Here is how it works: Spice's frame buffer is composed of multiple 
memory chunks. So we use GStreamer's GstMemory objects to wrap each 
chunk and put them all to form the GstBuffer we pass to the pipeline. 
However there's a limit to the number of memory objects that we can put 
in a GstBuffer. Usually that limit is 16. Furthermore Spice splits the 
frame into 256KB chunks. so this approach only works up to 4MB or 
between 1 and 1.3Mpixels. For larger frames we use the zero-copy 
approach for the first 15 frame chunks, and copy all the rest into the 
16th memory chunk.

So on my machine, for a 720x304 video the frame copy time goes from 
around 265us (line-by-line) to 5us. For a 1440x900 frame it goes from 
about 1610us with the old line-by-line copy code, down to 1490us for the 
new chunk-by-chunk one, and down to about 440us when minimizing copies. 
The latter matches well with the ~0.3Mpixels we have to copy after the 
first 1Mpixels.

To avoid these copies it will be necessary to either increase the size 
of the Spice chunks or to increase the number of memory objects that 
GstBuffers can hold.

Changes since take 3:
 - None.

 server/gstreamer_encoder.c | 107 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 101 insertions(+), 6 deletions(-)

diff --git a/server/gstreamer_encoder.c b/server/gstreamer_encoder.c
index ac8f5c0..2ad1890 100644
--- a/server/gstreamer_encoder.c
+++ b/server/gstreamer_encoder.c
@@ -35,6 +35,10 @@ typedef struct GstEncoder GstEncoder;
 
 #define GSTE_DEFAULT_FPS 30
 
+#ifndef HAVE_GSTREAMER_0_10
+# define DO_ZERO_COPY
+#endif
+
 
 typedef struct {
     SpiceBitmapFmt  spice_format;
@@ -86,6 +90,11 @@ struct GstEncoder {
     /* The default video buffer */
     GstVideoBuffer *default_buffer;
 
+#ifdef DO_ZERO_COPY
+    /* Set to TRUE until GStreamer no longer needs the raw bitmap buffer. */
+    gboolean needs_bitmap;
+#endif
+
     /* The frame counter for GStreamer buffers */
     uint32_t frame;
 
@@ -355,6 +364,15 @@ static void reconfigure_pipeline(GstEncoder *encoder)
     }
 }
 
+#ifdef DO_ZERO_COPY
+/* A helper for push_raw_frame() */
+static void unref_bitmap(gpointer mem)
+{
+    GstEncoder *encoder = (GstEncoder*)mem;
+    encoder->needs_bitmap = FALSE;
+}
+#endif
+
 /* A helper for gst_encoder_encode_frame(). */
 static int push_raw_frame(GstEncoder *encoder, const SpiceBitmap *bitmap,
                           const SpiceRect *src, int top_down)
@@ -362,15 +380,14 @@ static int push_raw_frame(GstEncoder *encoder, const SpiceBitmap *bitmap,
     const uint32_t stream_height = src->bottom - src->top;
     const uint32_t stream_stride = (src->right - src->left) * encoder->format->bpp / 8;
     uint32_t len = stream_stride * stream_height;
-    GstBuffer *buffer = gst_buffer_new_and_alloc(len);
 #ifdef HAVE_GSTREAMER_0_10
+    GstBuffer *buffer = gst_buffer_new_and_alloc(len);
     uint8_t *b = GST_BUFFER_DATA(buffer);
+    uint8_t *dst = b;
 #else
-    GstMapInfo map;
-    gst_buffer_map(buffer, &map, GST_MAP_WRITE);
-    uint8_t *b = map.data;
+    GstBuffer *buffer = gst_buffer_new();
+    GstMapInfo map = { .memory = NULL };
 #endif
-    uint8_t *dst = b;
 
     /* Note that we should not reorder the lines, even if top_down is false.
      * It just changes the number of lines to skip at the start of the bitmap.
@@ -382,6 +399,70 @@ static int push_raw_frame(GstEncoder *encoder, const SpiceBitmap *bitmap,
 
     if (stream_stride == bitmap->stride) {
         /* We can copy the bitmap chunk by chunk */
+#ifdef DO_ZERO_COPY
+        /* We cannot control the lifetime of the bitmap but we can wrap it in
+         * the buffer anyway because:
+         * - Before returning from gst_encoder_encode_frame() we wait for the
+         *   pipeline to have converted this frame into a compressed buffer.
+         *   So it has to have gone through the frame at least once.
+         * - For all encoders but MJPEG, the first element of the pipeline will
+         *   convert the bitmap to another image format which entails copying
+         *   it. So by the time the encoder starts its work, this buffer will
+         *   not be needed anymore.
+         * - The MJPEG encoder does not perform inter-frame compression and thus
+         *   does not need to keep hold of this buffer once it has processed it.
+         */
+        while (chunk_offset >= chunks->chunk[chunk].len) {
+            /* Make sure that the chunk is not padded */
+            if (chunks->chunk[chunk].len % bitmap->stride != 0) {
+                gst_buffer_unref(buffer);
+                return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+            }
+            chunk_offset -= chunks->chunk[chunk].len;
+            chunk++;
+        }
+
+        int max_mem = gst_buffer_get_max_memory();
+        if (chunks->num_chunks - chunk > max_mem) {
+            /* There are more chunks than we can fit memory objects in a
+             * buffer. This will cause the buffer to merge memory objects to
+             * fit the extra chunks, which means doing wasteful memory copies.
+             * So use the zero-copy approach for the first max_mem-1 chunks,
+             * and directly copy the remainder to the last memory object.
+             */
+            max_mem = chunk + max_mem - 1;
+        } else {
+            max_mem = chunks->num_chunks;
+        }
+
+        while (len && chunk < max_mem) {
+            /* Make sure that the chunk is not padded */
+            if (chunks->chunk[chunk].len % bitmap->stride != 0) {
+                gst_buffer_unref(buffer);
+                return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+            }
+            uint32_t thislen = MIN(chunks->chunk[chunk].len - chunk_offset, len);
+            GstMemory *mem = gst_memory_new_wrapped(GST_MEMORY_FLAG_READONLY, chunks->chunk[chunk].data, chunks->chunk[chunk].len, chunk_offset, thislen, encoder, &unref_bitmap);
+            gst_buffer_append_memory(buffer, mem);
+            len -= thislen;
+            chunk_offset = 0;
+            chunk++;
+        }
+        encoder->needs_bitmap = TRUE;
+        /* Now we must avoid any write to the GstBuffer object as it would
+         * cause a copy of the read-only memory objects we just added.
+         * Fortunately we can append extra writable memory objects instead.
+         */
+#endif
+
+#ifndef HAVE_GSTREAMER_0_10
+        if (len) {
+            GstMemory *mem = gst_allocator_alloc(NULL, len, NULL);
+            gst_memory_map(mem, &map, GST_MAP_WRITE);
+        }
+        uint8_t *dst = map.data;
+#endif
+
         while (len && chunk < chunks->num_chunks) {
             /* Make sure that the chunk is not padded */
             if (chunks->chunk[chunk].len % bitmap->stride != 0) {
@@ -408,6 +489,13 @@ static int push_raw_frame(GstEncoder *encoder, const SpiceBitmap *bitmap,
         /* We have to do a line-by-line copy because for each we have to leave
          * out pixels on the left or right.
          */
+#ifndef HAVE_GSTREAMER_0_10
+        GstMemory *mem = gst_allocator_alloc(NULL, len, NULL);
+        gst_memory_map(mem, &map, GST_MAP_WRITE);
+        uint8_t *b = map.data;
+        uint8_t *dst = b;
+#endif
+
         chunk_offset += src->left * encoder->format->bpp / 8;
         for (int l = 0; l < stream_height; l++) {
             /* We may have to move forward by more than one chunk the first
@@ -434,7 +522,10 @@ static int push_raw_frame(GstEncoder *encoder, const SpiceBitmap *bitmap,
 #ifdef HAVE_GSTREAMER_0_10
     gst_buffer_set_caps(buffer, encoder->src_caps);
 #else
-    gst_buffer_unmap(buffer, &map);
+    if (map.memory) {
+        gst_memory_unmap(map.memory, &map);
+        gst_buffer_append_memory(buffer, map.memory);
+    }
 #endif
     GST_BUFFER_OFFSET(buffer) = encoder->frame++;
 
@@ -530,6 +621,10 @@ static int gst_encoder_encode_frame(GstEncoder *encoder,
     int rc = push_raw_frame(encoder, bitmap, src, top_down);
     if (rc == VIDEO_ENCODER_FRAME_ENCODE_DONE) {
         rc = pull_compressed_buffer(encoder, buffer);
+#ifdef DO_ZERO_COPY
+        /* GStreamer should have released the source frame buffer by now */
+        spice_assert(!encoder->needs_bitmap);
+#endif
     }
     return rc;
 }
-- 
2.1.4