[Spice-devel] [spice v11 09/27] server: Avoid copying the input frame in the GStreamer encoder

Fri Mar 25 12:38:17 UTC 2016

Note that we can only avoid copies for the first 1 Mpixels or so.
That's because Spice splits larger frames into more chunks than we can
fit GstMemory fragments in a GStreamer buffer. So if there are more
pixels we will avoid copies for the first 3840 KB and copy the rest.
Furthermore, while in practice the GStreamer encoder will only modify
the RedDrawable refcount during the encode_frame(), in theory the
refcount could be decremented from the GStreamer thread after
encode_frame() returns.

Signed-off-by: Francois Gouget <fgouget at codeweavers.com>
---

Changed the zero-copy approach to not require ref/unref() of RedDrawable 
objects to be thread-safe. Thread-safety aspects are instead handled in 
gstreamer-encoder.c now.

 server/dcc-send.c          |  13 +--
 server/gstreamer-encoder.c | 199 +++++++++++++++++++++++++++++++++++++++++----
 server/mjpeg-encoder.c     |   5 +-
 server/stream.c            |  18 +++-
 server/video-encoder.h     |  25 +++++-
 5 files changed, 233 insertions(+), 27 deletions(-)

diff --git a/server/dcc-send.c b/server/dcc-send.c
index 3be443e..322f6eb 100644
--- a/server/dcc-send.c
+++ b/server/dcc-send.c
@@ -1656,12 +1656,12 @@ static void red_release_video_encoder_buffer(uint8_t *data, void *opaque)
 }
 
 static int red_marshall_stream_data(RedChannelClient *rcc,
-                                    SpiceMarshaller *base_marshaller, Drawable *drawable)
+                                    SpiceMarshaller *base_marshaller,
+                                    Drawable *drawable)
 {
     DisplayChannelClient *dcc = RCC_TO_DCC(rcc);
     DisplayChannel *display = DCC_TO_DC(dcc);
     Stream *stream = drawable->stream;
-    SpiceImage *image;
     uint32_t frame_mm_time;
     int width, height;
     int ret;
@@ -1670,10 +1670,10 @@ static int red_marshall_stream_data(RedChannelClient *rcc,
         spice_assert(drawable->sized_stream);
         stream = drawable->sized_stream;
     }
-    spice_assert(drawable->red_drawable->type == QXL_DRAW_COPY);
-
-    image = drawable->red_drawable->u.copy.src_bitmap;
+    RedDrawable *red_drawable = drawable->red_drawable;
+    spice_assert(red_drawable->type == QXL_DRAW_COPY);
 
+    SpiceImage *image = red_drawable->u.copy.src_bitmap;
     if (image->descriptor.type != SPICE_IMAGE_TYPE_BITMAP) {
         return FALSE;
     }
@@ -1715,7 +1715,8 @@ static int red_marshall_stream_data(RedChannelClient *rcc,
                                              frame_mm_time,
                                              &image->u.bitmap, width, height,
                                              &drawable->red_drawable->u.copy.src_area,
-                                             stream->top_down, &outbuf);
+                                             stream->top_down, red_drawable,
+                                             &outbuf);
     switch (ret) {
     case VIDEO_ENCODER_FRAME_DROP:
         spice_assert(dcc->use_video_encoder_rate_control);
diff --git a/server/gstreamer-encoder.c b/server/gstreamer-encoder.c
index fc04f78..b754676 100644
--- a/server/gstreamer-encoder.c
+++ b/server/gstreamer-encoder.c
@@ -30,6 +30,8 @@
 
 #define SPICE_GST_DEFAULT_FPS 30
 
+#define DO_ZERO_COPY
+
 
 typedef struct {
     SpiceBitmapFmt spice_format;
@@ -46,6 +48,14 @@ typedef struct SpiceGstVideoBuffer {
 typedef struct SpiceGstEncoder {
     VideoEncoder base;
 
+    /* Callbacks to adjust the refcount of the bitmap being encoded. */
+    bitmap_ref_t bitmap_ref;
+    bitmap_unref_t bitmap_unref;
+
+#ifdef DO_ZERO_COPY
+    GAsyncQueue *unused_bitmap_opaques;
+#endif
+
     /* Rate control callbacks */
     VideoEncoderRateControlCbs cbs;
 
@@ -404,12 +414,109 @@ static inline int line_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
      return TRUE;
 }
 
+#ifdef DO_ZERO_COPY
+typedef struct {
+    gint refs;
+    SpiceGstEncoder *encoder;
+    gpointer opaque;
+} BitmapWrapper;
+
+static void clear_zero_copy_queue(SpiceGstEncoder *encoder, gboolean unref_queue)
+{
+    gpointer bitmap_opaque;
+    while ((bitmap_opaque = g_async_queue_try_pop(encoder->unused_bitmap_opaques))) {
+        encoder->bitmap_unref(bitmap_opaque);
+    }
+    if (unref_queue) {
+        g_async_queue_unref(encoder->unused_bitmap_opaques);
+    }
+}
+
+static BitmapWrapper *bitmap_wrapper_new(SpiceGstEncoder *encoder, gpointer bitmap_opaque)
+{
+    BitmapWrapper *wrapper = spice_new(BitmapWrapper, 1);
+    wrapper->refs = 1;
+    wrapper->encoder = encoder;
+    wrapper->opaque = bitmap_opaque;
+    encoder->bitmap_ref(bitmap_opaque);
+    return wrapper;
+}
+
+static void bitmap_wrapper_unref(gpointer data)
+{
+    BitmapWrapper *wrapper = data;
+    if (g_atomic_int_dec_and_test(&wrapper->refs)) {
+        g_async_queue_push(wrapper->encoder->unused_bitmap_opaques, wrapper->opaque);
+        free(wrapper);
+    }
+}
+
+
+/* A helper for push_raw_frame() */
+static inline int zero_copy(SpiceGstEncoder *encoder,
+                            const SpiceBitmap *bitmap, gpointer bitmap_opaque,
+                            GstBuffer *buffer, uint32_t *chunk_index,
+                            uint32_t *chunk_offset, uint32_t *len)
+{
+    const SpiceChunks *chunks = bitmap->data;
+    while (*chunk_index < chunks->num_chunks &&
+           *chunk_offset >= chunks->chunk[*chunk_index].len) {
+        if (is_chunk_padded(bitmap, *chunk_index)) {
+            return FALSE;
+        }
+        *chunk_offset -= chunks->chunk[*chunk_index].len;
+        (*chunk_index)++;
+    }
+
+    int max_mem = gst_buffer_get_max_memory();
+    if (chunks->num_chunks - *chunk_index > max_mem) {
+        /* There are more chunks than we can fit memory objects in a
+         * buffer. This will cause the buffer to merge memory objects to
+         * fit the extra chunks, which means doing wasteful memory copies.
+         * So use the zero-copy approach for the first max_mem-1 chunks, and
+         * let push_raw_frame() deal with the rest.
+         */
+        max_mem = *chunk_index + max_mem - 1;
+    } else {
+        max_mem = chunks->num_chunks;
+    }
+
+    BitmapWrapper *wrapper = NULL;
+    while (*len && *chunk_index < max_mem) {
+        if (is_chunk_padded(bitmap, *chunk_index)) {
+            return FALSE;
+        }
+        if (wrapper) {
+            wrapper->refs++;
+        } else {
+            wrapper = bitmap_wrapper_new(encoder, bitmap_opaque);
+        }
+        uint32_t thislen = MIN(chunks->chunk[*chunk_index].len - *chunk_offset, *len);
+        GstMemory *mem = gst_memory_new_wrapped(GST_MEMORY_FLAG_READONLY,
+                                                chunks->chunk[*chunk_index].data,
+                                                chunks->chunk[*chunk_index].len,
+                                                *chunk_offset, thislen,
+                                                wrapper, bitmap_wrapper_unref);
+        gst_buffer_append_memory(buffer, mem);
+        *len -= thislen;
+        *chunk_offset = 0;
+        (*chunk_index)++;
+    }
+    return TRUE;
+}
+#else
+static void clear_zero_copy_queue(SpiceGstEncoder *encoder, gboolean unref_queue)
+{
+    /* Nothing to do */
+}
+#endif
+
 /* A helper for push_raw_frame() */
 static inline int chunk_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
-                             uint32_t chunk_offset, uint32_t len, uint8_t *dst)
+                             uint32_t chunk_index, uint32_t chunk_offset,
+                             uint32_t len, uint8_t *dst)
 {
     SpiceChunks *chunks = bitmap->data;
-    uint32_t chunk_index = 0;
     /* Skip chunks until we find the start of the frame */
     while (chunk_index < chunks->num_chunks &&
            chunk_offset >= chunks->chunk[chunk_index].len) {
@@ -437,17 +544,35 @@ static inline int chunk_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap
     return TRUE;
 }
 
+/* A helper for push_raw_frame() */
+static uint8_t *allocate_and_map_memory(gsize size, GstMapInfo *map, GstBuffer *buffer)
+{
+    GstMemory *mem = gst_allocator_alloc(NULL, size, NULL);
+    if (!mem) {
+        gst_buffer_unref(buffer);
+        return NULL;
+    }
+    if (!gst_memory_map(mem, map, GST_MAP_WRITE))
+    {
+        gst_memory_unref(mem);
+        gst_buffer_unref(buffer);
+        return NULL;
+    }
+    return map->data;
+}
+
 /* A helper for spice_gst_encoder_encode_frame() */
-static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
-                          const SpiceRect *src, int top_down)
+static int push_raw_frame(SpiceGstEncoder *encoder,
+                          const SpiceBitmap *bitmap,
+                          const SpiceRect *src, int top_down,
+                          gpointer bitmap_opaque)
 {
     uint32_t height = src->bottom - src->top;
     uint32_t stream_stride = (src->right - src->left) * encoder->format->bpp / 8;
     uint32_t len = stream_stride * height;
-    GstBuffer *buffer = gst_buffer_new_and_alloc(len);
-    GstMapInfo map;
-    gst_buffer_map(buffer, &map, GST_MAP_WRITE);
-    uint8_t *dst = map.data;
+    GstBuffer *buffer = gst_buffer_new();
+    /* TODO Use GST_MAP_INFO_INIT once GStreamer 1.4.5 is no longer relevant */
+    GstMapInfo map = { .memory = NULL };
 
     /* Note that we should not reorder the lines, even if top_down is false.
      * It just changes the number of lines to skip at the start of the bitmap.
@@ -459,20 +584,50 @@ static int push_raw_frame(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
         /* We have to do a line-by-line copy because for each we have to
          * leave out pixels on the left or right.
          */
+        uint8_t *dst = allocate_and_map_memory(len, &map, buffer);
+        if (!dst) {
+            return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+        }
+
         chunk_offset += src->left * encoder->format->bpp / 8;
         if (!line_copy(encoder, bitmap, chunk_offset, stream_stride, height, dst)) {
-            gst_buffer_unmap(buffer, &map);
+            gst_memory_unmap(map.memory, &map);
+            gst_memory_unref(map.memory);
             gst_buffer_unref(buffer);
             return VIDEO_ENCODER_FRAME_UNSUPPORTED;
         }
     } else {
-        if (!chunk_copy(encoder, bitmap, chunk_offset, len, dst)) {
-            gst_buffer_unmap(buffer, &map);
+        uint32_t chunk_index = 0;
+        /* We can copy the bitmap chunk by chunk */
+#ifdef DO_ZERO_COPY
+        if (!zero_copy(encoder, bitmap, bitmap_opaque, buffer, &chunk_index, &chunk_offset, &len)) {
             gst_buffer_unref(buffer);
             return VIDEO_ENCODER_FRAME_UNSUPPORTED;
         }
+        /* len now contains the remaining number of bytes to copy.
+         * However we must avoid any write to the GstBuffer object as it
+         * would cause a copy of the read-only memory objects we just added.
+         * Fortunately we can append extra writable memory objects instead.
+         */
+#endif
+
+        if (len) {
+            uint8_t *dst = allocate_and_map_memory(len, &map, buffer);
+            if (!dst) {
+                return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+            }
+            if (!chunk_copy(encoder, bitmap, chunk_index, chunk_offset, len, dst)) {
+                gst_memory_unmap(map.memory, &map);
+                gst_memory_unref(map.memory);
+                gst_buffer_unref(buffer);
+                return VIDEO_ENCODER_FRAME_UNSUPPORTED;
+            }
+        }
+    }
+    if (map.memory) {
+        gst_memory_unmap(map.memory, &map);
+        gst_buffer_append_memory(buffer, map.memory);
     }
-    gst_buffer_unmap(buffer, &map);
     GST_BUFFER_OFFSET(buffer) = encoder->frame++;
 
     GstFlowReturn ret = gst_app_src_push_buffer(encoder->appsrc, buffer);
@@ -515,6 +670,8 @@ static void spice_gst_encoder_destroy(VideoEncoder *video_encoder)
 {
     SpiceGstEncoder *encoder = (SpiceGstEncoder*)video_encoder;
     free_pipeline(encoder);
+    /* Unref any lingering bitmap opaque structures from past frames */
+    clear_zero_copy_queue(encoder, TRUE);
     free(encoder);
 }
 
@@ -523,12 +680,16 @@ static int spice_gst_encoder_encode_frame(VideoEncoder *video_encoder,
                                           const SpiceBitmap *bitmap,
                                           int width, int height,
                                           const SpiceRect *src, int top_down,
+                                          gpointer bitmap_opaque,
                                           VideoBuffer **outbuf)
 {
     SpiceGstEncoder *encoder = (SpiceGstEncoder*)video_encoder;
     g_return_val_if_fail(outbuf != NULL, VIDEO_ENCODER_FRAME_UNSUPPORTED);
     *outbuf = NULL;
 
+    /* Unref the last frame's bitmap_opaque structures if any */
+    clear_zero_copy_queue(encoder, FALSE);
+
     if (width != encoder->width || height != encoder->height ||
         encoder->spice_format != bitmap->format) {
         spice_debug("video format change: width %d -> %d, height %d -> %d, format %d -> %d",
@@ -551,7 +712,7 @@ static int spice_gst_encoder_encode_frame(VideoEncoder *video_encoder,
         return VIDEO_ENCODER_FRAME_UNSUPPORTED;
     }
 
-    int rc = push_raw_frame(encoder, bitmap, src, top_down);
+    int rc = push_raw_frame(encoder, bitmap, src, top_down, bitmap_opaque);
     if (rc == VIDEO_ENCODER_FRAME_ENCODE_DONE) {
         rc = pull_compressed_buffer(encoder, outbuf);
         if (rc != VIDEO_ENCODER_FRAME_ENCODE_DONE) {
@@ -561,6 +722,9 @@ static int spice_gst_encoder_encode_frame(VideoEncoder *video_encoder,
             free_pipeline(encoder);
         }
     }
+
+    /* Unref the last frame's bitmap_opaque structures if any */
+    clear_zero_copy_queue(encoder, FALSE);
     return rc;
 }
 
@@ -608,7 +772,9 @@ static void spice_gst_encoder_get_stats(VideoEncoder *video_encoder,
 
 VideoEncoder *gstreamer_encoder_new(SpiceVideoCodecType codec_type,
                                     uint64_t starting_bit_rate,
-                                    VideoEncoderRateControlCbs *cbs)
+                                    VideoEncoderRateControlCbs *cbs,
+                                    bitmap_ref_t bitmap_ref,
+                                    bitmap_unref_t bitmap_unref)
 {
     spice_return_val_if_fail(codec_type == SPICE_VIDEO_CODEC_TYPE_MJPEG ||
                              codec_type == SPICE_VIDEO_CODEC_TYPE_VP8, NULL);
@@ -628,11 +794,16 @@ VideoEncoder *gstreamer_encoder_new(SpiceVideoCodecType codec_type,
     encoder->base.get_bit_rate = spice_gst_encoder_get_bit_rate;
     encoder->base.get_stats = spice_gst_encoder_get_stats;
     encoder->base.codec_type = codec_type;
+#ifdef DO_ZERO_COPY
+    encoder->unused_bitmap_opaques = g_async_queue_new();
+#endif
 
     if (cbs) {
         encoder->cbs = *cbs;
     }
     encoder->starting_bit_rate = starting_bit_rate;
+    encoder->bitmap_ref = bitmap_ref;
+    encoder->bitmap_unref = bitmap_unref;
 
     /* All the other fields are initialized to zero by spice_new0(). */
 
diff --git a/server/mjpeg-encoder.c b/server/mjpeg-encoder.c
index debe50a..e8dcaf9 100644
--- a/server/mjpeg-encoder.c
+++ b/server/mjpeg-encoder.c
@@ -949,6 +949,7 @@ static int mjpeg_encoder_encode_frame(VideoEncoder *video_encoder,
                                       const SpiceBitmap *bitmap,
                                       int width, int height,
                                       const SpiceRect *src, int top_down,
+                                      gpointer bitmap_opaque,
                                       VideoBuffer **outbuf)
 {
     MJpegEncoder *encoder = (MJpegEncoder*)video_encoder;
@@ -1369,7 +1370,9 @@ static void mjpeg_encoder_get_stats(VideoEncoder *video_encoder,
 
 VideoEncoder *mjpeg_encoder_new(SpiceVideoCodecType codec_type,
                                 uint64_t starting_bit_rate,
-                                VideoEncoderRateControlCbs *cbs)
+                                VideoEncoderRateControlCbs *cbs,
+                                bitmap_ref_t bitmap_ref,
+                                bitmap_unref_t bitmap_unref)
 {
     MJpegEncoder *encoder = spice_new0(MJpegEncoder, 1);
 
diff --git a/server/stream.c b/server/stream.c
index 0aa10c3..4413680 100644
--- a/server/stream.c
+++ b/server/stream.c
@@ -696,6 +696,20 @@ static void update_client_playback_delay(void *opaque, uint32_t delay_ms)
                                         agent->dcc->streams_max_latency);
 }
 
+void bitmap_ref(gpointer data)
+{
+    RedDrawable *red_drawable = (RedDrawable*)data;
+    spice_debug("red_drawable %p", red_drawable);
+    red_drawable_ref(red_drawable);
+}
+
+void bitmap_unref(gpointer data)
+{
+    RedDrawable *red_drawable = (RedDrawable*)data;
+    spice_debug("red_drawable %p", red_drawable);
+    red_drawable_unref(red_drawable);
+}
+
 /* A helper for dcc_create_stream(). */
 static VideoEncoder* dcc_create_video_encoder(DisplayChannelClient *dcc,
                                               uint64_t starting_bit_rate,
@@ -720,7 +734,7 @@ static VideoEncoder* dcc_create_video_encoder(DisplayChannelClient *dcc,
             continue;
         }
 
-        VideoEncoder* video_encoder = video_codec->create(video_codec->type, starting_bit_rate, cbs);
+        VideoEncoder* video_encoder = video_codec->create(video_codec->type, starting_bit_rate, cbs, bitmap_ref, bitmap_unref);
         if (video_encoder) {
             return video_encoder;
         }
@@ -728,7 +742,7 @@ static VideoEncoder* dcc_create_video_encoder(DisplayChannelClient *dcc,
 
     /* Try to use the builtin MJPEG video encoder as a fallback */
     if (!client_has_multi_codec || red_channel_client_test_remote_cap(rcc, SPICE_DISPLAY_CAP_CODEC_MJPEG)) {
-        return mjpeg_encoder_new(SPICE_VIDEO_CODEC_TYPE_MJPEG, starting_bit_rate, cbs);
+        return mjpeg_encoder_new(SPICE_VIDEO_CODEC_TYPE_MJPEG, starting_bit_rate, cbs, bitmap_ref, bitmap_unref);
     }
 
     return NULL;
diff --git a/server/video-encoder.h b/server/video-encoder.h
index f94fd69..41c7f17 100644
--- a/server/video-encoder.h
+++ b/server/video-encoder.h
@@ -61,6 +61,8 @@ struct VideoEncoder {
      * @bitmap:        The Spice screen.
      * @src:           A rectangle specifying the area occupied by the video.
      * @top_down:      If true the first video line is specified by src.top.
+     * @bitmap_opaque: The parameter for the bitmap_ref() and bitmap_unref()
+     *                 callbacks.
      * @outbuf:        A pointer to a VideoBuffer structure containing the
      *                 compressed frame if successful. Call the buffer's
      *                 free() method as soon as it is no longer needed.
@@ -73,7 +75,7 @@ struct VideoEncoder {
     int (*encode_frame)(VideoEncoder *encoder, uint32_t frame_mm_time,
                         const SpiceBitmap *bitmap, int width, int height,
                         const SpiceRect *src, int top_down,
-                        VideoBuffer** outbuf);
+                        gpointer bitmap_opaque, VideoBuffer** outbuf);
 
     /*
      * Bit rate control methods.
@@ -162,6 +164,8 @@ typedef struct VideoEncoderRateControlCbs {
     void (*update_client_playback_delay)(void *opaque, uint32_t delay_ms);
 } VideoEncoderRateControlCbs;
 
+typedef void (*bitmap_ref_t)(gpointer data);
+typedef void (*bitmap_unref_t)(gpointer data);
 
 /* Instantiates the video encoder.
  *
@@ -169,22 +173,35 @@ typedef struct VideoEncoderRateControlCbs {
  * @starting_bit_rate: An initial estimate of the available stream bit rate
  *                     or zero if the client does not support rate control.
  * @cbs:               A set of callback methods to be used for rate control.
+ * @bitmap_ref:        A callback that the encoder can use to increase the
+ *                     bitmap refcount.
+ *                     This must be called from the main context.
+ * @bitmap_unref:      A callback that the encoder can use to decrease the
+ *                     bitmap refcount.
+ *                     This must be called from the main context.
  * @return:            A pointer to a structure implementing the VideoEncoder
  *                     methods.
  */
 typedef VideoEncoder* (*new_video_encoder_t)(SpiceVideoCodecType codec_type,
                                              uint64_t starting_bit_rate,
-                                             VideoEncoderRateControlCbs *cbs);
+                                             VideoEncoderRateControlCbs *cbs,
+                                             bitmap_ref_t bitmap_ref,
+                                             bitmap_unref_t bitmap_unref);
 
 VideoEncoder* mjpeg_encoder_new(SpiceVideoCodecType codec_type,
                                 uint64_t starting_bit_rate,
-                                VideoEncoderRateControlCbs *cbs);
+                                VideoEncoderRateControlCbs *cbs,
+                                bitmap_ref_t bitmap_ref,
+                                bitmap_unref_t bitmap_unref);
 #ifdef HAVE_GSTREAMER_1_0
 VideoEncoder* gstreamer_encoder_new(SpiceVideoCodecType codec_type,
                                     uint64_t starting_bit_rate,
-                                    VideoEncoderRateControlCbs *cbs);
+                                    VideoEncoderRateControlCbs *cbs,
+                                    bitmap_ref_t bitmap_ref,
+                                    bitmap_unref_t bitmap_unref);
 #endif
 
+
 typedef struct RedVideoCodec {
     new_video_encoder_t create;
     SpiceVideoCodecType type;
-- 
2.7.0