[Cogl] [PATCH 2/2] Add a workaround for slow read pixels on Mesa

Robert Bragg robert at sixbynine.org
Thu Apr 5 05:50:03 PDT 2012


thanks, this all looks good to me now.

Reviewed-by: Robert Bragg <robert at linux.intel.com>

On Thu, Apr 5, 2012 at 1:22 PM, Neil Roberts <neil at linux.intel.com> wrote:
> Mesa before version 8.0.2 has a slow read pixels path that gets used
> with the Intel driver where it converts all of the pixels into a
> floating point representation and back even if the data is being read
> into exactly the same format. There is however a faster path using the
> blitter when reading into a PBO with BGRA format. It works out faster
> to read into a PBO and then memcpy back out into the application's
> buffer even though it adds an extra memcpy. This patch adds a
> workaround in cogl_framebuffer_read_pixels_into_bitmap when it detects
> this situation. In that case it will create a temporary CoglBitmap
> using cogl_bitmap_new_with_size, read into it and then memcpy the data
> back out.
>
> The main impetus for this patch is that Gnome Shell has implemented
> this workaround directly using GL calls but it seems like the kind of
> thing that would sit better at the Cogl layer.
> ---
>  cogl/cogl-framebuffer-private.h |   10 +++
>  cogl/cogl-framebuffer.c         |  127 +++++++++++++++++++++++++++++++++++++-
>  cogl/cogl-gpu-info-private.h    |    7 ++-
>  cogl/cogl-gpu-info.c            |   12 ++++-
>  4 files changed, 150 insertions(+), 6 deletions(-)
>
> diff --git a/cogl/cogl-framebuffer-private.h b/cogl/cogl-framebuffer-private.h
> index 0081a2f..4c759cf 100644
> --- a/cogl/cogl-framebuffer-private.h
> +++ b/cogl/cogl-framebuffer-private.h
> @@ -93,6 +93,16 @@ typedef enum _CoglFramebufferState
>
>  #define COGL_FRAMEBUFFER_STATE_ALL ((1<<COGL_FRAMEBUFFER_STATE_INDEX_MAX) - 1)
>
> +/* Private flags that can internally be added to CoglReadPixelsFlags */
> +typedef enum
> +{
> +  /* If this is set then the data will not be flipped to compensate
> +     for GL's upside-down coordinate system but instead will be left
> +     in whatever order GL gives us (which will depend on whether the
> +     framebuffer is offscreen or not) */
> +  COGL_READ_PIXELS_NO_FLIP = 1L << 30
> +} CoglPrivateReadPixelsFlags;
> +
>  struct _CoglFramebuffer
>  {
>   CoglObject          _parent;
> diff --git a/cogl/cogl-framebuffer.c b/cogl/cogl-framebuffer.c
> index b55b016..bc6bad8 100644
> --- a/cogl/cogl-framebuffer.c
> +++ b/cogl/cogl-framebuffer.c
> @@ -1942,6 +1942,97 @@ _cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
>   return FALSE;
>  }
>
> +static gboolean
> +_cogl_framebuffer_slow_read_pixels_workaround (CoglFramebuffer *framebuffer,
> +                                               int x,
> +                                               int y,
> +                                               CoglReadPixelsFlags source,
> +                                               CoglBitmap *bitmap)
> +{
> +  CoglContext *ctx;
> +  CoglPixelFormat format;
> +  CoglBitmap *pbo;
> +  int width;
> +  int height;
> +  gboolean res;
> +
> +  _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
> +  _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
> +
> +  ctx = cogl_framebuffer_get_context (framebuffer);
> +
> +  width = cogl_bitmap_get_width (bitmap);
> +  height = cogl_bitmap_get_height (bitmap);
> +  format = cogl_bitmap_get_format (bitmap);
> +
> +  pbo = cogl_bitmap_new_with_size (ctx, width, height, format);
> +  if (pbo == NULL)
> +    return FALSE;
> +
> +  /* Read into the pbo. We need to disable the flipping because the
> +     blit fast path in the driver does not work with
> +     GL_PACK_INVERT_MESA is set */
> +  res = cogl_framebuffer_read_pixels_into_bitmap (framebuffer,
> +                                                  x, y,
> +                                                  source |
> +                                                  COGL_READ_PIXELS_NO_FLIP,
> +                                                  pbo);
> +
> +  if (res)
> +    {
> +      guint8 *dst;
> +
> +      /* Copy the pixels back into application's buffer */
> +      dst = _cogl_bitmap_map (bitmap,
> +                              COGL_BUFFER_ACCESS_WRITE,
> +                              COGL_BUFFER_MAP_HINT_DISCARD);
> +
> +      if (dst == NULL)
> +        res = FALSE;
> +      else
> +        {
> +          const guint8 *src;
> +
> +          src = _cogl_bitmap_map (pbo,
> +                                  COGL_BUFFER_ACCESS_READ,
> +                                  0 /* hints */);
> +          if (src == NULL)
> +            res = FALSE;
> +          else
> +            {
> +              int src_rowstride = cogl_bitmap_get_rowstride (pbo);
> +              int dst_rowstride = cogl_bitmap_get_rowstride (bitmap);
> +              int to_copy =
> +                _cogl_pixel_format_get_bytes_per_pixel (format) * width;
> +              int y;
> +
> +              /* If the framebuffer is onscreen we need to flip the
> +                 data while copying */
> +              if (!cogl_is_offscreen (framebuffer))
> +                {
> +                  src += src_rowstride * (height - 1);
> +                  src_rowstride = -src_rowstride;
> +                }
> +
> +              for (y = 0; y < height; y++)
> +                {
> +                  memcpy (dst, src, to_copy);
> +                  dst += dst_rowstride;
> +                  src += src_rowstride;
> +                }
> +
> +              _cogl_bitmap_unmap (pbo);
> +            }
> +
> +          _cogl_bitmap_unmap (bitmap);
> +        }
> +    }
> +
> +  cogl_object_unref (pbo);
> +
> +  return res;
> +}
> +
>  gboolean
>  cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>                                           int x,
> @@ -1960,7 +2051,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>   int width;
>   int height;
>
> -  _COGL_RETURN_VAL_IF_FAIL (source == COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
> +  _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
>   _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
>
>   if (!cogl_framebuffer_allocate (framebuffer, NULL))
> @@ -1970,6 +2061,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>
>   width = cogl_bitmap_get_width (bitmap);
>   height = cogl_bitmap_get_height (bitmap);
> +  format = cogl_bitmap_get_format (bitmap);
>
>   if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty)
>     {
> @@ -1986,6 +2078,32 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>         return TRUE;
>     }
>
> +  /* Workaround for cases where its faster to read into a temporary
> +   * PBO. This is only worth doing if:
> +   *
> +   * • The GPU is an Intel GPU. In that case there is a known
> +   *   fast-path when reading into a PBO that will use the blitter
> +   *   instead of the Mesa fallback code. The driver bug will only be
> +   *   set if this is the case.
> +   * • We're not already reading into a PBO.
> +   * • The target format is BGRA. The fast-path blit does not get hit
> +   *   otherwise.
> +   * • The size of the data is not trivially small. This isn't a
> +   *   requirement to hit the fast-path blit but intuitively it feels
> +   *   like if the amount of data is too small then the cost of
> +   *   allocating a PBO will outweigh the cost of temporarily
> +   *   converting the data to floats.
> +   */
> +  if ((ctx->gpu.driver_bugs &
> +       COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS) &&
> +      (width > 8 || height > 8) &&
> +      (format & ~COGL_PREMULT_BIT) == COGL_PIXEL_FORMAT_BGRA_8888 &&
> +      cogl_bitmap_get_buffer (bitmap) == NULL)
> +    return _cogl_framebuffer_slow_read_pixels_workaround (framebuffer,
> +                                                          x, y,
> +                                                          source,
> +                                                          bitmap);
> +
>   /* make sure any batched primitives get emitted to the GL driver
>    * before issuing our read pixels...
>    */
> @@ -2006,8 +2124,6 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>   if (!cogl_is_offscreen (framebuffer))
>     y = framebuffer_height - y - height;
>
> -  format = cogl_bitmap_get_format (bitmap);
> -
>   required_format = ctx->driver_vtable->pixel_format_to_gl (ctx,
>                                                             format,
>                                                             &gl_intformat,
> @@ -2017,6 +2133,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>   /* NB: All offscreen rendering is done upside down so there is no need
>    * to flip in this case... */
>   if ((ctx->private_feature_flags & COGL_PRIVATE_FEATURE_MESA_PACK_INVERT) &&
> +      (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
>       !cogl_is_offscreen (framebuffer))
>     {
>       GE (ctx, glPixelStorei (GL_PACK_INVERT_MESA, TRUE));
> @@ -2152,7 +2269,9 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>
>   /* NB: All offscreen rendering is done upside down so there is no need
>    * to flip in this case... */
> -  if (!cogl_is_offscreen (framebuffer) && !pack_invert_set)
> +  if (!cogl_is_offscreen (framebuffer) &&
> +      (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
> +      !pack_invert_set)
>     {
>       guint8 *temprow;
>       int rowstride;
> diff --git a/cogl/cogl-gpu-info-private.h b/cogl/cogl-gpu-info-private.h
> index 2930d6b..70f150a 100644
> --- a/cogl/cogl-gpu-info-private.h
> +++ b/cogl/cogl-gpu-info-private.h
> @@ -40,7 +40,12 @@ typedef enum
>
>  typedef enum
>  {
> -  COGL_GPU_INFO_DRIVER_STUB
> +  /* If this bug is present then it is faster to read pixels into a
> +   * PBO and then memcpy out of the PBO into system memory rather than
> +   * directly read into system memory.
> +   * https://bugs.freedesktop.org/show_bug.cgi?id=46631
> +   */
> +  COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS = 1 << 0
>  } CoglGpuInfoDriverBug;
>
>  typedef struct _CoglGpuInfoVersion CoglGpuInfoVersion;
> diff --git a/cogl/cogl-gpu-info.c b/cogl/cogl-gpu-info.c
> index f62bbc3..e64fdbd 100644
> --- a/cogl/cogl-gpu-info.c
> +++ b/cogl/cogl-gpu-info.c
> @@ -250,5 +250,15 @@ _cogl_gpu_info_init (CoglContext *ctx,
>     }
>
>   /* Determine the driver bugs */
> -  gpu->driver_bugs = 0;
> +
> +  /* In Mesa < 8.0.2 the glReadPixels implementation is really slow
> +     because it converts each pixel to a floating point representation
> +     and back even if the data could just be memcpy'd. The Intel
> +     driver has a fast blit path when reading into a PBO. Reading into
> +     a temporary PBO and then memcpying back out to the application's
> +     memory is faster than a regular glReadPixels in this case */
> +  if (gpu->vendor == COGL_GPU_INFO_VENDOR_INTEL &&
> +      gpu->driver_package == COGL_GPU_INFO_DRIVER_PACKAGE_MESA &&
> +      gpu->driver_package_version < COGL_VERSION_ENCODE (8, 0, 2))
> +    gpu->driver_bugs |= COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS;
>  }
> --
> 1.7.3.16.g9464b
>
> _______________________________________________
> Cogl mailing list
> Cogl at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/cogl


More information about the Cogl mailing list