[Cogl] [PATCH 2/2] Add a workaround for slow read pixels on Mesa
Robert Bragg
robert at sixbynine.org
Thu Apr 5 05:50:03 PDT 2012
thanks, this all looks good to me now.
Reviewed-by: Robert Bragg <robert at linux.intel.com>
On Thu, Apr 5, 2012 at 1:22 PM, Neil Roberts <neil at linux.intel.com> wrote:
> Mesa before version 8.0.2 has a slow read pixels path that gets used
> with the Intel driver where it converts all of the pixels into a
> floating point representation and back even if the data is being read
> into exactly the same format. There is however a faster path using the
> blitter when reading into a PBO with BGRA format. It works out faster
> to read into a PBO and then memcpy back out into the application's
> buffer even though it adds an extra memcpy. This patch adds a
> workaround in cogl_framebuffer_read_pixels_into_bitmap when it detects
> this situation. In that case it will create a temporary CoglBitmap
> using cogl_bitmap_new_with_size, read into it and then memcpy the data
> back out.
>
> The main impetus for this patch is that Gnome Shell has implemented
> this workaround directly using GL calls but it seems like the kind of
> thing that would sit better at the Cogl layer.
> ---
> cogl/cogl-framebuffer-private.h | 10 +++
> cogl/cogl-framebuffer.c | 127 +++++++++++++++++++++++++++++++++++++-
> cogl/cogl-gpu-info-private.h | 7 ++-
> cogl/cogl-gpu-info.c | 12 ++++-
> 4 files changed, 150 insertions(+), 6 deletions(-)
>
> diff --git a/cogl/cogl-framebuffer-private.h b/cogl/cogl-framebuffer-private.h
> index 0081a2f..4c759cf 100644
> --- a/cogl/cogl-framebuffer-private.h
> +++ b/cogl/cogl-framebuffer-private.h
> @@ -93,6 +93,16 @@ typedef enum _CoglFramebufferState
>
> #define COGL_FRAMEBUFFER_STATE_ALL ((1<<COGL_FRAMEBUFFER_STATE_INDEX_MAX) - 1)
>
> +/* Private flags that can internally be added to CoglReadPixelsFlags */
> +typedef enum
> +{
> + /* If this is set then the data will not be flipped to compensate
> + for GL's upside-down coordinate system but instead will be left
> + in whatever order GL gives us (which will depend on whether the
> + framebuffer is offscreen or not) */
> + COGL_READ_PIXELS_NO_FLIP = 1L << 30
> +} CoglPrivateReadPixelsFlags;
> +
> struct _CoglFramebuffer
> {
> CoglObject _parent;
> diff --git a/cogl/cogl-framebuffer.c b/cogl/cogl-framebuffer.c
> index b55b016..bc6bad8 100644
> --- a/cogl/cogl-framebuffer.c
> +++ b/cogl/cogl-framebuffer.c
> @@ -1942,6 +1942,97 @@ _cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
> return FALSE;
> }
>
> +static gboolean
> +_cogl_framebuffer_slow_read_pixels_workaround (CoglFramebuffer *framebuffer,
> + int x,
> + int y,
> + CoglReadPixelsFlags source,
> + CoglBitmap *bitmap)
> +{
> + CoglContext *ctx;
> + CoglPixelFormat format;
> + CoglBitmap *pbo;
> + int width;
> + int height;
> + gboolean res;
> +
> + _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
> + _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
> +
> + ctx = cogl_framebuffer_get_context (framebuffer);
> +
> + width = cogl_bitmap_get_width (bitmap);
> + height = cogl_bitmap_get_height (bitmap);
> + format = cogl_bitmap_get_format (bitmap);
> +
> + pbo = cogl_bitmap_new_with_size (ctx, width, height, format);
> + if (pbo == NULL)
> + return FALSE;
> +
> + /* Read into the pbo. We need to disable the flipping because the
> + blit fast path in the driver does not work with
> + GL_PACK_INVERT_MESA is set */
> + res = cogl_framebuffer_read_pixels_into_bitmap (framebuffer,
> + x, y,
> + source |
> + COGL_READ_PIXELS_NO_FLIP,
> + pbo);
> +
> + if (res)
> + {
> + guint8 *dst;
> +
> + /* Copy the pixels back into application's buffer */
> + dst = _cogl_bitmap_map (bitmap,
> + COGL_BUFFER_ACCESS_WRITE,
> + COGL_BUFFER_MAP_HINT_DISCARD);
> +
> + if (dst == NULL)
> + res = FALSE;
> + else
> + {
> + const guint8 *src;
> +
> + src = _cogl_bitmap_map (pbo,
> + COGL_BUFFER_ACCESS_READ,
> + 0 /* hints */);
> + if (src == NULL)
> + res = FALSE;
> + else
> + {
> + int src_rowstride = cogl_bitmap_get_rowstride (pbo);
> + int dst_rowstride = cogl_bitmap_get_rowstride (bitmap);
> + int to_copy =
> + _cogl_pixel_format_get_bytes_per_pixel (format) * width;
> + int y;
> +
> + /* If the framebuffer is onscreen we need to flip the
> + data while copying */
> + if (!cogl_is_offscreen (framebuffer))
> + {
> + src += src_rowstride * (height - 1);
> + src_rowstride = -src_rowstride;
> + }
> +
> + for (y = 0; y < height; y++)
> + {
> + memcpy (dst, src, to_copy);
> + dst += dst_rowstride;
> + src += src_rowstride;
> + }
> +
> + _cogl_bitmap_unmap (pbo);
> + }
> +
> + _cogl_bitmap_unmap (bitmap);
> + }
> + }
> +
> + cogl_object_unref (pbo);
> +
> + return res;
> +}
> +
> gboolean
> cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
> int x,
> @@ -1960,7 +2051,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
> int width;
> int height;
>
> - _COGL_RETURN_VAL_IF_FAIL (source == COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
> + _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
> _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
>
> if (!cogl_framebuffer_allocate (framebuffer, NULL))
> @@ -1970,6 +2061,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>
> width = cogl_bitmap_get_width (bitmap);
> height = cogl_bitmap_get_height (bitmap);
> + format = cogl_bitmap_get_format (bitmap);
>
> if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty)
> {
> @@ -1986,6 +2078,32 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
> return TRUE;
> }
>
> + /* Workaround for cases where its faster to read into a temporary
> + * PBO. This is only worth doing if:
> + *
> + * • The GPU is an Intel GPU. In that case there is a known
> + * fast-path when reading into a PBO that will use the blitter
> + * instead of the Mesa fallback code. The driver bug will only be
> + * set if this is the case.
> + * • We're not already reading into a PBO.
> + * • The target format is BGRA. The fast-path blit does not get hit
> + * otherwise.
> + * • The size of the data is not trivially small. This isn't a
> + * requirement to hit the fast-path blit but intuitively it feels
> + * like if the amount of data is too small then the cost of
> + * allocating a PBO will outweigh the cost of temporarily
> + * converting the data to floats.
> + */
> + if ((ctx->gpu.driver_bugs &
> + COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS) &&
> + (width > 8 || height > 8) &&
> + (format & ~COGL_PREMULT_BIT) == COGL_PIXEL_FORMAT_BGRA_8888 &&
> + cogl_bitmap_get_buffer (bitmap) == NULL)
> + return _cogl_framebuffer_slow_read_pixels_workaround (framebuffer,
> + x, y,
> + source,
> + bitmap);
> +
> /* make sure any batched primitives get emitted to the GL driver
> * before issuing our read pixels...
> */
> @@ -2006,8 +2124,6 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
> if (!cogl_is_offscreen (framebuffer))
> y = framebuffer_height - y - height;
>
> - format = cogl_bitmap_get_format (bitmap);
> -
> required_format = ctx->driver_vtable->pixel_format_to_gl (ctx,
> format,
> &gl_intformat,
> @@ -2017,6 +2133,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
> /* NB: All offscreen rendering is done upside down so there is no need
> * to flip in this case... */
> if ((ctx->private_feature_flags & COGL_PRIVATE_FEATURE_MESA_PACK_INVERT) &&
> + (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
> !cogl_is_offscreen (framebuffer))
> {
> GE (ctx, glPixelStorei (GL_PACK_INVERT_MESA, TRUE));
> @@ -2152,7 +2269,9 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
>
> /* NB: All offscreen rendering is done upside down so there is no need
> * to flip in this case... */
> - if (!cogl_is_offscreen (framebuffer) && !pack_invert_set)
> + if (!cogl_is_offscreen (framebuffer) &&
> + (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
> + !pack_invert_set)
> {
> guint8 *temprow;
> int rowstride;
> diff --git a/cogl/cogl-gpu-info-private.h b/cogl/cogl-gpu-info-private.h
> index 2930d6b..70f150a 100644
> --- a/cogl/cogl-gpu-info-private.h
> +++ b/cogl/cogl-gpu-info-private.h
> @@ -40,7 +40,12 @@ typedef enum
>
> typedef enum
> {
> - COGL_GPU_INFO_DRIVER_STUB
> + /* If this bug is present then it is faster to read pixels into a
> + * PBO and then memcpy out of the PBO into system memory rather than
> + * directly read into system memory.
> + * https://bugs.freedesktop.org/show_bug.cgi?id=46631
> + */
> + COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS = 1 << 0
> } CoglGpuInfoDriverBug;
>
> typedef struct _CoglGpuInfoVersion CoglGpuInfoVersion;
> diff --git a/cogl/cogl-gpu-info.c b/cogl/cogl-gpu-info.c
> index f62bbc3..e64fdbd 100644
> --- a/cogl/cogl-gpu-info.c
> +++ b/cogl/cogl-gpu-info.c
> @@ -250,5 +250,15 @@ _cogl_gpu_info_init (CoglContext *ctx,
> }
>
> /* Determine the driver bugs */
> - gpu->driver_bugs = 0;
> +
> + /* In Mesa < 8.0.2 the glReadPixels implementation is really slow
> + because it converts each pixel to a floating point representation
> + and back even if the data could just be memcpy'd. The Intel
> + driver has a fast blit path when reading into a PBO. Reading into
> + a temporary PBO and then memcpying back out to the application's
> + memory is faster than a regular glReadPixels in this case */
> + if (gpu->vendor == COGL_GPU_INFO_VENDOR_INTEL &&
> + gpu->driver_package == COGL_GPU_INFO_DRIVER_PACKAGE_MESA &&
> + gpu->driver_package_version < COGL_VERSION_ENCODE (8, 0, 2))
> + gpu->driver_bugs |= COGL_GPU_INFO_DRIVER_BUG_MESA_46631_SLOW_READ_PIXELS;
> }
> --
> 1.7.3.16.g9464b
>
> _______________________________________________
> Cogl mailing list
> Cogl at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/cogl
More information about the Cogl
mailing list