[Cogl] [PATCH 5/5] Add a workaround for slow read pixels on Mesa
Neil Roberts
neil at linux.intel.com
Tue Mar 27 09:56:25 PDT 2012
Mesa before version 8.0.2 has a slow read pixels path that gets used
with the Intel driver where it converts all of the pixels into a
floating point representation and back even if the data is being read
into exactly the same format. There is however a faster path using the
blitter when reading into a PBO with BGRA format. It works out faster
to read into a PBO and then memcpy back out into the application's
buffer even though it adds an extra memcpy. This patch adds a
workaround in cogl_framebuffer_read_pixels_into_bitmap when it detects
this situation. In that case it will create a temporary CoglBitmap
using cogl_bitmap_new_with_size, read into it and then memcpy the data
back out.
The main impetus for this patch is that Gnome Shell has implemented
this workaround directly using GL calls but it seems like the kind of
thing that would sit better at the Cogl layer.
---
cogl/cogl-framebuffer-private.h | 10 ++++
cogl/cogl-framebuffer.c | 112 +++++++++++++++++++++++++++++++++++++--
cogl/cogl-gpu-private.h | 5 ++-
cogl/cogl-gpu.c | 14 +++++
4 files changed, 136 insertions(+), 5 deletions(-)
diff --git a/cogl/cogl-framebuffer-private.h b/cogl/cogl-framebuffer-private.h
index 0081a2f..4c759cf 100644
--- a/cogl/cogl-framebuffer-private.h
+++ b/cogl/cogl-framebuffer-private.h
@@ -93,6 +93,16 @@ typedef enum _CoglFramebufferState
#define COGL_FRAMEBUFFER_STATE_ALL ((1<<COGL_FRAMEBUFFER_STATE_INDEX_MAX) - 1)
+/* Private flags that can internally be added to CoglReadPixelsFlags */
+typedef enum
+{
+ /* If this is set then the data will not be flipped to compensate
+ for GL's upside-down coordinate system but instead will be left
+ in whatever order GL gives us (which will depend on whether the
+ framebuffer is offscreen or not) */
+ COGL_READ_PIXELS_NO_FLIP = 1L << 30
+} CoglPrivateReadPixelsFlags;
+
struct _CoglFramebuffer
{
CoglObject _parent;
diff --git a/cogl/cogl-framebuffer.c b/cogl/cogl-framebuffer.c
index ee11f7c..ee66e6f 100644
--- a/cogl/cogl-framebuffer.c
+++ b/cogl/cogl-framebuffer.c
@@ -1942,6 +1942,97 @@ _cogl_framebuffer_try_fast_read_pixel (CoglFramebuffer *framebuffer,
return FALSE;
}
+static gboolean
+_cogl_framebuffer_slow_read_pixels_workaround (CoglFramebuffer *framebuffer,
+ int x,
+ int y,
+ CoglReadPixelsFlags source,
+ CoglBitmap *bitmap)
+{
+ CoglContext *ctx;
+ CoglPixelFormat format;
+ CoglBitmap *pbo;
+ int width;
+ int height;
+ gboolean res;
+
+ _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
+ _COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
+
+ ctx = cogl_framebuffer_get_context (framebuffer);
+
+ width = cogl_bitmap_get_width (bitmap);
+ height = cogl_bitmap_get_height (bitmap);
+ format = cogl_bitmap_get_format (bitmap);
+
+ pbo = cogl_bitmap_new_with_size (ctx, width, height, format);
+ if (pbo == NULL)
+ return FALSE;
+
+ /* Read into the pbo. We need to disable the flipping because the
+ blit fast path in the driver does not work with
+ GL_PACK_INVERT_MESA is set */
+ res = cogl_framebuffer_read_pixels_into_bitmap (framebuffer,
+ x, y,
+ source |
+ COGL_READ_PIXELS_NO_FLIP,
+ pbo);
+
+ if (res)
+ {
+ guint8 *dst;
+
+ /* Copy the pixels back into application's buffer */
+ dst = _cogl_bitmap_map (bitmap,
+ COGL_BUFFER_ACCESS_WRITE,
+ COGL_BUFFER_MAP_HINT_DISCARD);
+
+ if (dst == NULL)
+ res = FALSE;
+ else
+ {
+ const guint8 *src;
+
+ src = _cogl_bitmap_map (pbo,
+ COGL_BUFFER_ACCESS_READ,
+ 0 /* hints */);
+ if (src == NULL)
+ res = FALSE;
+ else
+ {
+ int src_rowstride = cogl_bitmap_get_rowstride (pbo);
+ int dst_rowstride = cogl_bitmap_get_rowstride (bitmap);
+ int to_copy =
+ _cogl_pixel_format_get_bytes_per_pixel (format) * width;
+ int y;
+
+ /* If the framebuffer is onscreen we need to flip the
+ data while copying */
+ if (!cogl_is_offscreen (framebuffer))
+ {
+ src += src_rowstride * (height - 1);
+ src_rowstride = -src_rowstride;
+ }
+
+ for (y = 0; y < height; y++)
+ {
+ memcpy (dst, src, to_copy);
+ dst += dst_rowstride;
+ src += src_rowstride;
+ }
+
+ _cogl_bitmap_unmap (pbo);
+ }
+
+ _cogl_bitmap_unmap (bitmap);
+ }
+ }
+
+ cogl_object_unref (pbo);
+
+ return res;
+}
+
gboolean
cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
int x,
@@ -1960,13 +2051,14 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
int width;
int height;
- _COGL_RETURN_VAL_IF_FAIL (source == COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
+ _COGL_RETURN_VAL_IF_FAIL (source & COGL_READ_PIXELS_COLOR_BUFFER, FALSE);
_COGL_RETURN_VAL_IF_FAIL (cogl_is_framebuffer (framebuffer), FALSE);
ctx = cogl_framebuffer_get_context (framebuffer);
width = cogl_bitmap_get_width (bitmap);
height = cogl_bitmap_get_height (bitmap);
+ format = cogl_bitmap_get_format (bitmap);
if (width == 1 && height == 1 && !framebuffer->clear_clip_dirty)
{
@@ -1983,6 +2075,17 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
return TRUE;
}
+ /* Workaround for cases where its faster to read into a temporary
+ PBO */
+ if ((ctx->gpu.driver_bugs & COGL_GPU_DRIVER_BUG_SLOW_READ_PIXELS) &&
+ (width > 8 || height > 8) &&
+ (format & ~COGL_PREMULT_BIT) == COGL_PIXEL_FORMAT_BGRA_8888 &&
+ cogl_bitmap_get_buffer (bitmap) == NULL)
+ return _cogl_framebuffer_slow_read_pixels_workaround (framebuffer,
+ x, y,
+ source,
+ bitmap);
+
/* make sure any batched primitives get emitted to the GL driver
* before issuing our read pixels...
*/
@@ -2003,8 +2106,6 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
if (!cogl_is_offscreen (framebuffer))
y = framebuffer_height - y - height;
- format = cogl_bitmap_get_format (bitmap);
-
required_format = ctx->driver_vtable->pixel_format_to_gl (ctx,
format,
&gl_intformat,
@@ -2014,6 +2115,7 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
/* NB: All offscreen rendering is done upside down so there is no need
* to flip in this case... */
if ((ctx->private_feature_flags & COGL_PRIVATE_FEATURE_MESA_PACK_INVERT) &&
+ (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
!cogl_is_offscreen (framebuffer))
{
GE (ctx, glPixelStorei (GL_PACK_INVERT_MESA, TRUE));
@@ -2149,7 +2251,9 @@ cogl_framebuffer_read_pixels_into_bitmap (CoglFramebuffer *framebuffer,
/* NB: All offscreen rendering is done upside down so there is no need
* to flip in this case... */
- if (!cogl_is_offscreen (framebuffer) && !pack_invert_set)
+ if (!cogl_is_offscreen (framebuffer) &&
+ (source & COGL_READ_PIXELS_NO_FLIP) == 0 &&
+ !pack_invert_set)
{
guint8 *temprow;
int rowstride;
diff --git a/cogl/cogl-gpu-private.h b/cogl/cogl-gpu-private.h
index a32de0a..47b2008 100644
--- a/cogl/cogl-gpu-private.h
+++ b/cogl/cogl-gpu-private.h
@@ -40,7 +40,10 @@ typedef enum
typedef enum
{
- COGL_GPU_DRIVER_STUB
+ /* If this bug is present then it is faster to read pixels into a
+ PBO and then memcpy out of the PBO into system memory rather than
+ directly read into system memory. */
+ COGL_GPU_DRIVER_BUG_SLOW_READ_PIXELS = 1 << 0
} CoglGpuDriverBug;
typedef struct _CoglGpuVersion CoglGpuVersion;
diff --git a/cogl/cogl-gpu.c b/cogl/cogl-gpu.c
index 2ca38db..88f5124 100644
--- a/cogl/cogl-gpu.c
+++ b/cogl/cogl-gpu.c
@@ -108,4 +108,18 @@ _cogl_gpu_init (CoglContext *ctx,
/* Determine the driver bugs */
gpu->driver_bugs = 0;
+
+ /* In Mesa < 8.0.2 the glReadPixels implementation is really slow
+ because it converts each pixel to a floating point representation
+ and back even if the data could just be memcpy'd. The Intel
+ driver has a fast blit path when reading into a PBO. Reading into
+ a temporary PBO and then memcpying back out to the application's
+ memory is faster than a regular glReadPixels in this case */
+ if (gpu->vendor == COGL_GPU_VENDOR_INTEL &&
+ gpu->driver_package == COGL_GPU_DRIVER_PACKAGE_MESA &&
+ !_cogl_gpu_version_check (&gpu->driver_package_version,
+ 8, 0, 2))
+ gpu->driver_bugs |= COGL_GPU_DRIVER_BUG_SLOW_READ_PIXELS;
+
+ gpu->driver_bugs = COGL_GPU_DRIVER_BUG_SLOW_READ_PIXELS;
}
--
1.7.3.16.g9464b
More information about the Cogl
mailing list