[Mesa-dev] [PATCH RFC v1] i965: Implement CopyTexSubImage2D via BLORP (and use it by default).

Kenneth Graunke kenneth at whitecape.org
Sat Jan 19 11:06:22 PST 2013


The BLT engine has many limitations.  Currently, it can only blit
X-tiled buffers (since we don't have a kernel API to whack the BLT
tiling mode register), which means all depth/stencil operations get
punted to meta code, which can be very CPU-intensive.

Even if we used the BLT engine, it can't blit between buffers with
different tiling modes, such as an X-tiled non-MSAA ARGB8888 texture
and a Y-tiled CMS ARGB8888 renderbuffer.  This is a fundamental
limitation, and the only way around that is to use BLORP.

Previously, BLORP only handled BlitFramebuffer.  This patch adds an
additional frontend for doing CopyTexSubImage.  It also makes it the
default.  This is partly to increase testing and avoid hiding bugs,
and partly because the BLORP path can already handle more cases.  With
trivial extensions, it should be able to handle everything the BLT can.

This helps PlaneShift massively, which tries to CopyTexSubImage2D
between depth buffers whenever a player casts a spell.  Since these
are Y-tiled, we hit meta and software ReadPixels paths, eating 99% CPU
while delivering ~1 FPS.  This is particularly bad in an MMO setting
because people cast spells all the time.

It also helps Xonotic in 4X MSAA mode.  At default power management
settings, I measured a 6.35138% +/- 0.672548% performance boost (n=5).

No Piglit regressions on Ivybridge.  I have not tested Sandybridge.

Cc: Paul Berry <stereotype441 at gmail.com>
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 106 +++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_context.h      |   8 ++
 src/mesa/drivers/dri/intel/intel_tex_copy.c  |  32 ++++++--
 3 files changed, 138 insertions(+), 8 deletions(-)

Paul,

I'd appreciate your feedback on this patch.  It's my first time really
working with BLORP, so I'm bound to have screwed up something. :)

I'm not sure about the HiZ and downsample resolves (hence the //'d out lines).

I'm also tempted to merge do_blorp_copytexsubimage into the caller.
I mostly created it for symmetry with the existing code, but it's so small
that the distinction is rather meaningless.

Thanks in advance!
 --Ken

Also, here's the Xonotic ministat in case anyone cares:

x before
+ after
+--------------------------------------------------------------------------+
|x xxx                                                      + +    +     ++|
| |A|                                                        |_____A______||
+--------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x   5     110.64783     111.05587     110.87269     110.87019    0.15317753
+   5     117.07382     118.66559     117.92314     117.91198    0.70663046
Difference at 95.0% confidence
        7.04179 +/- 0.745655
        6.35138% +/- 0.672548%
        (Student's t, pooled s = 0.511268)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index bc7916a..b6d7738 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -295,6 +295,112 @@ try_blorp_blit(struct intel_context *intel,
    return true;
 }
 
+static void
+do_blorp_copytexsubimage(struct intel_context *intel,
+                         struct intel_renderbuffer *src_irb,
+                         struct gl_texture_image *dst_image,
+                         int srcX0, int srcY0,
+                         int dstX0, int dstY0,
+                         int dstX1, int dstY1,
+                         bool mirror_y)
+{
+   /* Find source/dst miptrees */
+   struct intel_mipmap_tree *src_mt = src_irb->mt;
+   struct intel_mipmap_tree *dst_mt = intel_texture_image(dst_image)->mt;
+
+   /* Get ready to blit.  This includes depth resolving the src and dst
+    * buffers if necessary.
+    */
+   intel_renderbuffer_resolve_depth(intel, src_irb);
+   intel_miptree_slice_resolve_hiz(intel, dst_mt,
+                                   dst_image->Level, dst_image->Face);
+
+   /* Do the blit */
+   brw_blorp_blit_miptrees(intel,
+                           src_mt, src_irb->mt_level, src_irb->mt_layer,
+                           dst_mt, dst_image->Level, dst_image->Face,
+                           srcX0, srcY0, dstX0, dstY0, dstX1, dstY1,
+                           false, mirror_y);
+
+   //intel_renderbuffer_set_needs_hiz_resolve(dst_irb);
+   //intel_renderbuffer_set_needs_downsample(dst_irb);
+}
+
+
+bool
+brw_blorp_copytexsubimage(struct intel_context *intel,
+                          struct gl_renderbuffer *src_rb,
+                          struct gl_texture_image *dst_image,
+                          int srcX0, int srcY0,
+                          int dstX0, int dstY0,
+                          int width, int height)
+{
+   struct gl_context *ctx = &intel->ctx;
+
+   /* BLORP is not supported before Gen6. */
+   if (intel->gen < 6)
+      return false;
+
+   /* CopyTexSubImage should happen even in conditional rendering.  We could
+    * turn it off and back on again.  For now, just bail instead.
+    */
+   if (ctx->Query.CondRenderQuery)
+      return false;
+
+   /* BLORP requires the formats to match.  In the future, we should relax
+    * this for simple format mismatches like XRGB <-> ARGB.
+    */
+   if (_mesa_get_srgb_format_linear(src_rb->Format) !=
+       _mesa_get_srgb_format_linear(dst_image->TexFormat))
+      return false;
+
+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking for the buffers.
+    */
+   intel_prepare_render(intel);
+
+   /* Detect if the blit needs to be mirrored */
+   bool mirror_y = false;
+
+   int srcX1 = srcX0 + width;
+   int srcY1 = srcY0 + height;
+   int dstX1 = dstX0 + width;
+   int dstY1 = dstY0 + height;
+
+   /* If the destination rectangle needs to be clipped or scissored, do so. */
+   if (!(clip_or_scissor(false, srcX0, srcX1, dstX0, dstX1,
+                         0, dst_image->Width) &&
+         clip_or_scissor(mirror_y, srcY0, srcY1, dstY0, dstY1,
+                         0, dst_image->Height))) {
+      /* Everything got clipped/scissored away, so the blit was successful. */
+      return true;
+   }
+
+   /* If the source rectangle needs to be clipped or scissored, do so. */
+   if (!(clip_or_scissor(false, dstX0, dstX1, srcX0, srcX1,
+                         0, src_rb->Width) &&
+         clip_or_scissor(mirror_y, dstY0, dstY1, srcY0, srcY1,
+                         0, src_rb->Height))) {
+      /* Everything got clipped/scissored away, so the blit was successful. */
+      return true;
+   }
+
+   /* Account for the fact that in the system framebuffer, the origin is at
+    * the lower left.
+    */
+   if (_mesa_is_winsys_fbo(ctx->ReadBuffer)) {
+      GLint tmp = src_rb->Height - srcY0;
+      srcY0 = src_rb->Height - srcY1;
+      srcY1 = tmp;
+      mirror_y = !mirror_y;
+   }
+
+   do_blorp_copytexsubimage(intel, intel_renderbuffer(src_rb), dst_image,
+                            srcX0, srcY0, dstX0, dstY0, dstX1, dstY1, mirror_y);
+   return true;
+}
+
+
 GLbitfield
 brw_blorp_framebuffer(struct intel_context *intel,
                       GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index f3a3efe..3976ef8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1219,6 +1219,14 @@ brw_blorp_framebuffer(struct intel_context *intel,
                       GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
                       GLbitfield mask, GLenum filter);
 
+bool
+brw_blorp_copytexsubimage(struct intel_context *intel,
+                          struct gl_renderbuffer *src_rb,
+                          struct gl_texture_image *dst_image,
+                          int srcX0, int srcY0,
+                          int dstX0, int dstY0,
+                          int width, int height);
+
 /* gen6_multisample_state.c */
 void
 gen6_emit_3dstate_multisample(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 1af7b1c..0a3dfef 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -41,6 +41,9 @@
 #include "intel_fbo.h"
 #include "intel_tex.h"
 #include "intel_blit.h"
+#ifndef I915
+#include "brw_context.h"
+#endif
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
 
@@ -156,15 +159,28 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
                      GLint x, GLint y,
                      GLsizei width, GLsizei height)
 {
-   if (dims == 3 || !intel_copy_texsubimage(intel_context(ctx),
-                               intel_texture_image(texImage),
-                               xoffset, yoffset,
-                               intel_renderbuffer(rb), x, y, width, height)) {
-      fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
-      _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
-                                 xoffset, yoffset, zoffset,
-                                 rb, x, y, width, height);
+   struct intel_context *intel = intel_context(ctx);
+   if (dims != 3) {
+#ifndef I915
+      /* Try BLORP first.  It can handle almost everything. */
+      if (brw_blorp_copytexsubimage(intel, rb, texImage, x, y,
+                                    xoffset, yoffset, width, height))
+         return;
+#endif
+
+      /* Next, try the BLT engine. */
+      if (intel_copy_texsubimage(intel_context(ctx),
+                                 intel_texture_image(texImage),
+                                 xoffset, yoffset,
+                                 intel_renderbuffer(rb), x, y, width, height))
+         return;
    }
+
+   /* Finally, fall back to meta.  This will likely be slow. */
+   fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
+   _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
+                              xoffset, yoffset, zoffset,
+                              rb, x, y, width, height);
 }
 
 
-- 
1.8.1.1



More information about the mesa-dev mailing list