[Mesa-dev] [PATCH v2] i965: Avoid unnecessary copy when depthstencil workaround invoked by clear.

Paul Berry stereotype441 at gmail.com
Thu Mar 14 08:54:47 PDT 2013


Since apps typically begin rendering with a call to glClear(), it is
likely that when brw_workaround_depthstencil_alignment() moves a
miplevel to a temporary buffer, it can avoid doing a blit, since the
contents of the miplevel are about to be erased.

This patch adds the necessary plumbing to determine when
brw_workaround_depthstencil_alignment() is being called as a
consequence of glClear(), and avoids the unnecessary blit when it is
safe to do so.

Reviewed-by: Chad Versace <chad.versace at linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

v2: Eliminate unnecessary call to _mesa_is_depthstencil_format().  Fix
handling of depth buffer in depth/stencil format.
---
 src/mesa/drivers/dri/i965/brw_clear.c            |  4 +++-
 src/mesa/drivers/dri/i965/brw_context.h          |  3 ++-
 src/mesa/drivers/dri/i965/brw_draw.c             |  2 +-
 src/mesa/drivers/dri/i965/brw_misc_state.c       | 24 +++++++++++++++++++-----
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  2 +-
 src/mesa/drivers/dri/intel/intel_fbo.c           | 10 ++++++++--
 src/mesa/drivers/dri/intel/intel_fbo.h           |  3 ++-
 7 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index cde1a06..e740f65 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -223,6 +223,8 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
 {
    struct brw_context *brw = brw_context(ctx);
    struct intel_context *intel = &brw->intel;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   bool partial_clear = ctx->Scissor.Enabled && !noop_scissor(ctx, fb);
 
    if (!_mesa_check_conditional_render(ctx))
       return;
@@ -232,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
    }
 
    intel_prepare_render(intel);
-   brw_workaround_depthstencil_alignment(brw);
+   brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
 
    if (mask & BUFFER_BIT_DEPTH) {
       if (brw_fast_clear_depth(ctx)) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index d2e2ade..8069567 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1130,7 +1130,8 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
                                      struct intel_mipmap_tree *stencil_mt,
                                      uint32_t *out_tile_mask_x,
                                      uint32_t *out_tile_mask_y);
-void brw_workaround_depthstencil_alignment(struct brw_context *brw);
+void brw_workaround_depthstencil_alignment(struct brw_context *brw,
+                                           GLbitfield clear_mask);
 
 /*======================================================================
  * brw_queryobj.c
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index e408185..809bcc5 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -439,7 +439,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
    /* This workaround has to happen outside of brw_upload_state() because it
     * may flush the batchbuffer for a blit, affecting the state flags.
     */
-   brw_workaround_depthstencil_alignment(brw);
+   brw_workaround_depthstencil_alignment(brw, 0);
 
    /* Resolves must occur after updating renderbuffers, updating context state,
     * and finalizing textures but before setting up any hardware state for
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 1024c42..c0d6243 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -41,6 +41,7 @@
 #include "brw_defines.h"
 
 #include "main/fbobject.h"
+#include "main/glformats.h"
 
 /* Constant single cliprect for framebuffer object or DRI2 drawing */
 static void upload_drawing_rect(struct brw_context *brw)
@@ -328,7 +329,8 @@ get_stencil_miptree(struct intel_renderbuffer *irb)
 }
 
 void
-brw_workaround_depthstencil_alignment(struct brw_context *brw)
+brw_workaround_depthstencil_alignment(struct brw_context *brw,
+                                      GLbitfield clear_mask)
 {
    struct intel_context *intel = &brw->intel;
    struct gl_context *ctx = &intel->ctx;
@@ -341,10 +343,22 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
    struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
    uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
    uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
+   bool invalidate_depth = clear_mask & GL_DEPTH_BUFFER_BIT;
+   bool invalidate_stencil = clear_mask & GL_STENCIL_BUFFER_BIT;
 
    if (depth_irb)
       depth_mt = depth_irb->mt;
 
+   if (depth_irb && invalidate_depth
+       && _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL) {
+      /* Depth buffer is in depth/stencil format, so it's only safe to
+       * invalidate it if we're also clearing stencil, and both depth_irb and
+       * stencil_irb point to the same miptree.
+       */
+      invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
+         && depth_irb->mt == stencil_irb->mt;
+   }
+
    uint32_t tile_mask_x, tile_mask_y;
    brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
                                    &tile_mask_x, &tile_mask_y);
@@ -373,8 +387,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
          perf_debug("HW workaround: blitting depth level %d to a temporary "
                     "to fix alignment (depth tile offset %d,%d)\n",
                     depth_irb->mt_level, tile_x, tile_y);
-
-         intel_renderbuffer_move_to_temp(intel, depth_irb);
+         intel_renderbuffer_move_to_temp(intel, depth_irb, invalidate_depth);
          /* In the case of stencil_irb being the same packed depth/stencil
           * texture but not the same rb, make it point at our rebased mt, too.
           */
@@ -435,7 +448,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
                  "to fix alignment (stencil tile offset %d,%d)\n",
                  stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
 
-      intel_renderbuffer_move_to_temp(intel, stencil_irb);
+      intel_renderbuffer_move_to_temp(intel, stencil_irb, invalidate_stencil);
       stencil_mt = get_stencil_miptree(stencil_irb);
 
       intel_miptree_get_image_offset(stencil_mt,
@@ -459,7 +472,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
                        tile_x, tile_y,
                        stencil_tile_x, stencil_tile_y);
 
-            intel_renderbuffer_move_to_temp(intel, depth_irb);
+            intel_renderbuffer_move_to_temp(intel, depth_irb,
+                                            invalidate_depth);
 
             tile_x = depth_irb->draw_x & tile_mask_x;
             tile_y = depth_irb->draw_y & tile_mask_y;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 7979487..2d722d2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1224,7 +1224,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 	  * select the image.  So, instead, we just make a new single-level
 	  * miptree and render into that.
 	  */
-	 intel_renderbuffer_move_to_temp(intel, irb);
+	 intel_renderbuffer_move_to_temp(intel, irb, false);
 	 mt = irb->mt;
       }
    }
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 7186978..410c393 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -991,7 +991,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel,
 
 void
 intel_renderbuffer_move_to_temp(struct intel_context *intel,
-                                struct intel_renderbuffer *irb)
+                                struct intel_renderbuffer *irb,
+                                bool invalidate)
 {
    struct intel_texture_image *intel_image =
       intel_texture_image(irb->tex_image);
@@ -1009,7 +1010,12 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel,
                                  irb->mt->num_samples,
                                  false /* force_y_tiling */);
 
-   intel_miptree_copy_teximage(intel, intel_image, new_mt);
+   /* If the invalidate flag is set, we don't need to blit the data across
+    * because it is about to be overwritten.
+    */
+   if (!invalidate)
+      intel_miptree_copy_teximage(intel, intel_image, new_mt);
+
    intel_miptree_reference(&irb->mt, intel_image->mt);
    intel_renderbuffer_set_draw_offset(irb);
    intel_miptree_release(&new_mt);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index ce744bf..9313c35 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -198,7 +198,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel,
 				 struct intel_renderbuffer *irb);
 
 void intel_renderbuffer_move_to_temp(struct intel_context *intel,
-                                     struct intel_renderbuffer *irb);
+                                     struct intel_renderbuffer *irb,
+                                     bool invalidate);
 
 unsigned
 intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
-- 
1.8.2



More information about the mesa-dev mailing list