[Mesa-dev] [PATCH 2/2] i965: Avoid unnecessary copy when depthstencil workaround invoked by clear.
Paul Berry
stereotype441 at gmail.com
Mon Mar 11 11:29:56 PDT 2013
Since apps typically begin rendering with a call to glClear(), it is
likely that when brw_workaround_depthstencil_alignment() moves a
miplevel to a temporary buffer, it can avoid doing a blit, since the
contents of the miplevel are about to be erased.
This patch adds the necessary plumbing to determine when
brw_workaround_depthstencil_alignment() is being called as a
consequence of glClear(), and avoids the unnecessary blit when it is
safe to do so.
---
src/mesa/drivers/dri/i965/brw_clear.c | 4 +++-
src/mesa/drivers/dri/i965/brw_context.h | 3 ++-
src/mesa/drivers/dri/i965/brw_draw.c | 2 +-
src/mesa/drivers/dri/i965/brw_misc_state.c | 26 +++++++++++++++++++-----
src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
src/mesa/drivers/dri/intel/intel_fbo.c | 10 +++++++--
src/mesa/drivers/dri/intel/intel_fbo.h | 3 ++-
7 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_clear.c b/src/mesa/drivers/dri/i965/brw_clear.c
index cde1a06..e740f65 100644
--- a/src/mesa/drivers/dri/i965/brw_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_clear.c
@@ -223,6 +223,8 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
{
struct brw_context *brw = brw_context(ctx);
struct intel_context *intel = &brw->intel;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ bool partial_clear = ctx->Scissor.Enabled && !noop_scissor(ctx, fb);
if (!_mesa_check_conditional_render(ctx))
return;
@@ -232,7 +234,7 @@ brw_clear(struct gl_context *ctx, GLbitfield mask)
}
intel_prepare_render(intel);
- brw_workaround_depthstencil_alignment(brw);
+ brw_workaround_depthstencil_alignment(brw, partial_clear ? 0 : mask);
if (mask & BUFFER_BIT_DEPTH) {
if (brw_fast_clear_depth(ctx)) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c34d6b1..5aa0081 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1129,7 +1129,8 @@ void brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt,
struct intel_mipmap_tree *stencil_mt,
uint32_t *out_tile_mask_x,
uint32_t *out_tile_mask_y);
-void brw_workaround_depthstencil_alignment(struct brw_context *brw);
+void brw_workaround_depthstencil_alignment(struct brw_context *brw,
+ GLbitfield clear_mask);
/*======================================================================
* brw_queryobj.c
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 9c96f69..149497f 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -439,7 +439,7 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
/* This workaround has to happen outside of brw_state_upload() because it
* may flush the batchbuffer for a blit, affecting the state flags.
*/
- brw_workaround_depthstencil_alignment(brw);
+ brw_workaround_depthstencil_alignment(brw, 0);
/* Resolves must occur after updating renderbuffers, updating context state,
* and finalizing textures but before setting up any hardware state for
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 1024c42..bf367d0 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -41,6 +41,7 @@
#include "brw_defines.h"
#include "main/fbobject.h"
+#include "main/glformats.h"
/* Constant single cliprect for framebuffer object or DRI2 drawing */
static void upload_drawing_rect(struct brw_context *brw)
@@ -328,7 +329,8 @@ get_stencil_miptree(struct intel_renderbuffer *irb)
}
void
-brw_workaround_depthstencil_alignment(struct brw_context *brw)
+brw_workaround_depthstencil_alignment(struct brw_context *brw,
+ GLbitfield clear_mask)
{
struct intel_context *intel = &brw->intel;
struct gl_context *ctx = &intel->ctx;
@@ -341,10 +343,24 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb);
uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0;
uint32_t stencil_draw_x = 0, stencil_draw_y = 0;
+ bool invalidate_depth = clear_mask & GL_DEPTH_BUFFER_BIT;
+ bool invalidate_stencil = clear_mask & GL_STENCIL_BUFFER_BIT;
if (depth_irb)
depth_mt = depth_irb->mt;
+ if (depth_irb && invalidate_depth
+ && _mesa_is_depthstencil_format(
+ _mesa_get_format_base_format(depth_mt->format))
+ && !depth_mt->stencil_mt) {
+ /* Depth buffer contains interleaved stencil data, so it's only safe to
+ * invalidate it if we're also clearing stencil, and both depth_irb and
+ * stencil_irb point to the same miptree.
+ */
+ invalidate_depth = invalidate_stencil && depth_irb && stencil_irb
+ && depth_irb->mt == stencil_irb->mt;
+ }
+
uint32_t tile_mask_x, tile_mask_y;
brw_get_depthstencil_tile_masks(depth_mt, stencil_mt,
&tile_mask_x, &tile_mask_y);
@@ -373,8 +389,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
perf_debug("HW workaround: blitting depth level %d to a temporary "
"to fix alignment (depth tile offset %d,%d)\n",
depth_irb->mt_level, tile_x, tile_y);
-
- intel_renderbuffer_move_to_temp(intel, depth_irb);
+ intel_renderbuffer_move_to_temp(intel, depth_irb, invalidate_depth);
/* In the case of stencil_irb being the same packed depth/stencil
* texture but not the same rb, make it point at our rebased mt, too.
*/
@@ -435,7 +450,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
"to fix alignment (stencil tile offset %d,%d)\n",
stencil_irb->mt_level, stencil_tile_x, stencil_tile_y);
- intel_renderbuffer_move_to_temp(intel, stencil_irb);
+ intel_renderbuffer_move_to_temp(intel, stencil_irb, invalidate_stencil);
stencil_mt = get_stencil_miptree(stencil_irb);
intel_miptree_get_image_offset(stencil_mt,
@@ -459,7 +474,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
tile_x, tile_y,
stencil_tile_x, stencil_tile_y);
- intel_renderbuffer_move_to_temp(intel, depth_irb);
+ intel_renderbuffer_move_to_temp(intel, depth_irb,
+ invalidate_depth);
tile_x = depth_irb->draw_x & tile_mask_x;
tile_y = depth_irb->draw_y & tile_mask_y;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 7979487..2d722d2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1224,7 +1224,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
* select the image. So, instead, we just make a new single-level
* miptree and render into that.
*/
- intel_renderbuffer_move_to_temp(intel, irb);
+ intel_renderbuffer_move_to_temp(intel, irb, false);
mt = irb->mt;
}
}
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 7186978..410c393 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -991,7 +991,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel,
void
intel_renderbuffer_move_to_temp(struct intel_context *intel,
- struct intel_renderbuffer *irb)
+ struct intel_renderbuffer *irb,
+ bool invalidate)
{
struct intel_texture_image *intel_image =
intel_texture_image(irb->tex_image);
@@ -1009,7 +1010,12 @@ intel_renderbuffer_move_to_temp(struct intel_context *intel,
irb->mt->num_samples,
false /* force_y_tiling */);
- intel_miptree_copy_teximage(intel, intel_image, new_mt);
+ /* If the invalidate flag is set, we don't need to blit the data across
+ * because it is about to be overwritten.
+ */
+ if (!invalidate)
+ intel_miptree_copy_teximage(intel, intel_image, new_mt);
+
intel_miptree_reference(&irb->mt, intel_image->mt);
intel_renderbuffer_set_draw_offset(irb);
intel_miptree_release(&new_mt);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index ce744bf..9313c35 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -198,7 +198,8 @@ intel_renderbuffer_resolve_depth(struct intel_context *intel,
struct intel_renderbuffer *irb);
void intel_renderbuffer_move_to_temp(struct intel_context *intel,
- struct intel_renderbuffer *irb);
+ struct intel_renderbuffer *irb,
+ bool invalidate);
unsigned
intel_quantize_num_samples(struct intel_screen *intel, unsigned num_samples);
--
1.8.1.5
More information about the mesa-dev
mailing list