[Mesa-dev] [PATCH] i965: Disable depth writes when depth test is GL_EQUAL.

Kenneth Graunke kenneth at whitecape.org
Thu Nov 17 19:33:53 UTC 2016


There's no point in performing depth writes when the depth test
comparison function is set to GL_EQUAL - it would just write out
the same value that's already there (if it is written at all).  While
this is harmless from a functional perspective, it hurts performance.
Obviously, writing to memory is not free, but there's another more
subtle impact as well: it can prevent early depth optimizations.

Depth writes aren't supposed to happen for pixels that are killed
by fragment shader discard statements or the alpha test.  So, with
depth writes enabled and either of those, the pixel shader must be
invoked to determine whether or not to perform the write.  This is
fairly stupid in the EQUAL case - we're running a shader to decide
whether to replace the existing depth value with itself.

By disabling these pointless writes, we allow early depth even with
discards and alpha testing, allowing the hardware to skip the pixel
shader altogether if the depth test fails.

Improves performance of Unigine Valley:

- Skylake GT2:    +17.8%
- Broadwell GT3e: +11.5%
- Cherrytrail:    +19.4%

Huge thanks to Mark Janes for building frameretrace [1], the performance
analysis tool that helped us find this issue, and to Robert Bragg for
providing us performance metrics on Linux.  Mark also spent the time to
analyze Valley performance on Windows vs. Linux and discovered a
discrepancy in early depth test metrics.  Once he had isolated a draw
call and drawn attention to the problem, fixing it was pretty simple.

[1] https://github.com/janesma/apitrace/wiki/frameretrace-branch

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_cc.c                | 2 +-
 src/mesa/drivers/dri/i965/brw_context.h           | 7 +++++++
 src/mesa/drivers/dri/i965/brw_draw.c              | 2 +-
 src/mesa/drivers/dri/i965/brw_wm.c                | 2 +-
 src/mesa/drivers/dri/i965/gen6_depthstencil.c     | 2 +-
 src/mesa/drivers/dri/i965/gen7_misc_state.c       | 2 +-
 src/mesa/drivers/dri/i965/gen8_depth_state.c      | 4 ++--
 src/mesa/drivers/dri/i965/gen8_wm_depth_stencil.c | 2 +-
 8 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
index b11d7c8..c3b00e1 100644
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -225,7 +225,7 @@ static void upload_cc_unit(struct brw_context *brw)
       cc->cc2.depth_test = 1;
       cc->cc2.depth_test_function =
 	 intel_translate_compare_func(ctx->Depth.Func);
-      cc->cc2.depth_write_enable = ctx->Depth.Mask;
+      cc->cc2.depth_write_enable = brw_depth_writes_enabled(brw);
    }
 
    if (brw->stats_wm || unlikely(INTEL_DEBUG & DEBUG_STATS))
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 7604d26..a268c94 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1714,6 +1714,13 @@ bool brw_lower_texture_gradients(struct brw_context *brw,
 extern const char * const conditional_modifier[16];
 extern const char *const pred_ctrl_align16[16];
 
+static inline bool
+brw_depth_writes_enabled(const struct brw_context *brw)
+{
+   const struct gl_context *ctx = &brw->ctx;
+   return ctx->Depth.Test && ctx->Depth.Mask && ctx->Depth.Func != GL_EQUAL;
+}
+
 void
 brw_emit_depthbuffer(struct brw_context *brw);
 
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index b093020..7904ef5 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -372,7 +372,7 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
       front_irb->need_downsample = true;
    if (back_irb)
       back_irb->need_downsample = true;
-   if (depth_irb && ctx->Depth.Mask) {
+   if (depth_irb && brw_depth_writes_enabled(brw)) {
       intel_renderbuffer_att_set_needs_depth_resolve(depth_att);
       brw_render_cache_set_add_bo(brw, depth_irb->mt->bo);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index e6f68c4..dab2e33 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -460,7 +460,7 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
       if (ctx->Depth.Test)
          lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
 
-      if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
+      if (brw_depth_writes_enabled(brw))
          lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
 
       /* _NEW_STENCIL | _NEW_BUFFERS */
diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
index a3de844..79d4d5d 100644
--- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c
+++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
@@ -83,7 +83,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
    if (ctx->Depth.Test && depth_irb) {
       ds->ds2.depth_test_enable = ctx->Depth.Test;
       ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func);
-      ds->ds2.depth_write_enable = ctx->Depth.Mask;
+      ds->ds2.depth_write_enable = brw_depth_writes_enabled(brw);
    }
 
    /* Point the GPU at the new indirect state. */
diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c b/src/mesa/drivers/dri/i965/gen7_misc_state.c
index 7bd5cd5..af9be66 100644
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -109,7 +109,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
              (depthbuffer_format << 18) |
              ((hiz ? 1 : 0) << 22) |
              ((stencil_mt != NULL && ctx->Stencil._WriteEnabled) << 27) |
-             ((ctx->Depth.Mask != 0) << 28) |
+             (brw_depth_writes_enabled(brw) << 28) |
              (surftype << 29));
 
    /* 3DSTATE_DEPTH_BUFFER dw2 */
diff --git a/src/mesa/drivers/dri/i965/gen8_depth_state.c b/src/mesa/drivers/dri/i965/gen8_depth_state.c
index 3604aee..14689f4 100644
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -218,7 +218,7 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
    }
 
    emit_depth_packets(brw, depth_mt, brw_depthbuffer_format(brw), surftype,
-                      ctx->Depth.Mask != 0,
+                      brw_depth_writes_enabled(brw),
                       stencil_mt, ctx->Stencil._WriteEnabled,
                       hiz, width, height, depth, lod, min_array_element);
 }
@@ -280,7 +280,7 @@ pma_fix_enable(const struct brw_context *brw)
     * 3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
     * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE.
     */
-   const bool depth_writes_enabled = ctx->Depth.Mask;
+   const bool depth_writes_enabled = brw_depth_writes_enabled(brw);
 
    /* _NEW_STENCIL:
     * !DEPTH_STENCIL_STATE::Stencil Buffer Write Enable ||
diff --git a/src/mesa/drivers/dri/i965/gen8_wm_depth_stencil.c b/src/mesa/drivers/dri/i965/gen8_wm_depth_stencil.c
index e49103c..9a6c9e0 100644
--- a/src/mesa/drivers/dri/i965/gen8_wm_depth_stencil.c
+++ b/src/mesa/drivers/dri/i965/gen8_wm_depth_stencil.c
@@ -90,7 +90,7 @@ gen8_upload_wm_depth_stencil(struct brw_context *brw)
          GEN8_WM_DS_DEPTH_TEST_ENABLE |
          FUNC(ctx->Depth.Func) << GEN8_WM_DS_DEPTH_FUNC_SHIFT;
 
-      if (ctx->Depth.Mask)
+      if (brw_depth_writes_enabled(brw))
          dw1 |= GEN8_WM_DS_DEPTH_BUFFER_WRITE_ENABLE;
    }
 
-- 
2.10.2



More information about the mesa-dev mailing list