Mesa (master): i965: Disable thread dispatch when the FS doesn' t do any work.

Eric Anholt anholt at kemper.freedesktop.org
Tue Oct 19 17:53:23 UTC 2010


Module: Mesa
Branch: master
Commit: f30de6964018619658439216cd8bf9371ee6256d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f30de6964018619658439216cd8bf9371ee6256d

Author: Eric Anholt <eric at anholt.net>
Date:   Mon Oct 18 15:43:20 2010 -0700

i965: Disable thread dispatch when the FS doesn't do any work.

This should reduce the cost of generating shadow maps, for example.
No performance difference measured in nexuiz, though it does trigger
this path.

---

 src/mesa/drivers/dri/i965/brw_wm.h        |    3 ++
 src/mesa/drivers/dri/i965/brw_wm_state.c  |   41 +++++++++++++++++++++++++++-
 src/mesa/drivers/dri/i965/gen6_wm_state.c |    9 +++++-
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index a094c45..99bd15c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -33,6 +33,7 @@
 #ifndef BRW_WM_H
 #define BRW_WM_H
 
+#include <stdbool.h>
 
 #include "program/prog_instruction.h"
 #include "brw_context.h"
@@ -473,4 +474,6 @@ GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog
 struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type);
 struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name);
 
+bool brw_color_buffer_write_enabled(struct brw_context *brw);
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 817adef..411204e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -52,9 +52,36 @@ struct brw_wm_unit_key {
    unsigned int nr_surfaces, sampler_count;
    GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
    GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+   GLboolean color_write_enable;
    GLfloat offset_units, offset_factor;
 };
 
+bool
+brw_color_buffer_write_enabled(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   int i;
+
+   /* _NEW_BUFFERS */
+   for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+      /* _NEW_COLOR */
+      if (rb &&
+	  (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
+	   fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
+	  (ctx->Color.ColorMask[i][0] ||
+	   ctx->Color.ColorMask[i][1] ||
+	   ctx->Color.ColorMask[i][2] ||
+	   ctx->Color.ColorMask[i][3])) {
+	 return true;
+      }
+   }
+
+   return false;
+}
+
 static void
 wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 {
@@ -100,6 +127,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    if (brw->state.depth_region == NULL)
       key->computes_depth = 0;
 
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   key->color_write_enable = brw_color_buffer_write_enabled(brw);
+
    /* _NEW_COLOR */
    key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
    key->is_glsl = bfp->isGLSL;
@@ -188,7 +218,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
       wm.wm5.enable_16_pix = 1;
 
    wm.wm5.max_threads = brw->wm_max_threads - 1;
-   wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */
+
+   if (key->color_write_enable ||
+       key->uses_kill ||
+       key->computes_depth) {
+      wm.wm5.thread_dispatch_enable = 1;
+   }
+
    wm.wm5.legacy_line_rast = 0;
    wm.wm5.legacy_global_depth_bias = 0;
    wm.wm5.early_depth_test = 1;	        /* never need to disable */
@@ -293,7 +329,8 @@ const struct brw_tracked_state brw_wm_unit = {
 	       _NEW_POLYGONSTIPPLE | 
 	       _NEW_LINE | 
 	       _NEW_COLOR |
-	       _NEW_DEPTH),
+	       _NEW_DEPTH |
+	       _NEW_BUFFERS),
 
       .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
 	      BRW_NEW_CURBE_OFFSETS |
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 7ef99ee..21059be 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -29,6 +29,7 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_util.h"
+#include "brw_wm.h"
 #include "program/prog_parameter.h"
 #include "program/prog_statevars.h"
 #include "intel_batchbuffer.h"
@@ -123,7 +124,6 @@ upload_wm_state(struct brw_context *brw)
 	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
 
    dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
-   dw5 |= GEN6_WM_DISPATCH_ENABLE;
 
    /* BRW_NEW_FRAGMENT_PROGRAM */
    if (fp->isGLSL)
@@ -149,6 +149,11 @@ upload_wm_state(struct brw_context *brw)
    if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
       dw5 |= GEN6_WM_KILL_ENABLE;
 
+   if (brw_color_buffer_write_enabled(brw) ||
+       dw5 & (GEN6_WM_KILL_ENABLE | GEN6_WM_COMPUTED_DEPTH)) {
+      dw5 |= GEN6_WM_DISPATCH_ENABLE;
+   }
+
    dw6 |= GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
 
    dw6 |= brw_count_bits(brw->fragment_program->Base.InputsRead) <<
@@ -169,7 +174,7 @@ upload_wm_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen6_wm_state = {
    .dirty = {
-      .mesa  = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR |
+      .mesa  = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS |
 		_NEW_PROGRAM_CONSTANTS),
       .brw   = (BRW_NEW_CURBE_OFFSETS |
 		BRW_NEW_FRAGMENT_PROGRAM |




More information about the mesa-commit mailing list