Mesa (master): i965/fs: Use conditional sends to do FB writes on HSW+.

Eric Anholt anholt at kemper.freedesktop.org
Tue Feb 18 18:37:22 UTC 2014


Module: Mesa
Branch: master
Commit: d92f593d8776ec157ad0e7fa2ee8c9a17fd744ce
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d92f593d8776ec157ad0e7fa2ee8c9a17fd744ce

Author: Eric Anholt <eric at anholt.net>
Date:   Thu Feb 13 21:37:50 2014 -0800

i965/fs: Use conditional sends to do FB writes on HSW+.

This drops the MOVs for header setup, which are totally mis-scheduled.

total instructions in shared programs: 1590047 -> 1589331 (-0.05%)
instructions in affected programs:     43729 -> 43013 (-1.64%)
GAINED:                                0
LOST:                                  0

glb27-trex:
x before
+ after
+-----------------------------------------------------------------------------+
|               +      x     xx        +  +    +                              |
|              ++  + xxx ++x xx + ** *x+  +  + +  x *                         |
|+x xx x*    x+++xx*x*xx+++*+*xx++** *x* x+***x*+xx+*     + *    +  +        *|
|               |__|__________MA___A___________|___|                          |
+-----------------------------------------------------------------------------+
    N           Min           Max        Median           Avg        Stddev
x  49         62.33         65.41         63.49      63.53449    0.62757822
+  50         62.28          65.4          63.7       63.6982      0.656564
No difference proven at 95.0% confidence

Reviewed-by: Matt Turner <mattst88 at gmail.com>

---

 src/mesa/drivers/dri/i965/brw_eu_emit.c         |    2 --
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp  |   22 +++++++++++--------
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp    |   14 +++++++++++-
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp |   26 +++++++++++++++++------
 4 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 8ab043f..5360b56 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2241,8 +2241,6 @@ void brw_fb_WRITE(struct brw_compile *p,
    } else {
       insn = next_insn(p, BRW_OPCODE_SEND);
    }
-   /* The execution mask is ignored for render target writes. */
-   insn->header.predicate_control = 0;
    insn->header.compression_control = BRW_COMPRESSION_NONE;
 
    if (brw->gen >= 6) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 00f19dc..ee13ced 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -114,18 +114,22 @@ fs_generator::generate_fb_write(fs_inst *inst)
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
-   if ((fp && fp->UsesKill) || c->key.alpha_test_func) {
-      struct brw_reg pixel_mask;
+   if (inst->header_present) {
+      /* On HSW, the GPU will use the predicate on SENDC, unless the header is
+       * present.
+       */
+      if (!brw->is_haswell && ((fp && fp->UsesKill) ||
+                               c->key.alpha_test_func)) {
+         struct brw_reg pixel_mask;
 
-      if (brw->gen >= 6)
-         pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-      else
-         pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+         if (brw->gen >= 6)
+            pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+         else
+            pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 
-      brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
-   }
+         brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
+      }
 
-   if (inst->header_present) {
       if (brw->gen >= 6) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 	 brw_MOV(p,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 45b053d..70b7c66 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2743,7 +2743,7 @@ fs_visitor::emit_fb_writes()
     *      thread message and on all dual-source messages."
     */
    if (brw->gen >= 6 &&
-       !this->fp->UsesKill &&
+       (brw->is_haswell || brw->gen >= 8 || !this->fp->UsesKill) &&
        !do_dual_src &&
        c->key.nr_color_regions == 1) {
       header_present = false;
@@ -2840,6 +2840,10 @@ fs_visitor::emit_fb_writes()
       inst->mlen = nr - base_mrf;
       inst->eot = true;
       inst->header_present = header_present;
+      if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
+         inst->predicate = BRW_PREDICATE_NORMAL;
+         inst->flag_subreg = 1;
+      }
 
       c->prog_data.dual_src_blend = true;
       this->current_annotation = NULL;
@@ -2885,6 +2889,10 @@ fs_visitor::emit_fb_writes()
          inst->mlen = nr - base_mrf;
       inst->eot = eot;
       inst->header_present = header_present;
+      if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
+         inst->predicate = BRW_PREDICATE_NORMAL;
+         inst->flag_subreg = 1;
+      }
    }
 
    if (c->key.nr_color_regions == 0) {
@@ -2902,6 +2910,10 @@ fs_visitor::emit_fb_writes()
       inst->mlen = nr - base_mrf;
       inst->eot = true;
       inst->header_present = header_present;
+      if ((brw->gen >= 8 || brw->is_haswell) && fp->UsesKill) {
+         inst->predicate = BRW_PREDICATE_NORMAL;
+         inst->flag_subreg = 1;
+      }
    }
 
    this->current_annotation = NULL;
diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
index 332b14f..ea16320 100644
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -61,14 +61,21 @@ gen8_fs_generator::mark_surface_used(unsigned surf_index)
 void
 gen8_fs_generator::generate_fb_write(fs_inst *ir)
 {
-   if (fp && fp->UsesKill) {
-      gen8_instruction *mov =
-         MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW),
-             brw_flag_reg(0, 1));
-      gen8_set_mask_control(mov, BRW_MASK_DISABLE);
-   }
+   /* Disable the discard condition while setting up the header. */
+   default_state.predicate = BRW_PREDICATE_NONE;
+   default_state.predicate_inverse = false;
+   default_state.flag_subreg_nr = 0;
 
    if (ir->header_present) {
+      /* The GPU will use the predicate on SENDC, unless the header is present.
+       */
+      if (fp && fp->UsesKill) {
+         gen8_instruction *mov =
+            MOV(retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW),
+                brw_flag_reg(0, 1));
+         gen8_set_mask_control(mov, BRW_MASK_DISABLE);
+      }
+
       gen8_instruction *mov =
          MOV_RAW(brw_message_reg(ir->base_mrf), brw_vec8_grf(0, 0));
       gen8_set_exec_size(mov, BRW_EXECUTE_16);
@@ -88,6 +95,13 @@ gen8_fs_generator::generate_fb_write(fs_inst *ir)
       }
    }
 
+   /* Set the predicate back to get the conditional write if necessary for
+    * discards.
+    */
+   default_state.predicate = ir->predicate;
+   default_state.predicate_inverse = ir->predicate_inverse;
+   default_state.flag_subreg_nr = ir->flag_subreg;
+
    gen8_instruction *inst = next_inst(BRW_OPCODE_SENDC);
    gen8_set_dst(brw, inst, retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW));
    gen8_set_src0(brw, inst, brw_message_reg(ir->base_mrf));




More information about the mesa-commit mailing list