[Mesa-dev] [PATCH 7/8] i965/fs: Rewrite discards to use a flag subreg to track discarded pixels.

Eric Anholt eric at anholt.net
Fri Dec 7 14:08:10 PST 2012


This makes much more sense on gen6+, and will also prove useful for
early exit of shaders on discard.
---
 src/mesa/drivers/dri/i965/brw_defines.h            |    1 -
 src/mesa/drivers/dri/i965/brw_fs.cpp               |    8 +++
 src/mesa/drivers/dri/i965/brw_fs.h                 |    1 -
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp          |   76 +++++---------------
 src/mesa/drivers/dri/i965/brw_fs_fp.cpp            |   17 ++---
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |    3 -
 .../dri/i965/brw_fs_schedule_instructions.cpp      |    1 -
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp       |   12 +++-
 8 files changed, 46 insertions(+), 73 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 5e00b40..2b77ae6 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -673,7 +673,6 @@ enum opcode {
    FS_OPCODE_PIXEL_Y,
    FS_OPCODE_CINTERP,
    FS_OPCODE_LINTERP,
-   FS_OPCODE_DISCARD,
    FS_OPCODE_SPILL,
    FS_OPCODE_UNSPILL,
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index dbf48f8..b4e8d68 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2434,6 +2434,14 @@ fs_visitor::run()
       else
 	 emit_interpolation_setup_gen6();
 
+      /* On gen6+, we handle discards by keeping track of the still-live
+       * pixels in f0.1.  If so, initialize it with the dispatched pixels.
+       */
+      if (fp->UsesKill) {
+         fs_inst *discard_init = emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+         discard_init->flag_subreg = 1;
+      }
+
       /* Generate FS IR for main().  (the visitor only descends into
        * functions called "main").
        */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b60a37e..b00755f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -501,7 +501,6 @@ private:
    void generate_math_gen4(fs_inst *inst,
 			   struct brw_reg dst,
 			   struct brw_reg src);
-   void generate_discard(fs_inst *inst);
    void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
    void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
                      bool negate_value);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index b3d7f81..f185eb5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -74,6 +74,17 @@ fs_generator::generate_fb_write(fs_inst *inst)
    brw_set_mask_control(p, BRW_MASK_DISABLE);
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
+   if (fp->UsesKill) {
+      struct brw_reg pixel_mask;
+
+      if (intel->gen >= 6)
+         pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+      else
+         pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+      brw_MOV(p, pixel_mask, brw_flag_reg(0, 1));
+   }
+
    if (inst->header_present) {
       if (intel->gen >= 6) {
 	 brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
@@ -514,58 +525,6 @@ fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src
 }
 
 void
-fs_generator::generate_discard(fs_inst *inst)
-{
-   struct brw_reg f0 = brw_flag_reg(0, 0);
-
-   if (intel->gen >= 6) {
-      struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-      struct brw_reg some_register;
-
-      /* As of gen6, we no longer have the mask register to look at,
-       * so life gets a bit more complicated.
-       */
-
-      /* Load the flag register with all ones. */
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_MOV(p, f0, brw_imm_uw(0xffff));
-      brw_pop_insn_state(p);
-
-      /* Do a comparison that should always fail, to produce 0s in the flag
-       * reg where we have active channels.
-       */
-      some_register = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
-      brw_CMP(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
-	      BRW_CONDITIONAL_NZ, some_register, some_register);
-
-      /* Undo CMP's whacking of predication*/
-      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_AND(p, g1, f0, g1);
-      brw_pop_insn_state(p);
-   } else {
-      struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-
-      brw_push_insn_state(p);
-      brw_set_mask_control(p, BRW_MASK_DISABLE);
-      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-
-      /* Unlike the 965, we have the mask reg, so we just need
-       * somewhere to invert that (containing channels to be disabled)
-       * so it can be ANDed with the mask of pixels still to be
-       * written. Use the flag reg for consistency with gen6+.
-       */
-      brw_NOT(p, f0, brw_mask_reg(1)); /* IMASK */
-      brw_AND(p, g0, f0, g0);
-
-      brw_pop_insn_state(p);
-   }
-}
-
-void
 fs_generator::generate_spill(fs_inst *inst, struct brw_reg src)
 {
    assert(inst->mlen != 0);
@@ -745,12 +704,16 @@ void
 fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
 {
    struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
-   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   struct brw_reg dispatch_mask;
+
+   if (intel->gen >= 6)
+      dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   else
+      dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 
-   assert (intel->gen >= 6);
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, flags, g1);
+   brw_MOV(p, flags, dispatch_mask);
    brw_pop_insn_state(p);
 }
 
@@ -1083,9 +1046,6 @@ fs_generator::generate_code(exec_list *instructions)
       case SHADER_OPCODE_TXS:
 	 generate_tex(inst, dst, src[0]);
 	 break;
-      case FS_OPCODE_DISCARD:
-	 generate_discard(inst);
-	 break;
       case FS_OPCODE_DDX:
 	 generate_ddx(inst, dst, src[0]);
 	 break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
index 4be7779..bebf059 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -252,14 +252,15 @@ fs_visitor::emit_fragment_program_code()
                continue;
             }
 
-            emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
-                     BRW_CONDITIONAL_L));
-
-            if (intel->gen < 6 && dispatch_width == 16)
-               fail("Can't support (non-uniform) control flow on 16-wide");
-            emit(IF(BRW_PREDICATE_NORMAL));
-            emit(FS_OPCODE_DISCARD);
-            emit(BRW_OPCODE_ENDIF);
+
+            /* Emit an instruction that's predicated on the current
+             * undiscarded pixels, and updates just those pixels to be
+             * turned off.
+             */
+            fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+                                    BRW_CONDITIONAL_GE));
+            cmp->predicate = BRW_PREDICATE_NORMAL;
+            cmp->flag_subreg = 1;
          }
          break;
       }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index a4fc032..d1147f5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -277,9 +277,6 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
          payload_last_use_ip[0 / reg_width] = use_ip;
          payload_last_use_ip[1 / reg_width] = use_ip;
          break;
-      case FS_OPCODE_DISCARD:
-         payload_last_use_ip[1 / reg_width] = use_ip;
-         break;
 
       case FS_OPCODE_LINTERP:
          /* On gen6+ in 16-wide, there are 4 adjacent registers (so 2 nodes)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 96d1131..e9c25b0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -299,7 +299,6 @@ instruction_scheduler::calculate_deps()
       }
 
       if (inst->predicate) {
-	 assert(last_conditional_mod[inst->flag_subreg]);
 	 add_dep(last_conditional_mod[inst->flag_subreg], n);
       }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f1c6860..98cd064 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1435,7 +1435,17 @@ fs_visitor::visit(ir_discard *ir)
 {
    assert(ir->condition == NULL); /* FINISHME */
 
-   emit(FS_OPCODE_DISCARD);
+   /* We track our discarded pixels in f0.1.  By predicating on it, we can
+    * update just the flag bits that aren't yet discarded.  By emitting a
+    * CMP of g0 != g0, all our currently executing channels will get turned
+    * off.
+    */
+   fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
+                                   BRW_REGISTER_TYPE_UW));
+   fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
+                           BRW_CONDITIONAL_NZ));
+   cmp->predicate = BRW_PREDICATE_NORMAL;
+   cmp->flag_subreg = 1;
 }
 
 void
-- 
1.7.10.4



More information about the mesa-dev mailing list