No subject


Mon Apr 30 13:12:18 PDT 2012


     The discard keyword is only allowed within fragment shaders. It
     can be used within a fragment shader to abandon the operation on
     the current fragment. This keyword causes the fragment to be
     discarded and no updates to any buffers will occur. Control flow
     exits the shader, and subsequent implicit or explicit derivatives
     are undefined when this control flow is non-uniform (meaning
     different fragments within the primitive take different control
     paths).

v2: Don't emit the final HALT if no other HALTs were emitted.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org> (v1)

---

 src/mesa/drivers/dri/i965/brw_eu.h        |    1 +
 src/mesa/drivers/dri/i965/brw_eu_emit.c   |   53 +++++++++++++++++++++++++---
 src/mesa/drivers/dri/i965/brw_fs.h        |   22 +++++++++++
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp |   55 +++++++++++++++++++++++++++++
 4 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index f660222..5064c18 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -1048,6 +1048,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p);
 struct brw_instruction *brw_BREAK(struct brw_compile *p);
 struct brw_instruction *brw_CONT(struct brw_compile *p);
 struct brw_instruction *gen6_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_HALT(struct brw_compile *p);
 /* Forward jumps:
  */
 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index b2581da..21d3c5a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1339,6 +1339,20 @@ struct brw_instruction *brw_CONT(struct brw_compile *p)
    return insn;
 }
 
+struct brw_instruction *gen6_HALT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+
+   insn = next_insn(p, BRW_OPCODE_HALT);
+   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   return insn;
+}
+
 /* DO/WHILE loop:
  *
  * The DO/WHILE is just an unterminated loop -- break or continue are
@@ -2395,8 +2409,8 @@ brw_find_next_block_end(struct brw_compile *p, int start)
 	 return ip;
       }
    }
-   assert(!"not reached");
-   return start + 1;
+
+   return 0;
 }
 
 /* There is no DO instruction on gen6, so to find the end of the loop
@@ -2425,7 +2439,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
 }
 
 /* After program generation, go back and update the UIP and JIP of
- * BREAK and CONT instructions to their correct locations.
+ * BREAK, CONT, and HALT instructions to their correct locations.
  */
 void
 brw_set_uip_jip(struct brw_compile *p)
@@ -2439,21 +2453,50 @@ brw_set_uip_jip(struct brw_compile *p)
 
    for (ip = 0; ip < p->nr_insn; ip++) {
       struct brw_instruction *insn = &p->store[ip];
+      int block_end_ip = 0;
+
+      if (insn->header.opcode == BRW_OPCODE_BREAK ||
+	  insn->header.opcode == BRW_OPCODE_CONTINUE ||
+	  insn->header.opcode == BRW_OPCODE_HALT) {
+	 block_end_ip = brw_find_next_block_end(p, ip);
+      }
 
       switch (insn->header.opcode) {
       case BRW_OPCODE_BREAK:
-	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+	 assert(block_end_ip != 0);
+	 insn->bits3.break_cont.jip = br * (block_end_ip - ip);
 	 /* Gen7 UIP points to WHILE; Gen6 points just after it */
 	 insn->bits3.break_cont.uip =
 	    br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
 	 break;
       case BRW_OPCODE_CONTINUE:
-	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+	 assert(block_end_ip != 0);
+	 insn->bits3.break_cont.jip = br * (block_end_ip - ip);
 	 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
 
 	 assert(insn->bits3.break_cont.uip != 0);
 	 assert(insn->bits3.break_cont.jip != 0);
 	 break;
+      case BRW_OPCODE_HALT:
+	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
+	  *
+	  *    "In case of the halt instruction not inside any conditional code
+	  *     block, the value of <JIP> and <UIP> should be the same. In case
+	  *     of the halt instruction inside conditional code block, the <UIP>
+	  *     should be the end of the program, and the <JIP> should be end of
+	  *     the most inner conditional code block."
+	  *
+	  * The uip will have already been set by whoever set up the
+	  * instruction.
+	  */
+	 if (block_end_ip == 0) {
+	    insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
+	 } else {
+	    insn->bits3.break_cont.jip = br * (block_end_ip - ip);
+	 }
+	 assert(insn->bits3.break_cont.uip != 0);
+	 assert(insn->bits3.break_cont.jip != 0);
+	 break;
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9a2cc08..b9cd42f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -171,6 +171,26 @@ static const fs_reg reg_undef;
 static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
 static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
 
+class ip_record : public exec_node {
+public:
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   ip_record(int ip)
+   {
+      this->ip = ip;
+   }
+
+   int ip;
+};
+
 class fs_inst : public exec_node {
 public:
    /* Callers of this ralloc-based new need not call delete. It's
@@ -489,6 +509,7 @@ public:
    bool remove_duplicate_mrf_writes();
    bool virtual_grf_interferes(int a, int b);
    void schedule_instructions();
+   void patch_discard_jumps_to_fb_writes();
    void fail(const char *msg, ...);
 
    void push_force_uncompressed();
@@ -571,6 +592,7 @@ public:
    struct gl_shader_program *prog;
    void *mem_ctx;
    exec_list instructions;
+   exec_list discard_halt_patches;
 
    /* Delayed setup of c->prog_data.params[] due to realloc of
     * ParamValues[] during compile.
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index b68d8cb..cc70904 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -37,11 +37,55 @@ extern "C" {
 #include "glsl/ir_print_visitor.h"
 
 void
+fs_visitor::patch_discard_jumps_to_fb_writes()
+{
+   if (intel->gen < 6 || this->discard_halt_patches.is_empty())
+      return;
+
+   /* There is a somewhat strange undocumented requirement of using
+    * HALT, according to the simulator.  If some channel has HALTed to
+    * a particular UIP, then by the end of the program, every channel
+    * must have HALTed to that UIP.  Furthermore, the tracking is a
+    * stack, so you can't do the final halt of a UIP after starting
+    * halting to a new UIP.
+    *
+    * Symptoms of not emitting this instruction on actual hardware
+    * included GPU hangs and sparkly rendering on the piglit discard
+    * tests.
+    */
+   struct brw_instruction *last_halt = gen6_HALT(p);
+   last_halt->bits3.break_cont.uip = 2;
+   last_halt->bits3.break_cont.jip = 2;
+
+   int ip = p->nr_insn;
+
+   foreach_list(node, &this->discard_halt_patches) {
+      ip_record *patch_ip = (ip_record *)node;
+      struct brw_instruction *patch = &p->store[patch_ip->ip];
+      int br = (intel->gen >= 5) ? 2 : 1;
+
+      /* HALT takes a distance from the pre-incremented IP, so '1'
+       * would be the next instruction after jmpi.
+       */
+      assert(patch->header.opcode == BRW_OPCODE_HALT);
+      patch->bits3.break_cont.uip = (ip - patch_ip->ip) * br;
+   }
+
+   this->discard_halt_patches.make_empty();
+}
+
+void
 fs_visitor::generate_fb_write(fs_inst *inst)
 {
    bool eot = inst->eot;
    struct brw_reg implied_header;
 
+   /* Note that the jumps emitted to this point mean that the g0 ->
+    * base_mrf setup must be inside of this function, so that we jump
+    * to a point containing it.
+    */
+   patch_discard_jumps_to_fb_writes();
+
    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
     * move, here's g1.
     */
@@ -482,6 +526,17 @@ fs_visitor::generate_discard(fs_inst *inst)
       brw_set_mask_control(p, BRW_MASK_DISABLE);
       brw_AND(p, g1, f0, g1);
       brw_pop_insn_state(p);
+
+      /* GLSL 1.30+ say that discarded channels should stop executing
+       * (so, for example, an infinite loop that would otherwise in
+       * just that channel does not occur.
+       *
+       * This HALT will be patched up at FB write time to point UIP at
+       * the end of the program, and at brw_uip_jip() JIP will be set
+       * to the end of the current block (or the program).
+       */
+      this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
+      gen6_HALT(p);
    } else {
       struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 



More information about the mesa-commit mailing list