[Mesa-dev] [PATCH 2/2] i965/fs: Add helper functions for IF and CMP and use them.

Eric Anholt eric at anholt.net
Fri Nov 9 13:34:44 PST 2012


---
 src/mesa/drivers/dri/i965/brw_fs.cpp         |   62 ++++++++++++++++--
 src/mesa/drivers/dri/i965/brw_fs.h           |    3 +-
 src/mesa/drivers/dri/i965/brw_fs_fp.cpp      |   23 +++----
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |   87 ++++++++------------------
 4 files changed, 95 insertions(+), 80 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 950053e..9851ad4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -162,6 +162,63 @@ ALU2(SHL)
 ALU2(SHR)
 ALU2(ASR)
 
+/** Gen4 predicated IF. */
+fs_inst *
+fs_visitor::IF(uint32_t predicate)
+{
+   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF);
+   inst->predicate = predicate;
+   return inst;
+}
+
+/** Gen6+ IF with embedded comparison. */
+fs_inst *
+fs_visitor::IF(fs_reg src0, fs_reg src1, uint32_t condition)
+{
+   assert(intel->gen >= 6);
+   fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF,
+                                        reg_null_d, src0, src1);
+   inst->conditional_mod = condition;
+   return inst;
+}
+
+/**
+ * CMP: Sets the low bit of the destination channels with the result
+ * of the comparison, while the upper bits are undefined, and updates
+ * the flag register with the packed 16 bits of the result.
+ */
+fs_inst *
+fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition)
+{
+   fs_inst *inst;
+
+   /* Take the instruction:
+    *
+    * CMP null<d> src0<f> src1<f>
+    *
+    * Original gen4 does type conversion to the destination type before before
+    * comparison, producing garbage results for floating point comparisons.
+    * gen5 does the comparison on the execution type (resolved source types),
+    * so dst type doesn't matter.  gen6 does comparison and then uses the
+    * result as if it was the dst type with no conversion, which happens to
+    * mostly work out for float-interpreted-as-int since our comparisons are
+    * for >0, =0, <0.
+    */
+   if (intel->gen == 4) {
+      dst.type = src0.type;
+      if (dst.file == FIXED_HW_REG)
+	 dst.fixed_hw_reg.type = dst.type;
+   }
+
+   resolve_ud_negate(&src0);
+   resolve_ud_negate(&src1);
+
+   inst = new(mem_ctx) fs_inst(BRW_OPCODE_CMP, dst, src0, src1);
+   inst->conditional_mod = condition;
+
+   return inst;
+}
+
 bool
 fs_inst::equals(fs_inst *inst)
 {
@@ -800,10 +857,7 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
       /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
        * us front face
        */
-      fs_inst *inst = emit(BRW_OPCODE_CMP, *reg,
-			   fs_reg(r1_6ud),
-			   fs_reg(1u << 31));
-      inst->conditional_mod = BRW_CONDITIONAL_L;
+      emit(CMP(*reg, fs_reg(r1_6ud), fs_reg(1u << 31), BRW_CONDITIONAL_L));
       emit(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u));
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 75b3b00..12234be 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -238,6 +238,8 @@ public:
    fs_inst *AND(fs_reg dst, fs_reg src0, fs_reg src1);
    fs_inst *OR(fs_reg dst, fs_reg src0, fs_reg src1);
    fs_inst *XOR(fs_reg dst, fs_reg src0, fs_reg src1);
+   fs_inst *IF(uint32_t predicate);
+   fs_inst *IF(fs_reg src0, fs_reg src1, uint32_t condition);
    fs_inst *CMP(fs_reg dst, fs_reg src0, fs_reg src1,
                 uint32_t condition);
 
@@ -440,7 +442,6 @@ public:
    fs_reg pixel_w;
    fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
    fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
-   fs_reg reg_null_cmp;
 
    int grf_used;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
index a5b3f7a..88b0976 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -88,9 +88,8 @@ fs_visitor::emit_fp_sop(uint32_t conditional_mod,
       if (fpi->DstReg.WriteMask & (1 << i)) {
          fs_inst *inst;
 
-         inst = emit(BRW_OPCODE_CMP, fs_reg(brw_null_reg()),
-                     regoffset(src0, i), regoffset(src1, i));
-         inst->conditional_mod = conditional_mod;
+         emit(CMP(reg_null_d, regoffset(src0, i), regoffset(src1, i),
+                  conditional_mod));
 
          inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
          inst->predicate = BRW_PREDICATE_NORMAL;
@@ -171,9 +170,8 @@ fs_visitor::emit_fragment_program_code()
             if (fpi->DstReg.WriteMask & (1 << i)) {
                fs_inst *inst;
 
-               inst = emit(BRW_OPCODE_CMP, null,
-                           regoffset(src[0], i), fs_reg(0.0f));
-               inst->conditional_mod = BRW_CONDITIONAL_L;
+               emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+                        BRW_CONDITIONAL_L));
 
                inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
                            regoffset(src[1], i), regoffset(src[2], i));
@@ -254,12 +252,10 @@ fs_visitor::emit_fragment_program_code()
                continue;
             }
 
-            fs_inst *inst = emit(BRW_OPCODE_CMP, null,
-                                 regoffset(src[0], i), 0.0f);
-            inst->conditional_mod = BRW_CONDITIONAL_L;
+            emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+                     BRW_CONDITIONAL_L));
 
-            inst = emit(BRW_OPCODE_IF);
-            inst->predicate = BRW_PREDICATE_NORMAL;
+            emit(IF(BRW_PREDICATE_NORMAL));
             emit(FS_OPCODE_DISCARD);
             emit(BRW_OPCODE_ENDIF);
          }
@@ -291,9 +287,8 @@ fs_visitor::emit_fragment_program_code()
 
          if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
             fs_inst *inst;
-            inst = emit(BRW_OPCODE_CMP, null,
-                        regoffset(src[0], 0), fs_reg(0.0f));
-            inst->conditional_mod = BRW_CONDITIONAL_LE;
+            emit(CMP(null, regoffset(src[0], 0), fs_reg(0.0f),
+                     BRW_CONDITIONAL_LE));
 
             if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
                emit(MOV(regoffset(dst, 1), regoffset(src[0], 0)));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 41ecaa2..e0ecde5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -187,8 +187,7 @@ fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
       inst->conditional_mod = conditionalmod;
    } else {
-      inst = emit(BRW_OPCODE_CMP, reg_null_cmp, src0, src1);
-      inst->conditional_mod = conditionalmod;
+      emit(CMP(reg_null_d, src0, src1, conditionalmod));
 
       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
       inst->predicate = BRW_PREDICATE_NORMAL;
@@ -326,13 +325,11 @@ fs_visitor::visit(ir_expression *ir)
 
       emit(MOV(this->result, fs_reg(0.0f)));
 
-      inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
-      inst->conditional_mod = BRW_CONDITIONAL_G;
+      emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G));
       inst = emit(MOV(this->result, fs_reg(1.0f)));
       inst->predicate = BRW_PREDICATE_NORMAL;
 
-      inst = emit(BRW_OPCODE_CMP, reg_null_f, op[0], fs_reg(0.0f));
-      inst->conditional_mod = BRW_CONDITIONAL_L;
+      emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L));
       inst = emit(MOV(this->result, fs_reg(-1.0f)));
       inst->predicate = BRW_PREDICATE_NORMAL;
 
@@ -432,8 +429,8 @@ fs_visitor::visit(ir_expression *ir)
       resolve_bool_comparison(ir->operands[0], &op[0]);
       resolve_bool_comparison(ir->operands[1], &op[1]);
 
-      inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
-      inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+      emit(CMP(temp, op[0], op[1],
+               brw_conditional_for_comparison(ir->operation)));
       break;
 
    case ir_binop_logic_xor:
@@ -501,15 +498,13 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_f2b:
-      inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f));
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
       emit(AND(this->result, this->result, fs_reg(1)));
       break;
    case ir_unop_i2b:
       assert(op[0].type == BRW_REGISTER_TYPE_D);
 
-      inst = emit(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0));
-      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
       emit(AND(this->result, this->result, fs_reg(1)));
       break;
 
@@ -588,9 +583,7 @@ fs_visitor::visit(ir_expression *ir)
           * values with the low bit set to 1.  Convert them using CMP.
           */
          if (ir->type->base_type == GLSL_TYPE_BOOL) {
-            fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, result,
-                                         packed_consts, fs_reg(0u)));
-            inst->conditional_mod = BRW_CONDITIONAL_NZ;
+            emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
          } else {
             emit(MOV(result, packed_consts));
          }
@@ -1518,20 +1511,20 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 
       case ir_unop_f2b:
 	 if (intel->gen >= 6) {
-	    inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f));
+	    emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 	 } else {
 	    inst = emit(MOV(reg_null_f, op[0]));
+            inst->conditional_mod = BRW_CONDITIONAL_NZ;
 	 }
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
 	 break;
 
       case ir_unop_i2b:
 	 if (intel->gen >= 6) {
-	    inst = emit(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0));
+	    emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
 	 } else {
 	    inst = emit(MOV(reg_null_d, op[0]));
+            inst->conditional_mod = BRW_CONDITIONAL_NZ;
 	 }
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
 	 break;
 
       case ir_binop_greater:
@@ -1545,9 +1538,8 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 	 resolve_bool_comparison(expr->operands[0], &op[0]);
 	 resolve_bool_comparison(expr->operands[1], &op[1]);
 
-	 inst = emit(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1]);
-	 inst->conditional_mod =
-	    brw_conditional_for_comparison(expr->operation);
+	 emit(CMP(reg_null_d, op[0], op[1],
+                  brw_conditional_for_comparison(expr->operation)));
 	 break;
 
       default:
@@ -1594,22 +1586,19 @@ fs_visitor::emit_if_gen6(ir_if *ir)
 	 return;
 
       case ir_binop_logic_xor:
-	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
 	 return;
 
       case ir_binop_logic_or:
 	 temp = fs_reg(this, glsl_type::bool_type);
 	 emit(OR(temp, op[0], op[1]));
-	 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
 	 return;
 
       case ir_binop_logic_and:
 	 temp = fs_reg(this, glsl_type::bool_type);
 	 emit(AND(temp, op[0], op[1]));
-	 inst = emit(BRW_OPCODE_IF, reg_null_d, temp, fs_reg(0));
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
 	 return;
 
       case ir_unop_f2b:
@@ -1618,8 +1607,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
 	 return;
 
       case ir_unop_i2b:
-	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
 	 return;
 
       case ir_binop_greater:
@@ -1630,14 +1618,12 @@ fs_visitor::emit_if_gen6(ir_if *ir)
       case ir_binop_all_equal:
       case ir_binop_nequal:
       case ir_binop_any_nequal:
-	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], op[1]);
-	 inst->conditional_mod =
-	    brw_conditional_for_comparison(expr->operation);
+	 emit(IF(op[0], op[1],
+                 brw_conditional_for_comparison(expr->operation)));
 	 return;
       default:
 	 assert(!"not reached");
-	 inst = emit(BRW_OPCODE_IF, reg_null_d, op[0], fs_reg(0));
-	 inst->conditional_mod = BRW_CONDITIONAL_NZ;
+	 emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
 	 fail("bad condition\n");
 	 return;
       }
@@ -1646,15 +1632,12 @@ fs_visitor::emit_if_gen6(ir_if *ir)
 
    ir->condition->accept(this);
 
-   fs_inst *inst = emit(BRW_OPCODE_IF, reg_null_d, this->result, fs_reg(0));
-   inst->conditional_mod = BRW_CONDITIONAL_NZ;
+   emit(IF(this->result, fs_reg(0), BRW_CONDITIONAL_NZ));
 }
 
 void
 fs_visitor::visit(ir_if *ir)
 {
-   fs_inst *inst;
-
    if (intel->gen < 6 && c->dispatch_width == 16) {
       fail("Can't support (non-uniform) control flow on 16-wide\n");
    }
@@ -1669,8 +1652,7 @@ fs_visitor::visit(ir_if *ir)
    } else {
       emit_bool_to_cond_code(ir->condition);
 
-      inst = emit(BRW_OPCODE_IF);
-      inst->predicate = BRW_PREDICATE_NORMAL;
+      emit(IF(BRW_PREDICATE_NORMAL));
    }
 
    foreach_list(node, &ir->then_instructions) {
@@ -1723,10 +1705,10 @@ fs_visitor::visit(ir_loop *ir)
       this->base_ir = ir->to;
       ir->to->accept(this);
 
-      fs_inst *inst = emit(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result);
-      inst->conditional_mod = brw_conditional_for_comparison(ir->cmp);
+      emit(CMP(reg_null_d, counter, this->result,
+               brw_conditional_for_comparison(ir->cmp)));
 
-      inst = emit(BRW_OPCODE_BREAK);
+      fs_inst *inst = emit(BRW_OPCODE_BREAK);
       inst->predicate = BRW_PREDICATE_NORMAL;
    }
 
@@ -2232,23 +2214,6 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,
                                        hash_table_pointer_hash,
                                        hash_table_pointer_compare);
 
-   /* There's a question that appears to be left open in the spec:
-    * How do implicit dst conversions interact with the CMP
-    * instruction or conditional mods?  On gen6, the instruction:
-    *
-    * CMP null<d> src0<f> src1<f>
-    *
-    * will do src1 - src0 and compare that result as if it was an
-    * integer.  On gen4, it will do src1 - src0 as float, convert
-    * the result to int, and compare as int.  In between, it
-    * appears that it does src1 - src0 and does the compare in the
-    * execution type so dst type doesn't matter.
-    */
-   if (this->intel->gen > 4)
-      this->reg_null_cmp = reg_null_d;
-   else
-      this->reg_null_cmp = reg_null_f;
-
    memset(this->outputs, 0, sizeof(this->outputs));
    memset(this->output_components, 0, sizeof(this->output_components));
    this->first_non_payload_grf = 0;
-- 
1.7.10.4



More information about the mesa-dev mailing list