[Mesa-dev] [PATCH 10/10] i965/vs: Add a peephole pass to combine ADD with ADDC/SUBB.

Matt Turner mattst88 at gmail.com
Thu Oct 3 11:00:38 PDT 2013


v2: Check fixed_hw_reg.{file,nr} instead of dst.reg.
v3: Store the bool emitted_addc_or_subb in the class, not static.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           |   3 +
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 104 ++++++++++++++++++++++++-
 2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 25427d7..9e2204d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -507,6 +507,7 @@ public:
 
    bool try_emit_sat(ir_expression *ir);
    bool try_emit_mad(ir_expression *ir, int mul_arg);
+   void try_combine_add_with_addc_subb();
    void resolve_ud_negate(src_reg *reg);
 
    src_reg get_timestamp();
@@ -530,6 +531,8 @@ protected:
    virtual int compute_array_stride(ir_dereference_array *ir);
 
    const bool debug_flag;
+
+   bool emitted_addc_or_subb;
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ffb2cfc..74bdd4d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1122,6 +1122,102 @@ vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
    return true;
 }
 
+/**
+ * The addition and carry in the uaddCarry() built-in function are implemented
+ * separately as ir_binop_add and ir_binop_carry respectively. i965 generates
+ * ADDC and a MOV from the accumulator for the carry.
+ *
+ * The generated code for uaddCarry(uint x, uint y, out uint carry) would look
+ * like this:
+ *
+ *    addc null, x, y
+ *    mov  carry, acc0
+ *    add  sum, x, y
+ *
+ * This peephole pass optimizes this into
+ *
+ *    addc sum, x, y
+ *    mov carry, acc0
+ *
+ * usubBorrow() works in the same fashion.
+ */
+void
+vec4_visitor::try_combine_add_with_addc_subb()
+{
+   /* ADDC/SUBB was introduced in gen7. */
+   if (brw->gen < 7)
+      return;
+
+   vec4_instruction *add_inst = (vec4_instruction *) instructions.get_tail();
+   assert(add_inst->opcode == BRW_OPCODE_ADD);
+
+   /* ADDC/SUBB only operates on UD. */
+   if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
+       add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
+       add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
+      return;
+
+   bool found = false;
+   vec4_instruction *match = (vec4_instruction *) add_inst->prev;
+   /* The ADDC should appear within 2 instructions of ADD. SUBB should appear
+    * farther away because of the extra MOV negate.
+    */
+   for (int i = 0; i < 4; i++, match = (vec4_instruction *) match->prev) {
+      if (match->is_head_sentinel())
+         return;
+
+      /* Look for an ADDC/SUBB instruction whose destination is the null
+       * register (ir_binop_carry emits ADDC with null destination; same for
+       * ir_binop_borrow with SUBB) and whose sources are identical to those
+       * of the ADD.
+       */
+      if (match->opcode != BRW_OPCODE_ADDC && match->opcode != BRW_OPCODE_SUBB)
+         continue;
+
+      /* Only look for newly emitted ADDC/SUBB with null destination. */
+      if (match->dst.file != HW_REG ||
+          match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+          match->dst.fixed_hw_reg.nr != BRW_ARF_NULL)
+         continue;
+
+      src_reg *src0 = &add_inst->src[0];
+      src_reg *src1 = &add_inst->src[1];
+
+      /* For SUBB, the ADD's second source will contain a negate modifier
+       * which at this point will be in the form of a
+       *
+       *    MOV dst, -src
+       *
+       * instruction, so src[1].file will be GRF, even if it's a uniform push
+       * constant.
+       */
+      if (match->src[1].reg != add_inst->src[1].reg) {
+         /* The negating MOV should be immediately before the ADD. */
+         vec4_instruction *mov_inst = (vec4_instruction *) add_inst->prev;
+         if (mov_inst->opcode != BRW_OPCODE_MOV)
+            continue;
+
+         src1 = &mov_inst->src[0];
+      }
+
+      /* If everything matches, we're done. */
+      if (match->src[0].file == src0->file &&
+          match->src[1].file == src1->file &&
+          match->src[0].reg == src0->reg &&
+          match->src[1].reg == src1->reg &&
+          match->src[0].reg_offset == src0->reg_offset &&
+          match->src[1].reg_offset == src1->reg_offset) {
+         found = true;
+         break;
+      }
+   }
+
+   if (found) {
+      match->dst = add_inst->dst;
+      add_inst->remove();
+   }
+}
+
 void
 vec4_visitor::emit_bool_comparison(unsigned int op,
 				 dst_reg dst, src_reg src0, src_reg src1)
@@ -1319,6 +1415,8 @@ vec4_visitor::visit(ir_expression *ir)
 
    case ir_binop_add:
       emit(ADD(result_dst, op[0], op[1]));
+      if (emitted_addc_or_subb)
+         try_combine_add_with_addc_subb();
       break;
    case ir_binop_sub:
       assert(!"not reached: should be handled by ir_sub_to_add_neg");
@@ -1359,6 +1457,8 @@ vec4_visitor::visit(ir_expression *ir)
       emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
       break;
    case ir_binop_carry: {
+      emitted_addc_or_subb = true;
+
       struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
 
       emit(ADDC(dst_null_ud(), op[0], op[1]));
@@ -1366,6 +1466,8 @@ vec4_visitor::visit(ir_expression *ir)
       break;
    }
    case ir_binop_borrow: {
+      emitted_addc_or_subb = true;
+
       struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
 
       emit(SUBB(dst_null_ud(), op[0], op[1]));
@@ -3058,7 +3160,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
 			   struct brw_shader *shader,
 			   void *mem_ctx,
                            bool debug_flag)
-   : debug_flag(debug_flag)
+   : debug_flag(debug_flag), emitted_addc_or_subb(false)
 {
    this->brw = brw;
    this->ctx = &brw->ctx;
-- 
1.8.3.2



More information about the mesa-dev mailing list