[Mesa-dev] [PATCH 09/10] i965/fs: Add a peephole pass to combine ADD with ADDC/SUBB.

Matt Turner mattst88 at gmail.com
Mon Sep 23 16:13:06 PDT 2013


---
 src/mesa/drivers/dri/i965/brw_fs.h           |   1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 101 +++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index a56f561..d776b5a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -373,6 +373,7 @@ public:
                     fs_reg src0, fs_reg src1);
    bool try_emit_saturate(ir_expression *ir);
    bool try_emit_mad(ir_expression *ir, int mul_arg);
+   void try_combine_add_with_addc_subb();
    void try_replace_with_sel();
    void emit_bool_to_cond_code(ir_rvalue *condition);
    void emit_if_gen6(ir_if *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 23cdc2e..531ef52 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -313,9 +313,104 @@ fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
    return true;
 }
 
+/**
+ * The addition and carry in the uaddCarry() built-in function are implemented
+ * separately as ir_binop_add and ir_binop_carry respectively. i965 generates
+ * ADDC and a MOV from the accumulator for the carry.
+ *
+ * The generated code for uaddCarry(uint x, uint y, out uint carry) would look
+ * like this:
+ *
+ *    addc null, x, y
+ *    mov  carry, acc0
+ *    add  sum, x, y
+ *
+ * This peephole pass optimizes this into
+ *
+ *    addc sum, x, y
+ *    mov carry, acc0
+ *
+ * usubBorrow() works in the same fashion.
+ */
+void
+fs_visitor::try_combine_add_with_addc_subb()
+{
+   /* ADDC/SUBB was introduced in gen7. */
+   if (brw->gen < 7)
+      return;
+
+   fs_inst *add_inst = (fs_inst *) instructions.get_tail();
+   assert(add_inst->opcode == BRW_OPCODE_ADD);
+
+   /* ADDC/SUBB only operates on UD. */
+   if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
+       add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
+       add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
+      return;
+
+   bool found = false;
+   fs_inst *match = (fs_inst *) add_inst->prev;
+   /* The ADDC should appear within 8 instructions of ADD for a vec4. SUBB
+    * should appear farther away because of the extra MOV negates.
+    */
+   for (int i = 0; i < 16; i++, match = (fs_inst *) match->prev) {
+      if (match->is_head_sentinel())
+         return;
+
+      /* Look for an ADDC/SUBB instruction whose destination is the null
+       * register (ir_binop_carry emits ADDC with null destination; same for
+       * ir_binop_borrow with SUBB) and whose sources are identical to those
+       * of the ADD.
+       */
+      if (match->opcode != BRW_OPCODE_ADDC && match->opcode != BRW_OPCODE_SUBB)
+         continue;
+
+      /* Only look for newly emitted ADDC/SUBB with null destination. */
+      if (match->dst.file != ARF || match->dst.reg != BRW_ARF_NULL)
+         continue;
+
+      fs_reg *src0 = &add_inst->src[0];
+      fs_reg *src1 = &add_inst->src[1];
+
+      /* For SUBB, the ADD's second source will contain a negate modifier
+       * which at this point will be in the form of a
+       *
+       *    MOV dst, -src
+       *
+       * instruction, so src[1].file will be GRF, even if it's a uniform push
+       * constant.
+       */
+      if (match->src[1].reg != add_inst->src[1].reg) {
+         /* The negating MOV should be immediately before the ADD. */
+         fs_inst *mov_inst = (fs_inst *) add_inst->prev;
+         if (mov_inst->opcode != BRW_OPCODE_MOV)
+            continue;
+
+         src1 = &mov_inst->src[0];
+      }
+
+      /* If everything matches, we're done. */
+      if (match->src[0].file == src0->file &&
+          match->src[1].file == src1->file &&
+          match->src[0].reg == src0->reg &&
+          match->src[1].reg == src1->reg &&
+          match->src[0].reg_offset == src0->reg_offset &&
+          match->src[1].reg_offset == src1->reg_offset) {
+         found = true;
+         break;
+      }
+   }
+
+   if (found) {
+      match->dst = add_inst->dst;
+      add_inst->remove();
+   }
+}
+
 void
 fs_visitor::visit(ir_expression *ir)
 {
+   static bool emitted_addc_or_subb = false;
    unsigned int operand;
    fs_reg op[3], temp;
    fs_inst *inst;
@@ -415,6 +510,8 @@ fs_visitor::visit(ir_expression *ir)
 
    case ir_binop_add:
       emit(ADD(this->result, op[0], op[1]));
+      if (emitted_addc_or_subb)
+         try_combine_add_with_addc_subb();
       break;
    case ir_binop_sub:
       assert(!"not reached: should be handled by ir_sub_to_add_neg");
@@ -451,6 +548,8 @@ fs_visitor::visit(ir_expression *ir)
       if (brw->gen >= 7 && dispatch_width == 16)
          fail("16-wide explicit accumulator operands unsupported\n");
 
+      emitted_addc_or_subb = true;
+
       struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
 
       emit(ADDC(reg_null_ud, op[0], op[1]));
@@ -461,6 +560,8 @@ fs_visitor::visit(ir_expression *ir)
       if (brw->gen >= 7 && dispatch_width == 16)
          fail("16-wide explicit accumulator operands unsupported\n");
 
+      emitted_addc_or_subb = true;
+
       struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
 
       emit(SUBB(reg_null_ud, op[0], op[1]));
-- 
1.8.3.2



More information about the mesa-dev mailing list