[Mesa-dev] [PATCH 09/10] i965/fs: Add a peephole pass to combine ADD with ADDC/SUBB.
Matt Turner
mattst88 at gmail.com
Fri Sep 27 18:15:36 PDT 2013
v2: Check fixed_hw_reg.{file,nr} instead of dst.reg.
---
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 103 +++++++++++++++++++++++++++
2 files changed, 104 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 6a53e59..1d33522 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -345,6 +345,7 @@ public:
fs_reg src0, fs_reg src1);
bool try_emit_saturate(ir_expression *ir);
bool try_emit_mad(ir_expression *ir, int mul_arg);
+ void try_combine_add_with_addc_subb();
void try_replace_with_sel();
void emit_bool_to_cond_code(ir_rvalue *condition);
void emit_if_gen6(ir_if *ir);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b8c30e6..989cd6b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -313,9 +313,106 @@ fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
return true;
}
+/**
+ * The addition and carry in the uaddCarry() built-in function are implemented
+ * separately as ir_binop_add and ir_binop_carry respectively. i965 generates
+ * ADDC and a MOV from the accumulator for the carry.
+ *
+ * The generated code for uaddCarry(uint x, uint y, out uint carry) would look
+ * like this:
+ *
+ * addc null, x, y
+ * mov carry, acc0
+ * add sum, x, y
+ *
+ * This peephole pass optimizes this into
+ *
+ * addc sum, x, y
+ * mov carry, acc0
+ *
+ * usubBorrow() works in the same fashion.
+ */
+void
+fs_visitor::try_combine_add_with_addc_subb()
+{
+ /* ADDC/SUBB was introduced in gen7. */
+ if (brw->gen < 7)
+ return;
+
+ fs_inst *add_inst = (fs_inst *) instructions.get_tail();
+ assert(add_inst->opcode == BRW_OPCODE_ADD);
+
+ /* ADDC/SUBB only operates on UD. */
+ if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
+ add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
+ add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
+ return;
+
+ bool found = false;
+ fs_inst *match = (fs_inst *) add_inst->prev;
+ /* The ADDC should appear within 8 instructions of ADD for a vec4. SUBB
+ * should appear farther away because of the extra MOV negates.
+ */
+ for (int i = 0; i < 16; i++, match = (fs_inst *) match->prev) {
+ if (match->is_head_sentinel())
+ return;
+
+ /* Look for an ADDC/SUBB instruction whose destination is the null
+ * register (ir_binop_carry emits ADDC with null destination; same for
+ * ir_binop_borrow with SUBB) and whose sources are identical to those
+ * of the ADD.
+ */
+ if (match->opcode != BRW_OPCODE_ADDC && match->opcode != BRW_OPCODE_SUBB)
+ continue;
+
+ /* Only look for newly emitted ADDC/SUBB with null destination. */
+ if (match->dst.file != HW_REG ||
+ match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ match->dst.fixed_hw_reg.nr != BRW_ARF_NULL)
+ continue;
+
+ fs_reg *src0 = &add_inst->src[0];
+ fs_reg *src1 = &add_inst->src[1];
+
+ /* For SUBB, the ADD's second source will contain a negate modifier
+ * which at this point will be in the form of a
+ *
+ * MOV dst, -src
+ *
+ * instruction, so src[1].file will be GRF, even if it's a uniform push
+ * constant.
+ */
+ if (match->src[1].reg != add_inst->src[1].reg) {
+ /* The negating MOV should be immediately before the ADD. */
+ fs_inst *mov_inst = (fs_inst *) add_inst->prev;
+ if (mov_inst->opcode != BRW_OPCODE_MOV)
+ continue;
+
+ src1 = &mov_inst->src[0];
+ }
+
+ /* If everything matches, we're done. */
+ if (match->src[0].file == src0->file &&
+ match->src[1].file == src1->file &&
+ match->src[0].reg == src0->reg &&
+ match->src[1].reg == src1->reg &&
+ match->src[0].reg_offset == src0->reg_offset &&
+ match->src[1].reg_offset == src1->reg_offset) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ match->dst = add_inst->dst;
+ add_inst->remove();
+ }
+}
+
void
fs_visitor::visit(ir_expression *ir)
{
+ static bool emitted_addc_or_subb = false;
unsigned int operand;
fs_reg op[3], temp;
fs_inst *inst;
@@ -415,6 +512,8 @@ fs_visitor::visit(ir_expression *ir)
case ir_binop_add:
emit(ADD(this->result, op[0], op[1]));
+ if (emitted_addc_or_subb)
+ try_combine_add_with_addc_subb();
break;
case ir_binop_sub:
assert(!"not reached: should be handled by ir_sub_to_add_neg");
@@ -451,6 +550,8 @@ fs_visitor::visit(ir_expression *ir)
if (brw->gen >= 7 && dispatch_width == 16)
fail("16-wide explicit accumulator operands unsupported\n");
+ emitted_addc_or_subb = true;
+
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
emit(ADDC(reg_null_ud, op[0], op[1]));
@@ -461,6 +562,8 @@ fs_visitor::visit(ir_expression *ir)
if (brw->gen >= 7 && dispatch_width == 16)
fail("16-wide explicit accumulator operands unsupported\n");
+ emitted_addc_or_subb = true;
+
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
emit(SUBB(reg_null_ud, op[0], op[1]));
--
1.8.3.2
More information about the mesa-dev
mailing list