[Mesa-dev] [PATCH 10/10] i965/vs: Add a peephole pass to combine ADD with ADDC/SUBB.
Matt Turner
mattst88 at gmail.com
Fri Sep 27 18:13:58 PDT 2013
v2: Check fixed_hw_reg.{file,nr} instead of dst.reg.
---
src/mesa/drivers/dri/i965/brw_vec4.h | 1 +
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 103 +++++++++++++++++++++++++
2 files changed, 104 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 25427d7..66e1ff0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -507,6 +507,7 @@ public:
bool try_emit_sat(ir_expression *ir);
bool try_emit_mad(ir_expression *ir, int mul_arg);
+ void try_combine_add_with_addc_subb();
void resolve_ud_negate(src_reg *reg);
src_reg get_timestamp();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ffb2cfc..781f60b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1122,6 +1122,102 @@ vec4_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
return true;
}
+/**
+ * The addition and carry in the uaddCarry() built-in function are implemented
+ * separately as ir_binop_add and ir_binop_carry respectively. i965 generates
+ * ADDC and a MOV from the accumulator for the carry.
+ *
+ * The generated code for uaddCarry(uint x, uint y, out uint carry) would look
+ * like this:
+ *
+ * addc null, x, y
+ * mov carry, acc0
+ * add sum, x, y
+ *
+ * This peephole pass optimizes this into
+ *
+ * addc sum, x, y
+ * mov carry, acc0
+ *
+ * usubBorrow() works in the same fashion.
+ */
+void
+vec4_visitor::try_combine_add_with_addc_subb()
+{
+ /* ADDC/SUBB was introduced in gen7. */
+ if (brw->gen < 7)
+ return;
+
+ vec4_instruction *add_inst = (vec4_instruction *) instructions.get_tail();
+ assert(add_inst->opcode == BRW_OPCODE_ADD);
+
+ /* ADDC/SUBB only operates on UD. */
+ if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
+ add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
+ add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
+ return;
+
+ bool found = false;
+ vec4_instruction *match = (vec4_instruction *) add_inst->prev;
+ /* The ADDC should appear within 2 instructions of ADD. SUBB should appear
+ * farther away because of the extra MOV negate.
+ */
+ for (int i = 0; i < 4; i++, match = (vec4_instruction *) match->prev) {
+ if (match->is_head_sentinel())
+ return;
+
+ /* Look for an ADDC/SUBB instruction whose destination is the null
+ * register (ir_binop_carry emits ADDC with null destination; same for
+ * ir_binop_borrow with SUBB) and whose sources are identical to those
+ * of the ADD.
+ */
+ if (match->opcode != BRW_OPCODE_ADDC && match->opcode != BRW_OPCODE_SUBB)
+ continue;
+
+ /* Only look for newly emitted ADDC/SUBB with null destination. */
+ if (match->dst.file != HW_REG ||
+ match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+ match->dst.fixed_hw_reg.nr != BRW_ARF_NULL)
+ continue;
+
+ src_reg *src0 = &add_inst->src[0];
+ src_reg *src1 = &add_inst->src[1];
+
+ /* For SUBB, the ADD's second source will contain a negate modifier
+ * which at this point will be in the form of a
+ *
+ * MOV dst, -src
+ *
+ * instruction, so src[1].file will be GRF, even if it's a uniform push
+ * constant.
+ */
+ if (match->src[1].reg != add_inst->src[1].reg) {
+ /* The negating MOV should be immediately before the ADD. */
+ vec4_instruction *mov_inst = (vec4_instruction *) add_inst->prev;
+ if (mov_inst->opcode != BRW_OPCODE_MOV)
+ continue;
+
+ src1 = &mov_inst->src[0];
+ }
+
+ /* If everything matches, we're done. */
+ if (match->src[0].file == src0->file &&
+ match->src[1].file == src1->file &&
+ match->src[0].reg == src0->reg &&
+ match->src[1].reg == src1->reg &&
+ match->src[0].reg_offset == src0->reg_offset &&
+ match->src[1].reg_offset == src1->reg_offset) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found) {
+ match->dst = add_inst->dst;
+ add_inst->remove();
+ }
+}
+
void
vec4_visitor::emit_bool_comparison(unsigned int op,
dst_reg dst, src_reg src0, src_reg src1)
@@ -1170,6 +1266,7 @@ is_16bit_constant(ir_rvalue *rvalue)
void
vec4_visitor::visit(ir_expression *ir)
{
+ static bool emitted_addc_or_subb = true;
unsigned int operand;
src_reg op[Elements(ir->operands)];
src_reg result_src;
@@ -1319,6 +1416,8 @@ vec4_visitor::visit(ir_expression *ir)
case ir_binop_add:
emit(ADD(result_dst, op[0], op[1]));
+ if (emitted_addc_or_subb)
+ try_combine_add_with_addc_subb();
break;
case ir_binop_sub:
assert(!"not reached: should be handled by ir_sub_to_add_neg");
@@ -1359,6 +1458,8 @@ vec4_visitor::visit(ir_expression *ir)
emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
break;
case ir_binop_carry: {
+ emitted_addc_or_subb = true;
+
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
emit(ADDC(dst_null_ud(), op[0], op[1]));
@@ -1366,6 +1467,8 @@ vec4_visitor::visit(ir_expression *ir)
break;
}
case ir_binop_borrow: {
+ emitted_addc_or_subb = true;
+
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
emit(SUBB(dst_null_ud(), op[0], op[1]));
--
1.8.3.2
More information about the mesa-dev
mailing list