[Mesa-dev] [PATCH 09/10] i965/fs: Add a peephole pass to combine ADD with ADDC/SUBB.

Ian Romanick idr at freedesktop.org
Tue Sep 24 14:41:27 PDT 2013


For our own edification, we should add some feedback in the
INTEL_DEBUG=perf case.  If there is any case that an ADDC (or SUBB) from
the frontend doesn't get merged with an add, we should generate a perf
warning.  This probably indicates a failing of the optimization pass.

That could probably be a follow-on patch...

On 09/23/2013 04:13 PM, Matt Turner wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h           |   1 +
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 101 +++++++++++++++++++++++++++
>  2 files changed, 102 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index a56f561..d776b5a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -373,6 +373,7 @@ public:
>                      fs_reg src0, fs_reg src1);
>     bool try_emit_saturate(ir_expression *ir);
>     bool try_emit_mad(ir_expression *ir, int mul_arg);
> +   void try_combine_add_with_addc_subb();
>     void try_replace_with_sel();
>     void emit_bool_to_cond_code(ir_rvalue *condition);
>     void emit_if_gen6(ir_if *ir);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 23cdc2e..531ef52 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -313,9 +313,104 @@ fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
>     return true;
>  }
>  
> +/**
> + * The addition and carry in the uaddCarry() built-in function are implemented
> + * separately as ir_binop_add and ir_binop_carry respectively. i965 generates
> + * ADDC and a MOV from the accumulator for the carry.
> + *
> + * The generated code for uaddCarry(uint x, uint y, out uint carry) would look
> + * like this:
> + *
> + *    addc null, x, y
> + *    mov  carry, acc0
> + *    add  sum, x, y
> + *
> + * This peephole pass optimizes this into
> + *
> + *    addc sum, x, y
> + *    mov carry, acc0
> + *
> + * usubBorrow() works in the same fashion.
> + */
> +void
> +fs_visitor::try_combine_add_with_addc_subb()
> +{
> +   /* ADDC/SUBB was introduced in gen7. */
> +   if (brw->gen < 7)
> +      return;
> +
> +   fs_inst *add_inst = (fs_inst *) instructions.get_tail();
> +   assert(add_inst->opcode == BRW_OPCODE_ADD);
> +
> +   /* ADDC/SUBB only operates on UD. */
> +   if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
> +       add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
> +       add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
> +      return;
> +
> +   bool found = false;
> +   fs_inst *match = (fs_inst *) add_inst->prev;
> +   /* The ADDC should appear within 8 instructions of ADD for a vec4. SUBB
> +    * should appear farther away because of the extra MOV negates.
> +    */
> +   for (int i = 0; i < 16; i++, match = (fs_inst *) match->prev) {
> +      if (match->is_head_sentinel())
> +         return;
> +
> +      /* Look for an ADDC/SUBB instruction whose destination is the null
> +       * register (ir_binop_carry emits ADDC with null destination; same for
> +       * ir_binop_borrow with SUBB) and whose sources are identical to those
> +       * of the ADD.
> +       */
> +      if (match->opcode != BRW_OPCODE_ADDC && match->opcode != BRW_OPCODE_SUBB)
> +         continue;
> +
> +      /* Only look for newly emitted ADDC/SUBB with null destination. */
> +      if (match->dst.file != ARF || match->dst.reg != BRW_ARF_NULL)
> +         continue;
> +
> +      fs_reg *src0 = &add_inst->src[0];
> +      fs_reg *src1 = &add_inst->src[1];
> +
> +      /* For SUBB, the ADD's second source will contain a negate modifier
> +       * which at this point will be in the form of a
> +       *
> +       *    MOV dst, -src
> +       *
> +       * instruction, so src[1].file will be GRF, even if it's a uniform push
> +       * constant.
> +       */
> +      if (match->src[1].reg != add_inst->src[1].reg) {
> +         /* The negating MOV should be immediately before the ADD. */
> +         fs_inst *mov_inst = (fs_inst *) add_inst->prev;
> +         if (mov_inst->opcode != BRW_OPCODE_MOV)
> +            continue;
> +
> +         src1 = &mov_inst->src[0];
> +      }
> +
> +      /* If everything matches, we're done. */
> +      if (match->src[0].file == src0->file &&
> +          match->src[1].file == src1->file &&
> +          match->src[0].reg == src0->reg &&
> +          match->src[1].reg == src1->reg &&
> +          match->src[0].reg_offset == src0->reg_offset &&
> +          match->src[1].reg_offset == src1->reg_offset) {
> +         found = true;
> +         break;
> +      }
> +   }
> +
> +   if (found) {
> +      match->dst = add_inst->dst;
> +      add_inst->remove();
> +   }
> +}
> +
>  void
>  fs_visitor::visit(ir_expression *ir)
>  {
> +   static bool emitted_addc_or_subb = false;
>     unsigned int operand;
>     fs_reg op[3], temp;
>     fs_inst *inst;
> @@ -415,6 +510,8 @@ fs_visitor::visit(ir_expression *ir)
>  
>     case ir_binop_add:
>        emit(ADD(this->result, op[0], op[1]));
> +      if (emitted_addc_or_subb)
> +         try_combine_add_with_addc_subb();
>        break;
>     case ir_binop_sub:
>        assert(!"not reached: should be handled by ir_sub_to_add_neg");
> @@ -451,6 +548,8 @@ fs_visitor::visit(ir_expression *ir)
>        if (brw->gen >= 7 && dispatch_width == 16)
>           fail("16-wide explicit accumulator operands unsupported\n");
>  
> +      emitted_addc_or_subb = true;
> +
>        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>  
>        emit(ADDC(reg_null_ud, op[0], op[1]));
> @@ -461,6 +560,8 @@ fs_visitor::visit(ir_expression *ir)
>        if (brw->gen >= 7 && dispatch_width == 16)
>           fail("16-wide explicit accumulator operands unsupported\n");
>  
> +      emitted_addc_or_subb = true;
> +
>        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>  
>        emit(SUBB(reg_null_ud, op[0], op[1]));
> 



More information about the mesa-dev mailing list