[Mesa-dev] [PATCH 09/10] i965/fs: Add a peephole pass to combine ADD with ADDC/SUBB.

Paul Berry stereotype441 at gmail.com
Fri Oct 4 13:51:51 PDT 2013


On 3 October 2013 10:59, Matt Turner <mattst88 at gmail.com> wrote:

> v2: Check fixed_hw_reg.{file,nr} instead of dst.reg.
> v3: Store the bool emitted_addc_or_subb in the class, not static.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.h           |   3 +
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 104
> +++++++++++++++++++++++++++
>  2 files changed, 107 insertions(+)
>

Reviewed-by: Paul Berry <stereotype441 at gmail.com>


>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 6a53e59..c703c2b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -345,6 +345,7 @@ public:
>                      fs_reg src0, fs_reg src1);
>     bool try_emit_saturate(ir_expression *ir);
>     bool try_emit_mad(ir_expression *ir, int mul_arg);
> +   void try_combine_add_with_addc_subb();
>     void try_replace_with_sel();
>     void emit_bool_to_cond_code(ir_rvalue *condition);
>     void emit_if_gen6(ir_if *ir);
> @@ -458,6 +459,8 @@ public:
>
>     int force_uncompressed_stack;
>     int force_sechalf_stack;
> +
> +   bool emitted_addc_or_subb;
>  };
>
>  /**
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index b8c30e6..8accbd6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -313,6 +313,102 @@ fs_visitor::try_emit_mad(ir_expression *ir, int
> mul_arg)
>     return true;
>  }
>
> +/**
> + * The addition and carry in the uaddCarry() built-in function are
> implemented
> + * separately as ir_binop_add and ir_binop_carry respectively. i965
> generates
> + * ADDC and a MOV from the accumulator for the carry.
> + *
> + * The generated code for uaddCarry(uint x, uint y, out uint carry) would
> look
> + * like this:
> + *
> + *    addc null, x, y
> + *    mov  carry, acc0
> + *    add  sum, x, y
> + *
> + * This peephole pass optimizes this into
> + *
> + *    addc sum, x, y
> + *    mov carry, acc0
> + *
> + * usubBorrow() works in the same fashion.
> + */
> +void
> +fs_visitor::try_combine_add_with_addc_subb()
> +{
> +   /* ADDC/SUBB was introduced in gen7. */
> +   if (brw->gen < 7)
> +      return;
> +
> +   fs_inst *add_inst = (fs_inst *) instructions.get_tail();
> +   assert(add_inst->opcode == BRW_OPCODE_ADD);
> +
> +   /* ADDC/SUBB only operates on UD. */
> +   if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
> +       add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
> +       add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
> +      return;
> +
> +   bool found = false;
> +   fs_inst *match = (fs_inst *) add_inst->prev;
> +   /* The ADDC should appear within 8 instructions of ADD for a vec4. SUBB
> +    * should appear farther away because of the extra MOV negates.
> +    */
> +   for (int i = 0; i < 16; i++, match = (fs_inst *) match->prev) {
> +      if (match->is_head_sentinel())
> +         return;
> +
> +      /* Look for an ADDC/SUBB instruction whose destination is the null
> +       * register (ir_binop_carry emits ADDC with null destination; same
> for
> +       * ir_binop_borrow with SUBB) and whose sources are identical to
> those
> +       * of the ADD.
> +       */
> +      if (match->opcode != BRW_OPCODE_ADDC && match->opcode !=
> BRW_OPCODE_SUBB)
> +         continue;
> +
> +      /* Only look for newly emitted ADDC/SUBB with null destination. */
> +      if (match->dst.file != HW_REG ||
> +          match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE
> ||
> +          match->dst.fixed_hw_reg.nr != BRW_ARF_NULL)
> +         continue;
> +
> +      fs_reg *src0 = &add_inst->src[0];
> +      fs_reg *src1 = &add_inst->src[1];
> +
> +      /* For SUBB, the ADD's second source will contain a negate modifier
> +       * which at this point will be in the form of a
> +       *
> +       *    MOV dst, -src
> +       *
> +       * instruction, so src[1].file will be GRF, even if it's a uniform
> push
> +       * constant.
> +       */
> +      if (match->src[1].reg != add_inst->src[1].reg) {
> +         /* The negating MOV should be immediately before the ADD. */
> +         fs_inst *mov_inst = (fs_inst *) add_inst->prev;
> +         if (mov_inst->opcode != BRW_OPCODE_MOV)
> +            continue;
> +
> +         src1 = &mov_inst->src[0];
> +      }
> +
> +      /* If everything matches, we're done. */
> +      if (match->src[0].file == src0->file &&
> +          match->src[1].file == src1->file &&
> +          match->src[0].reg == src0->reg &&
> +          match->src[1].reg == src1->reg &&
> +          match->src[0].reg_offset == src0->reg_offset &&
> +          match->src[1].reg_offset == src1->reg_offset) {
> +         found = true;
> +         break;
> +      }
> +   }
> +
> +   if (found) {
> +      match->dst = add_inst->dst;
> +      add_inst->remove();
> +   }
> +}
> +
>  void
>  fs_visitor::visit(ir_expression *ir)
>  {
> @@ -415,6 +511,8 @@ fs_visitor::visit(ir_expression *ir)
>
>     case ir_binop_add:
>        emit(ADD(this->result, op[0], op[1]));
> +      if (emitted_addc_or_subb)
> +         try_combine_add_with_addc_subb();
>        break;
>     case ir_binop_sub:
>        assert(!"not reached: should be handled by ir_sub_to_add_neg");
> @@ -451,6 +549,8 @@ fs_visitor::visit(ir_expression *ir)
>        if (brw->gen >= 7 && dispatch_width == 16)
>           fail("16-wide explicit accumulator operands unsupported\n");
>
> +      emitted_addc_or_subb = true;
> +
>        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>
>        emit(ADDC(reg_null_ud, op[0], op[1]));
> @@ -461,6 +561,8 @@ fs_visitor::visit(ir_expression *ir)
>        if (brw->gen >= 7 && dispatch_width == 16)
>           fail("16-wide explicit accumulator operands unsupported\n");
>
> +      emitted_addc_or_subb = true;
> +
>        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>
>        emit(SUBB(reg_null_ud, op[0], op[1]));
> @@ -2610,6 +2712,8 @@ fs_visitor::fs_visitor(struct brw_context *brw,
>     this->force_uncompressed_stack = 0;
>     this->force_sechalf_stack = 0;
>
> +   this->emitted_addc_or_subb = false;
> +
>     memset(&this->param_size, 0, sizeof(this->param_size));
>  }
>
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131004/6b06bd04/attachment.html>


More information about the mesa-dev mailing list