[Mesa-dev] [PATCH 10/10] i965/vs: Add a peephole pass to combine ADD with ADDC/SUBB.

Paul Berry stereotype441 at gmail.com
Fri Oct 4 14:21:00 PDT 2013


On 3 October 2013 11:00, Matt Turner <mattst88 at gmail.com> wrote:

> v2: Check fixed_hw_reg.{file,nr} instead of dst.reg.
> v3: Store the bool emitted_addc_or_subb in the class, not static.
> ---
>  src/mesa/drivers/dri/i965/brw_vec4.h           |   3 +
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 104
> ++++++++++++++++++++++++-
>  2 files changed, 106 insertions(+), 1 deletion(-)
>

My concerns about patch 9/10 apply to this patch as well.


>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 25427d7..9e2204d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -507,6 +507,7 @@ public:
>
>     bool try_emit_sat(ir_expression *ir);
>     bool try_emit_mad(ir_expression *ir, int mul_arg);
> +   void try_combine_add_with_addc_subb();
>     void resolve_ud_negate(src_reg *reg);
>
>     src_reg get_timestamp();
> @@ -530,6 +531,8 @@ protected:
>     virtual int compute_array_stride(ir_dereference_array *ir);
>
>     const bool debug_flag;
> +
> +   bool emitted_addc_or_subb;
>  };
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index ffb2cfc..74bdd4d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1122,6 +1122,102 @@ vec4_visitor::try_emit_mad(ir_expression *ir, int
> mul_arg)
>     return true;
>  }
>
> +/**
> + * The addition and carry in the uaddCarry() built-in function are
> implemented
> + * separately as ir_binop_add and ir_binop_carry respectively. i965
> generates
> + * ADDC and a MOV from the accumulator for the carry.
> + *
> + * The generated code for uaddCarry(uint x, uint y, out uint carry) would
> look
> + * like this:
> + *
> + *    addc null, x, y
> + *    mov  carry, acc0
> + *    add  sum, x, y
> + *
> + * This peephole pass optimizes this into
> + *
> + *    addc sum, x, y
> + *    mov carry, acc0
> + *
> + * usubBorrow() works in the same fashion.
> + */
> +void
> +vec4_visitor::try_combine_add_with_addc_subb()
> +{
> +   /* ADDC/SUBB was introduced in gen7. */
> +   if (brw->gen < 7)
> +      return;
> +
> +   vec4_instruction *add_inst = (vec4_instruction *)
> instructions.get_tail();
> +   assert(add_inst->opcode == BRW_OPCODE_ADD);
> +
> +   /* ADDC/SUBB only operates on UD. */
> +   if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
> +       add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
> +       add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
> +      return;
> +
> +   bool found = false;
> +   vec4_instruction *match = (vec4_instruction *) add_inst->prev;
> +   /* The ADDC should appear within 2 instructions of ADD. SUBB should
> appear
> +    * farther away because of the extra MOV negate.
> +    */
> +   for (int i = 0; i < 4; i++, match = (vec4_instruction *) match->prev) {
> +      if (match->is_head_sentinel())
> +         return;
> +
> +      /* Look for an ADDC/SUBB instruction whose destination is the null
> +       * register (ir_binop_carry emits ADDC with null destination; same
> for
> +       * ir_binop_borrow with SUBB) and whose sources are identical to
> those
> +       * of the ADD.
> +       */
> +      if (match->opcode != BRW_OPCODE_ADDC && match->opcode !=
> BRW_OPCODE_SUBB)
> +         continue;
> +
> +      /* Only look for newly emitted ADDC/SUBB with null destination. */
> +      if (match->dst.file != HW_REG ||
> +          match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE
> ||
> +          match->dst.fixed_hw_reg.nr != BRW_ARF_NULL)
> +         continue;
> +
> +      src_reg *src0 = &add_inst->src[0];
> +      src_reg *src1 = &add_inst->src[1];
> +
> +      /* For SUBB, the ADD's second source will contain a negate modifier
> +       * which at this point will be in the form of a
> +       *
> +       *    MOV dst, -src
> +       *
> +       * instruction, so src[1].file will be GRF, even if it's a uniform
> push
> +       * constant.
> +       */
> +      if (match->src[1].reg != add_inst->src[1].reg) {
> +         /* The negating MOV should be immediately before the ADD. */
> +         vec4_instruction *mov_inst = (vec4_instruction *) add_inst->prev;
> +         if (mov_inst->opcode != BRW_OPCODE_MOV)
> +            continue;
> +
> +         src1 = &mov_inst->src[0];
> +      }
> +
> +      /* If everything matches, we're done. */
> +      if (match->src[0].file == src0->file &&
> +          match->src[1].file == src1->file &&
> +          match->src[0].reg == src0->reg &&
> +          match->src[1].reg == src1->reg &&
> +          match->src[0].reg_offset == src0->reg_offset &&
> +          match->src[1].reg_offset == src1->reg_offset) {
> +         found = true;
> +         break;
> +      }
> +   }
> +
> +   if (found) {
> +      match->dst = add_inst->dst;
> +      add_inst->remove();
> +   }
> +}
> +
>  void
>  vec4_visitor::emit_bool_comparison(unsigned int op,
>                                  dst_reg dst, src_reg src0, src_reg src1)
> @@ -1319,6 +1415,8 @@ vec4_visitor::visit(ir_expression *ir)
>
>     case ir_binop_add:
>        emit(ADD(result_dst, op[0], op[1]));
> +      if (emitted_addc_or_subb)
> +         try_combine_add_with_addc_subb();
>        break;
>     case ir_binop_sub:
>        assert(!"not reached: should be handled by ir_sub_to_add_neg");
> @@ -1359,6 +1457,8 @@ vec4_visitor::visit(ir_expression *ir)
>        emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
>        break;
>     case ir_binop_carry: {
> +      emitted_addc_or_subb = true;
> +
>        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>
>        emit(ADDC(dst_null_ud(), op[0], op[1]));
> @@ -1366,6 +1466,8 @@ vec4_visitor::visit(ir_expression *ir)
>        break;
>     }
>     case ir_binop_borrow: {
> +      emitted_addc_or_subb = true;
> +
>        struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>
>        emit(SUBB(dst_null_ud(), op[0], op[1]));
> @@ -3058,7 +3160,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
>                            struct brw_shader *shader,
>                            void *mem_ctx,
>                             bool debug_flag)
> -   : debug_flag(debug_flag)
> +   : debug_flag(debug_flag), emitted_addc_or_subb(false)
>  {
>     this->brw = brw;
>     this->ctx = &brw->ctx;
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131004/33b922b5/attachment-0001.html>


More information about the mesa-dev mailing list