[Mesa-dev] [PATCH 10/10] i965/vs: Add a peephole pass to combine ADD with ADDC/SUBB.
Paul Berry
stereotype441 at gmail.com
Fri Oct 4 14:21:00 PDT 2013
On 3 October 2013 11:00, Matt Turner <mattst88 at gmail.com> wrote:
> v2: Check fixed_hw_reg.{file,nr} instead of dst.reg.
> v3: Store the bool emitted_addc_or_subb in the class, not static.
> ---
> src/mesa/drivers/dri/i965/brw_vec4.h | 3 +
> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 104
> ++++++++++++++++++++++++-
> 2 files changed, 106 insertions(+), 1 deletion(-)
>
My concerns about patch 9/10 apply to this patch as well.
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 25427d7..9e2204d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -507,6 +507,7 @@ public:
>
> bool try_emit_sat(ir_expression *ir);
> bool try_emit_mad(ir_expression *ir, int mul_arg);
> + void try_combine_add_with_addc_subb();
> void resolve_ud_negate(src_reg *reg);
>
> src_reg get_timestamp();
> @@ -530,6 +531,8 @@ protected:
> virtual int compute_array_stride(ir_dereference_array *ir);
>
> const bool debug_flag;
> +
> + bool emitted_addc_or_subb;
> };
>
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index ffb2cfc..74bdd4d 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1122,6 +1122,102 @@ vec4_visitor::try_emit_mad(ir_expression *ir, int
> mul_arg)
> return true;
> }
>
> +/**
> + * The addition and carry in the uaddCarry() built-in function are
> implemented
> + * separately as ir_binop_add and ir_binop_carry respectively. i965
> generates
> + * ADDC and a MOV from the accumulator for the carry.
> + *
> + * The generated code for uaddCarry(uint x, uint y, out uint carry) would
> look
> + * like this:
> + *
> + * addc null, x, y
> + * mov carry, acc0
> + * add sum, x, y
> + *
> + * This peephole pass optimizes this into
> + *
> + * addc sum, x, y
> + * mov carry, acc0
> + *
> + * usubBorrow() works in the same fashion.
> + */
> +void
> +vec4_visitor::try_combine_add_with_addc_subb()
> +{
> + /* ADDC/SUBB was introduced in gen7. */
> + if (brw->gen < 7)
> + return;
> +
> + vec4_instruction *add_inst = (vec4_instruction *)
> instructions.get_tail();
> + assert(add_inst->opcode == BRW_OPCODE_ADD);
> +
> + /* ADDC/SUBB only operates on UD. */
> + if (add_inst->dst.type != BRW_REGISTER_TYPE_UD ||
> + add_inst->src[0].type != BRW_REGISTER_TYPE_UD ||
> + add_inst->src[1].type != BRW_REGISTER_TYPE_UD)
> + return;
> +
> + bool found = false;
> + vec4_instruction *match = (vec4_instruction *) add_inst->prev;
> + /* The ADDC should appear within 2 instructions of ADD. SUBB should
> appear
> + * farther away because of the extra MOV negate.
> + */
> + for (int i = 0; i < 4; i++, match = (vec4_instruction *) match->prev) {
> + if (match->is_head_sentinel())
> + return;
> +
> + /* Look for an ADDC/SUBB instruction whose destination is the null
> + * register (ir_binop_carry emits ADDC with null destination; same
> for
> + * ir_binop_borrow with SUBB) and whose sources are identical to
> those
> + * of the ADD.
> + */
> + if (match->opcode != BRW_OPCODE_ADDC && match->opcode !=
> BRW_OPCODE_SUBB)
> + continue;
> +
> + /* Only look for newly emitted ADDC/SUBB with null destination. */
> + if (match->dst.file != HW_REG ||
> + match->dst.fixed_hw_reg.file != BRW_ARCHITECTURE_REGISTER_FILE
> ||
> + match->dst.fixed_hw_reg.nr != BRW_ARF_NULL)
> + continue;
> +
> + src_reg *src0 = &add_inst->src[0];
> + src_reg *src1 = &add_inst->src[1];
> +
> + /* For SUBB, the ADD's second source will contain a negate modifier
> + * which at this point will be in the form of a
> + *
> + * MOV dst, -src
> + *
> + * instruction, so src[1].file will be GRF, even if it's a uniform
> push
> + * constant.
> + */
> + if (match->src[1].reg != add_inst->src[1].reg) {
> + /* The negating MOV should be immediately before the ADD. */
> + vec4_instruction *mov_inst = (vec4_instruction *) add_inst->prev;
> + if (mov_inst->opcode != BRW_OPCODE_MOV)
> + continue;
> +
> + src1 = &mov_inst->src[0];
> + }
> +
> + /* If everything matches, we're done. */
> + if (match->src[0].file == src0->file &&
> + match->src[1].file == src1->file &&
> + match->src[0].reg == src0->reg &&
> + match->src[1].reg == src1->reg &&
> + match->src[0].reg_offset == src0->reg_offset &&
> + match->src[1].reg_offset == src1->reg_offset) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (found) {
> + match->dst = add_inst->dst;
> + add_inst->remove();
> + }
> +}
> +
> void
> vec4_visitor::emit_bool_comparison(unsigned int op,
> dst_reg dst, src_reg src0, src_reg src1)
> @@ -1319,6 +1415,8 @@ vec4_visitor::visit(ir_expression *ir)
>
> case ir_binop_add:
> emit(ADD(result_dst, op[0], op[1]));
> + if (emitted_addc_or_subb)
> + try_combine_add_with_addc_subb();
> break;
> case ir_binop_sub:
> assert(!"not reached: should be handled by ir_sub_to_add_neg");
> @@ -1359,6 +1457,8 @@ vec4_visitor::visit(ir_expression *ir)
> emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
> break;
> case ir_binop_carry: {
> + emitted_addc_or_subb = true;
> +
> struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>
> emit(ADDC(dst_null_ud(), op[0], op[1]));
> @@ -1366,6 +1466,8 @@ vec4_visitor::visit(ir_expression *ir)
> break;
> }
> case ir_binop_borrow: {
> + emitted_addc_or_subb = true;
> +
> struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_UD);
>
> emit(SUBB(dst_null_ud(), op[0], op[1]));
> @@ -3058,7 +3160,7 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
> struct brw_shader *shader,
> void *mem_ctx,
> bool debug_flag)
> - : debug_flag(debug_flag)
> + : debug_flag(debug_flag), emitted_addc_or_subb(false)
> {
> this->brw = brw;
> this->ctx = &brw->ctx;
> --
> 1.8.3.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20131004/33b922b5/attachment-0001.html>
More information about the mesa-dev
mailing list