[Mesa-dev] [RFC PATCH 1/5] i965: Add writes_accumulator flag

Matt Turner mattst88 at gmail.com
Fri Mar 21 23:17:54 PDT 2014


On Fri, Mar 21, 2014 at 7:33 AM, Juha-Pekka Heikkila
<juhapekka.heikkila at gmail.com> wrote:
> Our hardware has an "accumulator" register, which can be used to store
> intermediate results across multiple instructions.  Many instructions
> can implicitly write a value to the accumulator in addition to their
> normal destination register.  This is enabled by the "AccWrEn" flag.
>
> This patch introduces a new flag, inst->writes_accumulator, which
> allows us to express the AccWrEn notion in the IR.  It also creates a
> n ALU2_ACC macro to easily define emitters for instructions that
> implicitly write the accumulator.
>
> Previously, we only supported implicit accumulator writes from the
> ADDC, SUBB, and MACH instructions.  We always enabled them on those
> instructions, and left them disabled for other instructions.
>
> To take advantage of the MAC (multiply-accumulate) instruction, we
> need to be able to set AccWrEn on other types of instructions.
>
> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp             | 34 +++++++++++-------------
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |  7 +----
>  src/mesa/drivers/dri/i965/brw_shader.cpp         |  3 +++
>  src/mesa/drivers/dri/i965/brw_shader.h           |  1 +
>  src/mesa/drivers/dri/i965/brw_vec4.cpp           | 15 +----------
>  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  7 +----
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 21 +++++++++++----
>  7 files changed, 39 insertions(+), 49 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 27cf0f6..107d9e6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -64,6 +64,8 @@ fs_inst::init()
>
>     /* This will be the case for almost all instructions. */
>     this->regs_written = 1;
> +
> +   this->writes_accumulator = false;
>  }
>
>  fs_inst::fs_inst()
> @@ -151,6 +153,15 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
>        return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);    \
>     }
>
> +#define ALU2_ACC(op)                                                    \
> +   fs_inst *                                                            \
> +   fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1)                 \
> +   {                                                                    \
> +      fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
> +      inst->writes_accumulator = true;                                  \
> +      return inst;                                                      \
> +   }
> +
>  #define ALU3(op)                                                        \
>     fs_inst *                                                            \
>     fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)    \
> @@ -166,7 +177,7 @@ ALU1(RNDE)
>  ALU1(RNDZ)
>  ALU2(ADD)
>  ALU2(MUL)
> -ALU2(MACH)
> +ALU2_ACC(MACH)
>  ALU2(AND)
>  ALU2(OR)
>  ALU2(XOR)
> @@ -182,8 +193,8 @@ ALU1(FBH)
>  ALU1(FBL)
>  ALU1(CBIT)
>  ALU3(MAD)
> -ALU2(ADDC)
> -ALU2(SUBB)
> +ALU2_ACC(ADDC)
> +ALU2_ACC(SUBB)
>  ALU2(SEL)
>
>  /** Gen4 predicated IF. */
> @@ -2103,21 +2114,8 @@ fs_visitor::dead_code_eliminate()
>           }
>
>           if (dead) {
> -            /* Don't dead code eliminate instructions that write to the
> -             * accumulator as a side-effect. Instead just set the destination
> -             * to the null register to free it.
> -             */
> -            switch (inst->opcode) {
> -            case BRW_OPCODE_ADDC:
> -            case BRW_OPCODE_SUBB:
> -            case BRW_OPCODE_MACH:
> -               inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
> -               break;
> -            default:
> -               inst->remove();
> -               progress = true;
> -               break;
> -            }
> +            inst->remove();
> +            progress = true;

I think you meant something like

if (inst->writes_accumulator) {
   inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
} else {
   inst->remove();
}
progress = true;

rather than removing instructions that implicitly wrote the
accumulator when their destination was dead.

>           }
>        }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index e590bdf..1cf35b4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -1411,6 +1411,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
>        brw_set_flag_reg(p, 0, inst->flag_subreg);
>        brw_set_saturate(p, inst->saturate);
>        brw_set_mask_control(p, inst->force_writemask_all);
> +      brw_set_acc_write_control(p, inst->writes_accumulator);
>
>        if (inst->force_uncompressed || dispatch_width == 8) {
>          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
> @@ -1434,9 +1435,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
>          brw_AVG(p, dst, src[0], src[1]);
>          break;
>        case BRW_OPCODE_MACH:
> -        brw_set_acc_write_control(p, 1);
>          brw_MACH(p, dst, src[0], src[1]);
> -        brw_set_acc_write_control(p, 0);
>          break;
>
>        case BRW_OPCODE_MAD:
> @@ -1540,15 +1539,11 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
>           break;
>        case BRW_OPCODE_ADDC:
>           assert(brw->gen >= 7);
> -         brw_set_acc_write_control(p, 1);
>           brw_ADDC(p, dst, src[0], src[1]);
> -         brw_set_acc_write_control(p, 0);
>           break;
>        case BRW_OPCODE_SUBB:
>           assert(brw->gen >= 7);
> -         brw_set_acc_write_control(p, 1);
>           brw_SUBB(p, dst, src[0], src[1]);
> -         brw_set_acc_write_control(p, 0);
>           break;
>
>        case BRW_OPCODE_BFE:
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 407319b..7a26755 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -666,6 +666,9 @@ backend_instruction::can_do_saturate()
>  bool
>  backend_instruction::has_side_effects() const
>  {
> +   if (this->writes_accumulator)
> +      return true;
> +
>     switch (opcode) {
>     case SHADER_OPCODE_UNTYPED_ATOMIC:
>        return true;
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
> index 086d042..e0f6a06 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -59,6 +59,7 @@ public:
>
>     uint8_t predicate;
>     bool predicate_inverse;
> +   bool writes_accumulator; /**< instruction implicitly writes accumulator */
>  };
>
>  enum instruction_scheduler_mode {
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 87825f1..473f26c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -333,20 +333,7 @@ vec4_visitor::dead_code_eliminate()
>        if (inst->dst.file == GRF && !inst->has_side_effects()) {
>           assert(this->virtual_grf_end[inst->dst.reg] >= pc);
>           if (this->virtual_grf_end[inst->dst.reg] == pc) {
> -            /* Don't dead code eliminate instructions that write to the
> -             * accumulator as a side-effect. Instead just set the destination
> -             * to the null register to free it.
> -             */
> -            switch (inst->opcode) {
> -            case BRW_OPCODE_ADDC:
> -            case BRW_OPCODE_SUBB:
> -            case BRW_OPCODE_MACH:
> -               inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
> -               break;
> -            default:
> -               inst->remove();
> -               break;
> -            }
> +            inst->remove();
>              progress = true;

Same comment as above.

>           }
>        }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index a74514f..5f85d31 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -971,9 +971,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
>        brw_MUL(p, dst, src[0], src[1]);
>        break;
>     case BRW_OPCODE_MACH:
> -      brw_set_acc_write_control(p, 1);
>        brw_MACH(p, dst, src[0], src[1]);
> -      brw_set_acc_write_control(p, 0);
>        break;
>
>     case BRW_OPCODE_MAD:
> @@ -1077,15 +1075,11 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
>        break;
>     case BRW_OPCODE_ADDC:
>        assert(brw->gen >= 7);
> -      brw_set_acc_write_control(p, 1);
>        brw_ADDC(p, dst, src[0], src[1]);
> -      brw_set_acc_write_control(p, 0);
>        break;
>     case BRW_OPCODE_SUBB:
>        assert(brw->gen >= 7);
> -      brw_set_acc_write_control(p, 1);
>        brw_SUBB(p, dst, src[0], src[1]);
> -      brw_set_acc_write_control(p, 0);
>        break;
>
>     case BRW_OPCODE_BFE:
> @@ -1317,6 +1311,7 @@ vec4_generator::generate_code(exec_list *instructions)
>        brw_set_predicate_inverse(p, inst->predicate_inverse);
>        brw_set_saturate(p, inst->saturate);
>        brw_set_mask_control(p, inst->force_writemask_all);
> +      brw_set_acc_write_control(p, inst->writes_accumulator);
>
>        unsigned pre_emit_nr_insn = p->nr_insn;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index c73e58d..33df5d3 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -42,6 +42,7 @@ vec4_instruction::vec4_instruction(vec4_visitor *v,
>     this->force_writemask_all = false;
>     this->no_dd_clear = false;
>     this->no_dd_check = false;
> +   this->writes_accumulator = false;
>     this->conditional_mod = BRW_CONDITIONAL_NONE;
>     this->sampler = 0;
>     this->texture_offset = 0;
> @@ -113,7 +114,7 @@ vec4_visitor::emit(enum opcode opcode)
>     vec4_visitor::op(dst_reg dst, src_reg src0)                         \
>     {                                                                   \
>        return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
> -                                          src0);                       \
> +                                          src0);                       \

Spurious change.


More information about the mesa-dev mailing list