[Mesa-dev] [RFC PATCH 1/5] i965: Add writes_accumulator flag
Matt Turner
mattst88 at gmail.com
Fri Mar 21 23:17:54 PDT 2014
On Fri, Mar 21, 2014 at 7:33 AM, Juha-Pekka Heikkila
<juhapekka.heikkila at gmail.com> wrote:
> Our hardware has an "accumulator" register, which can be used to store
> intermediate results across multiple instructions. Many instructions
> can implicitly write a value to the accumulator in addition to their
> normal destination register. This is enabled by the "AccWrEn" flag.
>
> This patch introduces a new flag, inst->writes_accumulator, which
> allows us to express the AccWrEn notion in the IR. It also creates a
> n ALU2_ACC macro to easily define emitters for instructions that
> implicitly write the accumulator.
>
> Previously, we only supported implicit accumulator writes from the
> ADDC, SUBB, and MACH instructions. We always enabled them on those
> instructions, and left them disabled for other instructions.
>
> To take advantage of the MAC (multiply-accumulate) instruction, we
> need to be able to set AccWrEn on other types of instructions.
>
> Signed-off-by: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++++-------------
> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 +----
> src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +++
> src/mesa/drivers/dri/i965/brw_shader.h | 1 +
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 15 +----------
> src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 7 +----
> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 21 +++++++++++----
> 7 files changed, 39 insertions(+), 49 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 27cf0f6..107d9e6 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -64,6 +64,8 @@ fs_inst::init()
>
> /* This will be the case for almost all instructions. */
> this->regs_written = 1;
> +
> + this->writes_accumulator = false;
> }
>
> fs_inst::fs_inst()
> @@ -151,6 +153,15 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
> return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
> }
>
> +#define ALU2_ACC(op) \
> + fs_inst * \
> + fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1) \
> + { \
> + fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);\
> + inst->writes_accumulator = true; \
> + return inst; \
> + }
> +
> #define ALU3(op) \
> fs_inst * \
> fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \
> @@ -166,7 +177,7 @@ ALU1(RNDE)
> ALU1(RNDZ)
> ALU2(ADD)
> ALU2(MUL)
> -ALU2(MACH)
> +ALU2_ACC(MACH)
> ALU2(AND)
> ALU2(OR)
> ALU2(XOR)
> @@ -182,8 +193,8 @@ ALU1(FBH)
> ALU1(FBL)
> ALU1(CBIT)
> ALU3(MAD)
> -ALU2(ADDC)
> -ALU2(SUBB)
> +ALU2_ACC(ADDC)
> +ALU2_ACC(SUBB)
> ALU2(SEL)
>
> /** Gen4 predicated IF. */
> @@ -2103,21 +2114,8 @@ fs_visitor::dead_code_eliminate()
> }
>
> if (dead) {
> - /* Don't dead code eliminate instructions that write to the
> - * accumulator as a side-effect. Instead just set the destination
> - * to the null register to free it.
> - */
> - switch (inst->opcode) {
> - case BRW_OPCODE_ADDC:
> - case BRW_OPCODE_SUBB:
> - case BRW_OPCODE_MACH:
> - inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
> - break;
> - default:
> - inst->remove();
> - progress = true;
> - break;
> - }
> + inst->remove();
> + progress = true;
I think you meant something like
if (inst->writes_accumulator) {
inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
} else {
inst->remove();
}
progress = true;
rather than removing instructions that implicitly wrote the
accumulator when their destination was dead.
> }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index e590bdf..1cf35b4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -1411,6 +1411,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
> brw_set_flag_reg(p, 0, inst->flag_subreg);
> brw_set_saturate(p, inst->saturate);
> brw_set_mask_control(p, inst->force_writemask_all);
> + brw_set_acc_write_control(p, inst->writes_accumulator);
>
> if (inst->force_uncompressed || dispatch_width == 8) {
> brw_set_compression_control(p, BRW_COMPRESSION_NONE);
> @@ -1434,9 +1435,7 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
> brw_AVG(p, dst, src[0], src[1]);
> break;
> case BRW_OPCODE_MACH:
> - brw_set_acc_write_control(p, 1);
> brw_MACH(p, dst, src[0], src[1]);
> - brw_set_acc_write_control(p, 0);
> break;
>
> case BRW_OPCODE_MAD:
> @@ -1540,15 +1539,11 @@ fs_generator::generate_code(exec_list *instructions, FILE *dump_file)
> break;
> case BRW_OPCODE_ADDC:
> assert(brw->gen >= 7);
> - brw_set_acc_write_control(p, 1);
> brw_ADDC(p, dst, src[0], src[1]);
> - brw_set_acc_write_control(p, 0);
> break;
> case BRW_OPCODE_SUBB:
> assert(brw->gen >= 7);
> - brw_set_acc_write_control(p, 1);
> brw_SUBB(p, dst, src[0], src[1]);
> - brw_set_acc_write_control(p, 0);
> break;
>
> case BRW_OPCODE_BFE:
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 407319b..7a26755 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -666,6 +666,9 @@ backend_instruction::can_do_saturate()
> bool
> backend_instruction::has_side_effects() const
> {
> + if (this->writes_accumulator)
> + return true;
> +
> switch (opcode) {
> case SHADER_OPCODE_UNTYPED_ATOMIC:
> return true;
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
> index 086d042..e0f6a06 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -59,6 +59,7 @@ public:
>
> uint8_t predicate;
> bool predicate_inverse;
> + bool writes_accumulator; /**< instruction implicitly writes accumulator */
> };
>
> enum instruction_scheduler_mode {
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index 87825f1..473f26c 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -333,20 +333,7 @@ vec4_visitor::dead_code_eliminate()
> if (inst->dst.file == GRF && !inst->has_side_effects()) {
> assert(this->virtual_grf_end[inst->dst.reg] >= pc);
> if (this->virtual_grf_end[inst->dst.reg] == pc) {
> - /* Don't dead code eliminate instructions that write to the
> - * accumulator as a side-effect. Instead just set the destination
> - * to the null register to free it.
> - */
> - switch (inst->opcode) {
> - case BRW_OPCODE_ADDC:
> - case BRW_OPCODE_SUBB:
> - case BRW_OPCODE_MACH:
> - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
> - break;
> - default:
> - inst->remove();
> - break;
> - }
> + inst->remove();
> progress = true;
Same comment as above.
> }
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> index a74514f..5f85d31 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> @@ -971,9 +971,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
> brw_MUL(p, dst, src[0], src[1]);
> break;
> case BRW_OPCODE_MACH:
> - brw_set_acc_write_control(p, 1);
> brw_MACH(p, dst, src[0], src[1]);
> - brw_set_acc_write_control(p, 0);
> break;
>
> case BRW_OPCODE_MAD:
> @@ -1077,15 +1075,11 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
> break;
> case BRW_OPCODE_ADDC:
> assert(brw->gen >= 7);
> - brw_set_acc_write_control(p, 1);
> brw_ADDC(p, dst, src[0], src[1]);
> - brw_set_acc_write_control(p, 0);
> break;
> case BRW_OPCODE_SUBB:
> assert(brw->gen >= 7);
> - brw_set_acc_write_control(p, 1);
> brw_SUBB(p, dst, src[0], src[1]);
> - brw_set_acc_write_control(p, 0);
> break;
>
> case BRW_OPCODE_BFE:
> @@ -1317,6 +1311,7 @@ vec4_generator::generate_code(exec_list *instructions)
> brw_set_predicate_inverse(p, inst->predicate_inverse);
> brw_set_saturate(p, inst->saturate);
> brw_set_mask_control(p, inst->force_writemask_all);
> + brw_set_acc_write_control(p, inst->writes_accumulator);
>
> unsigned pre_emit_nr_insn = p->nr_insn;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index c73e58d..33df5d3 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -42,6 +42,7 @@ vec4_instruction::vec4_instruction(vec4_visitor *v,
> this->force_writemask_all = false;
> this->no_dd_clear = false;
> this->no_dd_check = false;
> + this->writes_accumulator = false;
> this->conditional_mod = BRW_CONDITIONAL_NONE;
> this->sampler = 0;
> this->texture_offset = 0;
> @@ -113,7 +114,7 @@ vec4_visitor::emit(enum opcode opcode)
> vec4_visitor::op(dst_reg dst, src_reg src0) \
> { \
> return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
> - src0); \
> + src0); \
Spurious change.
More information about the mesa-dev
mailing list