Mesa (master): aco: add v_nop inbetween exec write and VMEM/DS/FLAT

Fri Nov 29 18:07:21 UTC 2019

Module: Mesa
Branch: master
Commit: a9fc81b098ca36d063dbdb6f69ffde1ab215d34b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a9fc81b098ca36d063dbdb6f69ffde1ab215d34b

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Nov 27 17:20:15 2019 +0000

aco: add v_nop inbetween exec write and VMEM/DS/FLAT

LLVM and the proprietary compiler seem to do this

Fixes: b01847bd9 ("aco/gfx10: Fix mitigation of VMEMtoScalarWriteHazard.")
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>

---

 src/amd/compiler/aco_insert_NOPs.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index b0404300e0e..4871bea4ba7 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -385,7 +385,7 @@ void insert_NOPs_gfx8_9(Program* program)
    }
 }
 
-void handle_instruction_gfx10(NOP_ctx_gfx10 &ctx, aco_ptr<Instruction>& instr,
+void handle_instruction_gfx10(Program *program, NOP_ctx_gfx10 &ctx, aco_ptr<Instruction>& instr,
                               std::vector<aco_ptr<Instruction>>& old_instructions,
                               std::vector<aco_ptr<Instruction>>& new_instructions)
 {
@@ -396,6 +396,9 @@ void handle_instruction_gfx10(NOP_ctx_gfx10 &ctx, aco_ptr<Instruction>& instr,
        instr->format == Format::SCRATCH || instr->format == Format::DS) {
       /* Remember all SGPRs that are read by the VMEM instruction */
       mark_read_regs(instr, ctx.sgprs_read_by_VMEM);
+      ctx.sgprs_read_by_VMEM.set(exec);
+      if (program->wave_size == 64)
+         ctx.sgprs_read_by_VMEM.set(exec_hi);
    } else if (instr->isSALU() || instr->format == Format::SMEM) {
       /* Check if SALU writes an SGPR that was previously read by the VALU */
       if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) {
@@ -528,7 +531,7 @@ void handle_instruction_gfx10(NOP_ctx_gfx10 &ctx, aco_ptr<Instruction>& instr,
    }
 }
 
-void handle_block_gfx10(NOP_ctx_gfx10& ctx, Block& block)
+void handle_block_gfx10(Program *program, NOP_ctx_gfx10& ctx, Block& block)
 {
    if (block.instructions.empty())
       return;
@@ -537,7 +540,7 @@ void handle_block_gfx10(NOP_ctx_gfx10& ctx, Block& block)
    instructions.reserve(block.instructions.size());
 
    for (aco_ptr<Instruction>& instr : block.instructions) {
-      handle_instruction_gfx10(ctx, instr, block.instructions, instructions);
+      handle_instruction_gfx10(program, ctx, instr, block.instructions, instructions);
       instructions.emplace_back(std::move(instr));
    }
 
@@ -562,7 +565,7 @@ void mitigate_hazards_gfx10(Program *program)
             for (unsigned b : program->blocks[idx].linear_preds)
                loop_block_ctx.join(all_ctx[b]);
 
-            handle_block_gfx10(loop_block_ctx, program->blocks[idx]);
+            handle_block_gfx10(program, loop_block_ctx, program->blocks[idx]);
 
             /* We only need to continue if the loop header context changed */
             if (idx == loop_header_indices.top() && loop_block_ctx == all_ctx[idx])
@@ -577,7 +580,7 @@ void mitigate_hazards_gfx10(Program *program)
       for (unsigned b : block.linear_preds)
          ctx.join(all_ctx[b]);
 
-      handle_block_gfx10(ctx, block);
+      handle_block_gfx10(program, ctx, block);
    }
 }