Mesa (master): aco: fix NSA MIMG followed by MUBUF/MTBUF

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Mar 17 12:41:34 UTC 2021


Module: Mesa
Branch: master
Commit: 194f3e4c69b10b9dee4d577ef02218bf37702860
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=194f3e4c69b10b9dee4d577ef02218bf37702860

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Mon Feb 22 11:12:15 2021 +0000

aco: fix NSA MIMG followed by MUBUF/MTBUF

No fossil-db changes on GFX10.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Fixes: c353895c922 ("aco: use non-sequential addressing")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9187>

---

 src/amd/compiler/aco_assembler.cpp   | 21 ++++++++++++---------
 src/amd/compiler/aco_insert_NOPs.cpp | 19 +++++++++++++++++++
 src/amd/compiler/aco_ir.h            |  2 ++
 3 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 994ed2860e1..19b63d12752 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -48,6 +48,15 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
    return sel & sdwa_asuint;
 }
 
+unsigned get_mimg_nsa_dwords(const Instruction *instr) {
+   unsigned addr_dwords = instr->operands.size() - 3;
+   for (unsigned i = 1; i < addr_dwords; i++) {
+      if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
+         return DIV_ROUND_UP(addr_dwords - 1, 4);
+   }
+   return 0;
+}
+
 void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
 {
    /* lower remaining pseudo-instructions */
@@ -412,14 +421,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    }
    case Format::MIMG: {
-      unsigned use_nsa = false;
-      unsigned addr_dwords = instr->operands.size() - 3;
-      for (unsigned i = 1; i < addr_dwords; i++) {
-         if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
-            use_nsa = true;
-      }
-      assert(!use_nsa || ctx.chip_class >= GFX10);
-      unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
+      unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
+      assert(!nsa_dwords || ctx.chip_class >= GFX10);
 
       MIMG_instruction& mimg = instr->mimg();
       uint32_t encoding = (0b111100 << 26);
@@ -463,7 +466,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       if (nsa_dwords) {
          out.resize(out.size() + nsa_dwords);
          std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
-         for (unsigned i = 0; i < addr_dwords - 1; i++)
+         for (unsigned i = 0; i < instr->operands.size() - 4u; i++)
             nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
       }
       break;
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index b6213165113..15c41a7c6d7 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -25,6 +25,7 @@
 #include <algorithm>
 
 #include "aco_ir.h"
+#include "aco_builder.h"
 #include <stack>
 #include <functional>
 
@@ -149,6 +150,7 @@ struct NOP_ctx_gfx10 {
    bool has_branch_after_VMEM = false;
    bool has_DS = false;
    bool has_branch_after_DS = false;
+   bool has_NSA_MIMG = false;
    std::bitset<128> sgprs_read_by_VMEM;
    std::bitset<128> sgprs_read_by_SMEM;
 
@@ -159,6 +161,7 @@ struct NOP_ctx_gfx10 {
       has_branch_after_VMEM |= other.has_branch_after_VMEM;
       has_DS |= other.has_DS;
       has_branch_after_DS |= other.has_branch_after_DS;
+      has_NSA_MIMG |= other.has_NSA_MIMG;
       sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
       sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
    }
@@ -172,6 +175,7 @@ struct NOP_ctx_gfx10 {
          has_branch_after_VMEM == other.has_branch_after_VMEM &&
          has_DS == other.has_DS &&
          has_branch_after_DS == other.has_branch_after_DS &&
+         has_NSA_MIMG == other.has_NSA_MIMG &&
          sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
          sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
    }
@@ -737,6 +741,21 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
       wait->imm = 0;
       new_instructions.emplace_back(std::move(wait));
    }
+
+   /* NSAToVMEMBug
+    * Handles NSA MIMG (4 or more dwords) immediately followed by MUBUF/MTBUF (with offset[2:1] != 0).
+    */
+   if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
+      ctx.has_NSA_MIMG = true;
+   } else if (ctx.has_NSA_MIMG) {
+      ctx.has_NSA_MIMG = false;
+
+      if (instr->isMUBUF() || instr->isMTBUF()) {
+         uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
+         if (offset & 6)
+            Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
+      }
+   }
 }
 
 template <typename Ctx>
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index ff3f3e63e7b..cf69e90ca52 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1615,6 +1615,8 @@ bool needs_exec_mask(const Instruction* instr);
 
 uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
 
+unsigned get_mimg_nsa_dwords(const Instruction *instr);
+
 enum block_kind {
    /* uniform indicates that leaving this block,
     * all actives lanes stay active */



More information about the mesa-commit mailing list