Mesa (master): aco: fix NSA MIMG followed by MUBUF/MTBUF
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Mar 17 12:41:34 UTC 2021
Module: Mesa
Branch: master
Commit: 194f3e4c69b10b9dee4d577ef02218bf37702860
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=194f3e4c69b10b9dee4d577ef02218bf37702860
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Mon Feb 22 11:12:15 2021 +0000
aco: fix NSA MIMG followed by MUBUF/MTBUF
No fossil-db changes on GFX10.
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Fixes: c353895c922 ("aco: use non-sequential addressing")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9187>
---
src/amd/compiler/aco_assembler.cpp | 21 ++++++++++++---------
src/amd/compiler/aco_insert_NOPs.cpp | 19 +++++++++++++++++++
src/amd/compiler/aco_ir.h | 2 ++
3 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 994ed2860e1..19b63d12752 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -48,6 +48,15 @@ static uint32_t get_sdwa_sel(unsigned sel, PhysReg reg)
return sel & sdwa_asuint;
}
+unsigned get_mimg_nsa_dwords(const Instruction *instr) {
+ unsigned addr_dwords = instr->operands.size() - 3;
+ for (unsigned i = 1; i < addr_dwords; i++) {
+ if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
+ return DIV_ROUND_UP(addr_dwords - 1, 4);
+ }
+ return 0;
+}
+
void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* instr)
{
/* lower remaining pseudo-instructions */
@@ -412,14 +421,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::MIMG: {
- unsigned use_nsa = false;
- unsigned addr_dwords = instr->operands.size() - 3;
- for (unsigned i = 1; i < addr_dwords; i++) {
- if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
- use_nsa = true;
- }
- assert(!use_nsa || ctx.chip_class >= GFX10);
- unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
+ unsigned nsa_dwords = get_mimg_nsa_dwords(instr);
+ assert(!nsa_dwords || ctx.chip_class >= GFX10);
MIMG_instruction& mimg = instr->mimg();
uint32_t encoding = (0b111100 << 26);
@@ -463,7 +466,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
if (nsa_dwords) {
out.resize(out.size() + nsa_dwords);
std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
- for (unsigned i = 0; i < addr_dwords - 1; i++)
+ for (unsigned i = 0; i < instr->operands.size() - 4u; i++)
nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
}
break;
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index b6213165113..15c41a7c6d7 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -25,6 +25,7 @@
#include <algorithm>
#include "aco_ir.h"
+#include "aco_builder.h"
#include <stack>
#include <functional>
@@ -149,6 +150,7 @@ struct NOP_ctx_gfx10 {
bool has_branch_after_VMEM = false;
bool has_DS = false;
bool has_branch_after_DS = false;
+ bool has_NSA_MIMG = false;
std::bitset<128> sgprs_read_by_VMEM;
std::bitset<128> sgprs_read_by_SMEM;
@@ -159,6 +161,7 @@ struct NOP_ctx_gfx10 {
has_branch_after_VMEM |= other.has_branch_after_VMEM;
has_DS |= other.has_DS;
has_branch_after_DS |= other.has_branch_after_DS;
+ has_NSA_MIMG |= other.has_NSA_MIMG;
sgprs_read_by_VMEM |= other.sgprs_read_by_VMEM;
sgprs_read_by_SMEM |= other.sgprs_read_by_SMEM;
}
@@ -172,6 +175,7 @@ struct NOP_ctx_gfx10 {
has_branch_after_VMEM == other.has_branch_after_VMEM &&
has_DS == other.has_DS &&
has_branch_after_DS == other.has_branch_after_DS &&
+ has_NSA_MIMG == other.has_NSA_MIMG &&
sgprs_read_by_VMEM == other.sgprs_read_by_VMEM &&
sgprs_read_by_SMEM == other.sgprs_read_by_SMEM;
}
@@ -737,6 +741,21 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
wait->imm = 0;
new_instructions.emplace_back(std::move(wait));
}
+
+ /* NSAToVMEMBug
+ * Handles NSA MIMG (4 or more dwords) immediately followed by MUBUF/MTBUF (with offset[2:1] != 0).
+ */
+ if (instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 1) {
+ ctx.has_NSA_MIMG = true;
+ } else if (ctx.has_NSA_MIMG) {
+ ctx.has_NSA_MIMG = false;
+
+ if (instr->isMUBUF() || instr->isMTBUF()) {
+ uint32_t offset = instr->isMUBUF() ? instr->mubuf().offset : instr->mtbuf().offset;
+ if (offset & 6)
+ Builder(program, &new_instructions).sopp(aco_opcode::s_nop, -1, 0);
+ }
+ }
}
template <typename Ctx>
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index ff3f3e63e7b..cf69e90ca52 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1615,6 +1615,8 @@ bool needs_exec_mask(const Instruction* instr);
uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
+unsigned get_mimg_nsa_dwords(const Instruction *instr);
+
enum block_kind {
/* uniform indicates that leaving this block,
* all actives lanes stay active */
More information about the mesa-commit
mailing list