Mesa (master): aco: use non-sequential addressing
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jan 20 17:00:54 UTC 2021
Module: Mesa
Branch: master
Commit: c353895c92270c0e2a6e2b849c24d558efae0d5e
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c353895c92270c0e2a6e2b849c24d558efae0d5e
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Thu Jan 14 19:58:13 2021 +0000
aco: use non-sequential addressing
fossil-db (GFX10.3):
Totals from 70493 (50.57% of 139391) affected shaders:
SGPRs: 4232624 -> 4231808 (-0.02%); split: -0.09%, +0.07%
VGPRs: 2831772 -> 2764740 (-2.37%); split: -2.53%, +0.17%
CodeSize: 225584412 -> 225048740 (-0.24%); split: -0.44%, +0.21%
MaxWaves: 875319 -> 878837 (+0.40%); split: +0.44%, -0.04%
Instrs: 43157803 -> 42496421 (-1.53%); split: -1.54%, +0.01%
Cycles: 1656380132 -> 1641532056 (-0.90%); split: -0.94%, +0.04%
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8523>
---
src/amd/compiler/aco_assembler.cpp | 17 +++++++++
src/amd/compiler/aco_instruction_selection.cpp | 52 ++++++++++++++++----------
src/amd/compiler/aco_validate.cpp | 13 +++++--
src/amd/compiler/tests/test_isel.cpp | 16 ++++----
4 files changed, 68 insertions(+), 30 deletions(-)
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 35f07dddb85..05ec485a2cf 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -428,6 +428,15 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
break;
}
case Format::MIMG: {
+ unsigned use_nsa = false;
+ unsigned addr_dwords = instr->operands.size() - 3;
+ for (unsigned i = 1; i < addr_dwords; i++) {
+ if (instr->operands[3 + i].physReg() != instr->operands[3].physReg().advance(i * 4))
+ use_nsa = true;
+ }
+ assert(!use_nsa || ctx.chip_class >= GFX10);
+ unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
+
MIMG_instruction* mimg = static_cast<MIMG_instruction*>(instr);
uint32_t encoding = (0b111100 << 26);
encoding |= mimg->slc ? 1 << 25 : 0;
@@ -443,6 +452,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
encoding |= mimg->da ? 1 << 14 : 0;
} else {
encoding |= mimg->r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
+ encoding |= nsa_dwords << 1;
encoding |= mimg->dim << 3; /* GFX10: dimensionality instead of declare array */
encoding |= mimg->dlc ? 1 << 7 : 0;
}
@@ -465,6 +475,13 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
}
out.push_back(encoding);
+
+ if (nsa_dwords) {
+ out.resize(out.size() + nsa_dwords);
+ std::vector<uint32_t>::iterator nsa = std::prev(out.end(), nsa_dwords);
+ for (unsigned i = 0; i < addr_dwords - 1; i++)
+ nsa[i / 4] |= (0xFF & instr->operands[4 + i].physReg().reg()) << (i % 4 * 8);
+ }
break;
}
case Format::FLAT:
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index d97604f8571..08ad9572bdb 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5841,38 +5841,52 @@ static MIMG_instruction *emit_mimg(Builder& bld, aco_opcode op,
Definition dst,
Temp rsrc,
Operand samp,
- const std::vector<Temp>& coords,
+ std::vector<Temp> coords,
unsigned num_wqm_coords=0,
Operand vdata=Operand(v1))
{
- Temp coord = coords[0];
- if (coords.size() > 1) {
- coord = bld.tmp(RegType::vgpr, coords.size());
+ if (bld.program->chip_class < GFX10) {
+ Temp coord = coords[0];
+ if (coords.size() > 1) {
+ coord = bld.tmp(RegType::vgpr, coords.size());
- aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, coords.size(), 1)};
- for (unsigned i = 0; i < coords.size(); i++)
- vec->operands[i] = Operand(coords[i]);
- vec->definitions[0] = Definition(coord);
- bld.insert(std::move(vec));
- } else if (coord.type() == RegType::sgpr) {
- coord = bld.copy(bld.def(v1), coord);
- }
+ aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, coords.size(), 1)};
+ for (unsigned i = 0; i < coords.size(); i++)
+ vec->operands[i] = Operand(coords[i]);
+ vec->definitions[0] = Definition(coord);
+ bld.insert(std::move(vec));
+ } else if (coord.type() == RegType::sgpr) {
+ coord = bld.copy(bld.def(v1), coord);
+ }
- if (num_wqm_coords) {
- /* We don't need the bias, sample index, compare value or offset to be
- * computed in WQM but if the p_create_vector copies the coordinates, then it
- * needs to be in WQM. */
- coord = emit_wqm(bld, coord, bld.tmp(coord.regClass()), true);
+ if (num_wqm_coords) {
+ /* We don't need the bias, sample index, compare value or offset to be
+ * computed in WQM but if the p_create_vector copies the coordinates, then it
+ * needs to be in WQM. */
+ coord = emit_wqm(bld, coord, bld.tmp(coord.regClass()), true);
+ }
+
+ coords[0] = coord;
+ coords.resize(1);
+ } else {
+ for (unsigned i = 0; i < num_wqm_coords; i++)
+ coords[i] = emit_wqm(bld, coords[i], bld.tmp(coords[i].regClass()), true);
+
+ for (Temp& coord : coords) {
+ if (coord.type() == RegType::sgpr)
+ coord = bld.copy(bld.def(v1), coord);
+ }
}
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
- op, Format::MIMG, 4, dst.isTemp())};
+ op, Format::MIMG, 3 + coords.size(), dst.isTemp())};
if (dst.isTemp())
mimg->definitions[0] = dst;
mimg->operands[0] = Operand(rsrc);
mimg->operands[1] = samp;
mimg->operands[2] = vdata;
- mimg->operands[3] = Operand(coord);
+ for (unsigned i = 0; i < coords.size(); i++)
+ mimg->operands[3 + i] = Operand(coords[i]);
MIMG_instruction *res = mimg.get();
bld.insert(std::move(mimg));
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index 3abb21b5796..4c707b49699 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -436,7 +436,7 @@ bool validate_ir(Program* program)
break;
}
case Format::MIMG: {
- check(instr->operands.size() == 4, "MIMG instructions must have 4 operands", instr.get());
+ check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands", instr.get());
check(instr->operands[0].hasRegClass() && (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
"MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
if (instr->operands[1].hasRegClass())
@@ -447,8 +447,15 @@ bool validate_ir(Program* program)
check(instr->definitions.empty() || (instr->definitions[0].regClass() == instr->operands[2].regClass() || is_cmpswap),
"MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and TFE/LWE loads", instr.get());
}
- check(instr->operands[3].hasRegClass() && instr->operands[3].regClass().type() == RegType::vgpr,
- "MIMG operands[3] (VADDR) must be VGPR", instr.get());
+ check(instr->operands.size() == 4 || program->chip_class >= GFX10, "NSA is only supported on GFX10+", instr.get());
+ for (unsigned i = 3; i < instr->operands.size(); i++) {
+ if (instr->operands.size() == 4) {
+ check(instr->operands[i].hasRegClass() && instr->operands[i].regClass().type() == RegType::vgpr,
+ "MIMG operands[3] (VADDR) must be VGPR", instr.get());
+ } else {
+ check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used", instr.get());
+ }
+ }
check(instr->definitions.empty() || (instr->definitions[0].isTemp() && instr->definitions[0].regClass().type() == RegType::vgpr),
"MIMG definitions[0] (VDATA) must be VGPR", instr.get());
break;
diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp
index c911fea839b..208833c54fc 100644
--- a/src/amd/compiler/tests/test_isel.cpp
+++ b/src/amd/compiler/tests/test_isel.cpp
@@ -149,18 +149,18 @@ BEGIN_TEST(isel.sparse.clause)
};
void main() {
//>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0
- //>> v5: %_ = image_sample_lz_o %_, %_, %zero0, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> v5: %_ = image_sample_lz_o %_, %_, %zero0, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0
- //>> v5: %_ = image_sample_lz_o %_, %_, %zero1, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> v5: %_ = image_sample_lz_o %_, %_, %zero1, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0
- //>> v5: %_ = image_sample_lz_o %_, %_, %zero2, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> v5: %_ = image_sample_lz_o %_, %_, %zero2, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0
- //>> v5: %_ = image_sample_lz_o %_, %_, %zero3, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> v5: %_ = image_sample_lz_o %_, %_, %zero3, %_, %_, %_ dmask:xyzw 2d tfe storage: semantics: scope:invocation
//>> s_clause 0x3
- //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
- //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
- //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
- //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ //! image_sample_lz_o v#_, [v#_, v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ //! image_sample_lz_o v#_, [v#_, v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ //! image_sample_lz_o v#_, [v#_, v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ //! image_sample_lz_o v#_, [v#_, v#_, v#_, v#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
code[0] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(1, 0), res[0]);
code[1] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(2, 0), res[1]);
code[2] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(3, 0), res[2]);
More information about the mesa-commit
mailing list