Mesa (master): aco: form sparse load clauses
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jan 8 14:43:26 UTC 2021
Module: Mesa
Branch: master
Commit: a502aa7b043c1c7e1c33dc28f7ac4b066802571a
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a502aa7b043c1c7e1c33dc28f7ac4b066802571a
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Mon Nov 23 12:51:15 2020 +0000
aco: form sparse load clauses
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7775>
---
src/amd/compiler/aco_instruction_selection.cpp | 5 ++++
src/amd/compiler/aco_ir.h | 13 +++++++-
src/amd/compiler/aco_opt_value_numbering.cpp | 2 +-
src/amd/compiler/aco_print_ir.cpp | 2 ++
src/amd/compiler/tests/test_isel.cpp | 41 ++++++++++++++++++++++++++
5 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 016d3a043d9..94b35a3a063 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5887,6 +5887,11 @@ Operand emit_tfe_init(Builder& bld, Temp dst)
for (unsigned i = 0; i < dst.size(); i++)
vec->operands[i] = Operand(0u);
vec->definitions[0] = Definition(tmp);
+ /* Since this is fixed to an instruction's definition register, any CSE will
+ * just create copies. Copying costs about the same as zero-initialization,
+ * but these copies can break up clauses.
+ */
+ vec->definitions[0].setNoCSE(true);
bld.insert(std::move(vec));
return Operand(tmp);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 9345321c1dd..a97927b2036 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -851,7 +851,7 @@ class Definition final
{
public:
constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0),
- isKill_(0), isPrecise_(0), isNUW_(0) {}
+ isKill_(0), isPrecise_(0), isNUW_(0), isNoCSE_(0) {}
Definition(uint32_t index, RegClass type) noexcept
: temp(index, type) {}
explicit Definition(Temp tmp) noexcept
@@ -959,6 +959,16 @@ public:
return isNUW_;
}
+ constexpr void setNoCSE(bool noCSE) noexcept
+ {
+ isNoCSE_ = noCSE;
+ }
+
+ constexpr bool isNoCSE() const noexcept
+ {
+ return isNoCSE_;
+ }
+
private:
Temp temp = Temp(0, s1);
PhysReg reg_;
@@ -969,6 +979,7 @@ private:
uint8_t isKill_:1;
uint8_t isPrecise_:1;
uint8_t isNUW_:1;
+ uint8_t isNoCSE_:1;
};
/* can't initialize bit-fields in c++11, so work around using a union */
uint8_t control_ = 0;
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index af05df9c682..8dc2812bc7e 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -383,7 +383,7 @@ void process_block(vn_ctx& ctx, Block& block)
instr->opcode == aco_opcode::p_demote_to_helper)
ctx.exec_id++;
- if (instr->definitions.empty() || instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi) {
+ if (instr->definitions.empty() || is_phi(instr) || instr->definitions[0].isNoCSE()) {
new_instructions.emplace_back(std::move(instr));
continue;
}
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index 6a5da5dac19..f99046da007 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -192,6 +192,8 @@ static void print_definition(const Definition *definition, FILE *output)
fprintf(output, "(precise)");
if (definition->isNUW())
fprintf(output, "(nuw)");
+ if (definition->isNoCSE())
+ fprintf(output, "(noCSE)");
fprintf(output, "%%%d", definition->tempId());
if (definition->isFixed())
diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp
index 83daf37b8d9..676ff2c83c2 100644
--- a/src/amd/compiler/tests/test_isel.cpp
+++ b/src/amd/compiler/tests/test_isel.cpp
@@ -133,3 +133,44 @@ BEGIN_TEST(isel.gs.no_verts)
fprintf(output, "success\n");
}
END_TEST
+
+BEGIN_TEST(isel.sparse.clause)
+ for (unsigned i = GFX10; i <= GFX10; i++) {
+ if (!set_variant((chip_class)i))
+ continue;
+
+ QoShaderModuleCreateInfo cs = qoShaderModuleCreateInfoGLSL(COMPUTE,
+ QO_EXTENSION GL_ARB_sparse_texture2 : require
+ layout(local_size_x=1) in;
+ layout(binding=0) uniform sampler2D tex;
+ layout(binding=0) buffer Buf {
+ vec4 res[4];
+ uint code[4];
+ };
+ void main() {
+ //>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0
+ //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero0 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0
+ //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero1 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0
+ //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero2 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0
+ //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero3 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+ //>> s_clause 0x3
+ //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+ code[0] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(1, 0), res[0]);
+ code[1] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(2, 0), res[1]);
+ code[2] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(3, 0), res[2]);
+ code[3] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(4, 0), res[3]);
+ }
+ );
+
+ PipelineBuilder pbld(get_vk_device((chip_class)i));
+ pbld.add_cs(cs);
+ pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "ACO IR", true);
+ pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "Assembly", true);
+ }
+END_TEST
More information about the mesa-commit
mailing list