Mesa (master): aco: form sparse load clauses

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jan 8 14:43:26 UTC 2021


Module: Mesa
Branch: master
Commit: a502aa7b043c1c7e1c33dc28f7ac4b066802571a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a502aa7b043c1c7e1c33dc28f7ac4b066802571a

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Mon Nov 23 12:51:15 2020 +0000

aco: form sparse load clauses

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7775>

---

 src/amd/compiler/aco_instruction_selection.cpp |  5 ++++
 src/amd/compiler/aco_ir.h                      | 13 +++++++-
 src/amd/compiler/aco_opt_value_numbering.cpp   |  2 +-
 src/amd/compiler/aco_print_ir.cpp              |  2 ++
 src/amd/compiler/tests/test_isel.cpp           | 41 ++++++++++++++++++++++++++
 5 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 016d3a043d9..94b35a3a063 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5887,6 +5887,11 @@ Operand emit_tfe_init(Builder& bld, Temp dst)
    for (unsigned i = 0; i < dst.size(); i++)
       vec->operands[i] = Operand(0u);
    vec->definitions[0] = Definition(tmp);
+   /* Since this is fixed to an instruction's definition register, any CSE will
+    * just create copies. Copying costs about the same as zero-initialization,
+    * but these copies can break up clauses.
+    */
+   vec->definitions[0].setNoCSE(true);
    bld.insert(std::move(vec));
 
    return Operand(tmp);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 9345321c1dd..a97927b2036 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -851,7 +851,7 @@ class Definition final
 {
 public:
    constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0),
-                            isKill_(0), isPrecise_(0), isNUW_(0) {}
+                            isKill_(0), isPrecise_(0), isNUW_(0), isNoCSE_(0) {}
    Definition(uint32_t index, RegClass type) noexcept
       : temp(index, type) {}
    explicit Definition(Temp tmp) noexcept
@@ -959,6 +959,16 @@ public:
       return isNUW_;
    }
 
+   constexpr void setNoCSE(bool noCSE) noexcept
+   {
+      isNoCSE_ = noCSE;
+   }
+
+   constexpr bool isNoCSE() const noexcept
+   {
+      return isNoCSE_;
+   }
+
 private:
    Temp temp = Temp(0, s1);
    PhysReg reg_;
@@ -969,6 +979,7 @@ private:
          uint8_t isKill_:1;
          uint8_t isPrecise_:1;
          uint8_t isNUW_:1;
+         uint8_t isNoCSE_:1;
       };
       /* can't initialize bit-fields in c++11, so work around using a union */
       uint8_t control_ = 0;
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index af05df9c682..8dc2812bc7e 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -383,7 +383,7 @@ void process_block(vn_ctx& ctx, Block& block)
           instr->opcode == aco_opcode::p_demote_to_helper)
          ctx.exec_id++;
 
-      if (instr->definitions.empty() || instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi) {
+      if (instr->definitions.empty() || is_phi(instr) || instr->definitions[0].isNoCSE()) {
          new_instructions.emplace_back(std::move(instr));
          continue;
       }
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index 6a5da5dac19..f99046da007 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -192,6 +192,8 @@ static void print_definition(const Definition *definition, FILE *output)
       fprintf(output, "(precise)");
    if (definition->isNUW())
       fprintf(output, "(nuw)");
+   if (definition->isNoCSE())
+      fprintf(output, "(noCSE)");
    fprintf(output, "%%%d", definition->tempId());
 
    if (definition->isFixed())
diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp
index 83daf37b8d9..676ff2c83c2 100644
--- a/src/amd/compiler/tests/test_isel.cpp
+++ b/src/amd/compiler/tests/test_isel.cpp
@@ -133,3 +133,44 @@ BEGIN_TEST(isel.gs.no_verts)
       fprintf(output, "success\n");
    }
 END_TEST
+
+BEGIN_TEST(isel.sparse.clause)
+   for (unsigned i = GFX10; i <= GFX10; i++) {
+      if (!set_variant((chip_class)i))
+         continue;
+
+      QoShaderModuleCreateInfo cs = qoShaderModuleCreateInfoGLSL(COMPUTE,
+         QO_EXTENSION GL_ARB_sparse_texture2 : require
+         layout(local_size_x=1) in;
+         layout(binding=0) uniform sampler2D tex;
+         layout(binding=0) buffer Buf {
+            vec4 res[4];
+            uint code[4];
+         };
+         void main() {
+            //>> v5: (noCSE)%zero0 = p_create_vector 0, 0, 0, 0, 0
+            //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero0 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+            //>> v5: (noCSE)%zero1 = p_create_vector 0, 0, 0, 0, 0
+            //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero1 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+            //>> v5: (noCSE)%zero2 = p_create_vector 0, 0, 0, 0, 0
+            //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero2 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+            //>> v5: (noCSE)%zero3 = p_create_vector 0, 0, 0, 0, 0
+            //>> v5: %_ = image_sample_lz_o %_, %_, %_, %zero3 dmask:xyzw 2d tfe storage: semantics: scope:invocation
+            //>> s_clause 0x3
+            //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+            //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+            //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+            //! image_sample_lz_o v#_, v[#_:#_], @s256(img), @s128(samp) dmask:0xf dim:SQ_RSRC_IMG_2D tfe
+            code[0] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(1, 0), res[0]);
+            code[1] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(2, 0), res[1]);
+            code[2] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(3, 0), res[2]);
+            code[3] = sparseTextureOffsetARB(tex, vec2(0.5), ivec2(4, 0), res[3]);
+         }
+      );
+
+      PipelineBuilder pbld(get_vk_device((chip_class)i));
+      pbld.add_cs(cs);
+      pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "ACO IR", true);
+      pbld.print_ir(VK_SHADER_STAGE_COMPUTE_BIT, "Assembly", true);
+   }
+END_TEST



More information about the mesa-commit mailing list