Mesa (master): aco: fix value numbering of reductions

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Sep 9 15:10:26 UTC 2020


Module: Mesa
Branch: master
Commit: 834b449a46716d64bd2cee99a029cdc48813cc9a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=834b449a46716d64bd2cee99a029cdc48813cc9a

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Mon Sep  7 20:44:54 2020 +0100

aco: fix value numbering of reductions

Non-ssa definitions caused an assertion in value numbering.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6662>

---

 src/amd/compiler/aco_instruction_selection.cpp | 65 ++++++++++++++++++++------
 src/amd/compiler/aco_reduce_assign.cpp         | 28 -----------
 src/amd/compiler/aco_util.h                    |  6 +++
 3 files changed, 57 insertions(+), 42 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 9c177338f20..ba1e3ddffd5 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7046,6 +7046,54 @@ void emit_uniform_subgroup(isel_context *ctx, nir_intrinsic_instr *instr, Temp s
    }
 }
 
+Pseudo_reduction_instruction *create_reduction_instr(isel_context *ctx, aco_opcode aco_op, ReduceOp op, Definition dst, Temp src)
+{
+   assert(src.bytes() <= 8);
+   Builder bld(ctx->program, ctx->block);
+
+   unsigned num_defs = 0;
+   Definition defs[5];
+   defs[num_defs++] = dst;
+   defs[num_defs++] = bld.def(bld.lm); /* used internally to save/restore exec */
+
+   /* scalar identity temporary */
+   bool need_sitmp = (ctx->program->chip_class <= GFX7 || ctx->program->chip_class >= GFX10) && aco_op != aco_opcode::p_reduce;
+   if (aco_op == aco_opcode::p_exclusive_scan) {
+      need_sitmp |=
+         (op == imin8 || op == imin16 || op == imin32 || op == imin64 ||
+          op == imax8 || op == imax16 || op == imax32 || op == imax64 ||
+          op == fmin16 || op == fmin32 || op == fmin64 ||
+          op == fmax16 || op == fmax32 || op == fmax64 ||
+          op == fmul16 || op == fmul64);
+   }
+   if (need_sitmp)
+      defs[num_defs++] = bld.def(RegType::sgpr, dst.size());
+
+   /* scc clobber */
+   defs[num_defs++] = bld.def(s1, scc);
+
+   /* vcc clobber */
+   bool clobber_vcc = false;
+   if ((op == iadd32 || op == imul64) && ctx->program->chip_class < GFX9)
+      clobber_vcc = true;
+   if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)
+      clobber_vcc = true;
+
+   if (clobber_vcc)
+      defs[num_defs++] = bld.def(bld.lm, vcc);
+
+   Pseudo_reduction_instruction *reduce = create_instruction<Pseudo_reduction_instruction>(aco_op, Format::PSEUDO_REDUCTION, 3, num_defs);
+   reduce->operands[0] = Operand(src);
+   /* setup_reduce_temp will update these undef operands if needed */
+   reduce->operands[1] = Operand(RegClass(RegType::vgpr, dst.size()).as_linear());
+   reduce->operands[2] = Operand(v1.as_linear());
+   std::copy(defs, defs + num_defs, reduce->definitions.begin());
+
+   reduce->reduce_op = op;
+
+   return reduce;
+}
+
 void emit_interp_center(isel_context *ctx, Temp dst, Temp pos1, Temp pos2)
 {
    Builder bld(ctx->program, ctx->block);
@@ -7661,21 +7709,10 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
                unreachable("unknown reduce intrinsic");
          }
 
-         aco_ptr<Pseudo_reduction_instruction> reduce{create_instruction<Pseudo_reduction_instruction>(aco_op, Format::PSEUDO_REDUCTION, 3, 5)};
-         reduce->operands[0] = Operand(src);
-         // filled in by aco_reduce_assign.cpp, used internally as part of the
-         // reduce sequence
-         assert(dst.size() == 1 || dst.size() == 2);
-         reduce->operands[1] = Operand(RegClass(RegType::vgpr, dst.size()).as_linear());
-         reduce->operands[2] = Operand(v1.as_linear());
-
          Temp tmp_dst = bld.tmp(dst.regClass());
-         reduce->definitions[0] = Definition(tmp_dst);
-         reduce->definitions[1] = bld.def(ctx->program->lane_mask); // used internally
-         reduce->definitions[2] = Definition();
-         reduce->definitions[3] = Definition(scc, s1);
-         reduce->definitions[4] = Definition();
-         reduce->reduce_op = reduce_op;
+         aco_ptr<Pseudo_reduction_instruction> reduce{
+            create_reduction_instr(ctx, aco_op, reduce_op, Definition(tmp_dst), src)};
+
          reduce->cluster_size = cluster_size;
          ctx->block->instructions.emplace_back(std::move(reduce));
 
diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp
index 7bf7a6c3b68..301fe80911c 100644
--- a/src/amd/compiler/aco_reduce_assign.cpp
+++ b/src/amd/compiler/aco_reduce_assign.cpp
@@ -152,34 +152,6 @@ void setup_reduce_temp(Program* program)
          instr->operands[1] = Operand(reduceTmp);
          if (need_vtmp)
             instr->operands[2] = Operand(vtmp);
-
-         /* scalar temporary */
-         Builder bld(program);
-         instr->definitions[1] = bld.def(s2);
-
-         /* scalar identity temporary */
-         bool need_sitmp = (program->chip_class <= GFX7 || program->chip_class >= GFX10) && instr->opcode != aco_opcode::p_reduce;
-         if (instr->opcode == aco_opcode::p_exclusive_scan) {
-            need_sitmp |=
-               (op == imin8 || op == imin16 || op == imin32 || op == imin64 ||
-                op == imax8 || op == imax16 || op == imax32 || op == imax64 ||
-                op == fmin16 || op == fmin32 || op == fmin64 ||
-                op == fmax16 || op == fmax32 || op == fmax64 ||
-                op == fmul16 || op == fmul64);
-         }
-         if (need_sitmp) {
-            instr->definitions[2] = bld.def(RegClass(RegType::sgpr, instr->operands[0].size()));
-         }
-
-         /* vcc clobber */
-         bool clobber_vcc = false;
-         if ((op == iadd32 || op == imul64) && program->chip_class < GFX9)
-            clobber_vcc = true;
-         if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)
-            clobber_vcc = true;
-
-         if (clobber_vcc)
-            instr->definitions[4] = Definition(vcc, bld.lm);
       }
    }
 }
diff --git a/src/amd/compiler/aco_util.h b/src/amd/compiler/aco_util.h
index b04eaba55a1..7728747ca57 100644
--- a/src/amd/compiler/aco_util.h
+++ b/src/amd/compiler/aco_util.h
@@ -216,6 +216,12 @@ public:
       --length;
    }
 
+   /*! \brief                 Adds an element to the end of the span
+   */
+   constexpr void push_back(const_reference val) noexcept {
+      *std::next(begin(), length++) = val;
+   }
+
    /*! \brief                 Clears the span
    */
    constexpr void clear() noexcept {



More information about the mesa-commit mailing list