Mesa (master): aco: propagate temporaries into PSEUDO instructions if it can take it

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jan 21 11:13:24 UTC 2021


Module: Mesa
Branch: master
Commit: 96fafcca63d8bfc83bb7991d3e5c843fa2fe05e9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=96fafcca63d8bfc83bb7991d3e5c843fa2fe05e9

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Thu Dec 31 11:01:08 2020 +0000

aco: propagate temporaries into PSEUDO instructions if it can take it

This patch relaxes copy-propagation for PSEUDO instructions with
subdword Operands / Definitions:
general:
- only propagate VGPR temps if the Definition is VGPR (or on p_as_uniform)

parallelcopy/create_vector/phis:
- size has to be the same

extract_vector/split_vector:
- propagate SGPR temps on GFX9+ or if the Definitions are not subdword
- split_vector: size must not increase

Totals from 282 (0.20% of 140985) affected shaders (Polaris10):
VGPRs: 14520 -> 14408 (-0.77%)
CodeSize: 2693956 -> 2694316 (+0.01%); split: -0.20%, +0.21%
Instrs: 512874 -> 512864 (-0.00%); split: -0.16%, +0.16%
Cycles: 26338860 -> 26320652 (-0.07%); split: -0.36%, +0.29%
VMEM: 49460 -> 49634 (+0.35%); split: +0.47%, -0.12%
SMEM: 10035 -> 10036 (+0.01%)
VClause: 7675 -> 7674 (-0.01%)
Copies: 66012 -> 65943 (-0.10%); split: -1.31%, +1.20%
Branches: 17265 -> 17281 (+0.09%); split: -0.10%, +0.19%
PreVGPRs: 12211 -> 12124 (-0.71%)

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8260>

---

 src/amd/compiler/aco_optimizer.cpp | 106 ++++++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 36 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 04676516bbc..d7e5710aba8 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -627,6 +627,67 @@ bool can_use_VOP3(opt_ctx& ctx, const aco_ptr<Instruction>& instr)
           instr->opcode != aco_opcode::v_readfirstlane_b32;
 }
 
+bool pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr,
+                           Temp temp, unsigned index)
+{
+   if (instr->definitions.empty())
+      return false;
+
+   const bool vgpr = instr->opcode == aco_opcode::p_as_uniform ||
+                     std::all_of(instr->definitions.begin(), instr->definitions.end(),
+                                 [] (const Definition& def) { return def.regClass().type() == RegType::vgpr;});
+
+   /* don't propagate VGPRs into SGPR instructions */
+   if (temp.type() == RegType::vgpr && !vgpr)
+      return false;
+
+   bool can_accept_sgpr = ctx.program->chip_class >= GFX9 ||
+                          std::none_of(instr->definitions.begin(), instr->definitions.end(),
+                                       [] (const Definition& def) { return def.regClass().is_subdword();});
+
+   switch (instr->opcode) {
+   case aco_opcode::p_phi:
+   case aco_opcode::p_linear_phi:
+   case aco_opcode::p_parallelcopy:
+   case aco_opcode::p_create_vector:
+      if (temp.bytes() != instr->operands[index].bytes())
+         return false;
+      break;
+   case aco_opcode::p_extract_vector:
+      if (temp.type() == RegType::sgpr && !can_accept_sgpr)
+         return false;
+      break;
+   case aco_opcode::p_split_vector: {
+      if (temp.type() == RegType::sgpr && !can_accept_sgpr)
+         return false;
+      /* don't increase the vector size */
+      if (temp.bytes() > instr->operands[index].bytes())
+         return false;
+      /* We can decrease the vector size as smaller temporaries are only
+       * propagated by p_as_uniform instructions.
+       * If this propagation leads to invalid IR or hits the assertion below,
+       * it means that some undefined bytes within a dword are begin accessed
+       * and a bug in instruction_selection is likely. */
+      int decrease = instr->operands[index].bytes() - temp.bytes();
+      while (decrease > 0) {
+         decrease -= instr->definitions.back().bytes();
+         instr->definitions.pop_back();
+      }
+      assert(decrease == 0);
+      break;
+   }
+   case aco_opcode::p_as_uniform:
+      if (temp.regClass() == instr->definitions[0].regClass())
+         instr->opcode = aco_opcode::p_parallelcopy;
+      break;
+   default:
+      return false;
+   }
+
+   instr->operands[index].setTemp(temp);
+   return true;
+}
+
 bool can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 {
    if (instr->isSDWA() && ctx.program->chip_class < GFX9)
@@ -839,48 +900,21 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
       if (info.is_undefined() && is_phi(instr))
          instr->operands[i] = Operand(instr->operands[i].regClass());
       /* propagate reg->reg of same type */
-      if (info.is_temp() && info.temp.regClass() == instr->operands[i].getTemp().regClass()) {
+      while (info.is_temp() && info.temp.regClass() == instr->operands[i].getTemp().regClass()) {
          instr->operands[i].setTemp(ctx.info[instr->operands[i].tempId()].temp);
          info = ctx.info[info.temp.id()];
       }
 
-      /* SALU / PSEUDO: propagate inline constants */
-      if (instr->isSALU() || instr->format == Format::PSEUDO) {
-         bool is_subdword = false;
-         // TODO: optimize SGPR propagation for subdword pseudo instructions on gfx9+
-         if (instr->format == Format::PSEUDO) {
-            is_subdword = std::any_of(instr->definitions.begin(), instr->definitions.end(),
-                                      [] (const Definition& def) { return def.regClass().is_subdword();});
-            is_subdword = is_subdword || std::any_of(instr->operands.begin(), instr->operands.end(),
-                                                     [] (const Operand& op) { return op.bytes() % 4;});
-            if (is_subdword && ctx.program->chip_class < GFX9)
-               continue;
-         }
-
-         if (info.is_temp() && info.temp.type() == RegType::sgpr) {
-            instr->operands[i].setTemp(info.temp);
+      /* PSEUDO: propagate temporaries */
+      if (instr->format == Format::PSEUDO) {
+         while (info.is_temp()) {
+            pseudo_propagate_temp(ctx, instr, info.temp, i);
             info = ctx.info[info.temp.id()];
-         } else if (info.is_temp() && info.temp.type() == RegType::vgpr &&
-                    info.temp.bytes() == instr->operands[i].bytes()) {
-            /* propagate vgpr if it can take it */
-            switch (instr->opcode) {
-            case aco_opcode::p_create_vector:
-            case aco_opcode::p_split_vector:
-            case aco_opcode::p_extract_vector:
-            case aco_opcode::p_phi:
-            case aco_opcode::p_parallelcopy: {
-               const bool all_vgpr = std::none_of(instr->definitions.begin(), instr->definitions.end(),
-                                                  [] (const Definition& def) { return def.getTemp().type() != RegType::vgpr;});
-               if (all_vgpr) {
-                  instr->operands[i] = Operand(info.temp);
-                  info = ctx.info[info.temp.id()];
-               }
-               break;
-            }
-            default:
-               break;
-            }
          }
+      }
+
+      /* SALU / PSEUDO: propagate inline constants */
+      if (instr->isSALU() || instr->format == Format::PSEUDO) {
          unsigned bits = get_operand_size(instr, i);
          if ((info.is_constant(bits) || (info.is_literal(bits) && instr->format == Format::PSEUDO)) &&
              !instr->operands[i].isFixed() && alu_can_accept_constant(instr->opcode, i)) {



More information about the mesa-commit mailing list