Mesa (main): aco: relax condition to remove branches in case of few instructions

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Feb 25 16:04:20 UTC 2022


Module: Mesa
Branch: main
Commit: f030b75b7d2c359b90c18ee4ed83fa05265c12e0
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f030b75b7d2c359b90c18ee4ed83fa05265c12e0

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Fri Nov 27 15:23:34 2020 +0100

aco: relax condition to remove branches in case of few instructions

This patch relaxes the conditions under which
we remove branch instructions.

Totals from 27246 (20.20% of 134913) affected shaders: (GFX10.3)
CodeSize: 193413312 -> 192924928 (-0.25%)
Instrs: 36146788 -> 36024692 (-0.34%)
Latency: 528374112 -> 528469044 (+0.02%); split: -0.01%, +0.02%
InvThroughput: 106198759 -> 106216583 (+0.02%); split: -0.00%, +0.02%
Branches: 1040640 -> 918543 (-11.73%)

Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8647>

---

 src/amd/compiler/aco_lower_to_hw_instr.cpp | 67 ++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 12 deletions(-)

diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 6fb6accb9a9..08830bfc57b 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2196,35 +2196,77 @@ lower_to_hw_instr(Program* program)
             }
          } else if (instr->isBranch()) {
             Pseudo_branch_instruction* branch = &instr->branch();
-            uint32_t target = branch->target[0];
+            const uint32_t target = branch->target[0];
+            const bool uniform_branch = !(branch->opcode == aco_opcode::p_cbranch_z &&
+                                          branch->operands[0].physReg() == exec);
 
-            /* check if all blocks from current to target are empty */
-            /* In case there are <= 4 SALU or <= 2 VALU instructions, remove the branch */
+            /* Check if the branch instruction can be removed.
+             * This is beneficial when executing the next block with an empty exec mask
+             * is faster than the branch instruction itself.
+             */
             bool can_remove = block->index < target;
             unsigned num_scalar = 0;
             unsigned num_vector = 0;
-            for (unsigned i = block->index + 1; can_remove && i < branch->target[0]; i++) {
-               /* uniform branches must not be ignored if they
+            bool has_sopp = false;
+
+            /* Check the instructions between branch and target */
+            for (unsigned i = block->index + 1; i < branch->target[0]; i++) {
+               /* Uniform conditional branches must not be ignored if they
                 * are about to jump over actual instructions */
-               if (!program->blocks[i].instructions.empty() &&
-                   (branch->opcode != aco_opcode::p_cbranch_z ||
-                    branch->operands[0].physReg() != exec)) {
+               if (uniform_branch && !program->blocks[i].instructions.empty())
                   can_remove = false;
+
+               if (!can_remove)
                   break;
-               }
 
                for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) {
                   if (inst->isSOPP()) {
-                     can_remove = false;
+                     /* we allow at most one inner branch */
+                     if (has_sopp)
+                        can_remove = false;
+
+                     /* These instructions must conditionally be jumped over */
+                     if (inst->opcode == aco_opcode::s_endpgm ||
+                         inst->opcode == aco_opcode::s_sendmsg ||
+                         inst->opcode == aco_opcode::s_sendmsghalt ||
+                         inst->opcode == aco_opcode::s_trap ||
+                         inst->opcode == aco_opcode::s_barrier)
+                        can_remove = false;
+
+                     has_sopp = true;
                   } else if (inst->isSALU()) {
                      num_scalar++;
-                  } else if (inst->isVALU()) {
+                  } else if (inst->isVALU() || inst->isVINTRP()) {
                      num_vector++;
+                     /* VALU which writes SGPRs are always executed on GFX10+ */
+                     if (ctx.program->chip_class >= GFX10) {
+                        for (Definition& def : inst->definitions) {
+                           if (def.regClass().type() == RegType::sgpr)
+                              num_scalar++;
+                        }
+                     }
+                  } else if (inst->isVMEM() || inst->isFlatLike() || inst->isDS() ||
+                             inst->isEXP()) {
+                     // TODO: GFX6-9 can use vskip
+                     can_remove = false;
+                  } else if (inst->isSMEM()) {
+                     /* SMEM are at least as expensive as branches */
+                     can_remove = false;
+                  } else if (inst->isBarrier()) {
+                     can_remove = false;
                   } else {
                      can_remove = false;
+                     assert(false && "Pseudo instructions should be lowered by this point.");
                   }
 
-                  if (num_scalar + num_vector * 2 > 4)
+                  /* Under these conditions, we shouldn't remove the branch */
+                  unsigned est_cycles;
+                  if (ctx.program->chip_class >= GFX10)
+                     est_cycles = num_scalar * 2 + num_vector;
+                  else
+                     est_cycles = num_scalar * 4 + num_vector * 4;
+
+                  if (est_cycles > 16)
                      can_remove = false;
 
                   if (!can_remove)
@@ -2235,6 +2277,7 @@ lower_to_hw_instr(Program* program)
             if (can_remove)
                continue;
 
+            /* emit branch instruction */
             switch (instr->opcode) {
             case aco_opcode::p_branch:
                assert(block->linear_succs[0] == target);



More information about the mesa-commit mailing list