Mesa (staging/21.0): aco: remove divergent branches which only jump over very few instructions

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jan 13 19:31:07 UTC 2021


Module: Mesa
Branch: staging/21.0
Commit: 3aaf5188fe0741776bb8218aee9c7507518cb0b8
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3aaf5188fe0741776bb8218aee9c7507518cb0b8

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Thu Nov 26 17:36:47 2020 +0100

aco: remove divergent branches which only jump over very few instructions

Totals from 18436 (13.23% of 139391) affected shaders (NAVI10):
CodeSize: 138428504 -> 138172588 (-0.18%)
Instrs: 26605127 -> 26541176 (-0.24%)
Cycles: 1624994088 -> 1622461620 (-0.16%)
VMEM: 3689892 -> 3689102 (-0.02%)
SMEM: 1131767 -> 1131761 (-0.00%)
Branches: 851796 -> 787852 (-7.51%)

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7814>
(cherry picked from commit 288032a87316f8542d1d5de8b8e1d3a20359ceab)

---

 .pick_status.json                          |  2 +-
 src/amd/compiler/aco_lower_to_hw_instr.cpp | 56 +++++++++++++++++++++++-------
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index ee6bc6c3f46..eae4a01e3e8 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -274,7 +274,7 @@
         "description": "aco: remove divergent branches which only jump over very few instructions",
         "nominated": false,
         "nomination_type": null,
-        "resolution": 4,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index a5b939e905b..a4e2d5b6404 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -1792,7 +1792,7 @@ void lower_to_hw_instr(Program* program)
 {
    Block *discard_block = NULL;
 
-   for (size_t block_idx = 0; block_idx < program->blocks.size(); block_idx++)
+   for (int block_idx = program->blocks.size() - 1; block_idx >= 0; block_idx--)
    {
       Block *block = &program->blocks[block_idx];
       lower_context ctx;
@@ -1980,40 +1980,70 @@ void lower_to_hw_instr(Program* program)
             }
          } else if (instr->format == Format::PSEUDO_BRANCH) {
             Pseudo_branch_instruction* branch = static_cast<Pseudo_branch_instruction*>(instr.get());
+            uint32_t target = branch->target[0];
+
             /* check if all blocks from current to target are empty */
-            bool can_remove = block->index < branch->target[0];
+            /* In case there are <= 4 SALU or <= 2 VALU instructions, remove the branch */
+            bool can_remove = block->index < target;
+            unsigned num_scalar = 0;
+            unsigned num_vector = 0;
             for (unsigned i = block->index + 1; can_remove && i < branch->target[0]; i++) {
-               if (program->blocks[i].instructions.size())
+               /* uniform branches must not be ignored if they
+                * are about to jump over actual instructions */
+               if (!program->blocks[i].instructions.empty() &&
+                   (branch->opcode != aco_opcode::p_cbranch_z ||
+                    branch->operands[0].physReg() != exec)) {
                   can_remove = false;
+                  break;
+               }
+
+               for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) {
+                  if (inst->format == Format::SOPP) {
+                     can_remove = false;
+                  } else if (inst->isSALU()) {
+                     num_scalar++;
+                  } else if (inst->isVALU()) {
+                     num_vector++;
+                  } else {
+                     can_remove = false;
+                  }
+
+                  if (num_scalar + num_vector * 2 > 4)
+                     can_remove = false;
+
+                  if (!can_remove)
+                     break;
+               }
             }
+
             if (can_remove)
                continue;
 
             switch (instr->opcode) {
                case aco_opcode::p_branch:
-                  assert(block->linear_succs[0] == branch->target[0]);
-                  bld.sopp(aco_opcode::s_branch, branch->definitions[0], branch->target[0]);
+                  assert(block->linear_succs[0] == target);
+                  bld.sopp(aco_opcode::s_branch, branch->definitions[0], target);
                   break;
                case aco_opcode::p_cbranch_nz:
-                  assert(block->linear_succs[1] == branch->target[0]);
+                  assert(block->linear_succs[1] == target);
                   if (branch->operands[0].physReg() == exec)
-                     bld.sopp(aco_opcode::s_cbranch_execnz, branch->definitions[0], branch->target[0]);
+                     bld.sopp(aco_opcode::s_cbranch_execnz, branch->definitions[0], target);
                   else if (branch->operands[0].physReg() == vcc)
-                     bld.sopp(aco_opcode::s_cbranch_vccnz, branch->definitions[0], branch->target[0]);
+                     bld.sopp(aco_opcode::s_cbranch_vccnz, branch->definitions[0], target);
                   else {
                      assert(branch->operands[0].physReg() == scc);
-                     bld.sopp(aco_opcode::s_cbranch_scc1, branch->definitions[0], branch->target[0]);
+                     bld.sopp(aco_opcode::s_cbranch_scc1, branch->definitions[0], target);
                   }
                   break;
                case aco_opcode::p_cbranch_z:
-                  assert(block->linear_succs[1] == branch->target[0]);
+                  assert(block->linear_succs[1] == target);
                   if (branch->operands[0].physReg() == exec)
-                     bld.sopp(aco_opcode::s_cbranch_execz, branch->definitions[0], branch->target[0]);
+                     bld.sopp(aco_opcode::s_cbranch_execz, branch->definitions[0], target);
                   else if (branch->operands[0].physReg() == vcc)
-                     bld.sopp(aco_opcode::s_cbranch_vccz, branch->definitions[0], branch->target[0]);
+                     bld.sopp(aco_opcode::s_cbranch_vccz, branch->definitions[0], target);
                   else {
                      assert(branch->operands[0].physReg() == scc);
-                     bld.sopp(aco_opcode::s_cbranch_scc0, branch->definitions[0], branch->target[0]);
+                     bld.sopp(aco_opcode::s_cbranch_scc0, branch->definitions[0], target);
                   }
                   break;
                default:



More information about the mesa-commit mailing list