Mesa (main): aco: relax condition to remove branches in case of few instructions
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Feb 25 16:04:20 UTC 2022
Module: Mesa
Branch: main
Commit: f030b75b7d2c359b90c18ee4ed83fa05265c12e0
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f030b75b7d2c359b90c18ee4ed83fa05265c12e0
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Fri Nov 27 15:23:34 2020 +0100
aco: relax condition to remove branches in case of few instructions
This patch relaxes the conditions under which
we remove branch instructions.
Totals from 27246 (20.20% of 134913) affected shaders: (GFX10.3)
CodeSize: 193413312 -> 192924928 (-0.25%)
Instrs: 36146788 -> 36024692 (-0.34%)
Latency: 528374112 -> 528469044 (+0.02%); split: -0.01%, +0.02%
InvThroughput: 106198759 -> 106216583 (+0.02%); split: -0.00%, +0.02%
Branches: 1040640 -> 918543 (-11.73%)
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8647>
---
src/amd/compiler/aco_lower_to_hw_instr.cpp | 67 ++++++++++++++++++++++++------
1 file changed, 55 insertions(+), 12 deletions(-)
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 6fb6accb9a9..08830bfc57b 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2196,35 +2196,77 @@ lower_to_hw_instr(Program* program)
}
} else if (instr->isBranch()) {
Pseudo_branch_instruction* branch = &instr->branch();
- uint32_t target = branch->target[0];
+ const uint32_t target = branch->target[0];
+ const bool uniform_branch = !(branch->opcode == aco_opcode::p_cbranch_z &&
+ branch->operands[0].physReg() == exec);
- /* check if all blocks from current to target are empty */
- /* In case there are <= 4 SALU or <= 2 VALU instructions, remove the branch */
+ /* Check if the branch instruction can be removed.
+ * This is beneficial when executing the next block with an empty exec mask
+ * is faster than the branch instruction itself.
+ */
bool can_remove = block->index < target;
unsigned num_scalar = 0;
unsigned num_vector = 0;
- for (unsigned i = block->index + 1; can_remove && i < branch->target[0]; i++) {
- /* uniform branches must not be ignored if they
+ bool has_sopp = false;
+
+ /* Check the instructions between branch and target */
+ for (unsigned i = block->index + 1; i < branch->target[0]; i++) {
+ /* Uniform conditional branches must not be ignored if they
* are about to jump over actual instructions */
- if (!program->blocks[i].instructions.empty() &&
- (branch->opcode != aco_opcode::p_cbranch_z ||
- branch->operands[0].physReg() != exec)) {
+ if (uniform_branch && !program->blocks[i].instructions.empty())
can_remove = false;
+
+ if (!can_remove)
break;
- }
for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) {
if (inst->isSOPP()) {
- can_remove = false;
+ /* we allow at most one inner branch */
+ if (has_sopp)
+ can_remove = false;
+
+ /* These instructions must conditionally be jumped over */
+ if (inst->opcode == aco_opcode::s_endpgm ||
+ inst->opcode == aco_opcode::s_sendmsg ||
+ inst->opcode == aco_opcode::s_sendmsghalt ||
+ inst->opcode == aco_opcode::s_trap ||
+ inst->opcode == aco_opcode::s_barrier)
+ can_remove = false;
+
+ has_sopp = true;
} else if (inst->isSALU()) {
num_scalar++;
- } else if (inst->isVALU()) {
+ } else if (inst->isVALU() || inst->isVINTRP()) {
num_vector++;
+ /* VALU which writes SGPRs are always executed on GFX10+ */
+ if (ctx.program->chip_class >= GFX10) {
+ for (Definition& def : inst->definitions) {
+ if (def.regClass().type() == RegType::sgpr)
+ num_scalar++;
+ }
+ }
+ } else if (inst->isVMEM() || inst->isFlatLike() || inst->isDS() ||
+ inst->isEXP()) {
+ // TODO: GFX6-9 can use vskip
+ can_remove = false;
+ } else if (inst->isSMEM()) {
+ /* SMEM are at least as expensive as branches */
+ can_remove = false;
+ } else if (inst->isBarrier()) {
+ can_remove = false;
} else {
can_remove = false;
+ assert(false && "Pseudo instructions should be lowered by this point.");
}
- if (num_scalar + num_vector * 2 > 4)
+ /* Under these conditions, we shouldn't remove the branch */
+ unsigned est_cycles;
+ if (ctx.program->chip_class >= GFX10)
+ est_cycles = num_scalar * 2 + num_vector;
+ else
+ est_cycles = num_scalar * 4 + num_vector * 4;
+
+ if (est_cycles > 16)
can_remove = false;
if (!can_remove)
@@ -2235,6 +2277,7 @@ lower_to_hw_instr(Program* program)
if (can_remove)
continue;
+ /* emit branch instruction */
switch (instr->opcode) {
case aco_opcode::p_branch:
assert(block->linear_succs[0] == target);
More information about the mesa-commit
mailing list