Mesa (master): aco: fix combining add/sub to b2i if a new dest needs to be allocated
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Nov 10 09:55:55 UTC 2020
Module: Mesa
Branch: master
Commit: ec347ee9bc41f99dc8e398c652d873cc192bc99c
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ec347ee9bc41f99dc8e398c652d873cc192bc99c
Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date: Mon Nov 9 19:42:22 2020 +0100
aco: fix combining add/sub to b2i if a new dest needs to be allocated
The uses vector needs to be expanded to avoid out of bounds access
and to make sure the number of uses is initialized to 0.
This fixes combining more v_and(a, v_subbrev_co_u32).
fossilds-db (Vega10):
Totals from 4574 (3.28% of 139517) affected shaders:
SGPRs: 291625 -> 292217 (+0.20%); split: -0.01%, +0.21%
VGPRs: 276368 -> 276188 (-0.07%); split: -0.07%, +0.01%
SpillSGPRs: 455 -> 533 (+17.14%)
SpillVGPRs: 76 -> 78 (+2.63%)
CodeSize: 23327500 -> 23304152 (-0.10%); split: -0.17%, +0.07%
MaxWaves: 22044 -> 22066 (+0.10%)
Instrs: 4583064 -> 4576301 (-0.15%); split: -0.15%, +0.01%
Cycles: 47925276 -> 47871968 (-0.11%); split: -0.13%, +0.01%
VMEM: 1599363 -> 1597473 (-0.12%); split: +0.08%, -0.19%
SMEM: 331461 -> 331126 (-0.10%); split: +0.08%, -0.18%
VClause: 80639 -> 80696 (+0.07%); split: -0.02%, +0.09%
SClause: 155992 -> 155993 (+0.00%); split: -0.02%, +0.02%
Copies: 333482 -> 333318 (-0.05%); split: -0.12%, +0.07%
Branches: 70967 -> 70968 (+0.00%)
PreSGPRs: 187078 -> 187711 (+0.34%); split: -0.01%, +0.35%
PreVGPRs: 244918 -> 244785 (-0.05%)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7513>
---
src/amd/compiler/aco_optimizer.cpp | 13 ++++++++++---
src/amd/compiler/tests/test_optimizer.cpp | 6 ++++++
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index aa9fad589b3..9d6219037de 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2260,9 +2260,16 @@ bool combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode n
}
ctx.uses[instr->operands[i].tempId()]--;
new_instr->definitions[0] = instr->definitions[0];
- new_instr->definitions[1] =
- instr->definitions.size() == 2 ? instr->definitions[1] :
- Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
+ if (instr->definitions.size() == 2) {
+ new_instr->definitions[1] = instr->definitions[1];
+ } else {
+ new_instr->definitions[1] =
+ Definition(ctx.program->allocateTmp(ctx.program->lane_mask));
+ /* Make sure the uses vector is large enough and the number of
+ * uses properly initialized to 0.
+ */
+ ctx.uses.push_back(0);
+ }
new_instr->definitions[1].setHint(vcc);
new_instr->operands[0] = Operand(0u);
new_instr->operands[1] = instr->operands[!i];
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index 4bb5898e236..f43ae731072 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -119,6 +119,12 @@ BEGIN_TEST(optimize.cndmask)
Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], subbrev);
writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, subbrev));
+ //! v1: %res4 = v_cndmask_b32 0, %a, %c
+ //! p_unit_test 4, %res4
+ Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), Operand(inputs[2]));
+ Temp sub = bld.vsub32(bld.def(v1), Operand(0u), cndmask);
+ writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(inputs[0]), sub));
+
finish_opt_test();
}
END_TEST
More information about the mesa-commit
mailing list