Mesa (master): aco/optimizer: convert extract_vector with index 0 into parallelcopies if possible
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Jan 21 11:13:24 UTC 2021
Module: Mesa
Branch: master
Commit: 7dcb9a0d8c90d4aecf325822eb8b2d121a020d1c
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7dcb9a0d8c90d4aecf325822eb8b2d121a020d1c
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Fri Jan 15 09:23:04 2021 +0100
aco/optimizer: convert extract_vector with index 0 into parallelcopies if possible
Totals from 273 (0.20% of 139391) affected shaders (Navi10):
VGPRs: 11600 -> 11792 (+1.66%)
CodeSize: 1389304 -> 1383152 (-0.44%); split: -0.53%, +0.08%
MaxWaves: 3848 -> 3752 (-2.49%)
Instrs: 240228 -> 239478 (-0.31%); split: -0.37%, +0.06%
Cycles: 20637708 -> 20580024 (-0.28%); split: -0.46%, +0.18%
VMEM: 39164 -> 38831 (-0.85%); split: +0.06%, -0.91%
SMEM: 21743 -> 22204 (+2.12%)
VClause: 4787 -> 4783 (-0.08%)
Copies: 39057 -> 38308 (-1.92%); split: -2.28%, +0.37%
Branches: 6556 -> 6557 (+0.02%)
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8260>
---
src/amd/compiler/aco_optimizer.cpp | 59 ++++++++++++++-----------------
src/amd/compiler/tests/test_optimizer.cpp | 3 +-
2 files changed, 28 insertions(+), 34 deletions(-)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index a88747775ff..b1e786408a1 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -1224,42 +1224,37 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
const unsigned index = instr->operands[1].constantValue();
const unsigned dst_offset = index * instr->definitions[0].bytes();
- if (info.is_constant_or_literal(32)) {
- uint32_t mask = u_bit_consecutive(0, instr->definitions[0].bytes() * 8u);
- ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, (info.val >> (dst_offset * 8u)) & mask);
- break;
- } else if (!info.is_vec()) {
- break;
- }
-
- /* check if we index directly into a vector element */
- Instruction* vec = info.instr;
- unsigned offset = 0;
-
- for (const Operand& op : vec->operands) {
- if (offset < dst_offset) {
- offset += op.bytes();
- continue;
- } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) {
+ if (info.is_vec()) {
+ /* check if we index directly into a vector element */
+ Instruction* vec = info.instr;
+ unsigned offset = 0;
+
+ for (const Operand& op : vec->operands) {
+ if (offset < dst_offset) {
+ offset += op.bytes();
+ continue;
+ } else if (offset != dst_offset || op.bytes() != instr->definitions[0].bytes()) {
+ break;
+ }
+ instr->operands[0] = op;
break;
}
+ } else if (info.is_constant_or_literal(32)) {
+ /* propagate constants */
+ uint32_t mask = u_bit_consecutive(0, instr->definitions[0].bytes() * 8u);
+ uint32_t val = (info.val >> (dst_offset * 8u)) & mask;
+ instr->operands[0] = Operand::get_const(ctx.program->chip_class, val, instr->definitions[0].bytes());;
+ } else if (index == 0 && instr->operands[0].size() == instr->definitions[0].size()) {
+ ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
+ }
- /* convert this extract into a copy instruction */
- instr->opcode = aco_opcode::p_parallelcopy;
- instr->operands.pop_back();
- instr->operands[0] = op;
-
- if (op.isConstant()) {
- ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, op.constantValue64());
- } else if (op.isUndefined()) {
- ctx.info[instr->definitions[0].tempId()].set_undefined();
- } else {
- assert(op.isTemp());
- ctx.info[instr->definitions[0].tempId()].set_temp(op.getTemp());
- }
+ if (instr->operands[0].bytes() != instr->definitions[0].bytes())
break;
- }
- break;
+
+ /* convert this extract into a copy instruction */
+ instr->opcode = aco_opcode::p_parallelcopy;
+ instr->operands.pop_back();
+ FALLTHROUGH;
}
case aco_opcode::p_parallelcopy: /* propagate */
if (instr->operands[0].isTemp() && ctx.info[instr->operands[0].tempId()].is_vec() &&
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index e885001c614..679812faac8 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -676,8 +676,7 @@ BEGIN_TEST(optimize.const_comparison_ordering)
writeout(9, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1));
/* bit sizes */
- //! v2b: %b16 = p_extract_vector %b, 0
- //! s2: %res10 = v_cmp_nge_f16 4.0, %b16
+ //! s2: %res10 = v_cmp_nge_f16 4.0, %b
//! p_unit_test 10, %res10
Temp input1_16 = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand(0u));
writeout(10, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc),
More information about the mesa-commit
mailing list