Mesa (main): aco/optimizer: optimize extract(extract())
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Dec 31 15:13:09 UTC 2021
Module: Mesa
Branch: main
Commit: fb622775b5e583784cd836afa4e00faf538ae178
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fb622775b5e583784cd836afa4e00faf538ae178
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Tue Oct 5 13:09:02 2021 +0100
aco/optimizer: optimize extract(extract())
Totals from 53 (0.04% of 134572) affected shaders: (GFX10.3)
SpillVGPRs: 1780 -> 1776 (-0.22%); split: -0.34%, +0.11%
CodeSize: 968352 -> 963196 (-0.53%); split: -0.55%, +0.02%
Scratch: 180224 -> 178176 (-1.14%)
Instrs: 169800 -> 169158 (-0.38%); split: -0.39%, +0.01%
Latency: 6186064 -> 6141408 (-0.72%); split: -1.16%, +0.44%
InvThroughput: 2605044 -> 2582967 (-0.85%); split: -1.37%, +0.52%
VClause: 4851 -> 4866 (+0.31%); split: -0.16%, +0.47%
SClause: 1744 -> 1746 (+0.11%)
Copies: 42874 -> 42325 (-1.28%); split: -1.40%, +0.12%
Branches: 5762 -> 5765 (+0.05%); split: -0.02%, +0.07%
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13576>
---
src/amd/compiler/aco_optimizer.cpp | 40 +++++++++++++++++++++++++++++++++++---
1 file changed, 37 insertions(+), 3 deletions(-)
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index bc842a58906..1fd3cf61941 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -527,6 +527,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
return false;
break;
case aco_opcode::p_extract_vector:
+ case aco_opcode::p_extract:
if (temp.type() == RegType::sgpr && !can_accept_sgpr)
return false;
break;
@@ -991,9 +992,21 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
can_use_opsel(ctx.program->chip_class, instr->opcode, idx, sel.offset()) &&
!(instr->vop3().opsel & (1 << idx))) {
return true;
- } else {
- return false;
+ } else if (instr->opcode == aco_opcode::p_extract) {
+ SubdwordSel instrSel = parse_extract(instr.get());
+
+ /* the outer offset must be within extracted range */
+ if (instrSel.offset() >= sel.size())
+ return false;
+
+ /* don't remove the sign-extension when increasing the size further */
+ if (instrSel.size() > sel.size() && !instrSel.sign_extend() && sel.sign_extend())
+ return false;
+
+ return true;
}
+
+ return false;
}
/* Combine an p_extract (or p_insert, in some cases) instruction with instr.
@@ -1033,6 +1046,18 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
} else if (instr->isVOP3()) {
if (sel.offset())
instr->vop3().opsel |= 1 << idx;
+ } else if (instr->opcode == aco_opcode::p_extract) {
+ SubdwordSel instrSel = parse_extract(instr.get());
+
+ unsigned size = std::min(sel.size(), instrSel.size());
+ unsigned offset = sel.offset() + instrSel.offset();
+ unsigned sign_extend =
+ instrSel.sign_extend() && (sel.sign_extend() || instrSel.size() <= sel.size());
+
+ instr->operands[1] = Operand::c32(offset / size);
+ instr->operands[2] = Operand::c32(size * 8u);
+ instr->operands[3] = Operand::c32(sign_extend);
+ return;
}
/* output modifier and label_vopc seem to be the only one worth keeping at the moment */
@@ -3406,8 +3431,17 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (instr->isSDWA() || instr->isDPP())
return;
- if (instr->opcode == aco_opcode::p_extract)
+ if (instr->opcode == aco_opcode::p_extract) {
+ ssa_info& info = ctx.info[instr->operands[0].tempId()];
+ if (info.is_extract() && can_apply_extract(ctx, instr, 0, info)) {
+ apply_extract(ctx, instr, 0, info);
+ if (--ctx.uses[instr->operands[0].tempId()])
+ ctx.uses[info.instr->operands[0].tempId()]++;
+ instr->operands[0].setTemp(info.instr->operands[0].getTemp());
+ }
+
apply_ds_extract(ctx, instr);
+ }
/* TODO: There are still some peephole optimizations that could be done:
* - abs(a - b) -> s_absdiff_i32
More information about the mesa-commit
mailing list