Mesa (main): aco/optimizer: optimize extract(extract())

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 31 15:13:09 UTC 2021


Module: Mesa
Branch: main
Commit: fb622775b5e583784cd836afa4e00faf538ae178
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=fb622775b5e583784cd836afa4e00faf538ae178

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Tue Oct  5 13:09:02 2021 +0100

aco/optimizer: optimize extract(extract())

Totals from 53 (0.04% of 134572) affected shaders: (GFX10.3)
SpillVGPRs: 1780 -> 1776 (-0.22%); split: -0.34%, +0.11%
CodeSize: 968352 -> 963196 (-0.53%); split: -0.55%, +0.02%
Scratch: 180224 -> 178176 (-1.14%)
Instrs: 169800 -> 169158 (-0.38%); split: -0.39%, +0.01%
Latency: 6186064 -> 6141408 (-0.72%); split: -1.16%, +0.44%
InvThroughput: 2605044 -> 2582967 (-0.85%); split: -1.37%, +0.52%
VClause: 4851 -> 4866 (+0.31%); split: -0.16%, +0.47%
SClause: 1744 -> 1746 (+0.11%)
Copies: 42874 -> 42325 (-1.28%); split: -1.40%, +0.12%
Branches: 5762 -> 5765 (+0.05%); split: -0.02%, +0.07%

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13576>

---

 src/amd/compiler/aco_optimizer.cpp | 40 +++++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index bc842a58906..1fd3cf61941 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -527,6 +527,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
          return false;
       break;
    case aco_opcode::p_extract_vector:
+   case aco_opcode::p_extract:
       if (temp.type() == RegType::sgpr && !can_accept_sgpr)
          return false;
       break;
@@ -991,9 +992,21 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
               can_use_opsel(ctx.program->chip_class, instr->opcode, idx, sel.offset()) &&
               !(instr->vop3().opsel & (1 << idx))) {
       return true;
-   } else {
-      return false;
+   } else if (instr->opcode == aco_opcode::p_extract) {
+      SubdwordSel instrSel = parse_extract(instr.get());
+
+      /* the outer offset must be within extracted range */
+      if (instrSel.offset() >= sel.size())
+         return false;
+
+      /* don't remove the sign-extension when increasing the size further */
+      if (instrSel.size() > sel.size() && !instrSel.sign_extend() && sel.sign_extend())
+         return false;
+
+      return true;
    }
+
+   return false;
 }
 
 /* Combine an p_extract (or p_insert, in some cases) instruction with instr.
@@ -1033,6 +1046,18 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
    } else if (instr->isVOP3()) {
       if (sel.offset())
          instr->vop3().opsel |= 1 << idx;
+   } else if (instr->opcode == aco_opcode::p_extract) {
+      SubdwordSel instrSel = parse_extract(instr.get());
+
+      unsigned size = std::min(sel.size(), instrSel.size());
+      unsigned offset = sel.offset() + instrSel.offset();
+      unsigned sign_extend =
+         instrSel.sign_extend() && (sel.sign_extend() || instrSel.size() <= sel.size());
+
+      instr->operands[1] = Operand::c32(offset / size);
+      instr->operands[2] = Operand::c32(size * 8u);
+      instr->operands[3] = Operand::c32(sign_extend);
+      return;
    }
 
    /* output modifier and label_vopc seem to be the only one worth keeping at the moment */
@@ -3406,8 +3431,17 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
    if (instr->isSDWA() || instr->isDPP())
       return;
 
-   if (instr->opcode == aco_opcode::p_extract)
+   if (instr->opcode == aco_opcode::p_extract) {
+      ssa_info& info = ctx.info[instr->operands[0].tempId()];
+      if (info.is_extract() && can_apply_extract(ctx, instr, 0, info)) {
+         apply_extract(ctx, instr, 0, info);
+         if (--ctx.uses[instr->operands[0].tempId()])
+            ctx.uses[info.instr->operands[0].tempId()]++;
+         instr->operands[0].setTemp(info.instr->operands[0].getTemp());
+      }
+
       apply_ds_extract(ctx, instr);
+   }
 
    /* TODO: There are still some peephole optimizations that could be done:
     * - abs(a - b) -> s_absdiff_i32



More information about the mesa-commit mailing list