Mesa (staging/21.0): aco: optimize packed clamp

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Jan 13 19:31:07 UTC 2021


Module: Mesa
Branch: staging/21.0
Commit: 3760fdf7e6f14cef928fcf8971289cddcad93c9b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3760fdf7e6f14cef928fcf8971289cddcad93c9b

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Fri Sep 11 15:54:39 2020 +0100

aco: optimize packed clamp

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6680>
(cherry picked from commit e3790fc4587485b75a389a5a640846c8b5ffa33f)

---

 .pick_status.json                  |  2 +-
 src/amd/compiler/aco_optimizer.cpp | 23 +++++++++++++++++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index a0b5e11268a..1fa8f5c86de 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -310,7 +310,7 @@
         "description": "aco: optimize packed clamp",
         "nominated": false,
         "nomination_type": null,
-        "resolution": 4,
+        "resolution": 1,
         "master_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index d4b95f50538..768037a5294 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2728,7 +2728,27 @@ bool combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
 void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
 {
-   // TODO: clamp, fneg?
+   VOP3P_instruction* vop3p = static_cast<VOP3P_instruction*>(instr.get());
+
+   /* apply clamp */
+   if (instr->opcode == aco_opcode::v_pk_mul_f16 &&
+       instr->operands[1].constantEquals(0x3C00) &&
+       vop3p->clamp &&
+       vop3p->opsel_lo == 0x0 &&
+       vop3p->opsel_hi == 0x1 &&
+       instr->operands[0].isTemp() &&
+       ctx.uses[instr->operands[0].tempId()] == 1) {
+
+      ssa_info& info = ctx.info[instr->operands[0].tempId()];
+      if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
+         Instruction* candidate = ctx.info[instr->operands[0].tempId()].instr;
+         static_cast<VOP3P_instruction*>(candidate)->clamp = true;
+         std::swap(instr->definitions[0], candidate->definitions[0]);
+         ctx.info[candidate->definitions[0].tempId()].instr = candidate;
+         ctx.uses[instr->definitions[0].tempId()]--;
+         return;
+      }
+   }
 
    if (instr->opcode == aco_opcode::v_pk_add_f16) {
       if (instr->definitions[0].isPrecise())
@@ -2781,7 +2801,6 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
       assert(mul_instr->format == Format::VOP3P);
       aco_ptr<VOP3P_instruction> fma{create_instruction<VOP3P_instruction>(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)};
       VOP3P_instruction* mul = static_cast<VOP3P_instruction*>(mul_instr);
-      VOP3P_instruction* vop3p = static_cast<VOP3P_instruction*>(instr.get());
       for (unsigned i = 0; i < 2; i++) {
          fma->operands[i] = op[i];
          fma->neg_lo[i] = mul->neg_lo[i];



More information about the mesa-commit mailing list