Mesa (master): aco: optimize out a*1.0 if it's used as a float

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jan 26 11:59:24 UTC 2021


Module: Mesa
Branch: master
Commit: 2f0d480c73b33c08adefe87723b30c0c6567ee86
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f0d480c73b33c08adefe87723b30c0c6567ee86

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Jun 17 15:02:30 2020 +0100

aco: optimize out a*1.0 if it's used as a float

fossil-db (GFX10):
Totals from 370 (0.27% of 139391) affected shaders:
CodeSize: 641436 -> 634156 (-1.13%); split: -1.14%, +0.00%
Instrs: 117668 -> 115739 (-1.64%); split: -1.64%, +0.00%

fossil-db (GFX10.3):
Totals from 370 (0.27% of 139391) affected shaders:
SGPRs: 26888 -> 26912 (+0.09%)
VGPRs: 13964 -> 13916 (-0.34%)
CodeSize: 692008 -> 679008 (-1.88%)
MaxWaves: 4779 -> 4783 (+0.08%)
Instrs: 134265 -> 132026 (-1.67%); split: -1.67%, +0.00%

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5523>

---

 src/amd/compiler/aco_optimizer.cpp | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index b324f484075..4266696b9fb 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -114,6 +114,7 @@ enum Label {
    label_vcc_hint = 1 << 25,
    label_scc_needed = 1 << 26,
    label_b2i = 1 << 27,
+   label_fcanonicalize = 1 << 28,
    label_constant_16bit = 1 << 29,
    label_usedef = 1 << 30, /* generic label */
    label_vop3p = 1 << 31,
@@ -125,7 +126,7 @@ static constexpr uint64_t instr_mod_labels = label_omod2 | label_omod4 | label_o
 
 static constexpr uint64_t instr_labels = instr_usedef_labels | instr_mod_labels;
 static constexpr uint64_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool |
-                                        label_scc_invert | label_b2i;
+                                        label_scc_invert | label_b2i | label_fcanonicalize;
 static constexpr uint32_t val_labels = label_constant_32bit | label_constant_64bit | label_constant_16bit | label_literal;
 
 static_assert((instr_labels & temp_labels) == 0, "labels cannot intersect");
@@ -511,6 +512,18 @@ struct ssa_info {
    {
       return label & label_vop3p;
    }
+
+   void set_fcanonicalize(Temp tmp)
+   {
+      add_label(label_fcanonicalize);
+      temp = tmp;
+   }
+
+   bool is_fcanonicalize()
+   {
+      return label & label_fcanonicalize;
+   }
+
 };
 
 struct opt_ctx {
@@ -876,6 +889,11 @@ bool fixed_to_exec(Operand op)
    return op.isFixed() && op.physReg() == exec;
 }
 
+bool can_eliminate_fcanonicalize(aco_opcode op)
+{
+   return instr_info.can_use_input_modifiers[(int)op] && op != aco_opcode::v_cndmask_b32;
+}
+
 void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
 {
    if (instr->isSALU() || instr->isVALU() || instr->isPseudo()) {
@@ -925,7 +943,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
 
       /* VALU: propagate neg, abs & inline constants */
       else if (instr->isVALU()) {
-         if (info.is_temp() && info.temp.type() == RegType::vgpr && valu_can_accept_vgpr(instr, i)) {
+         bool is_fp = can_eliminate_fcanonicalize(instr->opcode);
+         if ((info.is_temp() || (info.is_fcanonicalize() && is_fp)) && info.temp.type() == RegType::vgpr && valu_can_accept_vgpr(instr, i)) {
             instr->operands[i].setTemp(info.temp);
             info = ctx.info[info.temp.id()];
          }
@@ -1316,6 +1335,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
                        !(fp16 ? block.fp_mode.must_flush_denorms16_64 : block.fp_mode.must_flush_denorms32) &&
                        !instr->definitions[0].isPrecise()) { /* 1.0 */
                ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[i].getTemp());
+            } else if (instr->operands[!i].constantValue() == (fp16 ? 0x3c00 : 0x3f800000)) {
+               ctx.info[instr->definitions[0].tempId()].set_fcanonicalize(instr->operands[i].getTemp());
             } else if (instr->operands[!i].constantValue() == 0u &&
                        !(fp16 ? block.fp_mode.preserve_signed_zero_inf_nan16_64 : block.fp_mode.preserve_signed_zero_inf_nan32)) { /* 0.0 */
                ctx.info[instr->definitions[0].tempId()].set_constant(ctx.program->chip_class, 0u);
@@ -2530,7 +2551,8 @@ void apply_sgprs(opt_ctx &ctx, aco_ptr<Instruction>& instr)
             sgpr_ids[!!sgpr_ids[0]] = instr->operands[i].tempId();
       }
       ssa_info& info = ctx.info[instr->operands[i].tempId()];
-      if (info.is_temp() && info.temp.type() == RegType::sgpr)
+      if ((info.is_temp() || (info.is_fcanonicalize() && can_eliminate_fcanonicalize(instr->opcode))) &&
+          info.temp.type() == RegType::sgpr)
          operand_mask |= 1u << i;
    }
    unsigned max_sgprs = 1;



More information about the mesa-commit mailing list