Mesa (master): aco: restrict copying of create_vector operands to GFX9+

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu May 21 12:21:18 UTC 2020


Module: Mesa
Branch: master
Commit: 5201985332dae703566764606cbbf4d6f56fc40d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5201985332dae703566764606cbbf4d6f56fc40d

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Fri May  8 15:52:47 2020 +0100

aco: restrict copying of create_vector operands to GFX9+

This improves code size for Polaris and earlier due to less register swapping

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5070>

---

 src/amd/compiler/aco_register_allocation.cpp | 41 ++++++++++++++++++----------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 613e55f0034..07b5130aa63 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -806,7 +806,7 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
    /* now, we figured the placement for our definition */
    std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, reg_file, PhysReg{best_pos}, size);
 
-   if (instr->opcode == aco_opcode::p_create_vector) {
+   if (instr->opcode == aco_opcode::p_create_vector && ctx.program->chip_class >= GFX9) {
       /* move killed operands which aren't yet at the correct position */
       for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].size(), i++) {
          if (instr->operands[i].isTemp() && instr->operands[i].isFirstKillBeforeDef() &&
@@ -1118,18 +1118,26 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
    /* collect variables to be moved */
    std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, reg_file, PhysReg{best_pos}, size);
 
-   /* move killed operands which aren't yet at the correct position */
-   uint64_t moved_operand_mask = 0;
-   for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].bytes(), i++) {
-      if (instr->operands[i].isTemp() &&
-          instr->operands[i].isFirstKillBeforeDef() &&
-          instr->operands[i].getTemp().type() == rc.type() &&
-          instr->operands[i].physReg().reg_b != best_pos * 4 + offset) {
-         vars.emplace(instr->operands[i].bytes(), instr->operands[i].tempId());
-         moved_operand_mask |= (uint64_t)1 << i;
+   /* GFX9+: move killed operands which aren't yet at the correct position
+    * Moving all killed operands generally leads to more register swaps.
+    * This is only done on GFX9+ because of the cheap v_swap instruction.
+    */
+   if (ctx.program->chip_class >= GFX9) {
+      for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].bytes(), i++) {
+         if (instr->operands[i].isTemp() &&
+             instr->operands[i].isFirstKillBeforeDef() &&
+             instr->operands[i].getTemp().type() == rc.type() &&
+             instr->operands[i].physReg().reg_b != best_pos * 4 + offset) {
+            vars.emplace(instr->operands[i].bytes(), instr->operands[i].tempId());
+         }
+      }
+   } else {
+      /* re-enable the killed operands */
+      for (unsigned j = 0; j < instr->operands.size(); j++) {
+         if (instr->operands[j].isTemp() && instr->operands[j].isFirstKill())
+            reg_file.fill(instr->operands[j]);
       }
    }
-
    ASSERTED bool success = false;
    success = get_regs_for_copies(ctx, reg_file, parallelcopies, vars, lb, ub, instr, best_pos, best_pos + size - 1);
    assert(success);
@@ -1137,10 +1145,13 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
    update_renames(ctx, reg_file, parallelcopies, instr);
    adjust_max_used_regs(ctx, rc, best_pos);
 
-   while (moved_operand_mask) {
-      unsigned i = u_bit_scan64(&moved_operand_mask);
-      assert(instr->operands[i].isFirstKillBeforeDef());
-      reg_file.clear(instr->operands[i]);
+   /* remove killed operands from reg_file once again */
+   for (unsigned i = 0; i < instr->operands.size(); i++) {
+      if (!instr->operands[i].isTemp() || !instr->operands[i].isFixed())
+         continue;
+      assert(!instr->operands[i].isUndefined());
+      if (instr->operands[i].isFirstKillBeforeDef())
+         reg_file.clear(instr->operands[i]);
    }
 
    return PhysReg{best_pos};



More information about the mesa-commit mailing list