Mesa (main): aco/ra: fix get_reg_for_operand() with no free registers

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon May 17 13:45:41 UTC 2021


Module: Mesa
Branch: main
Commit: bc95d55e1f4ce54ff086352d5e52dc733d85118d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=bc95d55e1f4ce54ff086352d5e52dc733d85118d

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Fri Apr 23 14:29:22 2021 +0100

aco/ra: fix get_reg_for_operand() with no free registers

fossil-db (Sienna Cichlid):
Totals from 195 (0.13% of 149839) affected shaders:
CodeSize: 2352160 -> 2356720 (+0.19%); split: -0.00%, +0.20%
Instrs: 431976 -> 433124 (+0.27%); split: -0.00%, +0.27%
Latency: 10174434 -> 10174897 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 4044388 -> 4044425 (+0.00%); split: -0.00%, +0.00%
Copies: 67634 -> 68762 (+1.67%); split: -0.00%, +1.67%

fossil-db (Polaris):
Totals from 186 (0.12% of 151365) affected shaders:
CodeSize: 2272356 -> 2276848 (+0.20%); split: -0.00%, +0.20%
Instrs: 432390 -> 433513 (+0.26%); split: -0.00%, +0.26%
Latency: 13153394 -> 13160194 (+0.05%); split: -0.00%, +0.05%
InvThroughput: 10889509 -> 10889967 (+0.00%); split: -0.00%, +0.00%
SClause: 12745 -> 12747 (+0.02%)
Copies: 74832 -> 75945 (+1.49%); split: -0.01%, +1.50%

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10459>

---

 src/amd/compiler/aco_register_allocation.cpp | 10 ++++++++--
 src/amd/compiler/tests/test_regalloc.cpp     | 19 +++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index d7e64f88a89..0eb52ecb33c 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -1728,10 +1728,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
                          aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index)
 {
    /* check if the operand is fixed */
+   PhysReg src = ctx.assignments[operand.tempId()].reg;
    PhysReg dst;
    bool blocking_var = false;
    if (operand.isFixed()) {
-      assert(operand.physReg() != ctx.assignments[operand.tempId()].reg);
+      assert(operand.physReg() != src);
 
       /* check if target reg is blocked, and move away the blocking var */
       if (register_file[operand.physReg()]) {
@@ -1741,6 +1742,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
          Operand pc_op = Operand(Temp{blocking_id, rc});
          pc_op.setFixed(operand.physReg());
 
+         /* make space in the register file for get_reg() and then block the target reg */
+         register_file.clear(src, operand.regClass());
+         register_file.clear(pc_op.physReg(), rc);
+         register_file.block(operand.physReg(), operand.regClass());
+
          /* find free reg */
          PhysReg reg = get_reg(ctx, register_file, pc_op.getTemp(), parallelcopy, ctx.pseudo_dummy);
          update_renames(ctx, register_file, parallelcopy, ctx.pseudo_dummy, true);
@@ -1756,7 +1762,7 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
    }
 
    Operand pc_op = operand;
-   pc_op.setFixed(ctx.assignments[operand.tempId()].reg);
+   pc_op.setFixed(src);
    Definition pc_def = Definition(dst, pc_op.regClass());
    parallelcopy.emplace_back(pc_op, pc_def);
    update_renames(ctx, register_file, parallelcopy, instr, true);
diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp
index 0bf0a237118..2f1565d1c37 100644
--- a/src/amd/compiler/tests/test_regalloc.cpp
+++ b/src/amd/compiler/tests/test_regalloc.cpp
@@ -77,3 +77,22 @@ BEGIN_TEST(regalloc.32bit_partial_write)
 
    finish_ra_test(ra_test_policy());
 END_TEST
+
+BEGIN_TEST(regalloc.precolor.swap)
+   //>> s2: %op0:s[0-1] = p_startpgm
+   if (!setup_cs("s2", GFX10))
+      return;
+
+   program->dev.sgpr_limit = 4;
+
+   //! s2: %op1:s[2-3] = p_unit_test
+   Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
+
+   //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
+   //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
+   Operand op(inputs[0]);
+   op.setFixed(PhysReg(2));
+   bld.pseudo(aco_opcode::p_unit_test, op, op1);
+
+   finish_ra_test(ra_test_policy());
+END_TEST



More information about the mesa-commit mailing list