Mesa (master): aco: allocate full register for subdword definitions if HW doesn't support it

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 22 18:36:13 UTC 2020


Module: Mesa
Branch: master
Commit: edc2b57ac14c6f9f3dadd3d7282e9d6ac1bc4304
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=edc2b57ac14c6f9f3dadd3d7282e9d6ac1bc4304

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Mon Apr 13 17:23:38 2020 +0100

aco: allocate full register for subdword definitions if HW doesn't support it

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4573>

---

 src/amd/compiler/aco_register_allocation.cpp | 13 +++++++++++--
 src/amd/compiler/aco_validate.cpp            | 18 +++++++++++++++---
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 0857084a486..a6792679da1 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -1839,8 +1839,17 @@ void register_allocation(Program *program, std::vector<TempSet>& live_out_per_bl
                definition.setFixed(reg);
             }
 
-            if (!definition.isFixed())
-               definition.setFixed(get_reg(ctx, register_file, definition.getTemp(), parallelcopy, instr));
+            if (!definition.isFixed()) {
+               Temp tmp = definition.getTemp();
+               /* subdword instructions before RDNA write full registers */
+               if (tmp.regClass().is_subdword() &&
+                   !instr_can_access_subdword(instr) &&
+                   ctx.program->chip_class <= GFX9) {
+                  assert(tmp.bytes() <= 4);
+                  tmp = Temp(definition.tempId(), v1);
+               }
+               definition.setFixed(get_reg(ctx, register_file, tmp, parallelcopy, instr));
+            }
 
             assert(definition.isFixed() && ((definition.getTemp().type() == RegType::vgpr && definition.physReg() >= 256) ||
                                             (definition.getTemp().type() != RegType::vgpr && definition.physReg() < 256)));
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index 3e0f4584cd3..2f51b8a2eb1 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -46,6 +46,11 @@ void perfwarn(bool cond, const char *msg, Instruction *instr)
 }
 #endif
 
+bool instr_can_access_subdword(aco_ptr<Instruction>& instr)
+{
+   return instr->isSDWA() || instr->format == Format::PSEUDO;
+}
+
 void validate(Program* program, FILE * output)
 {
    if (!(debug_flags & DEBUG_VALIDATE))
@@ -162,7 +167,7 @@ void validate(Program* program, FILE * output)
          /* check subdword definitions */
          for (unsigned i = 0; i < instr->definitions.size(); i++) {
             if (instr->definitions[i].regClass().is_subdword())
-               check(instr->isSDWA() || instr->format == Format::PSEUDO, "Only SDWA and Pseudo instructions can write subdword registers", instr.get());
+               check(instr_can_access_subdword(instr) || instr->definitions[i].bytes() <= 4, "Only SDWA and Pseudo instructions can write subdword registers larger than 4 bytes", instr.get());
          }
 
          if (instr->isSALU() || instr->isVALU()) {
@@ -456,7 +461,7 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio
                err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d has an out-of-bounds register assignment", i);
             if (op.physReg() == vcc && !program->needs_vcc)
                err |= ra_fail(output, loc, Location(), "Operand %d fixed to vcc but needs_vcc=false", i);
-            if (!(instr->isSDWA() || instr->format == Format::PSEUDO) && op.regClass().is_subdword() && op.physReg().byte())
+            if (!instr_can_access_subdword(instr) && op.regClass().is_subdword() && op.physReg().byte())
                err |= ra_fail(output, loc, assignments.at(op.tempId()).firstloc, "Operand %d must be aligned to a full register", i);
             if (!assignments[op.tempId()].firstloc.block)
                assignments[op.tempId()].firstloc = loc;
@@ -477,6 +482,8 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio
                err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d has an out-of-bounds register assignment", i);
             if (def.physReg() == vcc && !program->needs_vcc)
                err |= ra_fail(output, loc, Location(), "Definition %d fixed to vcc but needs_vcc=false", i);
+            if (!instr_can_access_subdword(instr) && def.regClass().is_subdword() && def.physReg().byte())
+               err |= ra_fail(output, loc, assignments.at(def.tempId()).firstloc, "Definition %d must be aligned to a full register", i);
             if (!assignments[def.tempId()].firstloc.block)
                assignments[def.tempId()].firstloc = loc;
             assignments[def.tempId()].defloc = loc;
@@ -579,9 +586,14 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio
             PhysReg reg = assignments.at(tmp.id()).reg;
             for (unsigned j = 0; j < tmp.bytes(); j++) {
                if (regs[reg.reg_b + j])
-                  err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + i]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
+                  err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d already taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
                regs[reg.reg_b + j] = tmp.id();
             }
+            if (def.regClass().is_subdword() && !instr_can_access_subdword(instr)) {
+               for (unsigned j = tmp.bytes(); j < 4; j++)
+                  if (reg.reg_b + j)
+                     err |= ra_fail(output, loc, assignments.at(regs[reg.reg_b + j]).defloc, "Assignment of element %d of %%%d overwrites the full register taken by %%%d from instruction", i, tmp.id(), regs[reg.reg_b + j]);
+            }
          }
 
          for (const Definition& def : instr->definitions) {



More information about the mesa-commit mailing list