Mesa (master): aco: only use single-dword loads/stores for spilling

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Nov 4 19:14:40 UTC 2019


Module: Mesa
Branch: master
Commit: 5c7dcb15e0cc98fe9fa5fa25f320f2bdd71187c3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c7dcb15e0cc98fe9fa5fa25f320f2bdd71187c3

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Fri Nov  1 09:06:26 2019 +0100

aco: only use single-dword loads/stores for spilling

Fixes: 86786999189c43b4a2c8e1c1a18b55cd2f369fff "aco: implement VGPR spilling"

Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>

---

 src/amd/compiler/aco_spill.cpp | 51 +++++++++---------------------------------
 1 file changed, 10 insertions(+), 41 deletions(-)

diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 9f687da4b98..276dcbd7c5b 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1291,9 +1291,9 @@ Temp load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
       rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
                    S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
    }
-   /* older generations need element size = 16 bytes. element size removed in GFX9 */
+   /* older generations need element size = 4 bytes. element size removed in GFX9 */
    if (ctx.program->chip_class <= GFX8)
-      rsrc_conf |= S_008F0C_ELEMENT_SIZE(3);
+      rsrc_conf |= S_008F0C_ELEMENT_SIZE(1);
 
    return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
                      private_segment_buffer, Operand(-1u),
@@ -1544,37 +1544,21 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
                }
 
                unsigned offset = base_offset + spill_slot * 4;
-               aco_opcode opcode;
+               aco_opcode opcode = aco_opcode::buffer_store_dword;
                assert((*it)->operands[0].isTemp());
                Temp temp = (*it)->operands[0].getTemp();
                assert(temp.type() == RegType::vgpr && !temp.is_linear());
-               switch (temp.size()) {
-               case 1: opcode = aco_opcode::buffer_store_dword; break;
-               case 2: opcode = aco_opcode::buffer_store_dwordx2; break;
-               case 6: temp = bld.tmp(v3); /* fallthrough */
-               case 3: opcode = aco_opcode::buffer_store_dwordx3; break;
-               case 8: temp = bld.tmp(v4); /* fallthrough */
-               case 4: opcode = aco_opcode::buffer_store_dwordx4; break;
-               default: {
+               if (temp.size() > 1) {
                   Instruction* split{create_instruction<Pseudo_instruction>(aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())};
                   split->operands[0] = Operand(temp);
                   for (unsigned i = 0; i < temp.size(); i++)
                      split->definitions[i] = bld.def(v1);
                   bld.insert(split);
-                  opcode = aco_opcode::buffer_store_dword;
                   for (unsigned i = 0; i < temp.size(); i++)
                      bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
-                  continue;
-               }
+               } else {
+                  bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
                }
-
-               if ((*it)->operands[0].size() > 4) {
-                  Temp temp2 = bld.pseudo(aco_opcode::p_split_vector, bld.def(temp.regClass()), Definition(temp), (*it)->operands[0]);
-                  bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp2, offset, false);
-                  offset += temp.size() * 4;
-               }
-               bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
-
             } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
                ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
 
@@ -1629,35 +1613,20 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
                }
 
                unsigned offset = base_offset + spill_slot * 4;
-               aco_opcode opcode;
+               aco_opcode opcode = aco_opcode::buffer_load_dword;
                Definition def = (*it)->definitions[0];
-               switch (def.size()) {
-               case 1: opcode = aco_opcode::buffer_load_dword; break;
-               case 2: opcode = aco_opcode::buffer_load_dwordx2; break;
-               case 6: def = bld.def(v3); /* fallthrough */
-               case 3: opcode = aco_opcode::buffer_load_dwordx3; break;
-               case 8: def = bld.def(v4); /* fallthrough */
-               case 4: opcode = aco_opcode::buffer_load_dwordx4; break;
-               default: {
+               if (def.size() > 1) {
                   Instruction* vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
                   vec->definitions[0] = def;
-                  opcode = aco_opcode::buffer_load_dword;
                   for (unsigned i = 0; i < def.size(); i++) {
                      Temp tmp = bld.tmp(v1);
                      vec->operands[i] = Operand(tmp);
                      bld.mubuf(opcode, Definition(tmp), Operand(), scratch_rsrc, scratch_offset, offset + i * 4, false);
                   }
                   bld.insert(vec);
-                  continue;
-               }
+               } else {
+                  bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
                }
-
-               bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
-               if ((*it)->definitions[0].size() > 4) {
-                  Temp temp2 = bld.mubuf(opcode, bld.def(def.regClass()), Operand(), scratch_rsrc, scratch_offset, offset + def.size() * 4, false);
-                  bld.pseudo(aco_opcode::p_create_vector, (*it)->definitions[0], def.getTemp(), temp2);
-               }
-
             } else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
                uint32_t spill_slot = sgpr_slot[spill_id];
                reload_in_loop[spill_slot / 64] = block.loop_nest_depth > 0;




More information about the mesa-commit mailing list