Mesa (main): aco: ensure that definitions fixed to operands have matching regclasses

Thu May 5 20:41:25 UTC 2022

Module: Mesa
Branch: main
Commit: 2f0bb39e1621ab610cd6ca788635c64320917404
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f0bb39e1621ab610cd6ca788635c64320917404

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Fri Apr 22 16:46:21 2022 +0100

aco: ensure that definitions fixed to operands have matching regclasses

If the operand is not killed, the definition needs to be large enough so
that the new location for the operand does not intersect with the old
location.

Fixes with zink:
KHR-GL45.shader_image_load_store.basic-allTargets-atomicCS
KHR-GL45.shader_image_load_store.basic-allTargets-atomicGS
KHR-GL45.shader_image_load_store.basic-allTargets-atomicVS

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Gitlab: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6276
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16106>

---

 src/amd/compiler/aco_instruction_selection.cpp  | 23 ++++++++++++++++++-----
 src/amd/compiler/aco_register_allocation.cpp    | 12 +++++++++++-
 src/gallium/drivers/zink/ci/zink-radv-fails.txt |  3 ---
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 1fe28c82657..540886ba2eb 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -6184,10 +6184,11 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
    Builder bld(ctx->program, ctx->block);
 
    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[3].ssa));
+   bool cmpswap = instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
    bool is_64bit = data.bytes() == 8;
    assert((data.bytes() == 4 || data.bytes() == 8) && "only 32/64-bit image atomics implemented.");
 
-   if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap)
+   if (cmpswap)
       data = bld.pseudo(aco_opcode::p_create_vector, bld.def(is_64bit ? v4 : v2),
                         get_ssa_temp(ctx, instr->src[4].ssa), data);
 
@@ -6272,8 +6273,10 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
       mubuf->operands[1] = Operand(vindex);
       mubuf->operands[2] = Operand::c32(0);
       mubuf->operands[3] = Operand(data);
+      Definition def =
+         return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
       if (return_previous)
-         mubuf->definitions[0] = Definition(dst);
+         mubuf->definitions[0] = def;
       mubuf->offset = 0;
       mubuf->idxen = true;
       mubuf->glc = return_previous;
@@ -6282,12 +6285,15 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
       mubuf->sync = sync;
       ctx->program->needs_exact = true;
       ctx->block->instructions.emplace_back(std::move(mubuf));
+      if (return_previous && cmpswap)
+         bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
       return;
    }
 
    std::vector<Temp> coords = get_image_coords(ctx, instr);
    Temp resource = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
-   Definition def = return_previous ? Definition(dst) : Definition();
+   Definition def =
+      return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
    MIMG_instruction* mimg =
       emit_mimg(bld, image_op, def, resource, Operand(s4), coords, 0, Operand(data));
    mimg->glc = return_previous;
@@ -6299,6 +6305,8 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
    mimg->disable_wqm = true;
    mimg->sync = sync;
    ctx->program->needs_exact = true;
+   if (return_previous && cmpswap)
+      bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
    return;
 }
 
@@ -6481,8 +6489,9 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
    Builder bld(ctx->program, ctx->block);
    bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
+   bool cmpswap = instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap;
 
-   if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
+   if (cmpswap)
       data = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, data.size() * 2),
                         get_ssa_temp(ctx, instr->src[3].ssa), data);
 
@@ -6552,8 +6561,10 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
    mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
    mubuf->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0);
    mubuf->operands[3] = Operand(data);
+   Definition def =
+      return_previous ? (cmpswap ? bld.def(data.regClass()) : Definition(dst)) : Definition();
    if (return_previous)
-      mubuf->definitions[0] = Definition(dst);
+      mubuf->definitions[0] = def;
    mubuf->offset = 0;
    mubuf->offen = (offset.type() == RegType::vgpr);
    mubuf->glc = return_previous;
@@ -6562,6 +6573,8 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
    mubuf->sync = get_memory_sync_info(instr, storage_buffer, semantic_atomicrmw);
    ctx->program->needs_exact = true;
    ctx->block->instructions.emplace_back(std::move(mubuf));
+   if (return_previous && cmpswap)
+      bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), def.getTemp(), Operand::zero());
 }
 
 void
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 61e384dbdaa..108dfd04b8c 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -2711,7 +2711,12 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
             }
          }
 
-         /* handle definitions which must have the same register as an operand */
+         /* Handle definitions which must have the same register as an operand.
+          * We expect that the definition has the same size as the operand, otherwise the new
+          * location for the operand (if it's not killed) might intersect with the old one.
+          * We can't read from the old location because it's corrupted, and we can't write the new
+          * location because that's used by a live-through operand.
+          */
          if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
              instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 ||
              instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 ||
@@ -2721,15 +2726,20 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
              instr->opcode == aco_opcode::v_writelane_b32 ||
              instr->opcode == aco_opcode::v_writelane_b32_e64 ||
              instr->opcode == aco_opcode::v_dot4c_i32_i8) {
+            assert(instr->definitions[0].bytes() == instr->operands[2].bytes() ||
+                   instr->operands[2].regClass() == v1);
             instr->definitions[0].setFixed(instr->operands[2].physReg());
          } else if (instr->opcode == aco_opcode::s_addk_i32 ||
                     instr->opcode == aco_opcode::s_mulk_i32) {
+            assert(instr->definitions[0].bytes() == instr->operands[0].bytes());
             instr->definitions[0].setFixed(instr->operands[0].physReg());
          } else if (instr->isMUBUF() && instr->definitions.size() == 1 &&
                     instr->operands.size() == 4) {
+            assert(instr->definitions[0].bytes() == instr->operands[3].bytes());
             instr->definitions[0].setFixed(instr->operands[3].physReg());
          } else if (instr->isMIMG() && instr->definitions.size() == 1 &&
                     !instr->operands[2].isUndefined()) {
+            assert(instr->definitions[0].bytes() == instr->operands[2].bytes());
             instr->definitions[0].setFixed(instr->operands[2].physReg());
          }
 
diff --git a/src/gallium/drivers/zink/ci/zink-radv-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-fails.txt
index 33c55926244..fe90384874d 100644
--- a/src/gallium/drivers/zink/ci/zink-radv-fails.txt
+++ b/src/gallium/drivers/zink/ci/zink-radv-fails.txt
@@ -1,7 +1,4 @@
 # probable ACO bug: #6276
-KHR-GL46.shader_image_load_store.basic-allTargets-atomicCS,Fail
-KHR-GL46.shader_image_load_store.basic-allTargets-atomicGS,Fail
-KHR-GL46.shader_image_load_store.basic-allTargets-atomicVS,Fail
 KHR-GL46.shader_storage_buffer_object.basic-operations-case1-cs,Fail
 KHR-GL46.shader_storage_buffer_object.basic-operations-case1-vs,Fail