Mesa (main): freedreno/ir3: handle global atomics

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Nov 23 19:10:17 UTC 2021


Module: Mesa
Branch: main
Commit: 99388f0c27046374466b711efed29820554aa81d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=99388f0c27046374466b711efed29820554aa81d

Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date:   Mon Jul 26 13:50:03 2021 +0300

freedreno/ir3: handle global atomics

Only for a6xx since we don't know the instructions for global
atomics on previous gens. Per Qualcomm's docs in OpenCL atomics
are only supported since a5xx together with Generic memory space.

Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8717>

---

 src/compiler/nir/nir_gather_info.c    | 10 ++++++
 src/compiler/nir/nir_intrinsics.py    |  7 ++++
 src/freedreno/ir3/ir3_a4xx.c          |  7 ++++
 src/freedreno/ir3/ir3_a6xx.c          | 68 +++++++++++++++++++++++++++++++++++
 src/freedreno/ir3/ir3_compiler_nir.c  | 14 ++++++++
 src/freedreno/ir3/ir3_context.h       |  2 ++
 src/freedreno/ir3/ir3_nir_lower_64b.c | 47 ++++++++++++++++++++++--
 7 files changed, 152 insertions(+), 3 deletions(-)

diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c
index 30df0631008..b5f3ddc2ea7 100644
--- a/src/compiler/nir/nir_gather_info.c
+++ b/src/compiler/nir/nir_gather_info.c
@@ -344,6 +344,16 @@ nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)
    case nir_intrinsic_global_atomic_umax:
    case nir_intrinsic_global_atomic_umin:
    case nir_intrinsic_global_atomic_xor:
+   case nir_intrinsic_global_atomic_add_ir3:
+   case nir_intrinsic_global_atomic_and_ir3:
+   case nir_intrinsic_global_atomic_comp_swap_ir3:
+   case nir_intrinsic_global_atomic_exchange_ir3:
+   case nir_intrinsic_global_atomic_imax_ir3:
+   case nir_intrinsic_global_atomic_imin_ir3:
+   case nir_intrinsic_global_atomic_or_ir3:
+   case nir_intrinsic_global_atomic_umax_ir3:
+   case nir_intrinsic_global_atomic_umin_ir3:
+   case nir_intrinsic_global_atomic_xor_ir3:
    case nir_intrinsic_image_atomic_add:
    case nir_intrinsic_image_atomic_and:
    case nir_intrinsic_image_atomic_comp_swap:
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 00a72d63084..457ccbd3f57 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -659,18 +659,25 @@ intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
 # 1: The data parameter to the atomic function (i.e. the value to add
 #    in shared_atomic_add, etc).
 # 2: For CompSwap only: the second data parameter.
+#
+# IR3 global operations take 32b vec2 as memory address. IR3 doesn't support
+# float atomics.
 
 def memory_atomic_data1(name):
     intrinsic("deref_atomic_" + name,  src_comp=[-1, 1], dest_comp=1, indices=[ACCESS])
     intrinsic("ssbo_atomic_" + name,  src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
     intrinsic("shared_atomic_" + name,  src_comp=[1, 1], dest_comp=1, indices=[BASE])
     intrinsic("global_atomic_" + name,  src_comp=[1, 1], dest_comp=1, indices=[BASE])
+    if not name.startswith('f'):
+        intrinsic("global_atomic_" + name + "_ir3",  src_comp=[2, 1], dest_comp=1, indices=[BASE])
 
 def memory_atomic_data2(name):
     intrinsic("deref_atomic_" + name,  src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS])
     intrinsic("ssbo_atomic_" + name,  src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS])
     intrinsic("shared_atomic_" + name,  src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
     intrinsic("global_atomic_" + name,  src_comp=[1, 1, 1], dest_comp=1, indices=[BASE])
+    if not name.startswith('f'):
+        intrinsic("global_atomic_" + name + "_ir3",  src_comp=[2, 1, 1], dest_comp=1, indices=[BASE])
 
 memory_atomic_data1("add")
 memory_atomic_data1("imin")
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index 1b69a6c1fe3..4939ba98373 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -355,6 +355,12 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
    return atomic;
 }
 
+static struct ir3_instruction *
+emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+   unreachable("Global atomic are unimplemented on A5xx");
+}
+
 const struct ir3_context_funcs ir3_a4xx_funcs = {
    .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
    .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
@@ -365,4 +371,5 @@ const struct ir3_context_funcs ir3_a4xx_funcs = {
    .emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
    .emit_intrinsic_load_global_ir3 = NULL,
    .emit_intrinsic_store_global_ir3 = NULL,
+   .emit_intrinsic_atomic_global = emit_intrinsic_atomic_global,
 };
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index 70a4752417b..f6ddc61e4a3 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -441,6 +441,73 @@ emit_intrinsic_store_global_ir3(struct ir3_context *ctx,
    stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
 }
 
+static struct ir3_instruction *
+emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+   struct ir3_block *b = ctx->block;
+   struct ir3_instruction *addr, *atomic, *src1;
+   struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[1])[0];
+   type_t type = TYPE_U32;
+
+   addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0],
+                      ir3_get_src(ctx, &intr->src[0])[1]);
+
+   if (intr->intrinsic == nir_intrinsic_global_atomic_comp_swap_ir3) {
+      struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[2])[0];
+      src1 = ir3_collect(b, compare, value);
+   } else {
+      src1 = value;
+   }
+
+   switch (intr->intrinsic) {
+   case nir_intrinsic_global_atomic_add_ir3:
+      atomic = ir3_ATOMIC_G_ADD(b, addr, 0, src1, 0);
+      break;
+   case nir_intrinsic_global_atomic_imin_ir3:
+      atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
+      type = TYPE_S32;
+      break;
+   case nir_intrinsic_global_atomic_umin_ir3:
+      atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
+      break;
+   case nir_intrinsic_global_atomic_imax_ir3:
+      atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
+      type = TYPE_S32;
+      break;
+   case nir_intrinsic_global_atomic_umax_ir3:
+      atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
+      break;
+   case nir_intrinsic_global_atomic_and_ir3:
+      atomic = ir3_ATOMIC_G_AND(b, addr, 0, src1, 0);
+      break;
+   case nir_intrinsic_global_atomic_or_ir3:
+      atomic = ir3_ATOMIC_G_OR(b, addr, 0, src1, 0);
+      break;
+   case nir_intrinsic_global_atomic_xor_ir3:
+      atomic = ir3_ATOMIC_G_XOR(b, addr, 0, src1, 0);
+      break;
+   case nir_intrinsic_global_atomic_exchange_ir3:
+      atomic = ir3_ATOMIC_G_XCHG(b, addr, 0, src1, 0);
+      break;
+   case nir_intrinsic_global_atomic_comp_swap_ir3:
+      atomic = ir3_ATOMIC_G_CMPXCHG(b, addr, 0, src1, 0);
+      break;
+   default:
+      unreachable("Unknown global atomic op");
+   }
+
+   atomic->cat6.iim_val = 1;
+   atomic->cat6.d = 1;
+   atomic->cat6.type = type;
+   atomic->barrier_class = IR3_BARRIER_BUFFER_W;
+   atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+
+   /* even if nothing consume the result, we can't DCE the instruction: */
+   array_insert(b, b->keeps, atomic);
+
+   return atomic;
+}
+
 const struct ir3_context_funcs ir3_a6xx_funcs = {
    .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
    .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
@@ -451,4 +518,5 @@ const struct ir3_context_funcs ir3_a6xx_funcs = {
    .emit_intrinsic_image_size = emit_intrinsic_image_size,
    .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
    .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
+   .emit_intrinsic_atomic_global = emit_intrinsic_atomic_global,
 };
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index ee2b327a9d5..05c93726a32 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2250,6 +2250,20 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
    case nir_intrinsic_bindless_resource_ir3:
       dst[0] = ir3_get_src(ctx, &intr->src[0])[0];
       break;
+   case nir_intrinsic_global_atomic_add_ir3:
+   case nir_intrinsic_global_atomic_imin_ir3:
+   case nir_intrinsic_global_atomic_umin_ir3:
+   case nir_intrinsic_global_atomic_imax_ir3:
+   case nir_intrinsic_global_atomic_umax_ir3:
+   case nir_intrinsic_global_atomic_and_ir3:
+   case nir_intrinsic_global_atomic_or_ir3:
+   case nir_intrinsic_global_atomic_xor_ir3:
+   case nir_intrinsic_global_atomic_exchange_ir3:
+   case nir_intrinsic_global_atomic_comp_swap_ir3: {
+      dst[0] = ctx->funcs->emit_intrinsic_atomic_global(ctx, intr);
+      break;
+   }
+
    default:
       ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
                         nir_intrinsic_infos[intr->intrinsic].name);
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 0e78e9153ff..98aa1085e81 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -188,6 +188,8 @@ struct ir3_context_funcs {
                                           struct ir3_instruction **dst);
    void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx,
                                            nir_intrinsic_instr *intr);
+   struct ir3_instruction *(*emit_intrinsic_atomic_global)(
+      struct ir3_context *ctx, nir_intrinsic_instr *intr);
 };
 
 extern const struct ir3_context_funcs ir3_a4xx_funcs;
diff --git a/src/freedreno/ir3/ir3_nir_lower_64b.c b/src/freedreno/ir3/ir3_nir_lower_64b.c
index e02a6dfc564..9c6ae66dc09 100644
--- a/src/freedreno/ir3/ir3_nir_lower_64b.c
+++ b/src/freedreno/ir3/ir3_nir_lower_64b.c
@@ -229,9 +229,24 @@ lower_64b_global_filter(const nir_instr *instr, const void *unused)
       return false;
 
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
-   return (intr->intrinsic == nir_intrinsic_load_global) ||
-          (intr->intrinsic == nir_intrinsic_load_global_constant) ||
-          (intr->intrinsic == nir_intrinsic_store_global);
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_global:
+   case nir_intrinsic_load_global_constant:
+   case nir_intrinsic_store_global:
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_global_atomic_xor:
+   case nir_intrinsic_global_atomic_exchange:
+   case nir_intrinsic_global_atomic_comp_swap:
+      return true;
+   default:
+      return false;
+   }
 }
 
 static nir_ssa_def *
@@ -250,6 +265,32 @@ lower_64b_global(nir_builder *b, nir_instr *instr, void *unused)
     * those up into max 4 components per load/store.
     */
 
+#define GLOBAL_IR3_2SRC(name)                                                 \
+   case nir_intrinsic_##name: {                                               \
+      return nir_build_##name##_ir3(b, nir_dest_bit_size(intr->dest), addr,   \
+                                  nir_ssa_for_src(b, intr->src[1], 1));       \
+   }
+
+   switch (intr->intrinsic) {
+   GLOBAL_IR3_2SRC(global_atomic_add)
+   GLOBAL_IR3_2SRC(global_atomic_imin)
+   GLOBAL_IR3_2SRC(global_atomic_umin)
+   GLOBAL_IR3_2SRC(global_atomic_imax)
+   GLOBAL_IR3_2SRC(global_atomic_umax)
+   GLOBAL_IR3_2SRC(global_atomic_and)
+   GLOBAL_IR3_2SRC(global_atomic_or)
+   GLOBAL_IR3_2SRC(global_atomic_xor)
+   GLOBAL_IR3_2SRC(global_atomic_exchange)
+   case nir_intrinsic_global_atomic_comp_swap:
+      return nir_build_global_atomic_comp_swap_ir3(
+         b, nir_dest_bit_size(intr->dest), addr,
+         nir_ssa_for_src(b, intr->src[1], 1),
+         nir_ssa_for_src(b, intr->src[2], 1));
+   default:
+      break;
+   }
+#undef GLOBAL_IR3_2SRC
+
    if (load) {
       unsigned num_comp = nir_intrinsic_dest_components(intr);
       nir_ssa_def *components[num_comp];



More information about the mesa-commit mailing list