Mesa (main): freedreno/ir3: add a6xx global atomics and separate atomic opcodes

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Nov 23 19:10:17 UTC 2021


Module: Mesa
Branch: main
Commit: 5d5b1fc4722fa8db9b74b20d113c3f85d3f6bcb9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d5b1fc4722fa8db9b74b20d113c3f85d3f6bcb9

Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date:   Fri Jan 22 19:51:56 2021 +0200

freedreno/ir3: add a6xx global atomics and separate atomic opcodes

Separating atomic opcodes makes possible to express a6xx global
atomics which take iova in SRC1. They would be needed by
VK_KHR_buffer_device_address.
The change also makes easier to distiguish atomics in conditions.

Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8717>

---

 src/freedreno/.gitlab-ci/reference/crash.log |  12 +-
 src/freedreno/ir3/disasm-a3xx.c              |  33 +++++
 src/freedreno/ir3/instr-a3xx.h               | 106 +++++++++++++++-
 src/freedreno/ir3/ir3.c                      |   5 +-
 src/freedreno/ir3/ir3.h                      |  68 ++++++-----
 src/freedreno/ir3/ir3_a4xx.c                 |  36 +++---
 src/freedreno/ir3/ir3_a6xx.c                 |  36 +++---
 src/freedreno/ir3/ir3_legalize.c             |  15 ++-
 src/freedreno/ir3/ir3_lexer.l                |  23 ++++
 src/freedreno/ir3/ir3_parser.y               |  81 ++++++++++---
 src/freedreno/ir3/tests/disasm.c             |  11 +-
 src/freedreno/isa/encode.c                   |   5 +-
 src/freedreno/isa/ir3-cat6.xml               | 173 +++++++++++++++++++++++----
 13 files changed, 466 insertions(+), 138 deletions(-)

diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log
index e26db4d25f3..45edfb3aeb7 100644
--- a/src/freedreno/.gitlab-ci/reference/crash.log
+++ b/src/freedreno/.gitlab-ci/reference/crash.log
@@ -4638,12 +4638,12 @@ shader-blocks:
       size: 2048
 	:0:0000:0000[00000000x_00003002x] nop
 	:0:0001:0001[00000000x_00000000x] nop
-	:6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222	; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
-	:6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222	; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
-	:6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222	; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
-	:6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222	; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
-	:6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222	; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
-	:6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222	; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
+	:6:0002:0002[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+	:6:0003:0003[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+	:6:0004:0004[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+	:6:0005:0005[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+	:6:0006:0006[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+	:6:0007:0007[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
 	-----------------------------------------------
 	8192 (0x2000) bytes
 	000000: 00003002 00000000 00000000 00000000	|.0..............|
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index 3044fe145de..24b3805085c 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -333,6 +333,39 @@ static const struct opc_info {
    OPC(6, OPC_ATOMIC_AND,     atomic.and),
    OPC(6, OPC_ATOMIC_OR,      atomic.or),
    OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
+   OPC(6, OPC_ATOMIC_B_ADD,     atomic.b.add),
+   OPC(6, OPC_ATOMIC_B_SUB,     atomic.b.sub),
+   OPC(6, OPC_ATOMIC_B_XCHG,    atomic.b.xchg),
+   OPC(6, OPC_ATOMIC_B_INC,     atomic.b.inc),
+   OPC(6, OPC_ATOMIC_B_DEC,     atomic.b.dec),
+   OPC(6, OPC_ATOMIC_B_CMPXCHG, atomic.b.cmpxchg),
+   OPC(6, OPC_ATOMIC_B_MIN,     atomic.b.min),
+   OPC(6, OPC_ATOMIC_B_MAX,     atomic.b.max),
+   OPC(6, OPC_ATOMIC_B_AND,     atomic.b.and),
+   OPC(6, OPC_ATOMIC_B_OR,      atomic.b.or),
+   OPC(6, OPC_ATOMIC_B_XOR,     atomic.b.xor),
+   OPC(6, OPC_ATOMIC_S_ADD,     atomic.s.add),
+   OPC(6, OPC_ATOMIC_S_SUB,     atomic.s.sub),
+   OPC(6, OPC_ATOMIC_S_XCHG,    atomic.s.xchg),
+   OPC(6, OPC_ATOMIC_S_INC,     atomic.s.inc),
+   OPC(6, OPC_ATOMIC_S_DEC,     atomic.s.dec),
+   OPC(6, OPC_ATOMIC_S_CMPXCHG, atomic.s.cmpxchg),
+   OPC(6, OPC_ATOMIC_S_MIN,     atomic.s.min),
+   OPC(6, OPC_ATOMIC_S_MAX,     atomic.s.max),
+   OPC(6, OPC_ATOMIC_S_AND,     atomic.s.and),
+   OPC(6, OPC_ATOMIC_S_OR,      atomic.s.or),
+   OPC(6, OPC_ATOMIC_S_XOR,     atomic.s.xor),
+   OPC(6, OPC_ATOMIC_G_ADD,     atomic.g.add),
+   OPC(6, OPC_ATOMIC_G_SUB,     atomic.g.sub),
+   OPC(6, OPC_ATOMIC_G_XCHG,    atomic.g.xchg),
+   OPC(6, OPC_ATOMIC_G_INC,     atomic.g.inc),
+   OPC(6, OPC_ATOMIC_G_DEC,     atomic.g.dec),
+   OPC(6, OPC_ATOMIC_G_CMPXCHG, atomic.g.cmpxchg),
+   OPC(6, OPC_ATOMIC_G_MIN,     atomic.g.min),
+   OPC(6, OPC_ATOMIC_G_MAX,     atomic.g.max),
+   OPC(6, OPC_ATOMIC_G_AND,     atomic.g.and),
+   OPC(6, OPC_ATOMIC_G_OR,      atomic.g.or),
+   OPC(6, OPC_ATOMIC_G_XOR,     atomic.g.xor),
    OPC(6, OPC_LDGB,         ldgb),
    OPC(6, OPC_STGB,         stgb),
    OPC(6, OPC_STIB,         stib),
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index 8957182b2aa..8a85f575ddb 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -306,11 +306,35 @@ typedef enum {
    OPC_ATOMIC_B_OR       = _OPC(6, 53),
    OPC_ATOMIC_B_XOR      = _OPC(6, 54),
 
-   OPC_LDG_A           = _OPC(6, 55),
-   OPC_STG_A           = _OPC(6, 56),
-
-   OPC_SPILL_MACRO     = _OPC(6, 57),
-   OPC_RELOAD_MACRO    = _OPC(6, 58),
+   OPC_ATOMIC_S_ADD      = _OPC(6, 55),
+   OPC_ATOMIC_S_SUB      = _OPC(6, 56),
+   OPC_ATOMIC_S_XCHG     = _OPC(6, 57),
+   OPC_ATOMIC_S_INC      = _OPC(6, 58),
+   OPC_ATOMIC_S_DEC      = _OPC(6, 59),
+   OPC_ATOMIC_S_CMPXCHG  = _OPC(6, 60),
+   OPC_ATOMIC_S_MIN      = _OPC(6, 61),
+   OPC_ATOMIC_S_MAX      = _OPC(6, 62),
+   OPC_ATOMIC_S_AND      = _OPC(6, 63),
+   OPC_ATOMIC_S_OR       = _OPC(6, 64),
+   OPC_ATOMIC_S_XOR      = _OPC(6, 65),
+
+   OPC_ATOMIC_G_ADD      = _OPC(6, 66),
+   OPC_ATOMIC_G_SUB      = _OPC(6, 67),
+   OPC_ATOMIC_G_XCHG     = _OPC(6, 68),
+   OPC_ATOMIC_G_INC      = _OPC(6, 69),
+   OPC_ATOMIC_G_DEC      = _OPC(6, 70),
+   OPC_ATOMIC_G_CMPXCHG  = _OPC(6, 71),
+   OPC_ATOMIC_G_MIN      = _OPC(6, 72),
+   OPC_ATOMIC_G_MAX      = _OPC(6, 73),
+   OPC_ATOMIC_G_AND      = _OPC(6, 74),
+   OPC_ATOMIC_G_OR       = _OPC(6, 75),
+   OPC_ATOMIC_G_XOR      = _OPC(6, 76),
+
+   OPC_LDG_A           = _OPC(6, 77),
+   OPC_STG_A           = _OPC(6, 78),
+
+   OPC_SPILL_MACRO     = _OPC(6, 79),
+   OPC_RELOAD_MACRO    = _OPC(6, 80),
 
    /* category 7: */
    OPC_BAR             = _OPC(7, 0),
@@ -592,7 +616,7 @@ is_madsh(opc_t opc)
 }
 
 static inline bool
-is_atomic(opc_t opc)
+is_local_atomic(opc_t opc)
 {
    switch (opc) {
    case OPC_ATOMIC_ADD:
@@ -612,6 +636,76 @@ is_atomic(opc_t opc)
    }
 }
 
+static inline bool
+is_global_a3xx_atomic(opc_t opc)
+{
+   switch (opc) {
+   case OPC_ATOMIC_S_ADD:
+   case OPC_ATOMIC_S_SUB:
+   case OPC_ATOMIC_S_XCHG:
+   case OPC_ATOMIC_S_INC:
+   case OPC_ATOMIC_S_DEC:
+   case OPC_ATOMIC_S_CMPXCHG:
+   case OPC_ATOMIC_S_MIN:
+   case OPC_ATOMIC_S_MAX:
+   case OPC_ATOMIC_S_AND:
+   case OPC_ATOMIC_S_OR:
+   case OPC_ATOMIC_S_XOR:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static inline bool
+is_global_a6xx_atomic(opc_t opc)
+{
+   switch (opc) {
+   case OPC_ATOMIC_G_ADD:
+   case OPC_ATOMIC_G_SUB:
+   case OPC_ATOMIC_G_XCHG:
+   case OPC_ATOMIC_G_INC:
+   case OPC_ATOMIC_G_DEC:
+   case OPC_ATOMIC_G_CMPXCHG:
+   case OPC_ATOMIC_G_MIN:
+   case OPC_ATOMIC_G_MAX:
+   case OPC_ATOMIC_G_AND:
+   case OPC_ATOMIC_G_OR:
+   case OPC_ATOMIC_G_XOR:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static inline bool
+is_bindless_atomic(opc_t opc)
+{
+   switch (opc) {
+   case OPC_ATOMIC_B_ADD:
+   case OPC_ATOMIC_B_SUB:
+   case OPC_ATOMIC_B_XCHG:
+   case OPC_ATOMIC_B_INC:
+   case OPC_ATOMIC_B_DEC:
+   case OPC_ATOMIC_B_CMPXCHG:
+   case OPC_ATOMIC_B_MIN:
+   case OPC_ATOMIC_B_MAX:
+   case OPC_ATOMIC_B_AND:
+   case OPC_ATOMIC_B_OR:
+   case OPC_ATOMIC_B_XOR:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static inline bool
+is_atomic(opc_t opc)
+{
+   return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
+          is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
+}
+
 static inline bool
 is_ssbo(opc_t opc)
 {
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index db116f01017..766a7adec7b 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -952,10 +952,11 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
          /* disallow immediates in anything but the SSBO slot argument for
           * cat6 instructions:
           */
-         if (is_atomic(instr->opc) && (n != 0))
+         if (is_global_a3xx_atomic(instr->opc) && (n != 0))
             return false;
 
-         if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
+         if (is_local_atomic(instr->opc) || is_global_a6xx_atomic(instr->opc) ||
+             is_bindless_atomic(instr->opc))
             return false;
 
          if (instr->opc == OPC_STG && (n == 2))
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index c5470d28ad4..25a5f36731f 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -282,19 +282,18 @@ struct ir3_instruction {
       IR3_INSTR_P = 0x080,
       IR3_INSTR_S = 0x100,
       IR3_INSTR_S2EN = 0x200,
-      IR3_INSTR_G = 0x400,
-      IR3_INSTR_SAT = 0x800,
+      IR3_INSTR_SAT = 0x400,
       /* (cat5/cat6) Bindless */
-      IR3_INSTR_B = 0x1000,
+      IR3_INSTR_B = 0x800,
       /* (cat5/cat6) nonuniform */
-      IR3_INSTR_NONUNIF = 0x02000,
+      IR3_INSTR_NONUNIF = 0x1000,
       /* (cat5-only) Get some parts of the encoding from a1.x */
-      IR3_INSTR_A1EN = 0x04000,
+      IR3_INSTR_A1EN = 0x02000,
       /* meta-flags, for intermediate stages of IR, ie.
        * before register assignment is done:
        */
-      IR3_INSTR_MARK = 0x08000,
-      IR3_INSTR_UNUSED = 0x10000,
+      IR3_INSTR_MARK = 0x04000,
+      IR3_INSTR_UNUSED = 0x08000,
    } flags;
    uint8_t repeat;
    uint8_t nop;
@@ -2183,17 +2182,28 @@ INSTR3NODST(STIB);
 INSTR2(LDIB);
 INSTR5(LDG_A);
 INSTR6NODST(STG_A);
-INSTR3F(G, ATOMIC_ADD)
-INSTR3F(G, ATOMIC_SUB)
-INSTR3F(G, ATOMIC_XCHG)
-INSTR3F(G, ATOMIC_INC)
-INSTR3F(G, ATOMIC_DEC)
-INSTR3F(G, ATOMIC_CMPXCHG)
-INSTR3F(G, ATOMIC_MIN)
-INSTR3F(G, ATOMIC_MAX)
-INSTR3F(G, ATOMIC_AND)
-INSTR3F(G, ATOMIC_OR)
-INSTR3F(G, ATOMIC_XOR)
+INSTR2(ATOMIC_G_ADD)
+INSTR2(ATOMIC_G_SUB)
+INSTR2(ATOMIC_G_XCHG)
+INSTR2(ATOMIC_G_INC)
+INSTR2(ATOMIC_G_DEC)
+INSTR2(ATOMIC_G_CMPXCHG)
+INSTR2(ATOMIC_G_MIN)
+INSTR2(ATOMIC_G_MAX)
+INSTR2(ATOMIC_G_AND)
+INSTR2(ATOMIC_G_OR)
+INSTR2(ATOMIC_G_XOR)
+INSTR3(ATOMIC_B_ADD)
+INSTR3(ATOMIC_B_SUB)
+INSTR3(ATOMIC_B_XCHG)
+INSTR3(ATOMIC_B_INC)
+INSTR3(ATOMIC_B_DEC)
+INSTR3(ATOMIC_B_CMPXCHG)
+INSTR3(ATOMIC_B_MIN)
+INSTR3(ATOMIC_B_MAX)
+INSTR3(ATOMIC_B_AND)
+INSTR3(ATOMIC_B_OR)
+INSTR3(ATOMIC_B_XOR)
 #elif GPU >= 400
 INSTR3(LDGB)
 #if GPU >= 500
@@ -2201,17 +2211,17 @@ INSTR3(LDIB)
 #endif
 INSTR4NODST(STGB)
 INSTR4NODST(STIB)
-INSTR4F(G, ATOMIC_ADD)
-INSTR4F(G, ATOMIC_SUB)
-INSTR4F(G, ATOMIC_XCHG)
-INSTR4F(G, ATOMIC_INC)
-INSTR4F(G, ATOMIC_DEC)
-INSTR4F(G, ATOMIC_CMPXCHG)
-INSTR4F(G, ATOMIC_MIN)
-INSTR4F(G, ATOMIC_MAX)
-INSTR4F(G, ATOMIC_AND)
-INSTR4F(G, ATOMIC_OR)
-INSTR4F(G, ATOMIC_XOR)
+INSTR4(ATOMIC_S_ADD)
+INSTR4(ATOMIC_S_SUB)
+INSTR4(ATOMIC_S_XCHG)
+INSTR4(ATOMIC_S_INC)
+INSTR4(ATOMIC_S_DEC)
+INSTR4(ATOMIC_S_CMPXCHG)
+INSTR4(ATOMIC_S_MIN)
+INSTR4(ATOMIC_S_MAX)
+INSTR4(ATOMIC_S_AND)
+INSTR4(ATOMIC_S_OR)
+INSTR4(ATOMIC_S_XOR)
 #endif
 
 /* cat7 instructions: */
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index 2339fa3d67a..1b69a6c1fe3 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -135,39 +135,39 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 
    switch (intr->intrinsic) {
    case nir_intrinsic_ssbo_atomic_add_ir3:
-      atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_ADD(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       break;
    case nir_intrinsic_ssbo_atomic_imin_ir3:
-      atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       type = TYPE_S32;
       break;
    case nir_intrinsic_ssbo_atomic_umin_ir3:
-      atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       break;
    case nir_intrinsic_ssbo_atomic_imax_ir3:
-      atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       type = TYPE_S32;
       break;
    case nir_intrinsic_ssbo_atomic_umax_ir3:
-      atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       break;
    case nir_intrinsic_ssbo_atomic_and_ir3:
-      atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_AND(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       break;
    case nir_intrinsic_ssbo_atomic_or_ir3:
-      atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_OR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       break;
    case nir_intrinsic_ssbo_atomic_xor_ir3:
-      atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_XOR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       break;
    case nir_intrinsic_ssbo_atomic_exchange_ir3:
-      atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+      atomic = ir3_ATOMIC_S_XCHG(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
       break;
    case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
       /* for cmpxchg, src0 is [ui]vec2(data, compare): */
       data = ir3_collect(b, src3, data);
       struct ir3_instruction *dword_offset = ir3_get_src(ctx, &intr->src[4])[0];
-      atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, data, 0, dword_offset, 0,
+      atomic = ir3_ATOMIC_S_CMPXCHG(b, ssbo, 0, data, 0, dword_offset, 0,
                                     byte_offset, 0);
       break;
    default:
@@ -311,32 +311,32 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 
    switch (intr->intrinsic) {
    case nir_intrinsic_image_atomic_add:
-      atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_ADD(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    case nir_intrinsic_image_atomic_imin:
    case nir_intrinsic_image_atomic_umin:
-      atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_MIN(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    case nir_intrinsic_image_atomic_imax:
    case nir_intrinsic_image_atomic_umax:
-      atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_MAX(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    case nir_intrinsic_image_atomic_and:
-      atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_AND(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    case nir_intrinsic_image_atomic_or:
-      atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_OR(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    case nir_intrinsic_image_atomic_xor:
-      atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_XOR(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    case nir_intrinsic_image_atomic_exchange:
-      atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_XCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    case nir_intrinsic_image_atomic_comp_swap:
       /* for cmpxchg, src0 is [ui]vec2(data, compare): */
       src0 = ir3_collect(b, ir3_get_src(ctx, &intr->src[4])[0], src0);
-      atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+      atomic = ir3_ATOMIC_S_CMPXCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
       break;
    default:
       unreachable("boo");
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index 04efc0adfe0..70a4752417b 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -144,36 +144,36 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 
    switch (intr->intrinsic) {
    case nir_intrinsic_ssbo_atomic_add_ir3:
-      atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_ssbo_atomic_imin_ir3:
-      atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
       type = TYPE_S32;
       break;
    case nir_intrinsic_ssbo_atomic_umin_ir3:
-      atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_ssbo_atomic_imax_ir3:
-      atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
       type = TYPE_S32;
       break;
    case nir_intrinsic_ssbo_atomic_umax_ir3:
-      atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_ssbo_atomic_and_ir3:
-      atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_ssbo_atomic_or_ir3:
-      atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_ssbo_atomic_xor_ir3:
-      atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_ssbo_atomic_exchange_ir3:
-      atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
-      atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
       break;
    default:
       unreachable("boo");
@@ -288,39 +288,39 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
    switch (intr->intrinsic) {
    case nir_intrinsic_image_atomic_add:
    case nir_intrinsic_bindless_image_atomic_add:
-      atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_image_atomic_imin:
    case nir_intrinsic_image_atomic_umin:
    case nir_intrinsic_bindless_image_atomic_imin:
    case nir_intrinsic_bindless_image_atomic_umin:
-      atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_image_atomic_imax:
    case nir_intrinsic_image_atomic_umax:
    case nir_intrinsic_bindless_image_atomic_imax:
    case nir_intrinsic_bindless_image_atomic_umax:
-      atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_image_atomic_and:
    case nir_intrinsic_bindless_image_atomic_and:
-      atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_image_atomic_or:
    case nir_intrinsic_bindless_image_atomic_or:
-      atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_image_atomic_xor:
    case nir_intrinsic_bindless_image_atomic_xor:
-      atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_image_atomic_exchange:
    case nir_intrinsic_bindless_image_atomic_exchange:
-      atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
       break;
    case nir_intrinsic_image_atomic_comp_swap:
    case nir_intrinsic_bindless_image_atomic_comp_swap:
-      atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
+      atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
       break;
    default:
       unreachable("boo");
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index 59765f7317b..eaa393bc1a2 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -273,19 +273,18 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
          else
             regmask_set(&state->needs_sy, n->dsts[0]);
       } else if (is_atomic(n->opc)) {
-         if (n->flags & IR3_INSTR_G) {
-            if (ctx->compiler->gen >= 6) {
-               /* New encoding, returns  result via second src: */
-               regmask_set(&state->needs_sy, n->srcs[2]);
-            } else {
-               regmask_set(&state->needs_sy, n->dsts[0]);
-            }
+         if (is_bindless_atomic(n->opc)) {
+            regmask_set(&state->needs_sy, n->srcs[2]);
+         } else if (is_global_a3xx_atomic(n->opc) ||
+                    is_global_a6xx_atomic(n->opc)) {
+            regmask_set(&state->needs_sy, n->dsts[0]);
          } else {
             regmask_set(&state->needs_ss, n->dsts[0]);
          }
       }
 
-      if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
+      if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) ||
+          is_bindless_atomic(n->opc))
          ctx->so->has_ssbo = true;
 
       /* both tex/sfu appear to not always immediately consume
diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l
index 6b9ecab8f5a..2353a049eb9 100644
--- a/src/freedreno/ir3/ir3_lexer.l
+++ b/src/freedreno/ir3/ir3_lexer.l
@@ -339,6 +339,29 @@ static int parse_w(const char *str)
 "atomic.b.and"                    return TOKEN(T_OP_ATOMIC_B_AND);
 "atomic.b.or"                     return TOKEN(T_OP_ATOMIC_B_OR);
 "atomic.b.xor"                    return TOKEN(T_OP_ATOMIC_B_XOR);
+"atomic.s.add"                    return TOKEN(T_OP_ATOMIC_S_ADD);
+"atomic.s.sub"                    return TOKEN(T_OP_ATOMIC_S_SUB);
+"atomic.s.xchg"                   return TOKEN(T_OP_ATOMIC_S_XCHG);
+"atomic.s.inc"                    return TOKEN(T_OP_ATOMIC_S_INC);
+"atomic.s.dec"                    return TOKEN(T_OP_ATOMIC_S_DEC);
+"atomic.s.cmpxchg"                return TOKEN(T_OP_ATOMIC_S_CMPXCHG);
+"atomic.s.min"                    return TOKEN(T_OP_ATOMIC_S_MIN);
+"atomic.s.max"                    return TOKEN(T_OP_ATOMIC_S_MAX);
+"atomic.s.and"                    return TOKEN(T_OP_ATOMIC_S_AND);
+"atomic.s.or"                     return TOKEN(T_OP_ATOMIC_S_OR);
+"atomic.s.xor"                    return TOKEN(T_OP_ATOMIC_S_XOR);
+"atomic.g.add"                    return TOKEN(T_OP_ATOMIC_G_ADD);
+"atomic.g.sub"                    return TOKEN(T_OP_ATOMIC_G_SUB);
+"atomic.g.xchg"                   return TOKEN(T_OP_ATOMIC_G_XCHG);
+"atomic.g.inc"                    return TOKEN(T_OP_ATOMIC_G_INC);
+"atomic.g.dec"                    return TOKEN(T_OP_ATOMIC_G_DEC);
+"atomic.g.cmpxchg"                return TOKEN(T_OP_ATOMIC_G_CMPXCHG);
+"atomic.g.min"                    return TOKEN(T_OP_ATOMIC_G_MIN);
+"atomic.g.max"                    return TOKEN(T_OP_ATOMIC_G_MAX);
+"atomic.g.and"                    return TOKEN(T_OP_ATOMIC_G_AND);
+"atomic.g.or"                     return TOKEN(T_OP_ATOMIC_G_OR);
+"atomic.g.xor"                    return TOKEN(T_OP_ATOMIC_G_XOR);
+
 "ldgb"                            return TOKEN(T_OP_LDGB);
 "stgb"                            return TOKEN(T_OP_STGB);
 "stib"                            return TOKEN(T_OP_STIB);
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index d41c38bc681..674e2fe2003 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -569,6 +569,28 @@ static void print_token(FILE *file, int type, YYSTYPE value)
 %token <tok> T_OP_ATOMIC_B_AND
 %token <tok> T_OP_ATOMIC_B_OR
 %token <tok> T_OP_ATOMIC_B_XOR
+%token <tok> T_OP_ATOMIC_S_ADD
+%token <tok> T_OP_ATOMIC_S_SUB
+%token <tok> T_OP_ATOMIC_S_XCHG
+%token <tok> T_OP_ATOMIC_S_INC
+%token <tok> T_OP_ATOMIC_S_DEC
+%token <tok> T_OP_ATOMIC_S_CMPXCHG
+%token <tok> T_OP_ATOMIC_S_MIN
+%token <tok> T_OP_ATOMIC_S_MAX
+%token <tok> T_OP_ATOMIC_S_AND
+%token <tok> T_OP_ATOMIC_S_OR
+%token <tok> T_OP_ATOMIC_S_XOR
+%token <tok> T_OP_ATOMIC_G_ADD
+%token <tok> T_OP_ATOMIC_G_SUB
+%token <tok> T_OP_ATOMIC_G_XCHG
+%token <tok> T_OP_ATOMIC_G_INC
+%token <tok> T_OP_ATOMIC_G_DEC
+%token <tok> T_OP_ATOMIC_G_CMPXCHG
+%token <tok> T_OP_ATOMIC_G_MIN
+%token <tok> T_OP_ATOMIC_G_MAX
+%token <tok> T_OP_ATOMIC_G_AND
+%token <tok> T_OP_ATOMIC_G_OR
+%token <tok> T_OP_ATOMIC_G_XOR
 %token <tok> T_OP_LDGB
 %token <tok> T_OP_STGB
 %token <tok> T_OP_STIB
@@ -1020,7 +1042,7 @@ cat6_imm_offset:   offset    { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
 cat6_offset:       cat6_imm_offset
 |                  '+' src
 cat6_dst_offset:   offset    { instr->cat6.dst_offset = $1; }
-|                  '+' src   { instr->flags |= IR3_INSTR_G; }
+|                  '+' src
 
 cat6_immed:        integer   { instr->cat6.iim_val = $1; }
 
@@ -1068,14 +1090,39 @@ cat6_atomic_opc:   T_OP_ATOMIC_ADD     { new_instr(OPC_ATOMIC_ADD); }
 |                  T_OP_ATOMIC_OR      { new_instr(OPC_ATOMIC_OR); }
 |                  T_OP_ATOMIC_XOR     { new_instr(OPC_ATOMIC_XOR); }
 
-cat6_atomic_g:     cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src {
-                       instr->flags |= IR3_INSTR_G;
-                   }
+cat6_a3xx_atomic_opc:   T_OP_ATOMIC_S_ADD     { new_instr(OPC_ATOMIC_S_ADD); }
+|                       T_OP_ATOMIC_S_SUB     { new_instr(OPC_ATOMIC_S_SUB); }
+|                       T_OP_ATOMIC_S_XCHG    { new_instr(OPC_ATOMIC_S_XCHG); }
+|                       T_OP_ATOMIC_S_INC     { new_instr(OPC_ATOMIC_S_INC); }
+|                       T_OP_ATOMIC_S_DEC     { new_instr(OPC_ATOMIC_S_DEC); }
+|                       T_OP_ATOMIC_S_CMPXCHG { new_instr(OPC_ATOMIC_S_CMPXCHG); }
+|                       T_OP_ATOMIC_S_MIN     { new_instr(OPC_ATOMIC_S_MIN); }
+|                       T_OP_ATOMIC_S_MAX     { new_instr(OPC_ATOMIC_S_MAX); }
+|                       T_OP_ATOMIC_S_AND     { new_instr(OPC_ATOMIC_S_AND); }
+|                       T_OP_ATOMIC_S_OR      { new_instr(OPC_ATOMIC_S_OR); }
+|                       T_OP_ATOMIC_S_XOR     { new_instr(OPC_ATOMIC_S_XOR); }
+
+cat6_a6xx_atomic_opc:   T_OP_ATOMIC_G_ADD     { new_instr(OPC_ATOMIC_G_ADD); }
+|                       T_OP_ATOMIC_G_SUB     { new_instr(OPC_ATOMIC_G_SUB); }
+|                       T_OP_ATOMIC_G_XCHG    { new_instr(OPC_ATOMIC_G_XCHG); }
+|                       T_OP_ATOMIC_G_INC     { new_instr(OPC_ATOMIC_G_INC); }
+|                       T_OP_ATOMIC_G_DEC     { new_instr(OPC_ATOMIC_G_DEC); }
+|                       T_OP_ATOMIC_G_CMPXCHG { new_instr(OPC_ATOMIC_G_CMPXCHG); }
+|                       T_OP_ATOMIC_G_MIN     { new_instr(OPC_ATOMIC_G_MIN); }
+|                       T_OP_ATOMIC_G_MAX     { new_instr(OPC_ATOMIC_G_MAX); }
+|                       T_OP_ATOMIC_G_AND     { new_instr(OPC_ATOMIC_G_AND); }
+|                       T_OP_ATOMIC_G_OR      { new_instr(OPC_ATOMIC_G_OR); }
+|                       T_OP_ATOMIC_G_XOR     { new_instr(OPC_ATOMIC_G_XOR); }
+
+cat6_a3xx_atomic_s: cat6_a3xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src
+
+cat6_a6xx_atomic_g: cat6_a6xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' src ',' src
 
 cat6_atomic_l:     cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'l' dst_reg ',' 'l' '[' cat6_reg_or_immed ']' ',' src
 
-cat6_atomic:       cat6_atomic_g
-|                  cat6_atomic_l
+cat6_atomic:       cat6_atomic_l
+|                  cat6_a3xx_atomic_s
+|                  cat6_a6xx_atomic_g
 
 cat6_ibo_opc_1src: T_OP_RESINFO   { new_instr(OPC_RESINFO); }
 
@@ -1104,17 +1151,17 @@ cat6_reg_or_immed: src
 
 cat6_bindless_ibo_opc_1src: T_OP_RESINFO_B       { new_instr(OPC_RESINFO); }
 
-cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD        { new_instr(OPC_ATOMIC_ADD)->flags  |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_SUB        { new_instr(OPC_ATOMIC_SUB)->flags  |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_XCHG       { new_instr(OPC_ATOMIC_XCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_INC        { new_instr(OPC_ATOMIC_INC)->flags  |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_DEC        { new_instr(OPC_ATOMIC_DEC)->flags  |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_CMPXCHG    { new_instr(OPC_ATOMIC_CMPXCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_MIN        { new_instr(OPC_ATOMIC_MIN)->flags  |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_MAX        { new_instr(OPC_ATOMIC_MAX)->flags  |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_AND        { new_instr(OPC_ATOMIC_AND)->flags  |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_OR         { new_instr(OPC_ATOMIC_OR)->flags   |= IR3_INSTR_G; dummy_dst(); }
-|                  T_OP_ATOMIC_B_XOR        { new_instr(OPC_ATOMIC_XOR)->flags  |= IR3_INSTR_G; dummy_dst(); }
+cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD        { new_instr(OPC_ATOMIC_B_ADD); dummy_dst(); }
+|                  T_OP_ATOMIC_B_SUB        { new_instr(OPC_ATOMIC_B_SUB); dummy_dst(); }
+|                  T_OP_ATOMIC_B_XCHG       { new_instr(OPC_ATOMIC_B_XCHG); dummy_dst(); }
+|                  T_OP_ATOMIC_B_INC        { new_instr(OPC_ATOMIC_B_INC); dummy_dst(); }
+|                  T_OP_ATOMIC_B_DEC        { new_instr(OPC_ATOMIC_B_DEC); dummy_dst(); }
+|                  T_OP_ATOMIC_B_CMPXCHG    { new_instr(OPC_ATOMIC_B_CMPXCHG); dummy_dst(); }
+|                  T_OP_ATOMIC_B_MIN        { new_instr(OPC_ATOMIC_B_MIN); dummy_dst(); }
+|                  T_OP_ATOMIC_B_MAX        { new_instr(OPC_ATOMIC_B_MAX); dummy_dst(); }
+|                  T_OP_ATOMIC_B_AND        { new_instr(OPC_ATOMIC_B_AND); dummy_dst(); }
+|                  T_OP_ATOMIC_B_OR         { new_instr(OPC_ATOMIC_B_OR); dummy_dst(); }
+|                  T_OP_ATOMIC_B_XOR        { new_instr(OPC_ATOMIC_B_XOR); dummy_dst(); }
 |                  T_OP_STIB_B              { new_instr(OPC_STIB); dummy_dst(); }
 
 cat6_bindless_ibo_opc_2src_dst: T_OP_LDIB_B              { new_instr(OPC_LDIB); }
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 77b42e3f120..679c843bb3f 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -336,14 +336,17 @@ static const struct test {
    /* Atomic: */
 #if 0
    /* TODO our encoding differs in b53 for these two */
-   INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
-   INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+   INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+   INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
 #else
-   INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
-   INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+   INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+   INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
 #endif
    INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
 
+   /* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */
+   INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"),
+
    /* Bindless atomic: */
    INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
    INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c
index ad9dbd2e39b..1c638cc171d 100644
--- a/src/freedreno/isa/encode.c
+++ b/src/freedreno/isa/encode.c
@@ -113,9 +113,6 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr)
 		}
 	} else if (instr->opc == OPC_DEMOTE) {
 		return OPC_KILL;
-	} else if ((instr->block->shader->compiler->gen >= 6) &&
-			is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) {
-		return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD;
 	} else if (s->compiler->gen >= 6) {
 		if (instr->opc == OPC_RESINFO) {
 			return OPC_RESINFO_B;
@@ -243,7 +240,7 @@ extract_cat6_DESC_MODE(struct ir3_instruction *instr)
 static inline struct ir3_register *
 extract_cat6_SRC(struct ir3_instruction *instr, unsigned n)
 {
-	if (instr->flags & IR3_INSTR_G) {
+	if (is_global_a3xx_atomic(instr->opc)) {
 		n++;
 	}
 	assert(n < instr->srcs_count);
diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml
index 8d1b9aa6e24..220ac9f0401 100644
--- a/src/freedreno/isa/ir3-cat6.xml
+++ b/src/freedreno/isa/ir3-cat6.xml
@@ -26,6 +26,14 @@ SOFTWARE.
 
 <!--
 	Cat6 Instructions:  load/store/atomic instructions
+
+	There are instructions with suffixes like:
+	"stg.a", "ldib.b", "atomic.g.add", "atomic.s.add"
+	They have the following meaning:
+	 '.a' - "addrcalc" stg/ldg with complex address computations
+	 '.b' - "bindless" instructions
+	 '.g' - "global" atomics that operate on raw iova addresses
+	 '.s' - "ssbo" pre-a6xx image/ssbo atomics
  -->
 
 <bitset name="#instruction-cat6" extends="#instruction">
@@ -482,16 +490,6 @@ SOFTWARE.
 		to still have an extra src.  For now, match that.
 	</doc>
 
-	<override expr="#cat6-global">
-		<display>
-			{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
-		</display>
-		<field   low="1"  high="8"  name="SRC3" type="#reg-gpr"/>
-		<field   low="41" high="48" name="SSBO" type="#cat6-src">   <!-- SSBO/image binding point -->
-			<param name="SSBO_IM" as="SRC_IM"/>
-		</field>
-		<field   pos="53"           name="SSBO_IM" type="bool"/>
-	</override>
 	<display>
 		{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.l {DST}, l[{SRC1}], {SRC2}
 	</display>
@@ -500,7 +498,6 @@ SOFTWARE.
 	<derived name="TYPE_SIZE" expr="#cat6-type-size" type="uint"/>
 
 	<pattern pos="0"           >1</pattern>
-	<pattern low="1"  high="8" >xxxxxxxx</pattern>       <!-- SRC3 -->
 	<field   low="9"  high="10" name="D_MINUS_ONE" type="uint"/>
 	<field   pos="11"           name="TYPED" type="#cat6-typed"/>
 	<field   low="12" high="13" name="TYPE_SIZE_MINUS_ONE" type="uint"/>
@@ -514,30 +511,29 @@ SOFTWARE.
 	</field>
 	<field   low="32" high="39" name="DST" type="#reg-gpr"/>
 	<pattern pos="40"          >x</pattern>
-	<assert  low="41" high="48">00000000</assert>       <!-- SSBO/image binding point -->
-	<field   pos="52"           name="G" type="bool"/>
-	<assert  pos="53"          >0</assert>              <!-- SSBO_IM -->
 	<encode>
-		<map name="G">!!(src->flags & IR3_INSTR_G)</map>
 		<map name="TYPED">src</map>
 		<map name="D_MINUS_ONE">src->cat6.d - 1</map>
 		<map name="TYPE_SIZE_MINUS_ONE">src->cat6.iim_val - 1</map>
-		<map name="SSBO">src->srcs[0]</map>
-		<map name="SSBO_IM">!!(src->srcs[0]->flags & IR3_REG_IMMED)</map>
 		<map name="SRC1">extract_cat6_SRC(src, 0)</map>
 		<map name="SRC1_IM">!!(extract_cat6_SRC(src, 0)->flags & IR3_REG_IMMED)</map>
 		<map name="SRC2">extract_cat6_SRC(src, 1)</map>
 		<map name="SRC2_IM">!!(extract_cat6_SRC(src, 1)->flags & IR3_REG_IMMED)</map>
-		<map name="SRC3">extract_cat6_SRC(src, 2)</map>
-		<map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED)</map>
 	</encode>
 </bitset>
 
-<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic">
+<bitset name="#instruction-cat6-a3xx-atomic-local" extends="#instruction-cat6-a3xx-atomic">
+	<pattern low="1"  high="8"  >00000000</pattern> <!-- SRC3 -->
+	<pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
+	<pattern pos="52"           >0</pattern> <!-- "G" -->
+	<pattern pos="53"           >0</pattern> <!-- SSBO_IM -->
+</bitset>
+
+<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic-local">
 	<!-- TODO when asm parser is updated, shift display templates, etc, here -->
 </bitset>
 
-<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic">
+<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic-local">
 	<!-- TODO when asm parser is updated, shift display templates, etc, here -->
 </bitset>
 
@@ -585,6 +581,136 @@ SOFTWARE.
 	<pattern low="54" high="58">11010</pattern>  <!-- OPC -->
 </bitset>
 
+<bitset name="#instruction-cat6-a3xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
+	<doc>
+		Pre-a6xx atomics for Image/SSBO
+	</doc>
+
+	<gen max="599"/>
+
+	<display>
+		{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
+	</display>
+
+	<field   low="1"  high="8"  name="SRC3" type="#reg-gpr"/>
+	<field   low="41" high="48" name="SSBO" type="#cat6-src">   <!-- SSBO/image binding point -->
+		<param name="SSBO_IM" as="SRC_IM"/>
+	</field>
+	<pattern pos="52" >1</pattern> <!-- "G" -->
+	<field   pos="53" name="SSBO_IM" type="bool"/>
+
+	<encode>
+		<map name="SSBO">src->srcs[0]</map>
+		<map name="SSBO_IM">!!(src->srcs[0]->flags & IR3_REG_IMMED)</map>
+		<map name="SRC3">extract_cat6_SRC(src, 2)</map>
+		<map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED)</map>
+	</encode>
+</bitset>
+
+<bitset name="atomic.s.add" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10000</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.sub" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10001</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.xchg" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10010</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.inc" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10011</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.dec" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10100</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.cmpxchg" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10101</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.min" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10110</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.max" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">10111</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.and" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">11000</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.or" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">11001</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.xor" extends="#instruction-cat6-a3xx-atomic-global">
+	<pattern low="54" high="58">11010</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="#instruction-cat6-a6xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
+	<doc>
+		a6xx+ global atomics which take iova in SRC1
+	</doc>
+
+	<gen min="600"/>
+
+	<display>
+		{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, {SRC1}, {SRC2}
+	</display>
+
+	<pattern low="1"  high="8"  >00000000</pattern> <!-- SRC3 -->
+	<pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
+	<pattern pos="52"           >1</pattern> <!-- "G" -->
+	<pattern pos="53"           >0</pattern> <!-- SSBO_IM -->
+</bitset>
+
+<bitset name="atomic.g.add" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10000</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.sub" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10001</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.xchg" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10010</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.inc" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10011</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.dec" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10100</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.cmpxchg" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10101</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.min" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10110</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.max" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">10111</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.and" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">11000</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.or" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">11001</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.xor" extends="#instruction-cat6-a6xx-atomic-global">
+	<pattern low="54" high="58">11010</pattern>  <!-- OPC -->
+</bitset>
 
 <!--
 	New a6xx+ encodings for potentially bindless image/ssbo:
@@ -850,11 +976,6 @@ SOFTWARE.
 	{TYPE_SIZE_MINUS_ONE} + 1
 </expr>
 
-<!-- Image/SSBO (ie. not local) -->
-<expr name="#cat6-global">
-	{G}
-</expr>
-
 <bitset name="#cat6-typed" size="1">
 	<override>
 		<expr>{TYPED}</expr>



More information about the mesa-commit mailing list