Mesa (main): freedreno/ir3: add a6xx global atomics and separate atomic opcodes
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Nov 23 19:10:17 UTC 2021
Module: Mesa
Branch: main
Commit: 5d5b1fc4722fa8db9b74b20d113c3f85d3f6bcb9
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d5b1fc4722fa8db9b74b20d113c3f85d3f6bcb9
Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date: Fri Jan 22 19:51:56 2021 +0200
freedreno/ir3: add a6xx global atomics and separate atomic opcodes
Separating atomic opcodes makes possible to express a6xx global
atomics which take iova in SRC1. They would be needed by
VK_KHR_buffer_device_address.
The change also makes easier to distiguish atomics in conditions.
Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8717>
---
src/freedreno/.gitlab-ci/reference/crash.log | 12 +-
src/freedreno/ir3/disasm-a3xx.c | 33 +++++
src/freedreno/ir3/instr-a3xx.h | 106 +++++++++++++++-
src/freedreno/ir3/ir3.c | 5 +-
src/freedreno/ir3/ir3.h | 68 ++++++-----
src/freedreno/ir3/ir3_a4xx.c | 36 +++---
src/freedreno/ir3/ir3_a6xx.c | 36 +++---
src/freedreno/ir3/ir3_legalize.c | 15 ++-
src/freedreno/ir3/ir3_lexer.l | 23 ++++
src/freedreno/ir3/ir3_parser.y | 81 ++++++++++---
src/freedreno/ir3/tests/disasm.c | 11 +-
src/freedreno/isa/encode.c | 5 +-
src/freedreno/isa/ir3-cat6.xml | 173 +++++++++++++++++++++++----
13 files changed, 466 insertions(+), 138 deletions(-)
diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log
index e26db4d25f3..45edfb3aeb7 100644
--- a/src/freedreno/.gitlab-ci/reference/crash.log
+++ b/src/freedreno/.gitlab-ci/reference/crash.log
@@ -4638,12 +4638,12 @@ shader-blocks:
size: 2048
:0:0000:0000[00000000x_00003002x] nop
:0:0001:0001[00000000x_00000000x] nop
- :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
- :6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
- :6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
- :6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
- :6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
- :6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000
+ :6:0002:0002[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+ :6:0003:0003[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+ :6:0004:0004[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+ :6:0005:0005[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+ :6:0006:0006[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
+ :6:0007:0007[deadbeefx_deadbeefx] no match: deadbeefdeadbeef
-----------------------------------------------
8192 (0x2000) bytes
000000: 00003002 00000000 00000000 00000000 |.0..............|
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index 3044fe145de..24b3805085c 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -333,6 +333,39 @@ static const struct opc_info {
OPC(6, OPC_ATOMIC_AND, atomic.and),
OPC(6, OPC_ATOMIC_OR, atomic.or),
OPC(6, OPC_ATOMIC_XOR, atomic.xor),
+ OPC(6, OPC_ATOMIC_B_ADD, atomic.b.add),
+ OPC(6, OPC_ATOMIC_B_SUB, atomic.b.sub),
+ OPC(6, OPC_ATOMIC_B_XCHG, atomic.b.xchg),
+ OPC(6, OPC_ATOMIC_B_INC, atomic.b.inc),
+ OPC(6, OPC_ATOMIC_B_DEC, atomic.b.dec),
+ OPC(6, OPC_ATOMIC_B_CMPXCHG, atomic.b.cmpxchg),
+ OPC(6, OPC_ATOMIC_B_MIN, atomic.b.min),
+ OPC(6, OPC_ATOMIC_B_MAX, atomic.b.max),
+ OPC(6, OPC_ATOMIC_B_AND, atomic.b.and),
+ OPC(6, OPC_ATOMIC_B_OR, atomic.b.or),
+ OPC(6, OPC_ATOMIC_B_XOR, atomic.b.xor),
+ OPC(6, OPC_ATOMIC_S_ADD, atomic.s.add),
+ OPC(6, OPC_ATOMIC_S_SUB, atomic.s.sub),
+ OPC(6, OPC_ATOMIC_S_XCHG, atomic.s.xchg),
+ OPC(6, OPC_ATOMIC_S_INC, atomic.s.inc),
+ OPC(6, OPC_ATOMIC_S_DEC, atomic.s.dec),
+ OPC(6, OPC_ATOMIC_S_CMPXCHG, atomic.s.cmpxchg),
+ OPC(6, OPC_ATOMIC_S_MIN, atomic.s.min),
+ OPC(6, OPC_ATOMIC_S_MAX, atomic.s.max),
+ OPC(6, OPC_ATOMIC_S_AND, atomic.s.and),
+ OPC(6, OPC_ATOMIC_S_OR, atomic.s.or),
+ OPC(6, OPC_ATOMIC_S_XOR, atomic.s.xor),
+ OPC(6, OPC_ATOMIC_G_ADD, atomic.g.add),
+ OPC(6, OPC_ATOMIC_G_SUB, atomic.g.sub),
+ OPC(6, OPC_ATOMIC_G_XCHG, atomic.g.xchg),
+ OPC(6, OPC_ATOMIC_G_INC, atomic.g.inc),
+ OPC(6, OPC_ATOMIC_G_DEC, atomic.g.dec),
+ OPC(6, OPC_ATOMIC_G_CMPXCHG, atomic.g.cmpxchg),
+ OPC(6, OPC_ATOMIC_G_MIN, atomic.g.min),
+ OPC(6, OPC_ATOMIC_G_MAX, atomic.g.max),
+ OPC(6, OPC_ATOMIC_G_AND, atomic.g.and),
+ OPC(6, OPC_ATOMIC_G_OR, atomic.g.or),
+ OPC(6, OPC_ATOMIC_G_XOR, atomic.g.xor),
OPC(6, OPC_LDGB, ldgb),
OPC(6, OPC_STGB, stgb),
OPC(6, OPC_STIB, stib),
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index 8957182b2aa..8a85f575ddb 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -306,11 +306,35 @@ typedef enum {
OPC_ATOMIC_B_OR = _OPC(6, 53),
OPC_ATOMIC_B_XOR = _OPC(6, 54),
- OPC_LDG_A = _OPC(6, 55),
- OPC_STG_A = _OPC(6, 56),
-
- OPC_SPILL_MACRO = _OPC(6, 57),
- OPC_RELOAD_MACRO = _OPC(6, 58),
+ OPC_ATOMIC_S_ADD = _OPC(6, 55),
+ OPC_ATOMIC_S_SUB = _OPC(6, 56),
+ OPC_ATOMIC_S_XCHG = _OPC(6, 57),
+ OPC_ATOMIC_S_INC = _OPC(6, 58),
+ OPC_ATOMIC_S_DEC = _OPC(6, 59),
+ OPC_ATOMIC_S_CMPXCHG = _OPC(6, 60),
+ OPC_ATOMIC_S_MIN = _OPC(6, 61),
+ OPC_ATOMIC_S_MAX = _OPC(6, 62),
+ OPC_ATOMIC_S_AND = _OPC(6, 63),
+ OPC_ATOMIC_S_OR = _OPC(6, 64),
+ OPC_ATOMIC_S_XOR = _OPC(6, 65),
+
+ OPC_ATOMIC_G_ADD = _OPC(6, 66),
+ OPC_ATOMIC_G_SUB = _OPC(6, 67),
+ OPC_ATOMIC_G_XCHG = _OPC(6, 68),
+ OPC_ATOMIC_G_INC = _OPC(6, 69),
+ OPC_ATOMIC_G_DEC = _OPC(6, 70),
+ OPC_ATOMIC_G_CMPXCHG = _OPC(6, 71),
+ OPC_ATOMIC_G_MIN = _OPC(6, 72),
+ OPC_ATOMIC_G_MAX = _OPC(6, 73),
+ OPC_ATOMIC_G_AND = _OPC(6, 74),
+ OPC_ATOMIC_G_OR = _OPC(6, 75),
+ OPC_ATOMIC_G_XOR = _OPC(6, 76),
+
+ OPC_LDG_A = _OPC(6, 77),
+ OPC_STG_A = _OPC(6, 78),
+
+ OPC_SPILL_MACRO = _OPC(6, 79),
+ OPC_RELOAD_MACRO = _OPC(6, 80),
/* category 7: */
OPC_BAR = _OPC(7, 0),
@@ -592,7 +616,7 @@ is_madsh(opc_t opc)
}
static inline bool
-is_atomic(opc_t opc)
+is_local_atomic(opc_t opc)
{
switch (opc) {
case OPC_ATOMIC_ADD:
@@ -612,6 +636,76 @@ is_atomic(opc_t opc)
}
}
+static inline bool
+is_global_a3xx_atomic(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ATOMIC_S_ADD:
+ case OPC_ATOMIC_S_SUB:
+ case OPC_ATOMIC_S_XCHG:
+ case OPC_ATOMIC_S_INC:
+ case OPC_ATOMIC_S_DEC:
+ case OPC_ATOMIC_S_CMPXCHG:
+ case OPC_ATOMIC_S_MIN:
+ case OPC_ATOMIC_S_MAX:
+ case OPC_ATOMIC_S_AND:
+ case OPC_ATOMIC_S_OR:
+ case OPC_ATOMIC_S_XOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+is_global_a6xx_atomic(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ATOMIC_G_ADD:
+ case OPC_ATOMIC_G_SUB:
+ case OPC_ATOMIC_G_XCHG:
+ case OPC_ATOMIC_G_INC:
+ case OPC_ATOMIC_G_DEC:
+ case OPC_ATOMIC_G_CMPXCHG:
+ case OPC_ATOMIC_G_MIN:
+ case OPC_ATOMIC_G_MAX:
+ case OPC_ATOMIC_G_AND:
+ case OPC_ATOMIC_G_OR:
+ case OPC_ATOMIC_G_XOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+is_bindless_atomic(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ATOMIC_B_ADD:
+ case OPC_ATOMIC_B_SUB:
+ case OPC_ATOMIC_B_XCHG:
+ case OPC_ATOMIC_B_INC:
+ case OPC_ATOMIC_B_DEC:
+ case OPC_ATOMIC_B_CMPXCHG:
+ case OPC_ATOMIC_B_MIN:
+ case OPC_ATOMIC_B_MAX:
+ case OPC_ATOMIC_B_AND:
+ case OPC_ATOMIC_B_OR:
+ case OPC_ATOMIC_B_XOR:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool
+is_atomic(opc_t opc)
+{
+ return is_local_atomic(opc) || is_global_a3xx_atomic(opc) ||
+ is_global_a6xx_atomic(opc) || is_bindless_atomic(opc);
+}
+
static inline bool
is_ssbo(opc_t opc)
{
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index db116f01017..766a7adec7b 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -952,10 +952,11 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
/* disallow immediates in anything but the SSBO slot argument for
* cat6 instructions:
*/
- if (is_atomic(instr->opc) && (n != 0))
+ if (is_global_a3xx_atomic(instr->opc) && (n != 0))
return false;
- if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
+ if (is_local_atomic(instr->opc) || is_global_a6xx_atomic(instr->opc) ||
+ is_bindless_atomic(instr->opc))
return false;
if (instr->opc == OPC_STG && (n == 2))
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index c5470d28ad4..25a5f36731f 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -282,19 +282,18 @@ struct ir3_instruction {
IR3_INSTR_P = 0x080,
IR3_INSTR_S = 0x100,
IR3_INSTR_S2EN = 0x200,
- IR3_INSTR_G = 0x400,
- IR3_INSTR_SAT = 0x800,
+ IR3_INSTR_SAT = 0x400,
/* (cat5/cat6) Bindless */
- IR3_INSTR_B = 0x1000,
+ IR3_INSTR_B = 0x800,
/* (cat5/cat6) nonuniform */
- IR3_INSTR_NONUNIF = 0x02000,
+ IR3_INSTR_NONUNIF = 0x1000,
/* (cat5-only) Get some parts of the encoding from a1.x */
- IR3_INSTR_A1EN = 0x04000,
+ IR3_INSTR_A1EN = 0x02000,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
- IR3_INSTR_MARK = 0x08000,
- IR3_INSTR_UNUSED = 0x10000,
+ IR3_INSTR_MARK = 0x04000,
+ IR3_INSTR_UNUSED = 0x08000,
} flags;
uint8_t repeat;
uint8_t nop;
@@ -2183,17 +2182,28 @@ INSTR3NODST(STIB);
INSTR2(LDIB);
INSTR5(LDG_A);
INSTR6NODST(STG_A);
-INSTR3F(G, ATOMIC_ADD)
-INSTR3F(G, ATOMIC_SUB)
-INSTR3F(G, ATOMIC_XCHG)
-INSTR3F(G, ATOMIC_INC)
-INSTR3F(G, ATOMIC_DEC)
-INSTR3F(G, ATOMIC_CMPXCHG)
-INSTR3F(G, ATOMIC_MIN)
-INSTR3F(G, ATOMIC_MAX)
-INSTR3F(G, ATOMIC_AND)
-INSTR3F(G, ATOMIC_OR)
-INSTR3F(G, ATOMIC_XOR)
+INSTR2(ATOMIC_G_ADD)
+INSTR2(ATOMIC_G_SUB)
+INSTR2(ATOMIC_G_XCHG)
+INSTR2(ATOMIC_G_INC)
+INSTR2(ATOMIC_G_DEC)
+INSTR2(ATOMIC_G_CMPXCHG)
+INSTR2(ATOMIC_G_MIN)
+INSTR2(ATOMIC_G_MAX)
+INSTR2(ATOMIC_G_AND)
+INSTR2(ATOMIC_G_OR)
+INSTR2(ATOMIC_G_XOR)
+INSTR3(ATOMIC_B_ADD)
+INSTR3(ATOMIC_B_SUB)
+INSTR3(ATOMIC_B_XCHG)
+INSTR3(ATOMIC_B_INC)
+INSTR3(ATOMIC_B_DEC)
+INSTR3(ATOMIC_B_CMPXCHG)
+INSTR3(ATOMIC_B_MIN)
+INSTR3(ATOMIC_B_MAX)
+INSTR3(ATOMIC_B_AND)
+INSTR3(ATOMIC_B_OR)
+INSTR3(ATOMIC_B_XOR)
#elif GPU >= 400
INSTR3(LDGB)
#if GPU >= 500
@@ -2201,17 +2211,17 @@ INSTR3(LDIB)
#endif
INSTR4NODST(STGB)
INSTR4NODST(STIB)
-INSTR4F(G, ATOMIC_ADD)
-INSTR4F(G, ATOMIC_SUB)
-INSTR4F(G, ATOMIC_XCHG)
-INSTR4F(G, ATOMIC_INC)
-INSTR4F(G, ATOMIC_DEC)
-INSTR4F(G, ATOMIC_CMPXCHG)
-INSTR4F(G, ATOMIC_MIN)
-INSTR4F(G, ATOMIC_MAX)
-INSTR4F(G, ATOMIC_AND)
-INSTR4F(G, ATOMIC_OR)
-INSTR4F(G, ATOMIC_XOR)
+INSTR4(ATOMIC_S_ADD)
+INSTR4(ATOMIC_S_SUB)
+INSTR4(ATOMIC_S_XCHG)
+INSTR4(ATOMIC_S_INC)
+INSTR4(ATOMIC_S_DEC)
+INSTR4(ATOMIC_S_CMPXCHG)
+INSTR4(ATOMIC_S_MIN)
+INSTR4(ATOMIC_S_MAX)
+INSTR4(ATOMIC_S_AND)
+INSTR4(ATOMIC_S_OR)
+INSTR4(ATOMIC_S_XOR)
#endif
/* cat7 instructions: */
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index 2339fa3d67a..1b69a6c1fe3 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -135,39 +135,39 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add_ir3:
- atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_ADD(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_imin_ir3:
- atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umin_ir3:
- atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_imax_ir3:
- atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umax_ir3:
- atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_and_ir3:
- atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_AND(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_or_ir3:
- atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_OR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_xor_ir3:
- atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_XOR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_exchange_ir3:
- atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
+ atomic = ir3_ATOMIC_S_XCHG(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0);
break;
case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
data = ir3_collect(b, src3, data);
struct ir3_instruction *dword_offset = ir3_get_src(ctx, &intr->src[4])[0];
- atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, data, 0, dword_offset, 0,
+ atomic = ir3_ATOMIC_S_CMPXCHG(b, ssbo, 0, data, 0, dword_offset, 0,
byte_offset, 0);
break;
default:
@@ -311,32 +311,32 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_image_atomic_add:
- atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_ADD(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
- atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_MIN(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
- atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_MAX(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_and:
- atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_AND(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_or:
- atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_OR(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_xor:
- atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_XOR(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_exchange:
- atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_XCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
case nir_intrinsic_image_atomic_comp_swap:
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
src0 = ir3_collect(b, ir3_get_src(ctx, &intr->src[4])[0], src0);
- atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
+ atomic = ir3_ATOMIC_S_CMPXCHG(b, image, 0, src0, 0, src1, 0, src2, 0);
break;
default:
unreachable("boo");
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index 04efc0adfe0..70a4752417b 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -144,36 +144,36 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_ssbo_atomic_add_ir3:
- atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_imin_ir3:
- atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umin_ir3:
- atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_imax_ir3:
- atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
type = TYPE_S32;
break;
case nir_intrinsic_ssbo_atomic_umax_ir3:
- atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_and_ir3:
- atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_or_ir3:
- atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_xor_ir3:
- atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_exchange_ir3:
- atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
- atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
break;
default:
unreachable("boo");
@@ -288,39 +288,39 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_bindless_image_atomic_add:
- atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_bindless_image_atomic_imin:
case nir_intrinsic_bindless_image_atomic_umin:
- atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_bindless_image_atomic_imax:
case nir_intrinsic_bindless_image_atomic_umax:
- atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_bindless_image_atomic_and:
- atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_bindless_image_atomic_or:
- atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_bindless_image_atomic_xor:
- atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_exchange:
case nir_intrinsic_bindless_image_atomic_exchange:
- atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
break;
case nir_intrinsic_image_atomic_comp_swap:
case nir_intrinsic_bindless_image_atomic_comp_swap:
- atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
+ atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
break;
default:
unreachable("boo");
diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index 59765f7317b..eaa393bc1a2 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c
@@ -273,19 +273,18 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
else
regmask_set(&state->needs_sy, n->dsts[0]);
} else if (is_atomic(n->opc)) {
- if (n->flags & IR3_INSTR_G) {
- if (ctx->compiler->gen >= 6) {
- /* New encoding, returns result via second src: */
- regmask_set(&state->needs_sy, n->srcs[2]);
- } else {
- regmask_set(&state->needs_sy, n->dsts[0]);
- }
+ if (is_bindless_atomic(n->opc)) {
+ regmask_set(&state->needs_sy, n->srcs[2]);
+ } else if (is_global_a3xx_atomic(n->opc) ||
+ is_global_a6xx_atomic(n->opc)) {
+ regmask_set(&state->needs_sy, n->dsts[0]);
} else {
regmask_set(&state->needs_ss, n->dsts[0]);
}
}
- if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G)))
+ if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) ||
+ is_bindless_atomic(n->opc))
ctx->so->has_ssbo = true;
/* both tex/sfu appear to not always immediately consume
diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l
index 6b9ecab8f5a..2353a049eb9 100644
--- a/src/freedreno/ir3/ir3_lexer.l
+++ b/src/freedreno/ir3/ir3_lexer.l
@@ -339,6 +339,29 @@ static int parse_w(const char *str)
"atomic.b.and" return TOKEN(T_OP_ATOMIC_B_AND);
"atomic.b.or" return TOKEN(T_OP_ATOMIC_B_OR);
"atomic.b.xor" return TOKEN(T_OP_ATOMIC_B_XOR);
+"atomic.s.add" return TOKEN(T_OP_ATOMIC_S_ADD);
+"atomic.s.sub" return TOKEN(T_OP_ATOMIC_S_SUB);
+"atomic.s.xchg" return TOKEN(T_OP_ATOMIC_S_XCHG);
+"atomic.s.inc" return TOKEN(T_OP_ATOMIC_S_INC);
+"atomic.s.dec" return TOKEN(T_OP_ATOMIC_S_DEC);
+"atomic.s.cmpxchg" return TOKEN(T_OP_ATOMIC_S_CMPXCHG);
+"atomic.s.min" return TOKEN(T_OP_ATOMIC_S_MIN);
+"atomic.s.max" return TOKEN(T_OP_ATOMIC_S_MAX);
+"atomic.s.and" return TOKEN(T_OP_ATOMIC_S_AND);
+"atomic.s.or" return TOKEN(T_OP_ATOMIC_S_OR);
+"atomic.s.xor" return TOKEN(T_OP_ATOMIC_S_XOR);
+"atomic.g.add" return TOKEN(T_OP_ATOMIC_G_ADD);
+"atomic.g.sub" return TOKEN(T_OP_ATOMIC_G_SUB);
+"atomic.g.xchg" return TOKEN(T_OP_ATOMIC_G_XCHG);
+"atomic.g.inc" return TOKEN(T_OP_ATOMIC_G_INC);
+"atomic.g.dec" return TOKEN(T_OP_ATOMIC_G_DEC);
+"atomic.g.cmpxchg" return TOKEN(T_OP_ATOMIC_G_CMPXCHG);
+"atomic.g.min" return TOKEN(T_OP_ATOMIC_G_MIN);
+"atomic.g.max" return TOKEN(T_OP_ATOMIC_G_MAX);
+"atomic.g.and" return TOKEN(T_OP_ATOMIC_G_AND);
+"atomic.g.or" return TOKEN(T_OP_ATOMIC_G_OR);
+"atomic.g.xor" return TOKEN(T_OP_ATOMIC_G_XOR);
+
"ldgb" return TOKEN(T_OP_LDGB);
"stgb" return TOKEN(T_OP_STGB);
"stib" return TOKEN(T_OP_STIB);
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index d41c38bc681..674e2fe2003 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -569,6 +569,28 @@ static void print_token(FILE *file, int type, YYSTYPE value)
%token <tok> T_OP_ATOMIC_B_AND
%token <tok> T_OP_ATOMIC_B_OR
%token <tok> T_OP_ATOMIC_B_XOR
+%token <tok> T_OP_ATOMIC_S_ADD
+%token <tok> T_OP_ATOMIC_S_SUB
+%token <tok> T_OP_ATOMIC_S_XCHG
+%token <tok> T_OP_ATOMIC_S_INC
+%token <tok> T_OP_ATOMIC_S_DEC
+%token <tok> T_OP_ATOMIC_S_CMPXCHG
+%token <tok> T_OP_ATOMIC_S_MIN
+%token <tok> T_OP_ATOMIC_S_MAX
+%token <tok> T_OP_ATOMIC_S_AND
+%token <tok> T_OP_ATOMIC_S_OR
+%token <tok> T_OP_ATOMIC_S_XOR
+%token <tok> T_OP_ATOMIC_G_ADD
+%token <tok> T_OP_ATOMIC_G_SUB
+%token <tok> T_OP_ATOMIC_G_XCHG
+%token <tok> T_OP_ATOMIC_G_INC
+%token <tok> T_OP_ATOMIC_G_DEC
+%token <tok> T_OP_ATOMIC_G_CMPXCHG
+%token <tok> T_OP_ATOMIC_G_MIN
+%token <tok> T_OP_ATOMIC_G_MAX
+%token <tok> T_OP_ATOMIC_G_AND
+%token <tok> T_OP_ATOMIC_G_OR
+%token <tok> T_OP_ATOMIC_G_XOR
%token <tok> T_OP_LDGB
%token <tok> T_OP_STGB
%token <tok> T_OP_STIB
@@ -1020,7 +1042,7 @@ cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
cat6_offset: cat6_imm_offset
| '+' src
cat6_dst_offset: offset { instr->cat6.dst_offset = $1; }
-| '+' src { instr->flags |= IR3_INSTR_G; }
+| '+' src
cat6_immed: integer { instr->cat6.iim_val = $1; }
@@ -1068,14 +1090,39 @@ cat6_atomic_opc: T_OP_ATOMIC_ADD { new_instr(OPC_ATOMIC_ADD); }
| T_OP_ATOMIC_OR { new_instr(OPC_ATOMIC_OR); }
| T_OP_ATOMIC_XOR { new_instr(OPC_ATOMIC_XOR); }
-cat6_atomic_g: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src {
- instr->flags |= IR3_INSTR_G;
- }
+cat6_a3xx_atomic_opc: T_OP_ATOMIC_S_ADD { new_instr(OPC_ATOMIC_S_ADD); }
+| T_OP_ATOMIC_S_SUB { new_instr(OPC_ATOMIC_S_SUB); }
+| T_OP_ATOMIC_S_XCHG { new_instr(OPC_ATOMIC_S_XCHG); }
+| T_OP_ATOMIC_S_INC { new_instr(OPC_ATOMIC_S_INC); }
+| T_OP_ATOMIC_S_DEC { new_instr(OPC_ATOMIC_S_DEC); }
+| T_OP_ATOMIC_S_CMPXCHG { new_instr(OPC_ATOMIC_S_CMPXCHG); }
+| T_OP_ATOMIC_S_MIN { new_instr(OPC_ATOMIC_S_MIN); }
+| T_OP_ATOMIC_S_MAX { new_instr(OPC_ATOMIC_S_MAX); }
+| T_OP_ATOMIC_S_AND { new_instr(OPC_ATOMIC_S_AND); }
+| T_OP_ATOMIC_S_OR { new_instr(OPC_ATOMIC_S_OR); }
+| T_OP_ATOMIC_S_XOR { new_instr(OPC_ATOMIC_S_XOR); }
+
+cat6_a6xx_atomic_opc: T_OP_ATOMIC_G_ADD { new_instr(OPC_ATOMIC_G_ADD); }
+| T_OP_ATOMIC_G_SUB { new_instr(OPC_ATOMIC_G_SUB); }
+| T_OP_ATOMIC_G_XCHG { new_instr(OPC_ATOMIC_G_XCHG); }
+| T_OP_ATOMIC_G_INC { new_instr(OPC_ATOMIC_G_INC); }
+| T_OP_ATOMIC_G_DEC { new_instr(OPC_ATOMIC_G_DEC); }
+| T_OP_ATOMIC_G_CMPXCHG { new_instr(OPC_ATOMIC_G_CMPXCHG); }
+| T_OP_ATOMIC_G_MIN { new_instr(OPC_ATOMIC_G_MIN); }
+| T_OP_ATOMIC_G_MAX { new_instr(OPC_ATOMIC_G_MAX); }
+| T_OP_ATOMIC_G_AND { new_instr(OPC_ATOMIC_G_AND); }
+| T_OP_ATOMIC_G_OR { new_instr(OPC_ATOMIC_G_OR); }
+| T_OP_ATOMIC_G_XOR { new_instr(OPC_ATOMIC_G_XOR); }
+
+cat6_a3xx_atomic_s: cat6_a3xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src
+
+cat6_a6xx_atomic_g: cat6_a6xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' src ',' src
cat6_atomic_l: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'l' dst_reg ',' 'l' '[' cat6_reg_or_immed ']' ',' src
-cat6_atomic: cat6_atomic_g
-| cat6_atomic_l
+cat6_atomic: cat6_atomic_l
+| cat6_a3xx_atomic_s
+| cat6_a6xx_atomic_g
cat6_ibo_opc_1src: T_OP_RESINFO { new_instr(OPC_RESINFO); }
@@ -1104,17 +1151,17 @@ cat6_reg_or_immed: src
cat6_bindless_ibo_opc_1src: T_OP_RESINFO_B { new_instr(OPC_RESINFO); }
-cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_ADD)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_SUB)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_XCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_INC)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_DEC)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_CMPXCHG)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_MIN)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_MAX)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_AND)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_OR)->flags |= IR3_INSTR_G; dummy_dst(); }
-| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_XOR)->flags |= IR3_INSTR_G; dummy_dst(); }
+cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_B_ADD); dummy_dst(); }
+| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_B_SUB); dummy_dst(); }
+| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_B_XCHG); dummy_dst(); }
+| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_B_INC); dummy_dst(); }
+| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_B_DEC); dummy_dst(); }
+| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_B_CMPXCHG); dummy_dst(); }
+| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_B_MIN); dummy_dst(); }
+| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_B_MAX); dummy_dst(); }
+| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_B_AND); dummy_dst(); }
+| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_B_OR); dummy_dst(); }
+| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_B_XOR); dummy_dst(); }
| T_OP_STIB_B { new_instr(OPC_STIB); dummy_dst(); }
cat6_bindless_ibo_opc_2src_dst: T_OP_LDIB_B { new_instr(OPC_LDIB); }
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 77b42e3f120..679c843bb3f 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -336,14 +336,17 @@ static const struct test {
/* Atomic: */
#if 0
/* TODO our encoding differs in b53 for these two */
- INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
- INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+ INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+ INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#else
- INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
- INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
+ INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
+ INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
#endif
INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
+ /* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */
+ INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"),
+
/* Bindless atomic: */
INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c
index ad9dbd2e39b..1c638cc171d 100644
--- a/src/freedreno/isa/encode.c
+++ b/src/freedreno/isa/encode.c
@@ -113,9 +113,6 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr)
}
} else if (instr->opc == OPC_DEMOTE) {
return OPC_KILL;
- } else if ((instr->block->shader->compiler->gen >= 6) &&
- is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) {
- return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD;
} else if (s->compiler->gen >= 6) {
if (instr->opc == OPC_RESINFO) {
return OPC_RESINFO_B;
@@ -243,7 +240,7 @@ extract_cat6_DESC_MODE(struct ir3_instruction *instr)
static inline struct ir3_register *
extract_cat6_SRC(struct ir3_instruction *instr, unsigned n)
{
- if (instr->flags & IR3_INSTR_G) {
+ if (is_global_a3xx_atomic(instr->opc)) {
n++;
}
assert(n < instr->srcs_count);
diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml
index 8d1b9aa6e24..220ac9f0401 100644
--- a/src/freedreno/isa/ir3-cat6.xml
+++ b/src/freedreno/isa/ir3-cat6.xml
@@ -26,6 +26,14 @@ SOFTWARE.
<!--
Cat6 Instructions: load/store/atomic instructions
+
+ There are instructions with suffixes like:
+ "stg.a", "ldib.b", "atomic.g.add", "atomic.s.add"
+ They have the following meaning:
+ '.a' - "addrcalc" stg/ldg with complex address computations
+ '.b' - "bindless" instructions
+ '.g' - "global" atomics that operate on raw iova addresses
+ '.s' - "ssbo" pre-a6xx image/ssbo atomics
-->
<bitset name="#instruction-cat6" extends="#instruction">
@@ -482,16 +490,6 @@ SOFTWARE.
to still have an extra src. For now, match that.
</doc>
- <override expr="#cat6-global">
- <display>
- {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
- </display>
- <field low="1" high="8" name="SRC3" type="#reg-gpr"/>
- <field low="41" high="48" name="SSBO" type="#cat6-src"> <!-- SSBO/image binding point -->
- <param name="SSBO_IM" as="SRC_IM"/>
- </field>
- <field pos="53" name="SSBO_IM" type="bool"/>
- </override>
<display>
{SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.l {DST}, l[{SRC1}], {SRC2}
</display>
@@ -500,7 +498,6 @@ SOFTWARE.
<derived name="TYPE_SIZE" expr="#cat6-type-size" type="uint"/>
<pattern pos="0" >1</pattern>
- <pattern low="1" high="8" >xxxxxxxx</pattern> <!-- SRC3 -->
<field low="9" high="10" name="D_MINUS_ONE" type="uint"/>
<field pos="11" name="TYPED" type="#cat6-typed"/>
<field low="12" high="13" name="TYPE_SIZE_MINUS_ONE" type="uint"/>
@@ -514,30 +511,29 @@ SOFTWARE.
</field>
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern pos="40" >x</pattern>
- <assert low="41" high="48">00000000</assert> <!-- SSBO/image binding point -->
- <field pos="52" name="G" type="bool"/>
- <assert pos="53" >0</assert> <!-- SSBO_IM -->
<encode>
- <map name="G">!!(src->flags & IR3_INSTR_G)</map>
<map name="TYPED">src</map>
<map name="D_MINUS_ONE">src->cat6.d - 1</map>
<map name="TYPE_SIZE_MINUS_ONE">src->cat6.iim_val - 1</map>
- <map name="SSBO">src->srcs[0]</map>
- <map name="SSBO_IM">!!(src->srcs[0]->flags & IR3_REG_IMMED)</map>
<map name="SRC1">extract_cat6_SRC(src, 0)</map>
<map name="SRC1_IM">!!(extract_cat6_SRC(src, 0)->flags & IR3_REG_IMMED)</map>
<map name="SRC2">extract_cat6_SRC(src, 1)</map>
<map name="SRC2_IM">!!(extract_cat6_SRC(src, 1)->flags & IR3_REG_IMMED)</map>
- <map name="SRC3">extract_cat6_SRC(src, 2)</map>
- <map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED)</map>
</encode>
</bitset>
-<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic">
+<bitset name="#instruction-cat6-a3xx-atomic-local" extends="#instruction-cat6-a3xx-atomic">
+ <pattern low="1" high="8" >00000000</pattern> <!-- SRC3 -->
+ <pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
+ <pattern pos="52" >0</pattern> <!-- "G" -->
+ <pattern pos="53" >0</pattern> <!-- SSBO_IM -->
+</bitset>
+
+<bitset name="#instruction-cat6-a3xx-atomic-1src" extends="#instruction-cat6-a3xx-atomic-local">
<!-- TODO when asm parser is updated, shift display templates, etc, here -->
</bitset>
-<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic">
+<bitset name="#instruction-cat6-a3xx-atomic-2src" extends="#instruction-cat6-a3xx-atomic-local">
<!-- TODO when asm parser is updated, shift display templates, etc, here -->
</bitset>
@@ -585,6 +581,136 @@ SOFTWARE.
<pattern low="54" high="58">11010</pattern> <!-- OPC -->
</bitset>
+<bitset name="#instruction-cat6-a3xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
+ <doc>
+ Pre-a6xx atomics for Image/SSBO
+ </doc>
+
+ <gen max="599"/>
+
+ <display>
+ {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3}
+ </display>
+
+ <field low="1" high="8" name="SRC3" type="#reg-gpr"/>
+ <field low="41" high="48" name="SSBO" type="#cat6-src"> <!-- SSBO/image binding point -->
+ <param name="SSBO_IM" as="SRC_IM"/>
+ </field>
+ <pattern pos="52" >1</pattern> <!-- "G" -->
+ <field pos="53" name="SSBO_IM" type="bool"/>
+
+ <encode>
+ <map name="SSBO">src->srcs[0]</map>
+ <map name="SSBO_IM">!!(src->srcs[0]->flags & IR3_REG_IMMED)</map>
+ <map name="SRC3">extract_cat6_SRC(src, 2)</map>
+ <map name="SRC3_IM">!!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED)</map>
+ </encode>
+</bitset>
+
+<bitset name="atomic.s.add" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10000</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.sub" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10001</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.xchg" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10010</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.inc" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10011</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.dec" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10100</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.cmpxchg" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10101</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.min" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10110</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.max" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">10111</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.and" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">11000</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.or" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">11001</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.s.xor" extends="#instruction-cat6-a3xx-atomic-global">
+ <pattern low="54" high="58">11010</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="#instruction-cat6-a6xx-atomic-global" extends="#instruction-cat6-a3xx-atomic">
+ <doc>
+ a6xx+ global atomics which take iova in SRC1
+ </doc>
+
+ <gen min="600"/>
+
+ <display>
+ {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, {SRC1}, {SRC2}
+ </display>
+
+ <pattern low="1" high="8" >00000000</pattern> <!-- SRC3 -->
+ <pattern low="41" high="48" >00000000</pattern> <!-- SSBO/image binding point -->
+ <pattern pos="52" >1</pattern> <!-- "G" -->
+ <pattern pos="53" >0</pattern> <!-- SSBO_IM -->
+</bitset>
+
+<bitset name="atomic.g.add" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10000</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.sub" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10001</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.xchg" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10010</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.inc" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10011</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.dec" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10100</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.cmpxchg" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10101</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.min" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10110</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.max" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">10111</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.and" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">11000</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.or" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">11001</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="atomic.g.xor" extends="#instruction-cat6-a6xx-atomic-global">
+ <pattern low="54" high="58">11010</pattern> <!-- OPC -->
+</bitset>
<!--
New a6xx+ encodings for potentially bindless image/ssbo:
@@ -850,11 +976,6 @@ SOFTWARE.
{TYPE_SIZE_MINUS_ONE} + 1
</expr>
-<!-- Image/SSBO (ie. not local) -->
-<expr name="#cat6-global">
- {G}
-</expr>
-
<bitset name="#cat6-typed" size="1">
<override>
<expr>{TYPED}</expr>
More information about the mesa-commit
mailing list