Mesa (main): ir3: add ldg.a,stg.a which allow complex in-place offset calculation

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jun 25 15:56:42 UTC 2021


Module: Mesa
Branch: main
Commit: fdc0f489e098d320593a1c6837a19726c84d90e9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=fdc0f489e098d320593a1c6837a19726c84d90e9

Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date:   Wed Jun 16 14:43:19 2021 +0300

ir3: add ldg.a,stg.a which allow complex in-place offset calculation

The full form for ldg.a/stg.a offset is:
 g[reg_address + reg_offset << (imm_shift + 2) + imm_offset << 2]

where imm_shift is in [0, 3] and imm_offset is in [0, 3]

a6xx blob was found to produce a bit simplier offset calculations
for TES/TCS shaders in GTA V:

 [c002000a_03c14215] ldg.a.f32 r2.z, g[r1.y+((r2.z+1)<<2)], 3;
 [c0020004_01c14609] ldg.a.f32 r1.x, g[r1.y+((r1.x+3)<<2)], 1;

Our new syntax:
 stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1
 stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1
 ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3
 ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3

Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11431>

---

 .../computerator/examples/stg_ldg_offset.asm       |  17 +++
 src/freedreno/ir3/disasm-a3xx.c                    |   2 +
 src/freedreno/ir3/instr-a3xx.h                     |   3 +
 src/freedreno/ir3/ir3.c                            |   7 +-
 src/freedreno/ir3/ir3.h                            |  56 +++++++-
 src/freedreno/ir3/ir3_a4xx.c                       |   2 +
 src/freedreno/ir3/ir3_a6xx.c                       |  64 +++++++++
 src/freedreno/ir3/ir3_compiler_nir.c               |  58 ++-------
 src/freedreno/ir3/ir3_context.h                    |   3 +
 src/freedreno/ir3/ir3_lexer.l                      |   2 +
 src/freedreno/ir3/ir3_parser.y                     |  41 +++---
 src/freedreno/ir3/ir3_validate.c                   |  12 ++
 src/freedreno/ir3/tests/disasm.c                   |  22 +++-
 src/freedreno/isa/ir3-cat6.xml                     | 144 +++++++++++++++------
 14 files changed, 317 insertions(+), 116 deletions(-)

diff --git a/src/freedreno/computerator/examples/stg_ldg_offset.asm b/src/freedreno/computerator/examples/stg_ldg_offset.asm
new file mode 100644
index 00000000000..53b379d47ff
--- /dev/null
+++ b/src/freedreno/computerator/examples/stg_ldg_offset.asm
@@ -0,0 +1,17 @@
+ at localsize 16, 1, 1
+ at buf 128 (c2.x)  ; c2.xy
+ at invocationid(r0.x) ; r0.xyz
+mov.u32u32 r0.y, r0.x
+mov.u32u32 r1.x, c2.x
+mov.u32u32 r1.y, c2.y
+mov.u32u32 r2.x, 0xff
+(rpt5)nop
+stg.a.u32 g[r1.x+r0.y<<4+2<<2], r2.x, 1
+nop(sy)
+ldg.a.u32 r4.x, g[r1.x+r0.y<<4+2<<2], 1
+nop(sy)
+add.u r4.x, r4.x, 1
+(rpt3)nop
+stg.a.u32 g[r1.x+r0.y<<4+1<<2], r4.x, 1
+end
+nop
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index f0b9731fee1..b0cfa73a4c1 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -303,9 +303,11 @@ static const struct opc_info {
 
 	/* category 6: */
 	OPC(6, OPC_LDG,          ldg),
+	OPC(6, OPC_LDG_A,        ldg.a),
 	OPC(6, OPC_LDL,          ldl),
 	OPC(6, OPC_LDP,          ldp),
 	OPC(6, OPC_STG,          stg),
+	OPC(6, OPC_STG_A,        stg.a),
 	OPC(6, OPC_STL,          stl),
 	OPC(6, OPC_STP,          stp),
 	OPC(6, OPC_LDIB,         ldib),
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index 656ee2b6db3..8fb954e0f57 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -291,6 +291,9 @@ typedef enum {
 	OPC_ATOMIC_B_OR       = _OPC(6, 53),
 	OPC_ATOMIC_B_XOR      = _OPC(6, 54),
 
+	OPC_LDG_A           = _OPC(6, 55),
+	OPC_STG_A           = _OPC(6, 56),
+
 	/* category 7: */
 	OPC_BAR             = _OPC(7, 0),
 	OPC_FENCE           = _OPC(7, 1),
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 5ba9134909f..7a340b78eb7 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -817,7 +817,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
 			 * but for load instructions this arg is the address (and not
 			 * really sure any good way to test a hard-coded immed addr src)
 			 */
-			if (is_store(instr) && (n == 1))
+			if (is_store(instr) && (instr->opc != OPC_STG) && (n == 1))
 				return false;
 
 			if ((instr->opc == OPC_LDL) && (n == 0))
@@ -847,7 +847,10 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
 			if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
 				return false;
 
-			if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
+			if (instr->opc == OPC_STG && (n == 2))
+				return false;
+
+			if (instr->opc == OPC_STG_A && (n == 4))
 				return false;
 
 			/* as with atomics, these cat6 instrs can only have an immediate
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 2bd2eebc83e..6dac29a78fe 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -827,6 +827,7 @@ is_store(struct ir3_instruction *instr)
 	 */
 	switch (instr->opc) {
 	case OPC_STG:
+	case OPC_STG_A:
 	case OPC_STGB:
 	case OPC_STIB:
 	case OPC_STP:
@@ -844,6 +845,7 @@ static inline bool is_load(struct ir3_instruction *instr)
 {
 	switch (instr->opc) {
 	case OPC_LDG:
+	case OPC_LDG_A:
 	case OPC_LDGB:
 	case OPC_LDIB:
 	case OPC_LDL:
@@ -1731,6 +1733,54 @@ ir3_##name(struct ir3_block *block,                                      \
 #define INSTR4F(f, name)    __INSTR4(IR3_INSTR_##f, name##_##f, OPC_##name)
 #define INSTR4(name)        __INSTR4(0, name, OPC_##name)
 
+#define __INSTR5(flag, name, opc)                                        \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+		struct ir3_instruction *a, unsigned aflags,                      \
+		struct ir3_instruction *b, unsigned bflags,                      \
+		struct ir3_instruction *c, unsigned cflags,                      \
+		struct ir3_instruction *d, unsigned dflags,                      \
+		struct ir3_instruction *e, unsigned eflags)                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create(block, opc, 1, 5);                              \
+	__ssa_dst(instr);                                                    \
+	__ssa_src(instr, a, aflags);                                         \
+	__ssa_src(instr, b, bflags);                                         \
+	__ssa_src(instr, c, cflags);                                         \
+	__ssa_src(instr, d, dflags);                                         \
+	__ssa_src(instr, e, eflags);                                         \
+	instr->flags |= flag;                                                \
+	return instr;                                                        \
+}
+#define INSTR5F(f, name)    __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
+#define INSTR5(name)        __INSTR5(0, name, OPC_##name)
+
+#define __INSTR6(flag, name, opc)                                        \
+static inline struct ir3_instruction *                                   \
+ir3_##name(struct ir3_block *block,                                      \
+		struct ir3_instruction *a, unsigned aflags,                      \
+		struct ir3_instruction *b, unsigned bflags,                      \
+		struct ir3_instruction *c, unsigned cflags,                      \
+		struct ir3_instruction *d, unsigned dflags,                      \
+		struct ir3_instruction *e, unsigned eflags,                      \
+		struct ir3_instruction *f, unsigned fflags)                      \
+{                                                                        \
+	struct ir3_instruction *instr =                                      \
+		ir3_instr_create(block, opc, 1, 6);                              \
+	__ssa_dst(instr);                                                    \
+	__ssa_src(instr, a, aflags);                                         \
+	__ssa_src(instr, b, bflags);                                         \
+	__ssa_src(instr, c, cflags);                                         \
+	__ssa_src(instr, d, dflags);                                         \
+	__ssa_src(instr, e, eflags);                                         \
+	__ssa_src(instr, f, fflags);                                         \
+	instr->flags |= flag;                                                \
+	return instr;                                                        \
+}
+#define INSTR6F(f, name)    __INSTR6(IR3_INSTR_##f, name##_##f, OPC_##name)
+#define INSTR6(name)        __INSTR6(0, name, OPC_##name)
+
 /* cat0 instructions: */
 INSTR1(B)
 INSTR0(JUMP)
@@ -1872,7 +1922,7 @@ INSTR3(LDG)
 INSTR3(LDL)
 INSTR3(LDLW)
 INSTR3(LDP)
-INSTR3(STG)
+INSTR4(STG)
 INSTR3(STL)
 INSTR3(STLW)
 INSTR3(STP)
@@ -1893,6 +1943,8 @@ INSTR2(LDC)
 #if GPU >= 600
 INSTR3(STIB);
 INSTR2(LDIB);
+INSTR5(LDG_A);
+INSTR6(STG_A);
 INSTR3F(G, ATOMIC_ADD)
 INSTR3F(G, ATOMIC_SUB)
 INSTR3F(G, ATOMIC_XCHG)
@@ -1921,8 +1973,6 @@ INSTR4F(G, ATOMIC_OR)
 INSTR4F(G, ATOMIC_XOR)
 #endif
 
-INSTR4F(G, STG)
-
 /* cat7 instructions: */
 INSTR0(BAR)
 INSTR0(FENCE)
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index 486dd1f7cfe..57e5b304ff3 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -357,4 +357,6 @@ const struct ir3_context_funcs ir3_a4xx_funcs = {
 		.emit_intrinsic_store_image = emit_intrinsic_store_image,
 		.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
 		.emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
+		.emit_intrinsic_load_global_ir3 = NULL,
+		.emit_intrinsic_store_global_ir3 = NULL,
 };
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index 1fbc8f1248d..501a02ae3d0 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -371,6 +371,68 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 	ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
 }
 
+static void
+emit_intrinsic_load_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+		struct ir3_instruction **dst)
+{
+	struct ir3_block *b = ctx->block;
+	unsigned dest_components = nir_intrinsic_dest_components(intr);
+	struct ir3_instruction *addr, *offset;
+
+	addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+			ir3_get_src(ctx, &intr->src[0])[0],
+			ir3_get_src(ctx, &intr->src[0])[1]
+	}, 2);
+
+	offset = ir3_get_src(ctx, &intr->src[1])[0];
+
+	struct ir3_instruction *load =
+		ir3_LDG_A(b, addr, 0, offset, 0,
+				create_immed(b, 0), 0,
+				create_immed(b, 0), 0,
+				create_immed(b, dest_components), 0);
+	load->cat6.type = TYPE_U32;
+	load->dsts[0]->wrmask = MASK(dest_components);
+
+	load->barrier_class = IR3_BARRIER_BUFFER_R;
+	load->barrier_conflict = IR3_BARRIER_BUFFER_W;
+
+	ir3_split_dest(b, dst, load, 0, dest_components);
+}
+
+static void
+emit_intrinsic_store_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+	struct ir3_block *b = ctx->block;
+	struct ir3_instruction *value, *addr, *offset;
+	unsigned ncomp = nir_intrinsic_src_components(intr, 0);
+
+	addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+			ir3_get_src(ctx, &intr->src[1])[0],
+			ir3_get_src(ctx, &intr->src[1])[1]
+	}, 2);
+
+	offset = ir3_get_src(ctx, &intr->src[2])[0];
+
+	value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
+
+	struct ir3_instruction *stg =
+		ir3_STG_A(b,
+					addr, 0,
+					offset, 0,
+					create_immed(b, 0), 0,
+					create_immed(b, 0), 0,
+					value, 0,
+					create_immed(b, ncomp), 0);
+	stg->cat6.type = TYPE_U32;
+	stg->cat6.iim_val = 1;
+
+	array_insert(b, b->keeps, stg);
+
+	stg->barrier_class = IR3_BARRIER_BUFFER_W;
+	stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+}
+
 const struct ir3_context_funcs ir3_a6xx_funcs = {
 		.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
 		.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
@@ -379,5 +441,7 @@ const struct ir3_context_funcs ir3_a6xx_funcs = {
 		.emit_intrinsic_store_image = emit_intrinsic_store_image,
 		.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
 		.emit_intrinsic_image_size = emit_intrinsic_image_size,
+		.emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
+		.emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
 };
 
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index e314f1e79de..88cc9251b7b 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1735,54 +1735,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		end->barrier_conflict = IR3_BARRIER_EVERYTHING;
 		break;
 
-	case nir_intrinsic_store_global_ir3: {
-		struct ir3_instruction *value, *addr, *offset;
-		unsigned ncomp = nir_intrinsic_src_components(intr, 0);
-
-		addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
-				ir3_get_src(ctx, &intr->src[1])[0],
-				ir3_get_src(ctx, &intr->src[1])[1]
-		}, 2);
-
-		offset = ir3_get_src(ctx, &intr->src[2])[0];
-
-		value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
-
-		struct ir3_instruction *stg =
-			ir3_STG_G(ctx->block, addr, 0, value, 0,
-					  create_immed(ctx->block, ncomp), 0, offset, 0);
-		stg->cat6.type = TYPE_U32;
-		stg->cat6.iim_val = 1;
-
-		array_insert(b, b->keeps, stg);
-
-		stg->barrier_class = IR3_BARRIER_BUFFER_W;
-		stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+	case nir_intrinsic_store_global_ir3:
+		ctx->funcs->emit_intrinsic_store_global_ir3(ctx, intr);
 		break;
-	}
-
-	case nir_intrinsic_load_global_ir3: {
-		struct ir3_instruction *addr, *offset;
-
-		addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
-				ir3_get_src(ctx, &intr->src[0])[0],
-				ir3_get_src(ctx, &intr->src[0])[1]
-		}, 2);
-
-		offset = ir3_get_src(ctx, &intr->src[1])[0];
-
-		struct ir3_instruction *load =
-			ir3_LDG(b, addr, 0, offset, 0,
-					create_immed(ctx->block, dest_components), 0);
-		load->cat6.type = TYPE_U32;
-		load->dsts[0]->wrmask = MASK(dest_components);
-
-		load->barrier_class = IR3_BARRIER_BUFFER_R;
-		load->barrier_conflict = IR3_BARRIER_BUFFER_W;
-
-		ir3_split_dest(b, dst, load, 0, dest_components);
+	case nir_intrinsic_load_global_ir3:
+		ctx->funcs->emit_intrinsic_load_global_ir3(ctx, intr, dst);
 		break;
-	}
 
 	case nir_intrinsic_load_ubo:
 		emit_intrinsic_load_ubo(ctx, intr, dst);
@@ -3085,10 +3043,12 @@ emit_stream_out(struct ir3_context *ctx)
 			base = bases[strmout->output[i].output_buffer];
 			out = ctx->outputs[regid(strmout->output[i].register_index, c)];
 
-			stg = ir3_STG(ctx->block, base, 0, out, 0,
-					create_immed(ctx->block, 1), 0);
+			stg = ir3_STG(ctx->block,
+						  base, 0,
+						  create_immed(ctx->block, (strmout->output[i].dst_offset + j) * 4), 0,
+						  out, 0,
+						  create_immed(ctx->block, 1), 0);
 			stg->cat6.type = TYPE_U32;
-			stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
 
 			array_insert(ctx->block, ctx->block->keeps, stg);
 		}
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 2a0066e069e..31ab63c2129 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -165,6 +165,9 @@ struct ir3_context_funcs {
 	struct ir3_instruction * (*emit_intrinsic_atomic_image)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
 	void (*emit_intrinsic_image_size)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
 			struct ir3_instruction **dst);
+	void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+			struct ir3_instruction **dst);
+	void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
 };
 
 extern const struct ir3_context_funcs ir3_a4xx_funcs;
diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l
index f7d9783c628..c5b263af910 100644
--- a/src/freedreno/ir3/ir3_lexer.l
+++ b/src/freedreno/ir3/ir3_lexer.l
@@ -296,9 +296,11 @@ static int parse_w(const char *str)
 
                                   /* category 6: */
 "ldg"                             return TOKEN(T_OP_LDG);
+"ldg.a"                           return TOKEN(T_OP_LDG_A);
 "ldl"                             return TOKEN(T_OP_LDL);
 "ldp"                             return TOKEN(T_OP_LDP);
 "stg"                             return TOKEN(T_OP_STG);
+"stg.a"                           return TOKEN(T_OP_STG_A);
 "stl"                             return TOKEN(T_OP_STL);
 "stp"                             return TOKEN(T_OP_STP);
 "ldib"                            return TOKEN(T_OP_LDIB);
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index 713676e24b1..47ca9eb0f59 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -92,7 +92,7 @@ static void new_label(const char *name)
 
 static struct ir3_instruction * new_instr(opc_t opc)
 {
-	instr = ir3_instr_create(block, opc, 4, 4);
+	instr = ir3_instr_create(block, opc, 4, 6);
 	instr->flags = iflags.flags;
 	instr->repeat = iflags.repeat;
 	instr->nop = iflags.nop;
@@ -525,9 +525,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
 
 /* category 6: */
 %token <tok> T_OP_LDG
+%token <tok> T_OP_LDG_A
 %token <tok> T_OP_LDL
 %token <tok> T_OP_LDP
 %token <tok> T_OP_STG
+%token <tok> T_OP_STG_A
 %token <tok> T_OP_STL
 %token <tok> T_OP_STP
 %token <tok> T_OP_LDIB
@@ -995,33 +997,40 @@ cat6_dim:          '.' T_1D  { instr->cat6.d = 1; }
 |                  '.' T_4D  { instr->cat6.d = 4; }
 
 cat6_type:         '.' type  { instr->cat6.type = $2; }
-cat6_offset:       offset    { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
+cat6_imm_offset:   offset    { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
+cat6_offset:       cat6_imm_offset
 |                  '+' src
 cat6_dst_offset:   offset    { instr->cat6.dst_offset = $1; }
 |                  '+' src   { instr->flags |= IR3_INSTR_G; }
 
 cat6_immed:        integer   { instr->cat6.iim_val = $1; }
 
-cat6_load:         T_OP_LDG  { new_instr(OPC_LDG); }  cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate
-|                  T_OP_LDP  { new_instr(OPC_LDP); }  cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate
-|                  T_OP_LDL  { new_instr(OPC_LDL); }  cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
-|                  T_OP_LDLW { new_instr(OPC_LDLW); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
-|                  T_OP_LDLV { new_instr(OPC_LDLV); } cat6_type dst_reg ',' 'l' '[' integer ']' {
+cat6_stg_ldg_a6xx_offset:
+                    '+' '(' src offset ')' '<' '<' integer {
+                        assert($8 == 2);
+                        new_src(0, IR3_REG_IMMED)->uim_val = 0;
+                        new_src(0, IR3_REG_IMMED)->uim_val = $4;
+                    }
+|                  '+' src '<' '<' integer offset '<' '<' integer {
+                        assert($9 == 2);
+                        new_src(0, IR3_REG_IMMED)->uim_val = $5 - 2;
+                        new_src(0, IR3_REG_IMMED)->uim_val = $6;
+                    }
+
+cat6_load:         T_OP_LDG   { new_instr(OPC_LDG); }   cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate
+|                  T_OP_LDG_A { new_instr(OPC_LDG_A); } cat6_type dst_reg ',' 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' immediate
+|                  T_OP_LDP   { new_instr(OPC_LDP); }   cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate
+|                  T_OP_LDL   { new_instr(OPC_LDL); }   cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
+|                  T_OP_LDLW  { new_instr(OPC_LDLW); }  cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
+|                  T_OP_LDLV  { new_instr(OPC_LDLV); }  cat6_type dst_reg ',' 'l' '[' integer ']' {
                        new_src(0, IR3_REG_IMMED)->iim_val = $8;
                    } ',' immediate
 
 // TODO some of the cat6 instructions have different syntax for a6xx..
 //|                  T_OP_LDIB { new_instr(OPC_LDIB); } cat6_type dst_reg cat6_offset ',' reg ',' cat6_immed
 
-cat6_store:        T_OP_STG  { new_instr(OPC_STG); dummy_dst(); }  cat6_type 'g' '[' src cat6_dst_offset ']' ',' src ',' immediate {
-                       /* fixup src order, the offset reg is expected last currently */
-                       if (instr->flags & IR3_INSTR_G) {
-                           struct ir3_register *offset = instr->srcs[1];
-                           instr->srcs[1] = instr->srcs[2];
-                           instr->srcs[2] = instr->srcs[3];
-                           instr->srcs[3] = offset;
-                       }
-                   }
+cat6_store:        T_OP_STG   { new_instr(OPC_STG); dummy_dst(); }   cat6_type 'g' '[' src cat6_imm_offset ']' ',' src ',' immediate
+|                  T_OP_STG_A { new_instr(OPC_STG_A); dummy_dst(); } cat6_type 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' src ',' immediate
 |                  T_OP_STP  { new_instr(OPC_STP); dummy_dst(); }  cat6_type 'p' '[' src cat6_dst_offset ']' ',' src ',' immediate
 |                  T_OP_STL  { new_instr(OPC_STL); dummy_dst(); }  cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate
 |                  T_OP_STLW { new_instr(OPC_STLW); dummy_dst(); } cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate
diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c
index e2c132f89b0..38e3ecaea3b 100644
--- a/src/freedreno/ir3/ir3_validate.c
+++ b/src/freedreno/ir3/ir3_validate.c
@@ -247,6 +247,18 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
 			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
 			break;
 		case OPC_STG:
+			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+			validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
+			validate_reg_size(ctx, instr->srcs[2], instr->cat6.type);
+			validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
+			break;
+		case OPC_STG_A:
+			validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+			validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
+			validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
+			validate_reg_size(ctx, instr->srcs[4], instr->cat6.type);
+			validate_assert(ctx, !(instr->srcs[5]->flags & IR3_REG_HALF));
+			break;
 		case OPC_STL:
 		case OPC_STP:
 		case OPC_STLW:
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 6f500f0253e..31fcf97bf4a 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -153,20 +153,30 @@ static const struct test {
 	// TODO is this a real instruction?  Or float -6.0 ?
 	// INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
 	/* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
-	INSTR_6XX(c0d20906_02800004, "stg.f32 g[r1.x+r1.z], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
-	INSTR_6XX(c0da052e_01800042, "stg.s32 g[r0.z+r11.z], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
+	INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
+	INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
 	INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"),
 	INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"),
 	INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"),
 
-	INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
-	INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
+	/* Customely crafted */
+	INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"),
+	INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"),
+
+	INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
+	INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
 	INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
 	INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
 	INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"),
 
-	INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
-	INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
+	/* Found in TCS/TES shaders of GTA V */
+	INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */
+
+	/* Customely crafted */
+	INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"),
+
+	INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
+	INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
 	INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
 	INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
 
diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml
index 9283d30d653..e4bccd0fc16 100644
--- a/src/freedreno/isa/ir3-cat6.xml
+++ b/src/freedreno/isa/ir3-cat6.xml
@@ -42,84 +42,148 @@ SOFTWARE.
 	<!-- TODO pull more fields up to this level, when they are common across sub-encodings -->
 </bitset>
 
-<bitset name="ldg" extends="#instruction-cat6-a3xx">
-	<doc>
-		LoaD Global
-	</doc>
-	<display>
-		{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}], {SIZE}
-	</display>
-
-	<override>
-		<display>
-			{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE}
-		</display>
-		<expr>!{SRC2_REG}</expr>
-		<field low="1" high="13" name="OFF" type="offset"/>
-	</override>
-
+<bitset name="#instruction-cat6-ldg" extends="#instruction-cat6-a3xx">
 	<pattern pos="0"           >1</pattern>
-	<field   low="1"  high="8"  name="SRC2"  type="#reg-gpr"/>
-	<assert  low="9"  high="13">00000</assert>
 	<field   low="14" high="21" name="SRC1" type="#reg-gpr"/>
-	<field   pos="22"           name="SRC2_REG" type="bool"/>
 	<pattern pos="23"          >1</pattern>
 	<field   low="24" high="31" name="SIZE" type="uint"/>
 	<field   low="32" high="39" name="DST" type="#reg-gpr"/>
 	<pattern low="40" high="48">xxxxxxxxx</pattern>
 	<pattern low="52" high="53">00</pattern>
 	<pattern low="54" high="58">00000</pattern>  <!-- OPC -->
+</bitset>
+
+<bitset name="ldg" extends="#instruction-cat6-ldg">
+	<doc>
+		LoaD Global
+	</doc>
+
+	<display>
+		{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE}
+	</display>
+
+	<field low="1" high="13" name="OFF" type="offset"/>
+	<pattern pos="22"          >0</pattern> <!-- Imm offset ldg form -->
+
 	<encode>
-		<map name="SRC2_REG">!(src->srcs[1]->flags & IR3_REG_IMMED)</map>
-		<map name="SRC2">src->srcs[1]</map>
 		<map name="OFF">src->srcs[1]->iim_val</map>
 		<map name="SIZE">src->srcs[2]->uim_val</map>
 	</encode>
 </bitset>
 
-<bitset name="stg" extends="#instruction-cat6-a3xx">
+<bitset name="ldg.a" extends="#instruction-cat6-ldg">
 	<doc>
-		STore Global
+		LoaD Global
 	</doc>
+
+	<gen min="600"/>
+
 	<display>
-		{SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}], {SRC3}, {SIZE}
+		{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+({SRC2}{OFF})<<{SRC2_BYTE_SHIFT}], {SIZE}
 	</display>
 
 	<override>
 		<display>
-			{SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE}
+			{SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}<<{SRC2_BYTE_SHIFT}{OFF}<<2], {SIZE}
 		</display>
-		<expr>!{G}</expr>
-		<derived name="OFF" width="13" type="offset">
-			<expr>({OFF_HI} << 8) | {OFF_LO}</expr>
-		</derived>
-		<field   low="9"  high="13" name="OFF_HI" type="uint"/>
-		<field   low="32" high="39" name="OFF_LO" type="uint"/>
+		<expr>{SRC2_ADD_DWORD_SHIFT} > 0</expr>
 	</override>
 
+	<field   low="1"  high="8"  name="SRC2"  type="#reg-gpr"/>
+	<field   low="9"  high="10" name="OFF" type="uoffset"/>
+	<assert  pos="11"          >0</assert>
+	<field   low="12" high="13" name="SRC2_ADD_DWORD_SHIFT" type="uint"/>
+	<pattern pos="22"          >1</pattern> <!-- Reg offset ldg form -->
+
+	<derived name="SRC2_BYTE_SHIFT" width="3" type="uint">
+		<expr>{SRC2_ADD_DWORD_SHIFT} + 2</expr>
+	</derived>
+
+	<encode>
+		<map name="SRC2">src->srcs[1]</map>
+		<map name="SRC2_ADD_DWORD_SHIFT">src->srcs[2]->uim_val</map>
+		<map name="OFF">src->srcs[3]->uim_val</map>
+		<map name="SIZE">src->srcs[4]->uim_val</map>
+	</encode>
+</bitset>
+
+<bitset name="#instruction-cat6-stg" extends="#instruction-cat6-a3xx">
 	<pattern pos="0"           >x</pattern>
 	<field   low="1"  high="8"  name="SRC3" type="#reg-gpr"/>
-	<assert  low="9"  high="13">00000</assert>  <!-- OFF_HI -->
 	<pattern low="14" high="21">xxxxxxxx</pattern>
 	<pattern low="22" high="23">1x</pattern>
 	<field   low="24" high="31" name="SIZE" type="uint"/>
-	<field   low="32" high="39" name="SRC2" type="#reg-gpr"/>
 	<field   pos="40"           name="DST_OFF" type="bool"/>
 	<field   low="41" high="48" name="SRC1" type="#reg-gpr"/>
-	<field   pos="52"           name="G" type="bool"/>
 	<pattern pos="53"          >x</pattern>
 	<pattern low="54" high="58">00011</pattern>  <!-- OPC -->
+
 	<encode>
-		<map name="SIZE">src->srcs[2]->uim_val</map>
-		<map name="SRC2">src->srcs[3]</map>
 		<map name="DST_OFF" force="true">1</map>
-		<map name="SRC3">src->srcs[1]</map>
-		<map name="G">(src->flags & IR3_INSTR_G) && !(src->srcs[3]->flags & IR3_REG_IMMED)</map>
-		<map name="OFF_LO">src->cat6.dst_offset</map>
-		<map name="OFF_HI">src->cat6.dst_offset >> 8</map>
 	</encode>
 </bitset>
 
+<bitset name="stg" extends="#instruction-cat6-stg">
+	<doc>
+		STore Global
+	</doc>
+
+	<display>
+		{SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE}
+	</display>
+
+	<derived name="OFF" width="13" type="offset">
+		<expr>({OFF_HI} << 8) | {OFF_LO}</expr>
+	</derived>
+
+	<field   low="9"  high="13" name="OFF_HI" type="uint"/>
+	<field   low="32" high="39" name="OFF_LO" type="uint"/>
+	<pattern pos="52" >0</pattern> <!-- Imm offset stg form -->
+
+	<encode>
+		<map name="OFF_LO">src->srcs[1]->iim_val</map>
+		<map name="OFF_HI">src->srcs[1]->iim_val >> 8</map>
+		<map name="SRC3">src->srcs[2]</map>
+		<map name="SIZE">src->srcs[3]->uim_val</map>
+	</encode>
+</bitset>
+
+<bitset name="stg.a" extends="#instruction-cat6-stg">
+	<doc>
+		STore Global
+	</doc>
+
+	<gen min="600"/>
+
+	<display>
+		{SY}{JP}{NAME}.{TYPE} g[{SRC1}+({SRC2}{OFF})<<{DST_BYTE_SHIFT}], {SRC3}, {SIZE}
+	</display>
+
+	<override>
+		<display>
+			{SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}<<{DST_BYTE_SHIFT}{OFF}<<2], {SRC3}, {SIZE}
+		</display>
+		<expr>{SRC2_ADD_DWORD_SHIFT} > 0</expr>
+	</override>
+
+	<derived name="DST_BYTE_SHIFT" width="3" type="uint">
+		<expr>{SRC2_ADD_DWORD_SHIFT} + 2</expr>
+	</derived>
+
+	<field   low="9"  high="10" name="OFF" type="uoffset"/>
+	<assert  pos="11"          >0</assert>
+	<field   low="12" high="13" name="SRC2_ADD_DWORD_SHIFT" type="uint"/>
+	<field   low="32" high="39" name="SRC2" type="#reg-gpr"/>
+	<pattern pos="52" >1</pattern> <!-- Reg offset stg form -->
+
+	<encode>
+		<map name="SRC2">src->srcs[1]</map>
+		<map name="SRC2_ADD_DWORD_SHIFT">src->srcs[2]->uim_val</map>
+		<map name="OFF">src->srcs[3]->uim_val</map>
+		<map name="SRC3">src->srcs[4]</map>
+		<map name="SIZE">src->srcs[5]->uim_val</map>
+	</encode>
+</bitset>
 
 <bitset name="#instruction-cat6-a3xx-ld" extends="#instruction-cat6-a3xx">
 	<pattern pos="0"           >1</pattern>



More information about the mesa-commit mailing list