Mesa (main): ir3: add ldg.a,stg.a which allow complex in-place offset calculation
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jun 25 15:56:42 UTC 2021
Module: Mesa
Branch: main
Commit: fdc0f489e098d320593a1c6837a19726c84d90e9
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fdc0f489e098d320593a1c6837a19726c84d90e9
Author: Danylo Piliaiev <dpiliaiev at igalia.com>
Date: Wed Jun 16 14:43:19 2021 +0300
ir3: add ldg.a,stg.a which allow complex in-place offset calculation
The full form for ldg.a/stg.a offset is:
g[reg_address + reg_offset << (imm_shift + 2) + imm_offset << 2]
where imm_shift is in [0, 3] and imm_offset is in [0, 3]
a6xx blob was found to produce a bit simplier offset calculations
for TES/TCS shaders in GTA V:
[c002000a_03c14215] ldg.a.f32 r2.z, g[r1.y+((r2.z+1)<<2)], 3;
[c0020004_01c14609] ldg.a.f32 r1.x, g[r1.y+((r1.x+3)<<2)], 1;
Our new syntax:
stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1
stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1
ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3
ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3
Signed-off-by: Danylo Piliaiev <dpiliaiev at igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11431>
---
.../computerator/examples/stg_ldg_offset.asm | 17 +++
src/freedreno/ir3/disasm-a3xx.c | 2 +
src/freedreno/ir3/instr-a3xx.h | 3 +
src/freedreno/ir3/ir3.c | 7 +-
src/freedreno/ir3/ir3.h | 56 +++++++-
src/freedreno/ir3/ir3_a4xx.c | 2 +
src/freedreno/ir3/ir3_a6xx.c | 64 +++++++++
src/freedreno/ir3/ir3_compiler_nir.c | 58 ++-------
src/freedreno/ir3/ir3_context.h | 3 +
src/freedreno/ir3/ir3_lexer.l | 2 +
src/freedreno/ir3/ir3_parser.y | 41 +++---
src/freedreno/ir3/ir3_validate.c | 12 ++
src/freedreno/ir3/tests/disasm.c | 22 +++-
src/freedreno/isa/ir3-cat6.xml | 144 +++++++++++++++------
14 files changed, 317 insertions(+), 116 deletions(-)
diff --git a/src/freedreno/computerator/examples/stg_ldg_offset.asm b/src/freedreno/computerator/examples/stg_ldg_offset.asm
new file mode 100644
index 00000000000..53b379d47ff
--- /dev/null
+++ b/src/freedreno/computerator/examples/stg_ldg_offset.asm
@@ -0,0 +1,17 @@
+ at localsize 16, 1, 1
+ at buf 128 (c2.x) ; c2.xy
+ at invocationid(r0.x) ; r0.xyz
+mov.u32u32 r0.y, r0.x
+mov.u32u32 r1.x, c2.x
+mov.u32u32 r1.y, c2.y
+mov.u32u32 r2.x, 0xff
+(rpt5)nop
+stg.a.u32 g[r1.x+r0.y<<4+2<<2], r2.x, 1
+nop(sy)
+ldg.a.u32 r4.x, g[r1.x+r0.y<<4+2<<2], 1
+nop(sy)
+add.u r4.x, r4.x, 1
+(rpt3)nop
+stg.a.u32 g[r1.x+r0.y<<4+1<<2], r4.x, 1
+end
+nop
diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index f0b9731fee1..b0cfa73a4c1 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c
@@ -303,9 +303,11 @@ static const struct opc_info {
/* category 6: */
OPC(6, OPC_LDG, ldg),
+ OPC(6, OPC_LDG_A, ldg.a),
OPC(6, OPC_LDL, ldl),
OPC(6, OPC_LDP, ldp),
OPC(6, OPC_STG, stg),
+ OPC(6, OPC_STG_A, stg.a),
OPC(6, OPC_STL, stl),
OPC(6, OPC_STP, stp),
OPC(6, OPC_LDIB, ldib),
diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index 656ee2b6db3..8fb954e0f57 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h
@@ -291,6 +291,9 @@ typedef enum {
OPC_ATOMIC_B_OR = _OPC(6, 53),
OPC_ATOMIC_B_XOR = _OPC(6, 54),
+ OPC_LDG_A = _OPC(6, 55),
+ OPC_STG_A = _OPC(6, 56),
+
/* category 7: */
OPC_BAR = _OPC(7, 0),
OPC_FENCE = _OPC(7, 1),
diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c
index 5ba9134909f..7a340b78eb7 100644
--- a/src/freedreno/ir3/ir3.c
+++ b/src/freedreno/ir3/ir3.c
@@ -817,7 +817,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
* but for load instructions this arg is the address (and not
* really sure any good way to test a hard-coded immed addr src)
*/
- if (is_store(instr) && (n == 1))
+ if (is_store(instr) && (instr->opc != OPC_STG) && (n == 1))
return false;
if ((instr->opc == OPC_LDL) && (n == 0))
@@ -847,7 +847,10 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n,
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
return false;
- if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
+ if (instr->opc == OPC_STG && (n == 2))
+ return false;
+
+ if (instr->opc == OPC_STG_A && (n == 4))
return false;
/* as with atomics, these cat6 instrs can only have an immediate
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 2bd2eebc83e..6dac29a78fe 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -827,6 +827,7 @@ is_store(struct ir3_instruction *instr)
*/
switch (instr->opc) {
case OPC_STG:
+ case OPC_STG_A:
case OPC_STGB:
case OPC_STIB:
case OPC_STP:
@@ -844,6 +845,7 @@ static inline bool is_load(struct ir3_instruction *instr)
{
switch (instr->opc) {
case OPC_LDG:
+ case OPC_LDG_A:
case OPC_LDGB:
case OPC_LDIB:
case OPC_LDL:
@@ -1731,6 +1733,54 @@ ir3_##name(struct ir3_block *block, \
#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, name##_##f, OPC_##name)
#define INSTR4(name) __INSTR4(0, name, OPC_##name)
+#define __INSTR5(flag, name, opc) \
+static inline struct ir3_instruction * \
+ir3_##name(struct ir3_block *block, \
+ struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, \
+ struct ir3_instruction *c, unsigned cflags, \
+ struct ir3_instruction *d, unsigned dflags, \
+ struct ir3_instruction *e, unsigned eflags) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create(block, opc, 1, 5); \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ __ssa_src(instr, b, bflags); \
+ __ssa_src(instr, c, cflags); \
+ __ssa_src(instr, d, dflags); \
+ __ssa_src(instr, e, eflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
+#define INSTR5(name) __INSTR5(0, name, OPC_##name)
+
+#define __INSTR6(flag, name, opc) \
+static inline struct ir3_instruction * \
+ir3_##name(struct ir3_block *block, \
+ struct ir3_instruction *a, unsigned aflags, \
+ struct ir3_instruction *b, unsigned bflags, \
+ struct ir3_instruction *c, unsigned cflags, \
+ struct ir3_instruction *d, unsigned dflags, \
+ struct ir3_instruction *e, unsigned eflags, \
+ struct ir3_instruction *f, unsigned fflags) \
+{ \
+ struct ir3_instruction *instr = \
+ ir3_instr_create(block, opc, 1, 6); \
+ __ssa_dst(instr); \
+ __ssa_src(instr, a, aflags); \
+ __ssa_src(instr, b, bflags); \
+ __ssa_src(instr, c, cflags); \
+ __ssa_src(instr, d, dflags); \
+ __ssa_src(instr, e, eflags); \
+ __ssa_src(instr, f, fflags); \
+ instr->flags |= flag; \
+ return instr; \
+}
+#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, name##_##f, OPC_##name)
+#define INSTR6(name) __INSTR6(0, name, OPC_##name)
+
/* cat0 instructions: */
INSTR1(B)
INSTR0(JUMP)
@@ -1872,7 +1922,7 @@ INSTR3(LDG)
INSTR3(LDL)
INSTR3(LDLW)
INSTR3(LDP)
-INSTR3(STG)
+INSTR4(STG)
INSTR3(STL)
INSTR3(STLW)
INSTR3(STP)
@@ -1893,6 +1943,8 @@ INSTR2(LDC)
#if GPU >= 600
INSTR3(STIB);
INSTR2(LDIB);
+INSTR5(LDG_A);
+INSTR6(STG_A);
INSTR3F(G, ATOMIC_ADD)
INSTR3F(G, ATOMIC_SUB)
INSTR3F(G, ATOMIC_XCHG)
@@ -1921,8 +1973,6 @@ INSTR4F(G, ATOMIC_OR)
INSTR4F(G, ATOMIC_XOR)
#endif
-INSTR4F(G, STG)
-
/* cat7 instructions: */
INSTR0(BAR)
INSTR0(FENCE)
diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c
index 486dd1f7cfe..57e5b304ff3 100644
--- a/src/freedreno/ir3/ir3_a4xx.c
+++ b/src/freedreno/ir3/ir3_a4xx.c
@@ -357,4 +357,6 @@ const struct ir3_context_funcs ir3_a4xx_funcs = {
.emit_intrinsic_store_image = emit_intrinsic_store_image,
.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
.emit_intrinsic_image_size = emit_intrinsic_image_size_tex,
+ .emit_intrinsic_load_global_ir3 = NULL,
+ .emit_intrinsic_store_global_ir3 = NULL,
};
diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c
index 1fbc8f1248d..501a02ae3d0 100644
--- a/src/freedreno/ir3/ir3_a6xx.c
+++ b/src/freedreno/ir3/ir3_a6xx.c
@@ -371,6 +371,68 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
}
+static void
+emit_intrinsic_load_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+ struct ir3_instruction **dst)
+{
+ struct ir3_block *b = ctx->block;
+ unsigned dest_components = nir_intrinsic_dest_components(intr);
+ struct ir3_instruction *addr, *offset;
+
+ addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+ ir3_get_src(ctx, &intr->src[0])[0],
+ ir3_get_src(ctx, &intr->src[0])[1]
+ }, 2);
+
+ offset = ir3_get_src(ctx, &intr->src[1])[0];
+
+ struct ir3_instruction *load =
+ ir3_LDG_A(b, addr, 0, offset, 0,
+ create_immed(b, 0), 0,
+ create_immed(b, 0), 0,
+ create_immed(b, dest_components), 0);
+ load->cat6.type = TYPE_U32;
+ load->dsts[0]->wrmask = MASK(dest_components);
+
+ load->barrier_class = IR3_BARRIER_BUFFER_R;
+ load->barrier_conflict = IR3_BARRIER_BUFFER_W;
+
+ ir3_split_dest(b, dst, load, 0, dest_components);
+}
+
+static void
+emit_intrinsic_store_global_ir3(struct ir3_context *ctx, nir_intrinsic_instr *intr)
+{
+ struct ir3_block *b = ctx->block;
+ struct ir3_instruction *value, *addr, *offset;
+ unsigned ncomp = nir_intrinsic_src_components(intr, 0);
+
+ addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
+ ir3_get_src(ctx, &intr->src[1])[0],
+ ir3_get_src(ctx, &intr->src[1])[1]
+ }, 2);
+
+ offset = ir3_get_src(ctx, &intr->src[2])[0];
+
+ value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
+
+ struct ir3_instruction *stg =
+ ir3_STG_A(b,
+ addr, 0,
+ offset, 0,
+ create_immed(b, 0), 0,
+ create_immed(b, 0), 0,
+ value, 0,
+ create_immed(b, ncomp), 0);
+ stg->cat6.type = TYPE_U32;
+ stg->cat6.iim_val = 1;
+
+ array_insert(b, b->keeps, stg);
+
+ stg->barrier_class = IR3_BARRIER_BUFFER_W;
+ stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+}
+
const struct ir3_context_funcs ir3_a6xx_funcs = {
.emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
.emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
@@ -379,5 +441,7 @@ const struct ir3_context_funcs ir3_a6xx_funcs = {
.emit_intrinsic_store_image = emit_intrinsic_store_image,
.emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
.emit_intrinsic_image_size = emit_intrinsic_image_size,
+ .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
+ .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
};
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index e314f1e79de..88cc9251b7b 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1735,54 +1735,12 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
end->barrier_conflict = IR3_BARRIER_EVERYTHING;
break;
- case nir_intrinsic_store_global_ir3: {
- struct ir3_instruction *value, *addr, *offset;
- unsigned ncomp = nir_intrinsic_src_components(intr, 0);
-
- addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
- ir3_get_src(ctx, &intr->src[1])[0],
- ir3_get_src(ctx, &intr->src[1])[1]
- }, 2);
-
- offset = ir3_get_src(ctx, &intr->src[2])[0];
-
- value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), ncomp);
-
- struct ir3_instruction *stg =
- ir3_STG_G(ctx->block, addr, 0, value, 0,
- create_immed(ctx->block, ncomp), 0, offset, 0);
- stg->cat6.type = TYPE_U32;
- stg->cat6.iim_val = 1;
-
- array_insert(b, b->keeps, stg);
-
- stg->barrier_class = IR3_BARRIER_BUFFER_W;
- stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+ case nir_intrinsic_store_global_ir3:
+ ctx->funcs->emit_intrinsic_store_global_ir3(ctx, intr);
break;
- }
-
- case nir_intrinsic_load_global_ir3: {
- struct ir3_instruction *addr, *offset;
-
- addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){
- ir3_get_src(ctx, &intr->src[0])[0],
- ir3_get_src(ctx, &intr->src[0])[1]
- }, 2);
-
- offset = ir3_get_src(ctx, &intr->src[1])[0];
-
- struct ir3_instruction *load =
- ir3_LDG(b, addr, 0, offset, 0,
- create_immed(ctx->block, dest_components), 0);
- load->cat6.type = TYPE_U32;
- load->dsts[0]->wrmask = MASK(dest_components);
-
- load->barrier_class = IR3_BARRIER_BUFFER_R;
- load->barrier_conflict = IR3_BARRIER_BUFFER_W;
-
- ir3_split_dest(b, dst, load, 0, dest_components);
+ case nir_intrinsic_load_global_ir3:
+ ctx->funcs->emit_intrinsic_load_global_ir3(ctx, intr, dst);
break;
- }
case nir_intrinsic_load_ubo:
emit_intrinsic_load_ubo(ctx, intr, dst);
@@ -3085,10 +3043,12 @@ emit_stream_out(struct ir3_context *ctx)
base = bases[strmout->output[i].output_buffer];
out = ctx->outputs[regid(strmout->output[i].register_index, c)];
- stg = ir3_STG(ctx->block, base, 0, out, 0,
- create_immed(ctx->block, 1), 0);
+ stg = ir3_STG(ctx->block,
+ base, 0,
+ create_immed(ctx->block, (strmout->output[i].dst_offset + j) * 4), 0,
+ out, 0,
+ create_immed(ctx->block, 1), 0);
stg->cat6.type = TYPE_U32;
- stg->cat6.dst_offset = (strmout->output[i].dst_offset + j) * 4;
array_insert(ctx->block, ctx->block->keeps, stg);
}
diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h
index 2a0066e069e..31ab63c2129 100644
--- a/src/freedreno/ir3/ir3_context.h
+++ b/src/freedreno/ir3/ir3_context.h
@@ -165,6 +165,9 @@ struct ir3_context_funcs {
struct ir3_instruction * (*emit_intrinsic_atomic_image)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
void (*emit_intrinsic_image_size)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst);
+ void (*emit_intrinsic_load_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr,
+ struct ir3_instruction **dst);
+ void (*emit_intrinsic_store_global_ir3)(struct ir3_context *ctx, nir_intrinsic_instr *intr);
};
extern const struct ir3_context_funcs ir3_a4xx_funcs;
diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l
index f7d9783c628..c5b263af910 100644
--- a/src/freedreno/ir3/ir3_lexer.l
+++ b/src/freedreno/ir3/ir3_lexer.l
@@ -296,9 +296,11 @@ static int parse_w(const char *str)
/* category 6: */
"ldg" return TOKEN(T_OP_LDG);
+"ldg.a" return TOKEN(T_OP_LDG_A);
"ldl" return TOKEN(T_OP_LDL);
"ldp" return TOKEN(T_OP_LDP);
"stg" return TOKEN(T_OP_STG);
+"stg.a" return TOKEN(T_OP_STG_A);
"stl" return TOKEN(T_OP_STL);
"stp" return TOKEN(T_OP_STP);
"ldib" return TOKEN(T_OP_LDIB);
diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y
index 713676e24b1..47ca9eb0f59 100644
--- a/src/freedreno/ir3/ir3_parser.y
+++ b/src/freedreno/ir3/ir3_parser.y
@@ -92,7 +92,7 @@ static void new_label(const char *name)
static struct ir3_instruction * new_instr(opc_t opc)
{
- instr = ir3_instr_create(block, opc, 4, 4);
+ instr = ir3_instr_create(block, opc, 4, 6);
instr->flags = iflags.flags;
instr->repeat = iflags.repeat;
instr->nop = iflags.nop;
@@ -525,9 +525,11 @@ static void print_token(FILE *file, int type, YYSTYPE value)
/* category 6: */
%token <tok> T_OP_LDG
+%token <tok> T_OP_LDG_A
%token <tok> T_OP_LDL
%token <tok> T_OP_LDP
%token <tok> T_OP_STG
+%token <tok> T_OP_STG_A
%token <tok> T_OP_STL
%token <tok> T_OP_STP
%token <tok> T_OP_LDIB
@@ -995,33 +997,40 @@ cat6_dim: '.' T_1D { instr->cat6.d = 1; }
| '.' T_4D { instr->cat6.d = 4; }
cat6_type: '.' type { instr->cat6.type = $2; }
-cat6_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
+cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; }
+cat6_offset: cat6_imm_offset
| '+' src
cat6_dst_offset: offset { instr->cat6.dst_offset = $1; }
| '+' src { instr->flags |= IR3_INSTR_G; }
cat6_immed: integer { instr->cat6.iim_val = $1; }
-cat6_load: T_OP_LDG { new_instr(OPC_LDG); } cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate
-| T_OP_LDP { new_instr(OPC_LDP); } cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate
-| T_OP_LDL { new_instr(OPC_LDL); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
-| T_OP_LDLW { new_instr(OPC_LDLW); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
-| T_OP_LDLV { new_instr(OPC_LDLV); } cat6_type dst_reg ',' 'l' '[' integer ']' {
+cat6_stg_ldg_a6xx_offset:
+ '+' '(' src offset ')' '<' '<' integer {
+ assert($8 == 2);
+ new_src(0, IR3_REG_IMMED)->uim_val = 0;
+ new_src(0, IR3_REG_IMMED)->uim_val = $4;
+ }
+| '+' src '<' '<' integer offset '<' '<' integer {
+ assert($9 == 2);
+ new_src(0, IR3_REG_IMMED)->uim_val = $5 - 2;
+ new_src(0, IR3_REG_IMMED)->uim_val = $6;
+ }
+
+cat6_load: T_OP_LDG { new_instr(OPC_LDG); } cat6_type dst_reg ',' 'g' '[' src cat6_offset ']' ',' immediate
+| T_OP_LDG_A { new_instr(OPC_LDG_A); } cat6_type dst_reg ',' 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' immediate
+| T_OP_LDP { new_instr(OPC_LDP); } cat6_type dst_reg ',' 'p' '[' src cat6_offset ']' ',' immediate
+| T_OP_LDL { new_instr(OPC_LDL); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
+| T_OP_LDLW { new_instr(OPC_LDLW); } cat6_type dst_reg ',' 'l' '[' src cat6_offset ']' ',' immediate
+| T_OP_LDLV { new_instr(OPC_LDLV); } cat6_type dst_reg ',' 'l' '[' integer ']' {
new_src(0, IR3_REG_IMMED)->iim_val = $8;
} ',' immediate
// TODO some of the cat6 instructions have different syntax for a6xx..
//| T_OP_LDIB { new_instr(OPC_LDIB); } cat6_type dst_reg cat6_offset ',' reg ',' cat6_immed
-cat6_store: T_OP_STG { new_instr(OPC_STG); dummy_dst(); } cat6_type 'g' '[' src cat6_dst_offset ']' ',' src ',' immediate {
- /* fixup src order, the offset reg is expected last currently */
- if (instr->flags & IR3_INSTR_G) {
- struct ir3_register *offset = instr->srcs[1];
- instr->srcs[1] = instr->srcs[2];
- instr->srcs[2] = instr->srcs[3];
- instr->srcs[3] = offset;
- }
- }
+cat6_store: T_OP_STG { new_instr(OPC_STG); dummy_dst(); } cat6_type 'g' '[' src cat6_imm_offset ']' ',' src ',' immediate
+| T_OP_STG_A { new_instr(OPC_STG_A); dummy_dst(); } cat6_type 'g' '[' src cat6_stg_ldg_a6xx_offset ']' ',' src ',' immediate
| T_OP_STP { new_instr(OPC_STP); dummy_dst(); } cat6_type 'p' '[' src cat6_dst_offset ']' ',' src ',' immediate
| T_OP_STL { new_instr(OPC_STL); dummy_dst(); } cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate
| T_OP_STLW { new_instr(OPC_STLW); dummy_dst(); } cat6_type 'l' '[' src cat6_dst_offset ']' ',' src ',' immediate
diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c
index e2c132f89b0..38e3ecaea3b 100644
--- a/src/freedreno/ir3/ir3_validate.c
+++ b/src/freedreno/ir3/ir3_validate.c
@@ -247,6 +247,18 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
break;
case OPC_STG:
+ validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+ validate_assert(ctx, !(instr->srcs[1]->flags & IR3_REG_HALF));
+ validate_reg_size(ctx, instr->srcs[2], instr->cat6.type);
+ validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
+ break;
+ case OPC_STG_A:
+ validate_assert(ctx, !(instr->srcs[0]->flags & IR3_REG_HALF));
+ validate_assert(ctx, !(instr->srcs[2]->flags & IR3_REG_HALF));
+ validate_assert(ctx, !(instr->srcs[3]->flags & IR3_REG_HALF));
+ validate_reg_size(ctx, instr->srcs[4], instr->cat6.type);
+ validate_assert(ctx, !(instr->srcs[5]->flags & IR3_REG_HALF));
+ break;
case OPC_STL:
case OPC_STP:
case OPC_STLW:
diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c
index 6f500f0253e..31fcf97bf4a 100644
--- a/src/freedreno/ir3/tests/disasm.c
+++ b/src/freedreno/ir3/tests/disasm.c
@@ -153,20 +153,30 @@ static const struct test {
// TODO is this a real instruction? Or float -6.0 ?
// INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
/* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
- INSTR_6XX(c0d20906_02800004, "stg.f32 g[r1.x+r1.z], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
- INSTR_6XX(c0da052e_01800042, "stg.s32 g[r0.z+r11.z], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
+ INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
+ INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"),
INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"),
INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"),
- INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
- INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
+ /* Customely crafted */
+ INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"),
+ INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"),
+
+ INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
+ INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"),
- INSTR_6XX(c0020011_04c08023, "ldg.f32 r4.y, g[r0.z+r4.y], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
- INSTR_6XX(c0060006_01c18017, "ldg.u32 r1.z, g[r1.z+r2.w], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
+ /* Found in TCS/TES shaders of GTA V */
+ INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */
+
+ /* Customely crafted */
+ INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"),
+
+ INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
+ INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml
index 9283d30d653..e4bccd0fc16 100644
--- a/src/freedreno/isa/ir3-cat6.xml
+++ b/src/freedreno/isa/ir3-cat6.xml
@@ -42,84 +42,148 @@ SOFTWARE.
<!-- TODO pull more fields up to this level, when they are common across sub-encodings -->
</bitset>
-<bitset name="ldg" extends="#instruction-cat6-a3xx">
- <doc>
- LoaD Global
- </doc>
- <display>
- {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}], {SIZE}
- </display>
-
- <override>
- <display>
- {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE}
- </display>
- <expr>!{SRC2_REG}</expr>
- <field low="1" high="13" name="OFF" type="offset"/>
- </override>
-
+<bitset name="#instruction-cat6-ldg" extends="#instruction-cat6-a3xx">
<pattern pos="0" >1</pattern>
- <field low="1" high="8" name="SRC2" type="#reg-gpr"/>
- <assert low="9" high="13">00000</assert>
<field low="14" high="21" name="SRC1" type="#reg-gpr"/>
- <field pos="22" name="SRC2_REG" type="bool"/>
<pattern pos="23" >1</pattern>
<field low="24" high="31" name="SIZE" type="uint"/>
<field low="32" high="39" name="DST" type="#reg-gpr"/>
<pattern low="40" high="48">xxxxxxxxx</pattern>
<pattern low="52" high="53">00</pattern>
<pattern low="54" high="58">00000</pattern> <!-- OPC -->
+</bitset>
+
+<bitset name="ldg" extends="#instruction-cat6-ldg">
+ <doc>
+ LoaD Global
+ </doc>
+
+ <display>
+ {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}{OFF}], {SIZE}
+ </display>
+
+ <field low="1" high="13" name="OFF" type="offset"/>
+ <pattern pos="22" >0</pattern> <!-- Imm offset ldg form -->
+
<encode>
- <map name="SRC2_REG">!(src->srcs[1]->flags & IR3_REG_IMMED)</map>
- <map name="SRC2">src->srcs[1]</map>
<map name="OFF">src->srcs[1]->iim_val</map>
<map name="SIZE">src->srcs[2]->uim_val</map>
</encode>
</bitset>
-<bitset name="stg" extends="#instruction-cat6-a3xx">
+<bitset name="ldg.a" extends="#instruction-cat6-ldg">
<doc>
- STore Global
+ LoaD Global
</doc>
+
+ <gen min="600"/>
+
<display>
- {SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}], {SRC3}, {SIZE}
+ {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+({SRC2}{OFF})<<{SRC2_BYTE_SHIFT}], {SIZE}
</display>
<override>
<display>
- {SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE}
+ {SY}{JP}{NAME}.{TYPE} {DST}, g[{SRC1}+{SRC2}<<{SRC2_BYTE_SHIFT}{OFF}<<2], {SIZE}
</display>
- <expr>!{G}</expr>
- <derived name="OFF" width="13" type="offset">
- <expr>({OFF_HI} << 8) | {OFF_LO}</expr>
- </derived>
- <field low="9" high="13" name="OFF_HI" type="uint"/>
- <field low="32" high="39" name="OFF_LO" type="uint"/>
+ <expr>{SRC2_ADD_DWORD_SHIFT} > 0</expr>
</override>
+ <field low="1" high="8" name="SRC2" type="#reg-gpr"/>
+ <field low="9" high="10" name="OFF" type="uoffset"/>
+ <assert pos="11" >0</assert>
+ <field low="12" high="13" name="SRC2_ADD_DWORD_SHIFT" type="uint"/>
+ <pattern pos="22" >1</pattern> <!-- Reg offset ldg form -->
+
+ <derived name="SRC2_BYTE_SHIFT" width="3" type="uint">
+ <expr>{SRC2_ADD_DWORD_SHIFT} + 2</expr>
+ </derived>
+
+ <encode>
+ <map name="SRC2">src->srcs[1]</map>
+ <map name="SRC2_ADD_DWORD_SHIFT">src->srcs[2]->uim_val</map>
+ <map name="OFF">src->srcs[3]->uim_val</map>
+ <map name="SIZE">src->srcs[4]->uim_val</map>
+ </encode>
+</bitset>
+
+<bitset name="#instruction-cat6-stg" extends="#instruction-cat6-a3xx">
<pattern pos="0" >x</pattern>
<field low="1" high="8" name="SRC3" type="#reg-gpr"/>
- <assert low="9" high="13">00000</assert> <!-- OFF_HI -->
<pattern low="14" high="21">xxxxxxxx</pattern>
<pattern low="22" high="23">1x</pattern>
<field low="24" high="31" name="SIZE" type="uint"/>
- <field low="32" high="39" name="SRC2" type="#reg-gpr"/>
<field pos="40" name="DST_OFF" type="bool"/>
<field low="41" high="48" name="SRC1" type="#reg-gpr"/>
- <field pos="52" name="G" type="bool"/>
<pattern pos="53" >x</pattern>
<pattern low="54" high="58">00011</pattern> <!-- OPC -->
+
<encode>
- <map name="SIZE">src->srcs[2]->uim_val</map>
- <map name="SRC2">src->srcs[3]</map>
<map name="DST_OFF" force="true">1</map>
- <map name="SRC3">src->srcs[1]</map>
- <map name="G">(src->flags & IR3_INSTR_G) && !(src->srcs[3]->flags & IR3_REG_IMMED)</map>
- <map name="OFF_LO">src->cat6.dst_offset</map>
- <map name="OFF_HI">src->cat6.dst_offset >> 8</map>
</encode>
</bitset>
+<bitset name="stg" extends="#instruction-cat6-stg">
+ <doc>
+ STore Global
+ </doc>
+
+ <display>
+ {SY}{JP}{NAME}.{TYPE} g[{SRC1}{OFF}], {SRC3}, {SIZE}
+ </display>
+
+ <derived name="OFF" width="13" type="offset">
+ <expr>({OFF_HI} << 8) | {OFF_LO}</expr>
+ </derived>
+
+ <field low="9" high="13" name="OFF_HI" type="uint"/>
+ <field low="32" high="39" name="OFF_LO" type="uint"/>
+ <pattern pos="52" >0</pattern> <!-- Imm offset stg form -->
+
+ <encode>
+ <map name="OFF_LO">src->srcs[1]->iim_val</map>
+ <map name="OFF_HI">src->srcs[1]->iim_val >> 8</map>
+ <map name="SRC3">src->srcs[2]</map>
+ <map name="SIZE">src->srcs[3]->uim_val</map>
+ </encode>
+</bitset>
+
+<bitset name="stg.a" extends="#instruction-cat6-stg">
+ <doc>
+ STore Global
+ </doc>
+
+ <gen min="600"/>
+
+ <display>
+ {SY}{JP}{NAME}.{TYPE} g[{SRC1}+({SRC2}{OFF})<<{DST_BYTE_SHIFT}], {SRC3}, {SIZE}
+ </display>
+
+ <override>
+ <display>
+ {SY}{JP}{NAME}.{TYPE} g[{SRC1}+{SRC2}<<{DST_BYTE_SHIFT}{OFF}<<2], {SRC3}, {SIZE}
+ </display>
+ <expr>{SRC2_ADD_DWORD_SHIFT} > 0</expr>
+ </override>
+
+ <derived name="DST_BYTE_SHIFT" width="3" type="uint">
+ <expr>{SRC2_ADD_DWORD_SHIFT} + 2</expr>
+ </derived>
+
+ <field low="9" high="10" name="OFF" type="uoffset"/>
+ <assert pos="11" >0</assert>
+ <field low="12" high="13" name="SRC2_ADD_DWORD_SHIFT" type="uint"/>
+ <field low="32" high="39" name="SRC2" type="#reg-gpr"/>
+ <pattern pos="52" >1</pattern> <!-- Reg offset stg form -->
+
+ <encode>
+ <map name="SRC2">src->srcs[1]</map>
+ <map name="SRC2_ADD_DWORD_SHIFT">src->srcs[2]->uim_val</map>
+ <map name="OFF">src->srcs[3]->uim_val</map>
+ <map name="SRC3">src->srcs[4]</map>
+ <map name="SIZE">src->srcs[5]->uim_val</map>
+ </encode>
+</bitset>
<bitset name="#instruction-cat6-a3xx-ld" extends="#instruction-cat6-a3xx">
<pattern pos="0" >1</pattern>
More information about the mesa-commit
mailing list