[Mesa-dev] [PATCH] r600g: Add get/set to handle ALLOC_EXPORT_RAT_WORD0
Vincent Lejeune
vljn at ovi.com
Tue Mar 19 10:09:33 PDT 2013
---
src/gallium/drivers/r600/eg_asm.c | 38 +++++++++++
src/gallium/drivers/r600/eg_sq.h | 59 ++++++++++++++++
src/gallium/drivers/r600/r600_asm.c | 119 +++++++++++++++++++++++++++++++++
src/gallium/drivers/r600/r600_asm.h | 8 ++-
src/gallium/drivers/r600/r600_shader.c | 34 +++++++---
5 files changed, 248 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index fffc436..cacb82f 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -106,6 +106,22 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
id++;
+ } else if (cfop->flags & CF_MEM) {
+ /* MEM_RAT_CACHELESS instructions */
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(cf->output.gpr) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(cf->output.elem_size) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(cf->output.rat_id) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(cf->output.rat_inst) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(cf->output.index_gpr) |
+ S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
+ bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
+ if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ id++;
} else {
/* branch, loop, call, return instructions */
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
@@ -118,6 +134,28 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
return 0;
}
+void eg_bytecode_export_rat_read(struct r600_bytecode *bc,
+ struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) {
+ output->rat_id = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(word0);
+ output->rat_inst = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(word0);
+ output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(word0);
+ output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(word0);
+ output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(word0);
+ output->index_gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(word0);
+
+ output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+ output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+ output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+ output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+ output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+ output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+ output->op = r600_isa_cf_by_opcode(bc->isa,
+ G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), /* is_cf_alu = */ 0 );
+ output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+ output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
+ output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
+}
+
void eg_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
{
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872..83588de 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -176,6 +176,65 @@
#define G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x) (((x) >> 30) & 0x3)
#define C_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE 0x3FFFFFFF
/* done */
+#define P_SQ_CF_ALLOC_EXPORT_WORD0_RAT
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x) (((x) & 0xF) << 0)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x) (((x) >> 0) & 0xF)
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x) (((x) & 0x3F) << 4)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x) (((x) >> 4) & 0x3F)
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_NOP 0x00000000
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_TYPED 0x00000001
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_CMPX_INT 0x00000004
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_ADD 0x00000007
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_SUB 0x00000008
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_RSUB 0x00000009
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_INT 0x0000000A
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_UINT 0x0000000B
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_INT 0x0000000C
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_UINT 0x0000000D
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_AND 0x0000000E
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_OR 0x0000000F
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XOR 0x00000010
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_INC_UINT 0x00000012
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_DEC_UINT 0x00000013
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_DWORD 0x00000014
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_SHORT 0x00000015
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_BYTE 0x00000016
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_NOP_RTN 0x00000020
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XCHG_RTN 0x00000022
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_CMPXCHG_INT_RTN 0x00000024
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_ADD_RTN 0x00000027
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_SUB_RTN 0x00000028
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_RSUB_RTN 0x00000029
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_INT_RTN 0x0000002A
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_UINT_RTN 0x0000002B
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_INT_RTN 0x0000002C
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_UINT_RTN 0x0000002D
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_AND_RTN 0x0000002E
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_OR_RTN 0x0000002F
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XOR_RTN 0x00000030
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_INC_UINT_RTN 0x00000032
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_DEC_UINT_RTN 0x00000033
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_MODE(x) (((x) & 0x3) << 11)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_MODE(x) (((x) >> 11) & 0x3)
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_NONE 0x00000000
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_0 0x00000001
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_1 0x00000002
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_INVALID 0x00000003
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(x) (((x) & 0x3) << 13)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(x) (((x) >> 13) & 0x3)
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE 0x00000000
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_IND 0x00000001
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_ACK 0x00000002
+#define V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_IND_ACK 0x00000003
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(x) (((x) & 0x7F) << 15)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(x) (((x) >> 15) & 0x7F)
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_REL(x) (((x) & 0x1) << 22)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_REL(x) (((x) >> 22) & 0x1)
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(x) (((x) & 0x7F) << 23)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(x) (((x) >> 23) & 0x7F)
+#define S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(x) (((x) & 0x3) << 30)
+#define G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(x) (((x) >> 30) & 0x3)
+/* done */
#define P_SQ_CF_ALLOC_EXPORT_WORD1
#define S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) & 0xF) << 16)
#define G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x) (((x) >> 16) & 0xF)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 0d570ca..b4ace0d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1927,6 +1927,125 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
if (cf->output.end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
+ } else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
+ int o = 0;
+ const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
+ "WRITE_IND_ACK"};
+ const char *eg_rat_inst[] = {
+ "NOP", // 0
+ "STORE_TYPED",
+ "STORE_RAW",
+ "STORE_RAW_FDENORM",
+ "CMPXCHG_INT",
+ "CMPXCHG_FLT",
+ "CMPXCHG_FDENORM",
+ "ADD",
+ "SUB",
+ "RSUB",
+ "MIN_INT",
+ "MIN_UINT",
+ "MAX_INT",
+ "MAX_UINT",
+ "AND",
+ "OR",
+ "XOR",
+ "MSKOR",
+ "INC_UINT",
+ "DEC_UINT",
+ "NOP_RTN",
+ "XCHG_RTN",
+ "XCHG_FDENORM_RTN",
+ "CMPXCHG_INT_RTN",
+ "CMPXCHG_FLT_RTN",
+ "CMPXCHG_FDENORM_RTN",
+ "ADD_RTN",
+ "SUB_RTN",
+ "SUBR_RTN",
+ "MIN_INT_RTN",
+ "MIN_UINT_RTN",
+ "MAX_INT_RTN",
+ "MAX_UINT_RTN",
+ "AND_RTN",
+ "OR_RTN",
+ "XOR_RTN",
+ "MSKOR_RTN",
+ "INC_UINT_RTN",
+ "DEC_UINT_RTN"
+ };
+
+ const char *cm_rat_inst[] = {
+ "NOP", // 0
+ "STORE_TYPED",
+ "?",
+ "?",
+ "CMPX_INT", //4
+ "?", "?",
+ "ADD", //7
+ "SUB",
+ "RSUB",
+ "MIN_INT",
+ "MIN_UINT",
+ "MAX_INT",
+ "MAX_UINT",
+ "AND",
+ "OR",
+ "XOR",
+ "?",
+ "INC_UINT", //12
+ "DEC_UINT",
+ "STORE_DWORD",
+ "STORE_SHORT",
+ "STORE_BYTE",
+ "?", "?", "?", "?", "?", "?", "?", "?", "?",
+ "NOP_RTN", //20
+ "?",
+ "XCHG_RTN", //22
+ "?",
+ "CMPXCHG_INT_RTN", //24
+ "?", "?",
+ "ADD_RTN", //27
+ "SUB_RTN",
+ "SUBR_RTN",
+ "MIN_INT_RTN",
+ "MIN_UINT_RTN",
+ "MAX_INT_RTN",
+ "MAX_UINT_RTN",
+ "AND_RTN",
+ "OR_RTN",
+ "XOR_RTN",
+ "?",
+ "INC_UINT_RTN", //32
+ "DEC_UINT_RTN"
+ };
+ o += fprintf(stderr, "%04d %08X %08X %s ", id,
+ bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
+ o += print_indent(o, 35);
+ if (bc->chip_class < CAYMAN)
+ o += fprintf(stderr, "%s ", eg_rat_inst[cf->output.rat_inst]);
+ else
+ o += fprintf(stderr, "%s ", cm_rat_inst[cf->output.rat_inst]);
+ o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
+ o += print_indent(o, 55);
+ o += fprintf(stderr, "R%d.", cf->output.gpr);
+ for (i = 0; i < 4; ++i) {
+ if (cf->output.comp_mask & (1 << i))
+ o += print_swizzle(i);
+ else
+ o += print_swizzle(7);
+ }
+ o += fprintf(stderr, ", R%d, ", cf->output.index_gpr);
+ o += fprintf(stderr, "UAV:%d ", cf->output.rat_id);
+
+ o += print_indent(o, 67);
+
+ fprintf(stderr, " ES:%i ", cf->output.elem_size);
+ if (cf->output.array_size != 0xFFF)
+ fprintf(stderr, "AS:%i ", cf->output.array_size);
+ if (!cf->output.barrier)
+ fprintf(stderr, "NO_BARRIER ");
+ if (cf->output.end_of_program)
+ fprintf(stderr, "EOP ");
+ fprintf(stderr, "\n");
} else {
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
bc->bytecode[id + 1], cfop->name);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 1465c31..38e6845 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -127,6 +127,11 @@ struct r600_bytecode_output {
unsigned swizzle_w;
unsigned burst_count;
unsigned barrier;
+
+ unsigned rat_id;
+ unsigned rat_inst;
+ unsigned index_gpr;
+
};
struct r600_bytecode_kcache {
@@ -254,7 +259,8 @@ void r600_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
void eg_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
-
+void eg_bytecode_export_rat_read(struct r600_bytecode *bc,
+ struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
unsigned *num_format, unsigned *format_comp, unsigned *endian);
#endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 29facf7..44c7c12 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -568,6 +568,17 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
return bytes_read;
}
+static
+void r600_export_rat_from_byte_stream(struct r600_shader_ctx *ctx,
+ uint32_t word0, uint32_t word1)
+{
+ struct r600_bytecode_output output;
+ memset(&output, 0, sizeof(struct r600_bytecode_output));
+ eg_bytecode_export_rat_read(ctx->bc, &output, word0, word1);
+ r600_bytecode_add_output(ctx->bc, &output);
+ return;
+}
+
static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
unsigned char * bytes, unsigned bytes_read)
{
@@ -588,7 +599,7 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
unsigned char * bytes, unsigned num_bytes)
{
unsigned bytes_read = 0;
- unsigned i, byte;
+ unsigned byte;
while (bytes_read < num_bytes) {
char inst_type = bytes[bytes_read++];
switch (inst_type) {
@@ -604,16 +615,21 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
bytes_read = r600_fc_from_byte_stream(ctx, bytes,
bytes_read);
break;
- case 3:
- r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
- for (i = 0; i < 2; i++) {
- for (byte = 0 ; byte < 4; byte++) {
- ctx->bc->cf_last->isa[i] |=
- (bytes[bytes_read++] << (byte * 8));
- }
+ case 3: {
+ uint32_t word0 = 0, word1 = 0;
+ for (byte = 0 ; byte < 4; byte++)
+ word0 |= (bytes[bytes_read++] << (byte * 8));
+ for (byte = 0 ; byte < 4; byte++)
+ word1 |= (bytes[bytes_read++] << (byte * 8));
+ if (((word1 >> 22) & 0xFF) == 0x57) { //CF_INST_MEM_RAT_CACHELESS
+ r600_export_rat_from_byte_stream(ctx, word0, word1);
+ } else {
+ r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
+ ctx->bc->cf_last->isa[0] |= word0;
+ ctx->bc->cf_last->isa[1] |= word1;
}
break;
-
+ }
case 4:
bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
bytes_read);
--
1.8.1.4
More information about the mesa-dev
mailing list