[Mesa-dev] [PATCH] r600g: Add get/set to handle ALLOC_EXPORT_RAT_WORD0

Vincent Lejeune vljn at ovi.com
Tue Mar 19 10:09:33 PDT 2013


---
 src/gallium/drivers/r600/eg_asm.c      |  38 +++++++++++
 src/gallium/drivers/r600/eg_sq.h       |  59 ++++++++++++++++
 src/gallium/drivers/r600/r600_asm.c    | 119 +++++++++++++++++++++++++++++++++
 src/gallium/drivers/r600/r600_asm.h    |   8 ++-
 src/gallium/drivers/r600/r600_shader.c |  34 +++++++---
 5 files changed, 248 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index fffc436..cacb82f 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -106,6 +106,22 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 			if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
 				bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
 			id++;
+		} else if (cfop->flags & CF_MEM) {
+			/* MEM_RAT_CACHELESS instructions */
+			bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(cf->output.gpr) |
+					S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(cf->output.elem_size) |
+					S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(cf->output.rat_id) |
+					S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(cf->output.rat_inst) |
+					S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(cf->output.index_gpr) |
+					S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
+			bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+					S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+					S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
+					S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
+					S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
+			if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
+				bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+			id++;
 		} else {
 			/* branch, loop, call, return instructions */
 			bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
@@ -118,6 +134,28 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 	return 0;
 }
 
+void eg_bytecode_export_rat_read(struct r600_bytecode *bc,
+		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1) {
+	output->rat_id = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(word0);
+	output->rat_inst = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(word0);
+	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(word0);
+	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(word0);
+	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(word0);
+	output->index_gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(word0);
+
+	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
+	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
+	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
+	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
+	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
+	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
+	output->op = r600_isa_cf_by_opcode(bc->isa,
+			G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), /* is_cf_alu = */ 0 );
+	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
+	output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
+	output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
+}
+
 void eg_bytecode_export_read(struct r600_bytecode *bc,
 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
 {
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index b534872..83588de 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -176,6 +176,65 @@
 #define   G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(x)                    (((x) >> 30) & 0x3)
 #define   C_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE                       0x3FFFFFFF
 /* done */
+#define P_SQ_CF_ALLOC_EXPORT_WORD0_RAT
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x)                       (((x) & 0xF) << 0)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ID(x)                       (((x) >> 0) & 0xF)
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x)                 (((x) & 0x3F) << 4)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST(x)                 (((x) >> 4) & 0x3F)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_NOP              0x00000000
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_TYPED      0x00000001
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_CMPX_INT         0x00000004
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_ADD              0x00000007
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_SUB              0x00000008
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_RSUB             0x00000009
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_INT          0x0000000A
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_UINT         0x0000000B
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_INT          0x0000000C
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_UINT         0x0000000D
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_AND              0x0000000E
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_OR               0x0000000F
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XOR              0x00000010
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_INC_UINT         0x00000012
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_DEC_UINT         0x00000013
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_DWORD      0x00000014
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_SHORT      0x00000015
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_STORE_BYTE       0x00000016
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_NOP_RTN          0x00000020
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XCHG_RTN         0x00000022
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_CMPXCHG_INT_RTN  0x00000024
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_ADD_RTN          0x00000027
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_SUB_RTN          0x00000028
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_RSUB_RTN         0x00000029
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_INT_RTN      0x0000002A
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MIN_UINT_RTN     0x0000002B
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_INT_RTN      0x0000002C
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_MAX_UINT_RTN     0x0000002D
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_AND_RTN          0x0000002E
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_OR_RTN           0x0000002F
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_XOR_RTN          0x00000030
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_INC_UINT_RTN     0x00000032
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INST_DEC_UINT_RTN     0x00000033
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_MODE(x)           (((x) & 0x3) << 11)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_MODE(x)           (((x) >> 11) & 0x3)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_NONE            0x00000000
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_0               0x00000001
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_1               0x00000002
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RAT_INDEX_INVALID         0x00000003
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(x)                     (((x) & 0x3) << 13)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_TYPE(x)                     (((x) >> 13) & 0x3)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE              0x00000000
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_IND          0x00000001
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_ACK          0x00000002
+#define     V_SQ_CF_ALLOC_EXPORT_WORD0_RAT_EXPORT_WRITE_IND_ACK      0x00000003
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(x)                   (((x) & 0x7F) << 15)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_GPR(x)                   (((x) >> 15) & 0x7F)
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_REL(x)                   (((x) & 0x1) << 22)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_RW_REL(x)                   (((x) >> 22) & 0x1)
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(x)                (((x) & 0x7F) << 23)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_INDEX_GPR(x)                (((x) >> 23) & 0x7F)
+#define   S_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(x)                (((x) & 0x3) << 30)
+#define   G_SQ_CF_ALLOC_EXPORT_WORD0_RAT_ELEM_SIZE(x)                (((x) >> 30) & 0x3)
+/* done */
 #define P_SQ_CF_ALLOC_EXPORT_WORD1
 #define   S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x)                  (((x) & 0xF) << 16)
 #define   G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(x)                  (((x) >> 16) & 0xF)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 0d570ca..b4ace0d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1927,6 +1927,125 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 				if (cf->output.end_of_program)
 					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
+			} else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
+				int o = 0;
+				const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
+						"WRITE_IND_ACK"};
+				const char *eg_rat_inst[] = {
+				"NOP", // 0
+				"STORE_TYPED",
+				"STORE_RAW",
+				"STORE_RAW_FDENORM",
+				"CMPXCHG_INT",
+				"CMPXCHG_FLT",
+				"CMPXCHG_FDENORM",
+				"ADD",
+				"SUB",
+				"RSUB",
+				"MIN_INT",
+				"MIN_UINT",
+				"MAX_INT",
+				"MAX_UINT",
+				"AND",
+				"OR",
+				"XOR",
+				"MSKOR",
+				"INC_UINT",
+				"DEC_UINT",
+				"NOP_RTN",
+				"XCHG_RTN",
+				"XCHG_FDENORM_RTN",
+				"CMPXCHG_INT_RTN",
+				"CMPXCHG_FLT_RTN",
+				"CMPXCHG_FDENORM_RTN",
+				"ADD_RTN",
+				"SUB_RTN",
+				"SUBR_RTN",
+				"MIN_INT_RTN",
+				"MIN_UINT_RTN",
+				"MAX_INT_RTN",
+				"MAX_UINT_RTN",
+				"AND_RTN",
+				"OR_RTN",
+				"XOR_RTN",
+				"MSKOR_RTN",
+				"INC_UINT_RTN",
+				"DEC_UINT_RTN"
+				};
+
+				const char *cm_rat_inst[] = {
+				"NOP", // 0
+				"STORE_TYPED",
+				"?",
+				"?",
+				"CMPX_INT", //4
+				"?", "?",
+				"ADD", //7
+				"SUB",
+				"RSUB",
+				"MIN_INT",
+				"MIN_UINT",
+				"MAX_INT",
+				"MAX_UINT",
+				"AND",
+				"OR",
+				"XOR",
+				"?",
+				"INC_UINT", //12
+				"DEC_UINT",
+				"STORE_DWORD",
+				"STORE_SHORT",
+				"STORE_BYTE",
+				"?", "?", "?", "?", "?", "?", "?", "?", "?",
+				"NOP_RTN", //20
+				"?",
+				"XCHG_RTN", //22
+				"?",
+				"CMPXCHG_INT_RTN", //24
+				"?", "?",
+				"ADD_RTN", //27
+				"SUB_RTN",
+				"SUBR_RTN",
+				"MIN_INT_RTN",
+				"MIN_UINT_RTN",
+				"MAX_INT_RTN",
+				"MAX_UINT_RTN",
+				"AND_RTN",
+				"OR_RTN",
+				"XOR_RTN",
+				"?",
+				"INC_UINT_RTN", //32
+				"DEC_UINT_RTN"
+				};
+				o += fprintf(stderr, "%04d %08X %08X  %s ", id,
+						bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
+				o += print_indent(o, 35);
+				if (bc->chip_class < CAYMAN)
+					o += fprintf(stderr, "%s ", eg_rat_inst[cf->output.rat_inst]);
+				else
+					o += fprintf(stderr, "%s ", cm_rat_inst[cf->output.rat_inst]);
+				o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
+				o += print_indent(o, 55);
+				o += fprintf(stderr, "R%d.", cf->output.gpr);
+				for (i = 0; i < 4; ++i) {
+					if (cf->output.comp_mask & (1 << i))
+						o += print_swizzle(i);
+					else
+						o += print_swizzle(7);
+					}
+				o += fprintf(stderr, ", R%d, ", cf->output.index_gpr);
+				o += fprintf(stderr, "UAV:%d ", cf->output.rat_id);
+
+				o += print_indent(o, 67);
+
+				fprintf(stderr, " ES:%i ", cf->output.elem_size);
+				if (cf->output.array_size != 0xFFF)
+					fprintf(stderr, "AS:%i ", cf->output.array_size);
+				if (!cf->output.barrier)
+					fprintf(stderr, "NO_BARRIER ");
+					if (cf->output.end_of_program)
+						fprintf(stderr, "EOP ");
+					fprintf(stderr, "\n");
 			} else {
 				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
 						bc->bytecode[id + 1], cfop->name);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 1465c31..38e6845 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -127,6 +127,11 @@ struct r600_bytecode_output {
 	unsigned			swizzle_w;
 	unsigned			burst_count;
 	unsigned			barrier;
+
+	unsigned			rat_id;
+	unsigned			rat_inst;
+	unsigned			index_gpr;
+	
 };
 
 struct r600_bytecode_kcache {
@@ -254,7 +259,8 @@ void r600_bytecode_export_read(struct r600_bytecode *bc,
 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 void eg_bytecode_export_read(struct r600_bytecode *bc,
 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
-
+void eg_bytecode_export_rat_read(struct r600_bytecode *bc,
+		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
 void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
 			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
 #endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 29facf7..44c7c12 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -568,6 +568,17 @@ static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx,
 	return bytes_read;
 }
 
+static
+void r600_export_rat_from_byte_stream(struct r600_shader_ctx *ctx,
+	uint32_t word0, uint32_t word1)
+{
+	struct r600_bytecode_output output;
+	memset(&output, 0, sizeof(struct r600_bytecode_output));
+	eg_bytecode_export_rat_read(ctx->bc, &output, word0, word1);
+	r600_bytecode_add_output(ctx->bc, &output);
+	return;
+}
+
 static int r600_export_from_byte_stream(struct r600_shader_ctx *ctx,
 	unsigned char * bytes, unsigned bytes_read)
 {
@@ -588,7 +599,7 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
 				unsigned char * bytes,	unsigned num_bytes)
 {
 	unsigned bytes_read = 0;
-	unsigned i, byte;
+	unsigned byte;
 	while (bytes_read < num_bytes) {
 		char inst_type = bytes[bytes_read++];
 		switch (inst_type) {
@@ -604,16 +615,21 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
 			bytes_read = r600_fc_from_byte_stream(ctx, bytes,
 								bytes_read);
 			break;
-		case 3:
-			r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
-			for (i = 0; i < 2; i++) {
-				for (byte = 0 ; byte < 4; byte++) {
-					ctx->bc->cf_last->isa[i] |=
-					(bytes[bytes_read++] << (byte * 8));
-				}
+		case 3: {
+			uint32_t word0 = 0, word1 = 0;
+			for (byte = 0 ; byte < 4; byte++)
+				word0 |= (bytes[bytes_read++] << (byte * 8));
+			for (byte = 0 ; byte < 4; byte++)
+				word1 |= (bytes[bytes_read++] << (byte * 8));
+			if (((word1 >> 22) & 0xFF) == 0x57) { //CF_INST_MEM_RAT_CACHELESS
+				r600_export_rat_from_byte_stream(ctx, word0, word1);
+			} else {
+				r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE);
+				ctx->bc->cf_last->isa[0] |= word0;
+				ctx->bc->cf_last->isa[1] |= word1;
 			}
 			break;
-
+		}
 		case 4:
 			bytes_read = r600_vtx_from_byte_stream(ctx, bytes,
 								bytes_read);
-- 
1.8.1.4



More information about the mesa-dev mailing list