Mesa (master): r300/compiler: Fix nested flow control in r500 vertex shaders

Tom Stellard tstellar at kemper.freedesktop.org
Sat Apr 14 02:32:10 UTC 2012


Module: Mesa
Branch: master
Commit: b2df031a959f36743527b9abc89913ce4f895de3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b2df031a959f36743527b9abc89913ce4f895de3

Author: Tom Stellard <tstellar at gmail.com>
Date:   Tue Sep 20 21:05:55 2011 -0700

r300/compiler: Fix nested flow control in r500 vertex shaders

---

 src/gallium/drivers/r300/Makefile.sources          |    1 +
 src/gallium/drivers/r300/compiler/r3xx_vertprog.c  |  217 ++++------------
 .../drivers/r300/compiler/r3xx_vertprog_dump.c     |   15 +-
 src/gallium/drivers/r300/compiler/radeon_code.h    |    6 +-
 .../drivers/r300/compiler/radeon_compiler.h        |    3 +-
 src/gallium/drivers/r300/compiler/radeon_opcodes.c |   72 +++++
 src/gallium/drivers/r300/compiler/radeon_opcodes.h |   15 +
 src/gallium/drivers/r300/compiler/radeon_program.h |    1 +
 .../r300/compiler/radeon_program_constants.h       |    6 +
 .../drivers/r300/compiler/radeon_program_print.c   |    6 +
 src/gallium/drivers/r300/compiler/radeon_vert_fc.c |  274 ++++++++++++++++++++
 11 files changed, 438 insertions(+), 178 deletions(-)

diff --git a/src/gallium/drivers/r300/Makefile.sources b/src/gallium/drivers/r300/Makefile.sources
index e27b14e..1e7d31b 100644
--- a/src/gallium/drivers/r300/Makefile.sources
+++ b/src/gallium/drivers/r300/Makefile.sources
@@ -46,6 +46,7 @@ C_SOURCES := \
 	compiler/radeon_optimize.c \
 	compiler/radeon_remove_constants.c \
 	compiler/radeon_rename_regs.c \
+	compiler/radeon_vert_fc.c \
 	compiler/radeon_variable.c \
 	compiler/r3xx_fragprog.c \
 	compiler/r300_fragprog.c \
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
index a8d8ebc..94733d7 100644
--- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c
@@ -28,17 +28,13 @@
 
 #include "radeon_compiler_util.h"
 #include "radeon_dataflow.h"
+#include "radeon_program.h"
 #include "radeon_program_alu.h"
 #include "radeon_swizzle.h"
 #include "radeon_emulate_branches.h"
 #include "radeon_emulate_loops.h"
 #include "radeon_remove_constants.h"
 
-struct loop {
-	int BgnLoop;
-
-};
-
 /*
  * Take an already-setup and valid source then swizzle it appropriately to
  * obtain a constant ZERO or ONE source.
@@ -359,140 +355,13 @@ static void ei_pow(struct r300_vertex_program_code *vp,
 	inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
 }
 
-static void mark_write(void * userdata,	struct rc_instruction * inst,
-		rc_register_file file,	unsigned int index, unsigned int mask)
-{
-	unsigned int * writemasks = userdata;
-
-	if (file != RC_FILE_TEMPORARY)
-		return;
-
-	if (index >= R300_VS_MAX_TEMPS)
-		return;
-
-	writemasks[index] |= mask;
-}
-
-static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
-{
-	return PVS_SRC_OPERAND(compiler->PredicateIndex,
-		t_swizzle(RC_SWIZZLE_ZERO),
-		t_swizzle(RC_SWIZZLE_ZERO),
-		t_swizzle(RC_SWIZZLE_ZERO),
-		t_swizzle(RC_SWIZZLE_W),
-		t_src_class(RC_FILE_TEMPORARY),
-		0);
-}
-
-static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
-					unsigned int hw_opcode, int is_math)
-{
-	return PVS_OP_DST_OPERAND(hw_opcode,
-	     is_math,
-	     0,
-	     compiler->PredicateIndex,
-	     RC_MASK_W,
-	     t_dst_class(RC_FILE_TEMPORARY));
-
-}
-
-static void ei_if(struct r300_vertex_program_compiler * compiler,
-					struct rc_instruction *rci,
-					unsigned int * inst,
-					unsigned int branch_depth)
-{
-	unsigned int predicate_opcode;
-	int is_math = 0;
-
-	if (!compiler->Base.is_r500) {
-		rc_error(&compiler->Base,"Opcode IF not supported\n");
-		return;
-	}
-
-	/* Reserve a temporary to use as our predicate stack counter, if we
-	 * don't already have one. */
-	if (!compiler->PredicateMask) {
-		unsigned int writemasks[RC_REGISTER_MAX_INDEX];
-		struct rc_instruction * inst;
-		unsigned int i;
-		memset(writemasks, 0, sizeof(writemasks));
-		for(inst = compiler->Base.Program.Instructions.Next;
-				inst != &compiler->Base.Program.Instructions;
-							inst = inst->Next) {
-			rc_for_all_writes_mask(inst, mark_write, writemasks);
-		}
-		for(i = 0; i < compiler->Base.max_temp_regs; i++) {
-			unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
-			/* Only the W component can be used fo the predicate
-			 * stack counter. */
-			if (mask & RC_MASK_W) {
-				compiler->PredicateMask = RC_MASK_W;
-				compiler->PredicateIndex = i;
-				break;
-			}
-		}
-		if (i == compiler->Base.max_temp_regs) {
-			rc_error(&compiler->Base, "No free temporary to use for"
-					" predicate stack counter.\n");
-			return;
-		}
-	}
-	predicate_opcode =
-			branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
-
-	rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
-	if (branch_depth == 0) {
-		is_math = 1;
-		predicate_opcode = ME_PRED_SET_NEQ;
-		inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
-		inst[2] = 0;
-	} else {
-		predicate_opcode = VE_PRED_SET_NEQ_PUSH;
-		inst[1] = t_pred_src(compiler);
-		inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
-	}
-
-	inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
-	inst[3] = 0;
-
-}
-
-static void ei_else(struct r300_vertex_program_compiler * compiler,
-							unsigned int * inst)
-{
-	if (!compiler->Base.is_r500) {
-		rc_error(&compiler->Base,"Opcode ELSE not supported\n");
-		return;
-	}
-	inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
-	inst[1] = t_pred_src(compiler);
-	inst[2] = 0;
-	inst[3] = 0;
-}
-
-static void ei_endif(struct r300_vertex_program_compiler *compiler,
-							unsigned int * inst)
-{
-	if (!compiler->Base.is_r500) {
-		rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
-		return;
-	}
-	inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
-	inst[1] = t_pred_src(compiler);
-	inst[2] = 0;
-	inst[3] = 0;
-}
-
 static void translate_vertex_program(struct radeon_compiler *c, void *user)
 {
 	struct r300_vertex_program_compiler *compiler = (struct r300_vertex_program_compiler*)c;
 	struct rc_instruction *rci;
 
-	struct loop * loops = NULL;
-	int current_loop_depth = 0;
-	int loops_reserved = 0;
-
-	unsigned int branch_depth = 0;
+	unsigned loops[R500_PVS_MAX_LOOP_DEPTH];
+	unsigned loop_depth = 0;
 
 	compiler->code->pos_end = 0;	/* Not supported yet */
 	compiler->code->length = 0;
@@ -532,12 +401,9 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
 		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
 		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
 		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
-		case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
-		case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
 		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
 		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
 		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
-		case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
 		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
 		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
 		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
@@ -556,37 +422,27 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
 		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
 		case RC_OPCODE_BGNLOOP:
 		{
-			struct loop * l;
-
 			if ((!compiler->Base.is_r500
-				&& loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
-				|| loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+				&& loop_depth >= R300_VS_MAX_LOOP_DEPTH)
+				|| loop_depth >= R500_PVS_MAX_LOOP_DEPTH) {
 				rc_error(&compiler->Base,
 						"Loops are nested too deep.");
 				return;
 			}
-			memory_pool_array_reserve(&compiler->Base.Pool,
-					struct loop, loops, current_loop_depth,
-					loops_reserved, 1);
-			l = &loops[current_loop_depth++];
-			memset(l , 0, sizeof(struct loop));
-			l->BgnLoop = (compiler->code->length / 4);
-			continue;
+			loops[loop_depth++] = ((compiler->code->length)/ 4) + 1;
+			break;
 		}
 		case RC_OPCODE_ENDLOOP:
 		{
-			struct loop * l;
 			unsigned int act_addr;
 			unsigned int last_addr;
 			unsigned int ret_addr;
 
-			assert(loops);
-			l = &loops[current_loop_depth - 1];
-			act_addr = l->BgnLoop - 1;
+			ret_addr = loops[--loop_depth];
+			act_addr = ret_addr - 1;
 			last_addr = (compiler->code->length / 4) - 1;
-			ret_addr = l->BgnLoop;
 
-			if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+			if (loop_depth >= R300_VS_MAX_FC_OPS) {
 				rc_error(&compiler->Base,
 					"Too many flow control instructions.");
 				return;
@@ -595,7 +451,7 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
 				compiler->code->fc_op_addrs.r500
 					[compiler->code->num_fc_ops].lw =
 					R500_PVS_FC_ACT_ADRS(act_addr)
-					| R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+					| R500_PVS_FC_LOOP_CNT_JMP_INST(0x00ff)
 					;
 				compiler->code->fc_op_addrs.r500
 					[compiler->code->num_fc_ops].uw =
@@ -618,26 +474,51 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
 			compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
 						compiler->code->num_fc_ops);
 			compiler->code->num_fc_ops++;
-			current_loop_depth--;
-			continue;
+
+			break;
 		}
 
+		case RC_ME_PRED_SET_CLR:
+			ei_math1(compiler->code, ME_PRED_SET_CLR, vpi, inst);
+			break;
+
+		case RC_ME_PRED_SET_INV:
+			ei_math1(compiler->code, ME_PRED_SET_INV, vpi, inst);
+			break;
+
+		case RC_ME_PRED_SET_POP:
+			ei_math1(compiler->code, ME_PRED_SET_POP, vpi, inst);
+			break;
+
+		case RC_ME_PRED_SET_RESTORE:
+			ei_math1(compiler->code, ME_PRED_SET_RESTORE, vpi, inst);
+			break;
+
+		case RC_ME_PRED_SEQ:
+			ei_math1(compiler->code, ME_PRED_SET_EQ, vpi, inst);
+			break;
+
+		case RC_ME_PRED_SNEQ:
+			ei_math1(compiler->code, ME_PRED_SET_NEQ, vpi, inst);
+			break;
+
+		case RC_VE_PRED_SNEQ_PUSH:
+			ei_vector2(compiler->code, VE_PRED_SET_NEQ_PUSH,
+								vpi, inst);
+			break;
+
 		default:
 			rc_error(&compiler->Base, "Unknown opcode %s\n", info->Name);
 			return;
 		}
 
-		/* Non-flow control instructions that are inside an if statement
-		 * need to pay attention to the predicate bit. */
-		if (branch_depth
-			&& vpi->Opcode != RC_OPCODE_IF
-			&& vpi->Opcode != RC_OPCODE_ELSE
-			&& vpi->Opcode != RC_OPCODE_ENDIF) {
-
+		if (vpi->DstReg.Pred != RC_PRED_DISABLED) {
 			inst[0] |= (PVS_DST_PRED_ENABLE_MASK
 						<< PVS_DST_PRED_ENABLE_SHIFT);
-			inst[0] |= (PVS_DST_PRED_SENSE_MASK
+			if (vpi->DstReg.Pred == RC_PRED_SET) {
+				inst[0] |= (PVS_DST_PRED_SENSE_MASK
 						<< PVS_DST_PRED_SENSE_SHIFT);
+			}
 		}
 
 		/* Update the number of temporaries. */
@@ -650,10 +531,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user)
 			    vpi->SrcReg[i].Index >= compiler->code->num_temporaries)
 				compiler->code->num_temporaries = vpi->SrcReg[i].Index + 1;
 
-		if (compiler->PredicateMask)
-			if (compiler->PredicateIndex >= compiler->code->num_temporaries)
-				compiler->code->num_temporaries = compiler->PredicateIndex + 1;
-
 		if (compiler->code->num_temporaries > compiler->Base.max_temp_regs) {
 			rc_error(&compiler->Base, "Too many temporaries.\n");
 			return;
@@ -1018,7 +895,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
 	struct radeon_compiler_pass vs_list[] = {
 		/* NAME				DUMP PREDICATE	FUNCTION			PARAM */
 		{"add artificial outputs",	0, 1,		rc_vs_add_artificial_outputs,	NULL},
-		{"transform loops",		1, 1,		rc_transform_loops,		NULL},
 		{"emulate branches",		1, !is_r500,	rc_emulate_branches,		NULL},
 		{"emulate negative addressing", 1, 1,		rc_emulate_negative_addressing,	NULL},
 		{"native rewrite",		1, is_r500,	rc_local_transform,		alu_rewrite_r500},
@@ -1030,6 +906,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
 		{"source conflict resolve",	1, 1,		rc_local_transform,		resolve_src_conflicts},
 		{"register allocation",		1, opt,		allocate_temporary_registers,	NULL},
 		{"dead constants",		1, 1,		rc_remove_unused_constants,	&c->code->constants_remap_table},
+		{"lower control flow opcodes",	1, is_r500,	rc_vert_fc,			NULL},
 		{"final code validation",	0, 1,		rc_validate_final_shader,	NULL},
 		{"machine code generation",	0, 1,		translate_vertex_program,	NULL},
 		{"dump machine code",		0, c->Base.Debug & RC_DBG_LOG, r300_vertex_program_dump,	NULL},
diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
index 2bc0a87..a41559c 100644
--- a/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog_dump.c
@@ -190,16 +190,25 @@ void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user)
 
 	fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
 	for(i = 0; i < vs->num_fc_ops; i++) {
+		unsigned is_loop = 0;
 		switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
 		case 0: fprintf(stderr, "NOP"); break;
 		case 1: fprintf(stderr, "JUMP"); break;
-		case 2: fprintf(stderr, "LOOP"); break;
+		case 2: fprintf(stderr, "LOOP"); is_loop = 1; break;
 		case 3: fprintf(stderr, "JSR"); break;
 		}
 		if (c->Base.is_r500) {
-			fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+			fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x "
+							"loop data->0x%08x\n",
 				vs->fc_op_addrs.r500[i].uw,
-				vs->fc_op_addrs.r500[i].lw);
+				vs->fc_op_addrs.r500[i].lw,
+				vs->fc_loop_index[i]);
+			if (is_loop) {
+				fprintf(stderr, "Before = %u First = %u Last = %u\n",
+					vs->fc_op_addrs.r500[i].lw & 0xffff,
+					(vs->fc_op_addrs.r500[i].uw >> 16) & 0xffff,
+					vs->fc_op_addrs.r500[i].uw  & 0xffff);
+			}
 		} else {
 			fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
 		}
diff --git a/src/gallium/drivers/r300/compiler/radeon_code.h b/src/gallium/drivers/r300/compiler/radeon_code.h
index 4280d66..44d5500 100644
--- a/src/gallium/drivers/r300/compiler/radeon_code.h
+++ b/src/gallium/drivers/r300/compiler/radeon_code.h
@@ -40,6 +40,9 @@
 #define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
 #define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
 
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_PVS_MAX_LOOP_DEPTH 8
 
 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
 
@@ -262,9 +265,6 @@ struct rX00_fragment_program_code {
 #define R300_VS_MAX_TEMPS	32
 /* This is the max for all chipsets (r300-r500) */
 #define R300_VS_MAX_FC_OPS 16
-/* The r500 maximum depth is not just for loops, but any combination of loops
- * and subroutine jumps. */
-#define R500_VS_MAX_FC_DEPTH 8
 #define R300_VS_MAX_LOOP_DEPTH 1
 
 #define VSF_MAX_INPUTS 32
diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h
index e7ccbb7..d42cee9 100644
--- a/src/gallium/drivers/r300/compiler/radeon_compiler.h
+++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h
@@ -137,11 +137,10 @@ struct r300_vertex_program_compiler {
 	void * UserData;
 	void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
 
-	int PredicateIndex;
-	unsigned int PredicateMask;
 };
 
 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void rc_vert_fc(struct radeon_compiler *compiler, void *user);
 void r300_vertex_program_dump(struct radeon_compiler *compiler, void *user);
 
 struct radeon_compiler_pass {
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.c b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
index 3b49ad7..9bcb3c9 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.c
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.c
@@ -437,6 +437,78 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
 	{
 		.Opcode = RC_OPCODE_KILP,
 		.Name = "KILP",
+	},
+	{
+		.Opcode = RC_ME_PRED_SEQ,
+		.Name = "ME_PRED_SEQ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_ME_PRED_SGT,
+		.Name = "ME_PRED_SGT",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_ME_PRED_SGE,
+		.Name = "ME_PRED_SGE",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_ME_PRED_SNEQ,
+		.Name = "ME_PRED_SNEQ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_ME_PRED_SET_CLR,
+		.Name = "ME_PRED_SET_CLEAR",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_ME_PRED_SET_INV,
+		.Name = "ME_PRED_SET_INV",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_ME_PRED_SET_POP,
+		.Name = "ME_PRED_SET_POP",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_ME_PRED_SET_RESTORE,
+		.Name = "ME_PRED_SET_RESTORE",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_VE_PRED_SEQ_PUSH,
+		.Name = "VE_PRED_SEQ_PUSH",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_VE_PRED_SGT_PUSH,
+		.Name = "VE_PRED_SGT_PUSH",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_VE_PRED_SGE_PUSH,
+		.Name = "VE_PRED_SGE_PUSH",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_VE_PRED_SNEQ_PUSH,
+		.Name = "VE_PRED_SNEQ_PUSH",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
 	}
 };
 
diff --git a/src/gallium/drivers/r300/compiler/radeon_opcodes.h b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
index 0b881c2..9c4b456 100644
--- a/src/gallium/drivers/r300/compiler/radeon_opcodes.h
+++ b/src/gallium/drivers/r300/compiler/radeon_opcodes.h
@@ -217,6 +217,21 @@ typedef enum {
 	/** Stop execution of the shader (GLSL discard) */
 	RC_OPCODE_KILP,
 
+	/* Vertex shader CF Instructions */
+	RC_ME_PRED_SEQ,
+	RC_ME_PRED_SGT,
+	RC_ME_PRED_SGE,
+	RC_ME_PRED_SNEQ,
+	RC_ME_PRED_SET_CLR,
+	RC_ME_PRED_SET_INV,
+	RC_ME_PRED_SET_POP,
+	RC_ME_PRED_SET_RESTORE,
+
+	RC_VE_PRED_SEQ_PUSH,
+	RC_VE_PRED_SGT_PUSH,
+	RC_VE_PRED_SGE_PUSH,
+	RC_VE_PRED_SNEQ_PUSH,
+
 	MAX_RC_OPCODE
 } rc_opcode;
 
diff --git a/src/gallium/drivers/r300/compiler/radeon_program.h b/src/gallium/drivers/r300/compiler/radeon_program.h
index e68be93..67be1b9 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program.h
@@ -58,6 +58,7 @@ struct rc_dst_register {
 	unsigned int File:3;
 	unsigned int Index:RC_REGISTER_INDEX_BITS;
 	unsigned int WriteMask:4;
+	unsigned int Pred:2;
 };
 
 struct rc_presub_instruction {
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_constants.h b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
index c07c492..4dbf649 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_constants.h
+++ b/src/gallium/drivers/r300/compiler/radeon_program_constants.h
@@ -203,4 +203,10 @@ static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
 #define RC_SOURCE_RGB   0x1
 #define RC_SOURCE_ALPHA 0x2
 
+typedef enum {
+	RC_PRED_DISABLED,
+	RC_PRED_SET,
+	RC_PRED_INV
+} rc_predicate_mode;
+
 #endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/gallium/drivers/r300/compiler/radeon_program_print.c b/src/gallium/drivers/r300/compiler/radeon_program_print.c
index e3d2104..29a349e 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_print.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_print.c
@@ -329,6 +329,12 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
 		fprintf(f, ")]");
 	}
 
+	if (inst->U.I.DstReg.Pred == RC_PRED_SET) {
+		fprintf(f, " PRED_SET");
+	} else if (inst->U.I.DstReg.Pred == RC_PRED_INV) {
+		fprintf(f, " PRED_INV");
+	}
+
 	fprintf(f, "\n");
 }
 
diff --git a/src/gallium/drivers/r300/compiler/radeon_vert_fc.c b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
new file mode 100644
index 0000000..3568b23
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/radeon_vert_fc.c
@@ -0,0 +1,274 @@
+
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_program.h"
+#include "radeon_program_constants.h"
+
+struct vert_fc_state {
+	struct radeon_compiler *C;
+	unsigned BranchDepth;
+	unsigned LoopDepth;
+	unsigned LoopsReserved;
+	int PredStack[R500_PVS_MAX_LOOP_DEPTH];
+	int PredicateReg;
+	unsigned InCFBreak;
+};
+
+static void build_pred_src(
+	struct rc_src_register * src,
+	struct vert_fc_state * fc_state)
+{
+	src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
+	src->File = RC_FILE_TEMPORARY;
+	src->Index = fc_state->PredicateReg;
+}
+
+static void build_pred_dst(
+	struct rc_dst_register * dst,
+	struct vert_fc_state * fc_state)
+{
+	dst->WriteMask = RC_MASK_W;
+	dst->File = RC_FILE_TEMPORARY;
+	dst->Index = fc_state->PredicateReg;
+}
+
+static void mark_write(void * userdata,	struct rc_instruction * inst,
+		rc_register_file file,	unsigned int index, unsigned int mask)
+{
+	unsigned int * writemasks = userdata;
+
+	if (file != RC_FILE_TEMPORARY)
+		return;
+
+	if (index >= R300_VS_MAX_TEMPS)
+		return;
+
+	writemasks[index] |= mask;
+}
+
+static int reserve_predicate_reg(struct vert_fc_state * fc_state)
+{
+	int i;
+	unsigned int writemasks[RC_REGISTER_MAX_INDEX];
+	struct rc_instruction * inst;
+	memset(writemasks, 0, sizeof(writemasks));
+	for(inst = fc_state->C->Program.Instructions.Next;
+				inst != &fc_state->C->Program.Instructions;
+				inst = inst->Next) {
+		rc_for_all_writes_mask(inst, mark_write, writemasks);
+	}
+
+	for(i = 0; i < fc_state->C->max_temp_regs; i++) {
+		/* Most of the control flow instructions only write the
+		 * W component of the Predicate Register, but
+		 * the docs say that ME_PRED_SET_CLR and
+		 * ME_PRED_SET_RESTORE write all components of the
+		 * register, so we must reserve a register that has
+		 * all its components free. */
+		if (!writemasks[i]) {
+			fc_state->PredicateReg = i;
+			break;
+		}
+	}
+	if (i == fc_state->C->max_temp_regs) {
+		rc_error(fc_state->C, "No free temporary to use for"
+				" predicate stack counter.\n");
+		return -1;
+	}
+	return 1;
+}
+
+static void lower_bgnloop(
+	struct rc_instruction * inst,
+	struct vert_fc_state * fc_state)
+{
+	struct rc_instruction * new_inst =
+			rc_insert_new_instruction(fc_state->C, inst->Prev);
+
+	if ((!fc_state->C->is_r500
+		&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
+	     || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
+		rc_error(fc_state->C, "Loops are nested too deep.");
+		return;
+	}
+
+	if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
+		if (fc_state->PredicateReg == -1) {
+			if (reserve_predicate_reg(fc_state) == -1) {
+				return;
+			}
+		}
+
+		/* Initialize the predicate bit to true. */
+		new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
+		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+		new_inst->U.I.SrcReg[0].Index = 0;
+		new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+		new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+	} else {
+		fc_state->PredStack[fc_state->LoopDepth] =
+						fc_state->PredicateReg;
+		/* Copy the the current predicate value to this loop's
+		 * predicate register */
+
+		/* Use the old predicate value for src0 */
+		build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
+
+		/* Reserve this loop's predicate register */
+		if (reserve_predicate_reg(fc_state) == -1) {
+			return;
+		}
+
+		/* Copy the old predicate value to the new register */
+		new_inst->U.I.Opcode = RC_OPCODE_ADD;
+		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+		new_inst->U.I.SrcReg[1].Index = 0;
+		new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
+		new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
+	}
+
+}
+
+static void lower_brk(
+	struct rc_instruction * inst,
+	struct vert_fc_state * fc_state)
+{
+	if (fc_state->LoopDepth == 1) {
+		inst->U.I.Opcode = RC_OPCODE_RCP;
+		inst->U.I.DstReg.Pred = RC_PRED_INV;
+		inst->U.I.SrcReg[0].Index = 0;
+		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+	} else {
+		inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
+		inst->U.I.DstReg.Pred = RC_PRED_SET;
+	}
+
+	build_pred_dst(&inst->U.I.DstReg, fc_state);
+}
+
+static void lower_endloop(
+	struct rc_instruction * inst,
+	struct vert_fc_state * fc_state)
+{
+	struct rc_instruction * new_inst =
+			rc_insert_new_instruction(fc_state->C, inst);
+
+	new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
+	build_pred_dst(&new_inst->U.I.DstReg, fc_state);
+	/* Restore the previous predicate register. */
+	fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
+	build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
+}
+
+static void lower_if(
+	struct rc_instruction * inst,
+	struct vert_fc_state * fc_state)
+{
+	/* Reserve a temporary to use as our predicate stack counter, if we
+	 * don't already have one. */
+	if (fc_state->PredicateReg == -1) {
+		/* If we are inside a loop, the Predicate Register should
+		 * have already been defined. */
+		assert(fc_state->LoopDepth == 0);
+
+		if (reserve_predicate_reg(fc_state) == -1) {
+			return;
+		}
+	}
+
+	if (inst->Next->U.I.Opcode == RC_OPCODE_BRK) {
+		fc_state->InCFBreak = 1;
+	}
+	if ((fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0)
+			|| (fc_state->LoopDepth == 1 && fc_state->InCFBreak)) {
+		if (fc_state->InCFBreak) {
+			inst->U.I.Opcode = RC_ME_PRED_SEQ;
+			inst->U.I.DstReg.Pred = RC_PRED_SET;
+		} else {
+			inst->U.I.Opcode = RC_ME_PRED_SNEQ;
+		}
+	} else {
+		unsigned swz;
+		inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
+		memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
+						sizeof(inst->U.I.SrcReg[1]));
+		swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
+		/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
+		 * w component */
+		inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
+				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
+		build_pred_src(&inst->U.I.SrcReg[0], fc_state);
+	}
+	build_pred_dst(&inst->U.I.DstReg, fc_state);
+}
+
+void rc_vert_fc(struct radeon_compiler *c, void *user)
+{
+	struct rc_instruction * inst;
+	struct vert_fc_state fc_state;
+
+	memset(&fc_state, 0, sizeof(fc_state));
+	fc_state.PredicateReg = -1;
+	fc_state.C = c;
+
+	for(inst = c->Program.Instructions.Next;
+					inst != &c->Program.Instructions;
+					inst = inst->Next) {
+
+		switch (inst->U.I.Opcode) {
+
+		case RC_OPCODE_BGNLOOP:
+			lower_bgnloop(inst, &fc_state);
+			fc_state.LoopDepth++;
+			break;
+
+		case RC_OPCODE_BRK:
+			lower_brk(inst, &fc_state);
+			break;
+
+		case RC_OPCODE_ENDLOOP:
+			if (fc_state.BranchDepth != 0
+					|| fc_state.LoopDepth != 1) {
+				lower_endloop(inst, &fc_state);
+			}
+			fc_state.LoopDepth--;
+			/* Skip PRED_RESTORE */
+			inst = inst->Next;
+			break;
+		case RC_OPCODE_IF:
+			lower_if(inst, &fc_state);
+			fc_state.BranchDepth++;
+			break;
+
+		case RC_OPCODE_ELSE:
+			inst->U.I.Opcode = RC_ME_PRED_SET_INV;
+			build_pred_dst(&inst->U.I.DstReg, &fc_state);
+			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
+			break;
+
+		case RC_OPCODE_ENDIF:
+			if (fc_state.LoopDepth == 1 && fc_state.InCFBreak) {
+				struct rc_instruction * to_delete = inst;
+				inst = inst->Prev;
+				rc_remove_instruction(to_delete);
+				/* XXX: Delete the endif instruction */
+			} else {
+				inst->U.I.Opcode = RC_ME_PRED_SET_POP;
+				build_pred_dst(&inst->U.I.DstReg, &fc_state);
+				build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
+			}
+			fc_state.InCFBreak = 0;
+			fc_state.BranchDepth--;
+			break;
+
+		default:
+			if (fc_state.BranchDepth || fc_state.LoopDepth) {
+				inst->U.I.DstReg.Pred = RC_PRED_SET;
+			}
+			break;
+		}
+	}
+}




More information about the mesa-commit mailing list