Mesa (master): r300/compiler: Emit flow control instructions and ALU result writes on R500

Nicolai Hähnle nh at kemper.freedesktop.org
Wed Oct 7 18:48:19 UTC 2009


Module: Mesa
Branch: master
Commit: 12e89e0e511d996db8e6eb11253dad4cdfab2083
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=12e89e0e511d996db8e6eb11253dad4cdfab2083

Author: Nicolai Hähnle <nhaehnle at gmail.com>
Date:   Sun Oct  4 17:53:08 2009 +0200

r300/compiler: Emit flow control instructions and ALU result writes on R500

Signed-off-by: Nicolai Hähnle <nhaehnle at gmail.com>

---

 .../drivers/dri/r300/compiler/r500_fragprog_emit.c |  180 +++++++++++++++++++-
 src/mesa/drivers/dri/r300/compiler/radeon_code.h   |    2 +
 .../dri/r300/compiler/radeon_dataflow_deadcode.c   |    2 +-
 .../drivers/dri/r300/compiler/radeon_opcodes.c     |    6 +-
 .../drivers/dri/r300/compiler/radeon_opcodes.h     |    2 +-
 .../dri/r300/compiler/radeon_pair_schedule.c       |    2 +-
 .../dri/r300/compiler/radeon_pair_translate.c      |    2 +-
 src/mesa/drivers/dri/r300/r300_reg.h               |    2 +
 8 files changed, 183 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 8f618d8..b1b1439 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -55,6 +55,23 @@
 	} while(0)
 
 
+struct branch_info {
+	int If;
+	int Else;
+	int Endif;
+};
+
+struct emit_state {
+	struct radeon_compiler * C;
+	struct r500_fragment_program_code * Code;
+
+	struct branch_info * Branches;
+	unsigned int CurrentBranchDepth;
+	unsigned int BranchesReserved;
+
+	unsigned int MaxBranchDepth;
+};
+
 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
 {
 	switch(opcode) {
@@ -131,6 +148,19 @@ static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
 	return t;
 }
 
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+	switch(func) {
+	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+	default:
+		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+		return 0;
+	}
+}
+
 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
 {
 	if (index > code->max_temp_idx)
@@ -153,13 +183,13 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r
 /**
  * Emit a paired ALU instruction.
  */
-static int emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
 {
 	PROG_CODE;
 
 	if (code->inst_end >= 511) {
 		error("emit_alu: Too many instructions");
-		return 0;
+		return;
 	}
 
 	int ip = ++code->inst_end;
@@ -167,10 +197,15 @@ static int emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_
 	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
 	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
 
-	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
+	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
 		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
-	else
+		if (inst->WriteALUResult) {
+			error("%s: cannot write output and ALU result at the same time");
+			return;
+		}
+	} else {
 		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+	}
 	code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
 
 	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
@@ -206,7 +241,16 @@ static int emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_
 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
 	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
 
-	return 1;
+	if (inst->WriteALUResult) {
+		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+		if (inst->WriteALUResult == RC_ALURESULT_X)
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+		else
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+	}
 }
 
 static unsigned int translate_strq_swizzle(unsigned int swizzle)
@@ -271,10 +315,118 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst
 	return 1;
 }
 
+static void grow_branches(struct emit_state * s)
+{
+	unsigned int newreserved = s->BranchesReserved * 2;
+	struct branch_info * newbranches;
+
+	if (!newreserved)
+		newreserved = 4;
+
+	newbranches = memory_pool_malloc(&s->C->Pool, newreserved*sizeof(struct branch_info));
+	memcpy(newbranches, s->Branches, s->CurrentBranchDepth*sizeof(struct branch_info));
+
+	s->Branches = newbranches;
+	s->BranchesReserved = newreserved;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+	if (s->Code->inst_end >= 511) {
+		rc_error(s->C, "emit_tex: Too many instructions");
+		return;
+	}
+
+	unsigned int newip = ++s->Code->inst_end;
+
+	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+	if (inst->U.I.Opcode == RC_OPCODE_IF) {
+		if (s->CurrentBranchDepth >= 32) {
+			rc_error(s->C, "Branch depth exceeds hardware limit");
+			return;
+		}
+
+		if (s->CurrentBranchDepth >= s->BranchesReserved)
+			grow_branches(s);
+
+		struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+		branch->If = newip;
+		branch->Else = -1;
+		branch->Endif = -1;
+
+		if (s->CurrentBranchDepth > s->MaxBranchDepth)
+			s->MaxBranchDepth = s->CurrentBranchDepth;
+
+		/* actual instruction is filled in at ENDIF time */
+	} else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Else = newip;
+
+		/* actual instruction is filled in at ENDIF time */
+	} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Endif = newip;
+
+		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
+		;
+
+		if (branch->Else >= 0) {
+			/* increment branch counter also if jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+				| R500_FC_A_OP_NONE /* no address stack */
+				| R500_FC_B_ELSE /* all active pixels want to jump */
+				| R500_FC_B_OP0_NONE /* no counter op if stay */
+				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+				| R500_FC_B_POP_CNT(1)
+			;
+			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		} else {
+			/* don't touch branch counter on jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		}
+
+		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+			| R500_FC_B_OP1_NONE /* no branch counter if stay */
+			| R500_FC_B_POP_CNT(1)
+		;
+		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+
+		s->CurrentBranchDepth--;
+	} else {
+		rc_error(s->C, "%s: unknown opcode %i\n", __FUNCTION__, inst->U.I.Opcode);
+	}
+}
+
 void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
 {
+	struct emit_state s;
 	struct r500_fragment_program_code *code = &compiler->code->code.r500;
 
+	memset(&s, 0, sizeof(s));
+	s.C = &compiler->Base;
+	s.Code = code;
+
 	memset(code, 0, sizeof(*code));
 	code->max_temp_idx = 1;
 	code->inst_end = -1;
@@ -283,10 +435,15 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
 	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
 	    inst = inst->Next) {
 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
-			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX)
-				continue;
+			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
-			emit_tex(compiler, &inst->U.I);
+			if (opcode->IsFlowControl) {
+				emit_flowcontrol(&s, inst);
+			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+				continue;
+			} else {
+				emit_tex(compiler, &inst->U.I);
+			}
 		} else {
 			emit_paired(compiler, &inst->U.P);
 		}
@@ -309,4 +466,11 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
 		int ip = ++code->inst_end;
 		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
 	}
+
+	if (s.MaxBranchDepth >= 4) {
+		if (code->max_temp_idx < 1)
+			code->max_temp_idx = 1;
+
+		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+	}
 }
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 75069e8..902b7cf 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -182,6 +182,8 @@ struct r500_fragment_program_code {
 	int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
 
 	int max_temp_idx;
+
+	uint32_t us_fc_ctrl;
 };
 
 struct rX00_fragment_program_code {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index f30b1ff..e0c66c4 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -213,7 +213,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
 	    inst = inst->Prev) {
 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
-		if (opcode->IsControlFlow) {
+		if (opcode->IsFlowControl) {
 			if (opcode->Opcode == RC_OPCODE_ENDIF) {
 				push_branch(&s);
 			} else {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index a5072b5..c1c0181 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -344,19 +344,19 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
 	{
 		.Opcode = RC_OPCODE_IF,
 		.Name = "IF",
-		.IsControlFlow = 1,
+		.IsFlowControl = 1,
 		.NumSrcRegs = 1
 	},
 	{
 		.Opcode = RC_OPCODE_ELSE,
 		.Name = "ELSE",
-		.IsControlFlow = 1,
+		.IsFlowControl = 1,
 		.NumSrcRegs = 0
 	},
 	{
 		.Opcode = RC_OPCODE_ENDIF,
 		.Name = "ENDIF",
-		.IsControlFlow = 1,
+		.IsFlowControl = 1,
 		.NumSrcRegs = 0
 	},
 	{
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index c9c5b9f..a3c5b86 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -206,7 +206,7 @@ struct rc_opcode_info {
 	unsigned int HasDstReg:1;
 
 	/** true if this instruction affects control flow */
-	unsigned int IsControlFlow:1;
+	unsigned int IsFlowControl:1;
 
 	/** true if this is a vector instruction that operates on components in parallel
 	 * without any cross-component interaction */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 8a4b5ac..ea01bb7 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -454,7 +454,7 @@ static int is_controlflow(struct rc_instruction * inst)
 {
 	if (inst->Type == RC_INSTRUCTION_NORMAL) {
 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
-		return opcode->IsControlFlow;
+		return opcode->IsFlowControl;
 	}
 	return 0;
 }
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index c31891a..7211768 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -241,7 +241,7 @@ void rc_pair_translate(struct r300_fragment_program_compiler *c)
 
 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
-		if (opcode->HasTexture || opcode->IsControlFlow || opcode->Opcode == RC_OPCODE_KIL)
+		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
 			continue;
 
 		struct rc_sub_instruction copy = inst->U.I;
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index b9ccd09..623da60 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -3002,6 +3002,8 @@ enum {
 #   define R500_INST_RGB_CLAMP				(1 << 19)
 #   define R500_INST_ALPHA_CLAMP			(1 << 20)
 #   define R500_INST_ALU_RESULT_SEL			(1 << 21)
+#   define R500_INST_ALU_RESULT_SEL_RED		(0 << 21)
+#   define R500_INST_ALU_RESULT_SEL_ALPHA		(1 << 21)
 #   define R500_INST_ALPHA_PRED_INV			(1 << 22)
 #   define R500_INST_ALU_RESULT_OP_EQ			(0 << 23)
 #   define R500_INST_ALU_RESULT_OP_LT			(1 << 23)




More information about the mesa-commit mailing list