Mesa (master): r300/compiler: Use ALU Result for IF conditionals

Tom Stellard tstellar at kemper.freedesktop.org
Sun May 15 05:36:47 UTC 2011


Module: Mesa
Branch: master
Commit: 6d539579add0a9d3017f441d9fad5d4cd3ae7bb9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d539579add0a9d3017f441d9fad5d4cd3ae7bb9

Author: Tom Stellard <tstellar at gmail.com>
Date:   Sat May 14 21:47:26 2011 -0700

r300/compiler: Use ALU Result for IF conditionals

This saves one instruction per IF.

---

 src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c |    7 +-
 src/mesa/drivers/dri/r300/compiler/r500_fragprog.c |  152 +++++++++++++++++---
 src/mesa/drivers/dri/r300/compiler/r500_fragprog.h |    2 +-
 .../drivers/dri/r300/compiler/radeon_optimize.c    |    5 +-
 .../drivers/dri/r300/compiler/radeon_variable.c    |   61 ++++++++
 .../drivers/dri/r300/compiler/radeon_variable.h    |    5 +
 6 files changed, 212 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index e2441e9..bb6c010 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -109,8 +109,12 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
 		{ 0, 0 }
 	};
 
-	struct radeon_program_transformation native_rewrite_r500[] = {
+	struct radeon_program_transformation rewrite_if[] = {
 		{ &r500_transform_IF, 0 },
+		{0, 0}
+	};
+
+	struct radeon_program_transformation native_rewrite_r500[] = {
 		{ &radeonTransformALU, 0 },
 		{ &radeonTransformDeriv, 0 },
 		{ &radeonTransformTrigScale, 0 },
@@ -135,6 +139,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
 		{"emulate branches",		1, !is_r500,	rc_emulate_branches,		NULL},
 		{"saturate output writes",	1, sat_out,	rc_local_transform,		saturate_output},
 		{"transform TEX",		1, 1,		rc_local_transform,		rewrite_tex},
+		{"transform IF",		1, is_r500,	rc_local_transform,		rewrite_if},
 		{"native rewrite",		1, is_r500,	rc_local_transform,		native_rewrite_r500},
 		{"native rewrite",		1, !is_r500,	rc_local_transform,		native_rewrite_r300},
 		{"deadcode",			1, opt,		rc_dataflow_deadcode,		dataflow_outputs_mark_use},
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index 5e0be6b..cf99f5e 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -30,6 +30,8 @@
 #include <stdio.h>
 
 #include "radeon_compiler_util.h"
+#include "radeon_list.h"
+#include "radeon_variable.h"
 #include "../r300_reg.h"
 
 /**
@@ -37,27 +39,143 @@
  */
 int r500_transform_IF(
 	struct radeon_compiler * c,
-	struct rc_instruction * inst,
-	void* data)
+	struct rc_instruction * inst_if,
+	void *data)
 {
-	struct rc_instruction * inst_mov;
+	struct rc_variable * writer;
+	struct rc_list * writer_list, * list_ptr;
+	struct rc_list * var_list = rc_get_variables(c);
+	unsigned int generic_if = 0;
+	unsigned int alu_chan;
 
-	if (inst->U.I.Opcode != RC_OPCODE_IF)
+	if (inst_if->U.I.Opcode != RC_OPCODE_IF) {
 		return 0;
+	}
+
+	writer_list = rc_variable_list_get_writers(
+			var_list, inst_if->Type, &inst_if->U.I.SrcReg[0]);
+	if (!writer_list) {
+		generic_if = 1;
+	} else {
+
+		/* Make sure it is safe for the writers to write to
+		 * ALU Result */
+		for (list_ptr = writer_list; list_ptr;
+						list_ptr = list_ptr->Next) {
+			struct rc_instruction * inst;
+			writer = list_ptr->Item;
+			/* We are going to modify the destination register
+			 * of writer, so if it has a reader other than
+			 * inst_if (aka ReaderCount > 1) we must fall back to
+			 * our generic IF.
+			 * If the writer has a lower IP than inst_if, this
+			 * means that inst_if is above the writer in a loop.
+			 * I'm not sure why this would ever happen, but
+			 * if it does we want to make sure we fall back
+			 * to our generic IF. */
+			if (writer->ReaderCount > 1 || writer->Inst->IP < inst_if->IP) {
+				generic_if = 1;
+				break;
+			}
+
+			/* The ALU Result is not preserved across IF
+			 * instructions, so if there is another IF
+			 * instruction between writer and inst_if, then
+			 * we need to fall back to generic IF. */
+			for (inst = writer->Inst; inst != inst_if; inst = inst->Next) {
+				const struct rc_opcode_info * info =
+					rc_get_opcode_info(inst->U.I.Opcode);
+				if (info->IsFlowControl) {
+					generic_if = 1;
+					break;
+				}
+			}
+			if (generic_if) {
+				break;
+			}
+		}
+	}
+
+	if (GET_SWZ(inst_if->U.I.SrcReg[0].Swizzle, 0) == RC_SWIZZLE_X) {
+		alu_chan = RC_ALURESULT_X;
+	} else {
+		alu_chan = RC_ALURESULT_W;
+	}
+	if (generic_if) {
+		struct rc_instruction * inst_mov =
+				rc_insert_new_instruction(c, inst_if->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.WriteMask = 0;
+		inst_mov->U.I.DstReg.File = RC_FILE_NONE;
+		inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+		inst_mov->U.I.WriteALUResult = alu_chan;
+		inst_mov->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+		if (alu_chan == RC_ALURESULT_X) {
+			inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+					inst_mov->U.I.SrcReg[0].Swizzle,
+					RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+		} else {
+			inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(
+					inst_mov->U.I.SrcReg[0].Swizzle,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
+					RC_SWIZZLE_UNUSED, RC_SWIZZLE_Z);
+		}
+	} else {
+		rc_compare_func compare_func = RC_COMPARE_FUNC_NEVER;
+		unsigned int reverse_srcs = 0;
+		unsigned int preserve_opcode = 0;
+		for (list_ptr = writer_list; list_ptr;
+						list_ptr = list_ptr->Next) {
+			writer = list_ptr->Item;
+			switch(writer->Inst->U.I.Opcode) {
+			case RC_OPCODE_SEQ:
+				compare_func = RC_COMPARE_FUNC_EQUAL;
+				break;
+			case RC_OPCODE_SNE:
+				compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+				break;
+			case RC_OPCODE_SLE:
+				reverse_srcs = 1;
+				/* Fall through */
+			case RC_OPCODE_SGE:
+				compare_func = RC_COMPARE_FUNC_GEQUAL;
+				break;
+			case RC_OPCODE_SGT:
+				reverse_srcs = 1;
+				/* Fall through */
+			case RC_OPCODE_SLT:
+				compare_func = RC_COMPARE_FUNC_LESS;
+				break;
+			default:
+				compare_func = RC_COMPARE_FUNC_NOTEQUAL;
+				preserve_opcode = 1;
+				break;
+			}
+			if (!preserve_opcode) {
+				writer->Inst->U.I.Opcode = RC_OPCODE_SUB;
+			}
+			writer->Inst->U.I.DstReg.WriteMask = 0;
+			writer->Inst->U.I.DstReg.File = RC_FILE_NONE;
+			writer->Inst->U.I.WriteALUResult = alu_chan;
+			writer->Inst->U.I.ALUResultCompare = compare_func;
+			if (reverse_srcs) {
+				struct rc_src_register temp_src;
+				temp_src = writer->Inst->U.I.SrcReg[0];
+				writer->Inst->U.I.SrcReg[0] =
+					writer->Inst->U.I.SrcReg[1];
+				writer->Inst->U.I.SrcReg[1] = temp_src;
+			}
+		}
+	}
 
-	inst_mov = rc_insert_new_instruction(c, inst->Prev);
-	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
-	inst_mov->U.I.DstReg.WriteMask = 0;
-	inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
-	inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
-	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-	inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
-			RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
-
-	inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
-	inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
-	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
-	inst->U.I.SrcReg[0].Negate = 0;
+	inst_if->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+	inst_if->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+	inst_if->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE(
+				RC_SWIZZLE_X, RC_SWIZZLE_UNUSED,
+				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED);
+	inst_if->U.I.SrcReg[0].Negate = 0;
 
 	return 1;
 }
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 1e665e2..6aa448c 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -44,7 +44,7 @@ extern struct rc_swizzle_caps r500_swizzle_caps;
 
 extern int r500_transform_IF(
 	struct radeon_compiler * c,
-	struct rc_instruction * inst,
+	struct rc_instruction * inst_if,
 	void* data);
 
 #endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index 53ab5fb..ac73608 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -517,8 +517,11 @@ static int is_presub_candidate(
 
 	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
 
-	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
+	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
+			|| inst->U.I.SaturateMode
+			|| inst->U.I.WriteALUResult) {
 		return 0;
+	}
 
 	/* If both sources use a constant swizzle, then we can't convert it to
 	 * a presubtract operation.  In fact for the ADD and SUB presubtract
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
index 5b2295d..33181bd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c
@@ -469,6 +469,67 @@ struct rc_list * rc_variable_readers_union(struct rc_variable * var)
 	return list;
 }
 
+static unsigned int reader_equals_src(
+	struct rc_reader reader,
+	unsigned int src_type,
+	void * src)
+{
+	if (reader.Inst->Type != src_type) {
+		return 0;
+	}
+	if (src_type == RC_INSTRUCTION_NORMAL) {
+		return reader.U.I.Src == src;
+	} else {
+		return reader.U.P.Src == src;
+	}
+}
+
+static unsigned int variable_writes_src(
+	struct rc_variable * var,
+	unsigned int src_type,
+	void * src)
+{
+	unsigned int i;
+	for (i = 0; i < var->ReaderCount; i++) {
+		if (reader_equals_src(var->Readers[i], src_type, src)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+
+struct rc_list * rc_variable_list_get_writers(
+	struct rc_list * var_list,
+	unsigned int src_type,
+	void * src)
+{
+	struct rc_list * list_ptr;
+	struct rc_list * writer_list = NULL;
+	for (list_ptr = var_list; list_ptr; list_ptr = list_ptr->Next) {
+		struct rc_variable * var = list_ptr->Item;
+		if (variable_writes_src(var, src_type, src)) {
+			struct rc_variable * friend;
+			rc_list_add(&writer_list, rc_list(&var->C->Pool, var));
+			for (friend = var->Friend; friend;
+						friend = friend->Friend) {
+				if (variable_writes_src(friend, src_type, src)) {
+					rc_list_add(&writer_list,
+						rc_list(&var->C->Pool, friend));
+				}
+			}
+			/* Once we have indentifed the variable and its
+			 * friends that write this source, we can stop
+			 * stop searching, because we know know of the
+			 * other variables in the list will write this source.
+			 * If they did they would be friends of var.
+			 */
+			break;
+		}
+	}
+	return writer_list;
+}
+
 void rc_variable_print(struct rc_variable * var)
 {
 	unsigned int i;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
index b8fbcaa..9427bee 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h
@@ -79,6 +79,11 @@ unsigned int rc_variable_writemask_sum(struct rc_variable * var);
 
 struct rc_list * rc_variable_readers_union(struct rc_variable * var);
 
+struct rc_list * rc_variable_list_get_writers(
+	struct rc_list * var_list,
+	unsigned int src_type,
+	void * src);
+
 void rc_variable_print(struct rc_variable * var);
 
 #endif /* RADEON_VARIABLE_H */




More information about the mesa-commit mailing list