Mesa (master): r300/compiler: Add peephole optimization for the 'add' presubtract operation

Tom Stellard tstellar at kemper.freedesktop.org
Sat Sep 11 02:54:07 UTC 2010


Module: Mesa
Branch: master
Commit: a64b4a05af362fff52c9e52eb51cd92fe164afcc
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a64b4a05af362fff52c9e52eb51cd92fe164afcc

Author: Tom Stellard <tstellar at gmail.com>
Date:   Mon Aug 30 08:59:30 2010 -0700

r300/compiler: Add peephole optimization for the 'add' presubtract operation

---

 .../drivers/dri/r300/compiler/radeon_optimize.c    |  226 ++++++++++++++------
 .../dri/r300/compiler/radeon_pair_schedule.c       |   58 ++++--
 2 files changed, 200 insertions(+), 84 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index e01ba85..3ff07d6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -38,6 +38,10 @@ struct peephole_state {
 	unsigned int WriteMask;
 };
 
+typedef void (*rc_presub_replace_fn)(struct peephole_state *,
+						struct rc_instruction *,
+						unsigned int);
+
 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
 {
 	struct rc_src_register combine;
@@ -516,68 +520,26 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
 	}
 }
 
-/**
- * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
- * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
- * of the add instruction must have the constatnt 1 swizzle.  This function
- * does not check const registers to see if their value is 1.0, so it should
- * be called after the constant_folding optimization.
- * @return 
- * 	0 if the ADD instruction is still part of the program.
- * 	1 if the ADD instruction is no longer part of the program.
- */
-static int peephole_add_presub_inv(
+static int presub_helper(
 	struct radeon_compiler * c,
-	struct rc_instruction * inst_add)
+	struct peephole_state * s,
+	rc_presubtract_op presub_opcode,
+	rc_presub_replace_fn presub_replace)
 {
-	unsigned int i, swz, mask;
+	struct rc_instruction * inst;
 	unsigned int can_remove = 0;
 	unsigned int cant_sub = 0;
-	struct rc_instruction * inst;
-	struct peephole_state s;
-
-	if (inst_add->U.I.SaturateMode)
-		return 0;
-
-	mask = inst_add->U.I.DstReg.WriteMask;
-
-	/* Check if src0 is 1. */
-	/* XXX It would be nice to use is_src_uniform_constant here, but that
-	 * function only works if the register's file is RC_FILE_NONE */
-	for(i = 0; i < 4; i++ ) {
-		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
-		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
-						&& swz != RC_SWIZZLE_ONE) {
-			return 0;
-		}
-	}
 
-	/* Check src1. */
-	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
-						inst_add->U.I.DstReg.WriteMask
-		|| inst_add->U.I.SrcReg[1].Abs
-		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
-			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
-		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
-
-		return 0;
-	}
-
-	/* Setup the peephole_state information. */
-	s.Inst = inst_add;
-	s.WriteMask = inst_add->U.I.DstReg.WriteMask;
-
-	/* For all instructions that read inst_add->U.I.DstReg before it is
-	 * written again, use the 1 - src0 presubtact instead. */
-	for(inst = inst_add->Next; inst != &c->Program.Instructions;
+	for(inst = s->Inst->Next; inst != &c->Program.Instructions;
 							inst = inst->Next) {
+		unsigned int i;
 		const struct rc_opcode_info * info =
 					rc_get_opcode_info(inst->U.I.Opcode);
 
 		for(i = 0; i < info->NumSrcRegs; i++) {
-			if(inst_add->U.I.DstReg.WriteMask !=
+			if(s->Inst->U.I.DstReg.WriteMask !=
 					src_reads_dst_mask(inst->U.I.SrcReg[i],
-						inst_add->U.I.DstReg)) {
+						s->Inst->U.I.DstReg)) {
 				continue;
 			}
 			if (cant_sub) {
@@ -601,47 +563,173 @@ static int peephole_add_presub_inv(
 			 * instruction, unless the two prsubtract operations
 			 * are the same and read from the same registers. */
 			if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
-				if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
+				if (inst->U.I.PreSub.Opcode != presub_opcode
 					|| inst->U.I.PreSub.SrcReg[0].File !=
-						inst_add->U.I.SrcReg[1].File
+						s->Inst->U.I.SrcReg[1].File
 					|| inst->U.I.PreSub.SrcReg[0].Index !=
-						inst_add->U.I.SrcReg[1].Index) {
+						s->Inst->U.I.SrcReg[1].Index) {
 
 					can_remove = 0;
 					break;
 				}
 			}
-			/* We must be careful not to modify inst_add, since it
-			 * is possible it will remain part of the program. */
-			inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
-			inst->U.I.PreSub.SrcReg[0].Negate = 0;
-			inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
-			inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
-						inst->U.I.PreSub.SrcReg[0]);
-
-			inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
-			inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
+			presub_replace(s, inst, i);
 			can_remove = 1;
 		}
 		if(!can_remove)
 			break;
-		rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+		rc_for_all_writes_mask(inst, peephole_scan_write, s);
 		/* If all components of inst_add's destination register have
 		 * been written to by subsequent instructions, the original
 		 * value of the destination register is no longer valid and
 		 * we can't keep doing substitutions. */
-		if (!s.WriteMask){
+		if (!s->WriteMask){
 			break;
 		}
 		/* Make this instruction doesn't write to the presubtract source. */
 		if (inst->U.I.DstReg.WriteMask &
-				src_reads_dst_mask(inst_add->U.I.SrcReg[1],
+				src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
 							inst->U.I.DstReg)
 				|| info->IsFlowControl) {
 			cant_sub = 1;
 		}
 	}
-	if(can_remove) {
+	return can_remove;
+}
+
+static void presub_replace_add(struct peephole_state *s,
+						struct rc_instruction * inst,
+						unsigned int src_index)
+{
+	inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
+	inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
+	inst->U.I.PreSub.SrcReg[0].Negate = 0;
+	inst->U.I.PreSub.SrcReg[1].Negate = 0;
+	inst->U.I.PreSub.Opcode = RC_PRESUB_ADD;
+	inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+						inst->U.I.PreSub.SrcReg[0]);
+	inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+	inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD;
+}
+
+static int peephole_add_presub_add(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add)
+{
+	struct rc_src_register * src0 = NULL;
+	struct rc_src_register * src1 = NULL;
+	unsigned int i;
+	struct peephole_state s;
+
+	if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+		return 0;
+
+	if (inst_add->U.I.SaturateMode)
+		return 0;
+
+	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+		return 0;
+
+	/* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
+	for (i = 0; i < 2; i++) {
+		if (inst_add->U.I.SrcReg[i].Abs)
+			return 0;
+		if ((inst_add->U.I.SrcReg[i].Negate
+					& inst_add->U.I.DstReg.WriteMask) ==
+						inst_add->U.I.DstReg.WriteMask) {
+			src0 = &inst_add->U.I.SrcReg[i];
+		} else if (!src1) {
+			src1 = &inst_add->U.I.SrcReg[i];
+		} else {
+			src0 = &inst_add->U.I.SrcReg[i];
+		}
+	}
+
+	if (!src1)
+		return 0;
+
+	/* XXX Only do add for now. */
+	if (src0->Negate)
+		return 0;
+
+	s.Inst = inst_add;
+	s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+	if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
+		rc_remove_instruction(inst_add);
+		return 1;
+	}
+	return 0;
+}
+
+static void presub_replace_inv(struct peephole_state * s,
+						struct rc_instruction * inst,
+						unsigned int src_index)
+{
+	/* We must be careful not to modify s->Inst, since it
+	 * is possible it will remain part of the program. 
+	 * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
+	inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
+	inst->U.I.PreSub.SrcReg[0].Negate = 0;
+	inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
+	inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+						inst->U.I.PreSub.SrcReg[0]);
+
+	inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+	inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
+ * of the add instruction must have the constatnt 1 swizzle.  This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return 
+ * 	0 if the ADD instruction is still part of the program.
+ * 	1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst_add)
+{
+	unsigned int i, swz, mask;
+	struct peephole_state s;
+
+	if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+		return 0;
+
+	if (inst_add->U.I.SaturateMode)
+		return 0;
+
+	mask = inst_add->U.I.DstReg.WriteMask;
+
+	/* Check if src0 is 1. */
+	/* XXX It would be nice to use is_src_uniform_constant here, but that
+	 * function only works if the register's file is RC_FILE_NONE */
+	for(i = 0; i < 4; i++ ) {
+		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+		if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+						&& swz != RC_SWIZZLE_ONE) {
+			return 0;
+		}
+	}
+
+	/* Check src1. */
+	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+						inst_add->U.I.DstReg.WriteMask
+		|| inst_add->U.I.SrcReg[1].Abs
+		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+		return 0;
+	}
+
+	/* Setup the peephole_state information. */
+	s.Inst = inst_add;
+	s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+
+	if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
 		rc_remove_instruction(inst_add);
 		return 1;
 	}
@@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
 		if (c->has_presub) {
 			if(peephole_add_presub_inv(c, inst))
 				return 1;
+			if(peephole_add_presub_add(c, inst))
+				return 1;
 		}
 		break;
 	default:
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
index 32c54fd..5269d65 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -300,6 +300,7 @@ static int destructive_merge_instructions(
 		for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
 			unsigned int arg;
 			int free_source;
+			unsigned int one_way = 0;
 			struct radeon_pair_instruction_source srcp =
 						alpha->RGB.Src[srcp_src];
 			struct radeon_pair_instruction_source temp;
@@ -307,14 +308,27 @@ static int destructive_merge_instructions(
 			 * 3rd arg of 0 means this is not an alpha source. */
 			free_source = rc_pair_alloc_source(rgb, 1, 0,
 							srcp.File, srcp.Index);
-			/* If free_source == srcp_src, then either the
-			 * presubtract source is already in the correct place. */
-			if (free_source == srcp_src)
-				continue;
 			/* If free_source < 0 then there are no free source
 			 * slots. */
 			if (free_source < 0)
 				return 0;
+
+			temp = rgb->RGB.Src[srcp_src];
+			rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
+			/* srcp needs src0 and src1 to be the same */
+			if (free_source < srcp_src) {
+				if (!temp.Used)
+					continue;
+				free_source = rc_pair_alloc_source(rgb, 1, 0,
+							srcp.File, srcp.Index);
+				one_way = 1;
+			} else {
+				rgb->RGB.Src[free_source] = temp;
+			}
+			/* If free_source == srcp_src, then the presubtract
+			 * source is already in the correct place. */
+			if (free_source == srcp_src)
+				continue;
 			/* Shuffle the sources, so we can put the
 			 * presubtract source in the correct place. */
 			for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -331,12 +345,11 @@ static int destructive_merge_instructions(
 				/* We need to do this just in case register
 				 * is one of the sources already, but in the
 				 * wrong spot. */
-				else if(rgb->RGB.Arg[arg].Source == free_source)
+				else if(rgb->RGB.Arg[arg].Source == free_source
+								&& !one_way) {
 					rgb->RGB.Arg[arg].Source = srcp_src;
+				}
 			}
-			temp = rgb->RGB.Src[srcp_src];
-			rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
-			rgb->RGB.Src[free_source] = temp;
 		}
 	}
 
@@ -352,6 +365,7 @@ static int destructive_merge_instructions(
 		for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
 			unsigned int arg;
 			int free_source;
+			unsigned int one_way = 0;
 			struct radeon_pair_instruction_source srcp =
 						alpha->Alpha.Src[srcp_src];
 			struct radeon_pair_instruction_source temp;
@@ -359,14 +373,27 @@ static int destructive_merge_instructions(
 			 * 3rd arg of 1 means this is an alpha source. */
 			free_source = rc_pair_alloc_source(rgb, 0, 1,
 							srcp.File, srcp.Index);
-			/* If free_source == srcp_src, then either the
-			 * presubtract source is already in the correct place. */
-			if (free_source == srcp_src)
-				continue;
 			/* If free_source < 0 then there are no free source
 			 * slots. */
 			if (free_source < 0)
 				return 0;
+
+			temp = rgb->Alpha.Src[srcp_src];
+			rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
+			/* srcp needs src0 and src1 to be the same. */
+			if (free_source < srcp_src) {
+				if (!temp.Used)
+					continue;
+				free_source = rc_pair_alloc_source(rgb, 0, 1,
+							temp.File, temp.Index);
+				one_way = 1;
+			} else {
+				rgb->Alpha.Src[free_source] = temp;
+			}
+			/* If free_source == srcp_src, then the presubtract
+			 * source is already in the correct place. */
+			if (free_source == srcp_src)
+				continue;
 			/* Shuffle the sources, so we can put the
 			 * presubtract source in the correct place. */
 			for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -380,12 +407,11 @@ static int destructive_merge_instructions(
 				}
 				if (rgb->RGB.Arg[arg].Source == srcp_src)
 					rgb->RGB.Arg[arg].Source = free_source;
-				else if (rgb->RGB.Arg[arg].Source == free_source)
+				else if (rgb->RGB.Arg[arg].Source == free_source
+								&& !one_way) {
 					rgb->RGB.Arg[arg].Source = srcp_src;
+				}
 			}
-			temp = rgb->Alpha.Src[srcp_src];
-			rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
-			rgb->Alpha.Src[free_source] = temp;
 		}
 	}
 




More information about the mesa-commit mailing list