Mesa (master): r300/compiler: implement SIN+COS+SCS for vertex shaders

Marek Olšák mareko at kemper.freedesktop.org
Sat Jun 5 05:04:10 UTC 2010


Module: Mesa
Branch: master
Commit: f855e16afa086edfc2f03360a27f66c955a6c208
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f855e16afa086edfc2f03360a27f66c955a6c208

Author: Marek Olšák <maraeo at gmail.com>
Date:   Sat Jun  5 05:07:41 2010 +0200

r300/compiler: implement SIN+COS+SCS for vertex shaders

---

 src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c |    5 +-
 .../drivers/dri/r300/compiler/radeon_program_alu.c |   87 +++++++++++++++-----
 .../drivers/dri/r300/compiler/radeon_program_alu.h |    5 +
 3 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index dd30785..507b2e5 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -360,6 +360,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
 		switch (vpi->Opcode) {
 		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
 		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
 		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
 		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
 		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
@@ -378,6 +379,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
 		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
 		case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
 		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+		case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
 		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
 		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
 		default:
@@ -605,8 +607,9 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
 	{
 		struct radeon_program_transformation transformations[] = {
 			{ &r300_transform_vertex_alu, 0 },
+			{ &r300_transform_trig_scale_vertex, 0 }
 		};
-		radeonLocalTransform(&compiler->Base, 1, transformations);
+		radeonLocalTransform(&compiler->Base, 2, transformations);
 	}
 
 	debug_program_log(compiler, "after native rewrite");
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index d14de79..c922d3d 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -848,6 +848,34 @@ int radeonTransformTrigSimple(struct radeon_compiler* c,
 	return 1;
 }
 
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	unsigned srctmp)
+{
+	if (inst->U.I.Opcode == RC_OPCODE_COS) {
+		emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+			srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+		emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+			inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+		struct rc_dst_register moddst = inst->U.I.DstReg;
+
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+			moddst.WriteMask = RC_MASK_X;
+			emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+			moddst.WriteMask = RC_MASK_Y;
+			emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+	}
+
+	rc_remove_instruction(inst);
+}
+
 
 /**
  * Transform the trigonometric functions COS, SIN, and SCS
@@ -880,29 +908,48 @@ int radeonTransformTrigScale(struct radeon_compiler* c,
 	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
 		srcreg(RC_FILE_TEMPORARY, temp));
 
-	if (inst->U.I.Opcode == RC_OPCODE_COS) {
-		emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
-			srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
-		emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
-			inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-	} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
-		struct rc_dst_register moddst = inst->U.I.DstReg;
+	r300_transform_SIN_COS_SCS(c, inst, temp);
+	return 1;
+}
 
-		if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
-			moddst.WriteMask = RC_MASK_X;
-			emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
-				srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-		}
-		if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
-			moddst.WriteMask = RC_MASK_Y;
-			emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
-				srcregswz(RC_FILE_TEMPORARY, temp, RC_SWIZZLE_WWWW));
-		}
-	}
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void *unused)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
 
-	rc_remove_instruction(inst);
+	/* Repeat x in the range [-PI, PI]:
+	 *
+	 *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+	 */
+
+	static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+	unsigned int temp;
+	unsigned int constant;
+
+	temp = rc_find_free_temporary(c);
+	constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		swizzle_xxxx(inst->U.I.SrcReg[0]),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
 
+	r300_transform_SIN_COS_SCS(c, inst, temp);
 	return 1;
 }
 
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
index 7cb5f84..77d4444 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -50,6 +50,11 @@ int radeonTransformTrigScale(
 	struct rc_instruction * inst,
 	void*);
 
+int r300_transform_trig_scale_vertex(
+	struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void*);
+
 int radeonTransformDeriv(
 	struct radeon_compiler * c,
 	struct rc_instruction * inst,




More information about the mesa-commit mailing list