mesa: Branch 'master'

Jerome Glisse glisse at kemper.freedesktop.org
Wed Feb 14 22:10:41 UTC 2007


 src/mesa/drivers/dri/r300/r300_context.h  |    2 
 src/mesa/drivers/dri/r300/r300_fragprog.c |  270 ++++++++++++++++++++----------
 2 files changed, 184 insertions(+), 88 deletions(-)

New commits:
diff-tree e4c772bead57c0190f74f1fa43dd129e170567f7 (from 1e2b46963595e02172b4d651cc8a219e9f5c8176)
Author: Rune Peterson <rune at megahurts.dk>
Date:   Wed Feb 14 23:10:52 2007 +0100

    r300: Fix cos & add scs to fragment program.
    
    So this do :
     - Fixes COS.
     - Does range reductions for SIN & COS.
     - Adds SCS.
     - removes the optimized version of SIN & COS.
     - tweaked weight (should help on precision).
     - fixed a copy paste typo in emit_arith().

diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
index b140235..48b50bc 100644
--- a/src/mesa/drivers/dri/r300/r300_context.h
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -731,7 +731,7 @@ struct r300_fragment_program {
 	int max_temp_idx;
 
 	/* the index of the sin constant is stored here */
-	GLint const_sin;
+	GLint const_sin[2];
 	
 	GLuint optimization;
 };
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c
index b00cf9e..8e45bd5 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog.c
@@ -33,7 +33,6 @@
 
 /*TODO'S
  *
- * - SCS instructions
  * - Depth write, WPOS/FOGC inputs
  * - FogOption
  * - Verify results of opcodes for accuracy, I've only checked them
@@ -1081,7 +1080,7 @@ static void emit_arith(struct r300_fragm
 				break;
 			}
 			if (emit_sop &&
-			    (s_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_VECTOR)) {
+			    (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) {
 				vpos = spos = MAX2(vpos, spos);
 				break;
 			}
@@ -1204,6 +1203,25 @@ static GLuint get_attrib(struct r300_fra
 }
 #endif
 
+static void make_sin_const(struct r300_fragment_program *rp)
+{
+	if(rp->const_sin[0] == -1){
+	    GLfloat cnstv[4];
+
+	    cnstv[0] = 1.273239545; // 4/PI
+	    cnstv[1] =-0.405284735; // -4/(PI*PI)
+	    cnstv[2] = 3.141592654; // PI
+	    cnstv[3] = 0.2225;      // weight
+	    rp->const_sin[0] = emit_const4fv(rp, cnstv);
+
+	    cnstv[0] = 0.5;
+	    cnstv[1] = -1.5;
+	    cnstv[2] = 0.159154943; // 1/(2*PI)
+	    cnstv[3] = 6.283185307; // 2*PI
+	    rp->const_sin[1] = emit_const4fv(rp, cnstv);
+	}
+}
+
 static GLboolean parse_program(struct r300_fragment_program *rp)
 {	
 	struct gl_fragment_program *mp = &rp->mesa_program;
@@ -1260,84 +1278,68 @@ static GLboolean parse_program(struct r3
 			 * cos using a parabola (see SIN):
 			 * cos(x):
 			 *   x += PI/2
-			 *   x = (x < PI)?x : x-2*PI
+			 *   x = (x/(2*PI))+0.5
+			 *   x = frac(x)
+			 *   x = (x*2*PI)-PI
 			 *   result = sin(x)
 			 */
 			temp = get_temp_reg(rp);
-			if(rp->const_sin == -1){
-			    cnstv[0] = 1.273239545;
-			    cnstv[1] =-0.405284735;
-			    cnstv[2] = 3.141592654;
-			    cnstv[3] = 0.225;
-			    rp->const_sin = emit_const4fv(rp, cnstv);
-			}
-			cnst = rp->const_sin;			
+			make_sin_const(rp);
 			src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 
-			emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
-				   pfs_half,
-				   undef,
-				   undef,
-				   0);
+			/* add 0.5*PI and do range reduction */
 
 			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
-				   swizzle(cnst, Z, Z, Z, Z), //PI
+				   swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI
 				   pfs_half,
 				   swizzle(keep(src[0]), X, X, X, X),
 				   0);
 
-			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
-				   negate(swizzle(temp, W, W, W, W)), //-2
-				   swizzle(cnst, Z, Z, Z, Z), //PI
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
 				   swizzle(temp, X, X, X, X),
+				   swizzle(rp->const_sin[1], Z, Z, Z, Z),
+				   pfs_half,
 				   0);
 
-			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
-				   swizzle(cnst, Z, Z, Z, Z), //PI
-				   negate(pfs_half),
-				   swizzle(src[0], X, X, X, X),
+			emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
+				   swizzle(temp, X, X, X, X),
+				   undef,
+				   undef,
 				   0);
-			
-			emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
-				   swizzle(temp, W, W, W, W),
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
 				   swizzle(temp, X, X, X, X),
-				   swizzle(temp, Y, Y, Y, Y), 
+				   swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+				   negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI
 				   0);
 
 			/* SIN */
 
 			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
 				   swizzle(temp, Z, Z, Z, Z),
-				   cnst,
+				   rp->const_sin[0],
 				   pfs_zero,
 				   0);
 
-			if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
-			    emit_arith(rp, PFS_OP_MAD, dest, mask,
-				       swizzle(temp, Y, Y, Y, Y),
-				       absolute(swizzle(temp, Z, Z, Z, Z)),
-				       swizzle(temp, X, X, X, X),
-				       flags);
-			}else{
-			    emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
-				       swizzle(temp, Y, Y, Y, Y),
-				       absolute(swizzle(temp, Z, Z, Z, Z)),
-				       swizzle(temp, X, X, X, X),
-				       0);
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+				   swizzle(temp, Y, Y, Y, Y),
+				   absolute(swizzle(temp, Z, Z, Z, Z)),
+				   swizzle(temp, X, X, X, X),
+				   0);
 			
-			    emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
-				       swizzle(temp, X, X, X, X),
-				       absolute(swizzle(temp, X, X, X, X)),
-				       negate(swizzle(temp, X, X, X, X)),
-				       0);
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+				   swizzle(temp, X, X, X, X),
+				   absolute(swizzle(temp, X, X, X, X)),
+				   negate(swizzle(temp, X, X, X, X)),
+				   0);
 
 
-	    		    emit_arith(rp, PFS_OP_MAD, dest, mask,
-				       swizzle(temp, Y, Y, Y, Y),
-				       swizzle(cnst, W, W, W, W),
-				       swizzle(temp, X, X, X, X),
-				       flags);
-			}
+	    		emit_arith(rp, PFS_OP_MAD, dest, mask,
+				   swizzle(temp, Y, Y, Y, Y),
+				   swizzle(rp->const_sin[0], W, W, W, W),
+				   swizzle(temp, X, X, X, X),
+				   flags);
+
 			free_temp(rp, temp);
 			break;
 		case OPCODE_DP3:
@@ -1577,7 +1579,93 @@ static GLboolean parse_program(struct r3
 				   flags);
 			break;
 		case OPCODE_SCS:
-			ERROR("SCS not implemented\n");
+			/*
+			 * cos using a parabola (see SIN):
+			 * cos(x):
+			 *   x += PI/2
+			 *   x = (x/(2*PI))+0.5
+			 *   x = frac(x)
+			 *   x = (x*2*PI)-PI
+			 *   result = sin(x)
+			 */
+			temp = get_temp_reg(rp);
+			make_sin_const(rp);
+			src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
+
+			/* add 0.5*PI and do range reduction */
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X|WRITEMASK_Y,
+				   swizzle(rp->const_sin[0], Z, Z, Z, Z),
+				   rp->const_sin[1],
+				   swizzle(keep(src[0]), X, X, X, X),
+				   0);
+
+			emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_W,
+				   swizzle(rp->const_sin[0], Z, Z, Z, Z),
+				   negate(pfs_half),
+				   swizzle(keep(src[0]), X, X, X, X),
+				   0);
+
+			emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
+				   swizzle(temp, X, X, X, X),
+				   swizzle(temp, Y, Y, Y, Y),
+				   swizzle(temp, W, W, W, W),
+				   0);
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+			           swizzle(temp, Z, Z, Z, Z),
+				   rp->const_sin[0],
+			           pfs_zero,
+				   0);
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+				   swizzle(temp, Y, Y, Y, Y),
+				   absolute(swizzle(temp, Z, Z, Z, Z)),
+				   swizzle(temp, X, X, X, X),
+				   0);
+
+			if(mask & WRITEMASK_Y)
+			{
+			    emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+				       swizzle(keep(src[0]), X, X, X, X),
+				       rp->const_sin[0],
+				       pfs_zero,
+				       0);
+
+			    emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+				       swizzle(temp, Y, Y, Y, Y),
+				       absolute(swizzle(keep(src[0]), X, X, X, X)),
+				       swizzle(temp, X, X, X, X),
+				       0);
+			}
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+				   swizzle(temp, W, W, W, W),
+				   absolute(swizzle(temp, W, W, W, W)),
+				   negate(swizzle(temp, W, W, W, W)),
+				   0);
+
+			emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_X,
+				   swizzle(temp, Z, Z, Z, Z),
+				   swizzle(rp->const_sin[0], W, W, W, W),
+				   swizzle(temp, W, W, W, W),
+				   flags);
+
+			if(mask & WRITEMASK_Y)
+			{
+			    emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+				       swizzle(temp, X, X, X, X),
+				       absolute(swizzle(temp, X, X, X, X)),
+				       negate(swizzle(temp, X, X, X, X)),
+				       0);
+
+	    		    emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
+				       swizzle(temp, W, W, W, W),
+				       swizzle(rp->const_sin[0], W, W, W, W),
+				       swizzle(temp, X, X, X, X),
+				       flags);
+			}
+			free_temp(rp, temp);
 			break;
 		case OPCODE_SGE:
 			src[0] = t_src(rp, fpi->SrcReg[0]);
@@ -1603,48 +1691,56 @@ static GLboolean parse_program(struct r3
 			 */
 
 			temp = get_temp_reg(rp);
-			if(rp->const_sin == -1){
-			    cnstv[0] = 1.273239545;
-			    cnstv[1] =-0.405284735;
-			    cnstv[2] = 3.141592654;
-			    cnstv[3] = 0.225;
-			    rp->const_sin = emit_const4fv(rp, cnstv);
-			}
-			cnst = rp->const_sin;
+			make_sin_const(rp);
 			src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 
-			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+			/* do range reduction */
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
 				   swizzle(keep(src[0]), X, X, X, X),
-				   cnst,
+				   swizzle(rp->const_sin[1], Z, Z, Z, Z),
+				   pfs_half,
+				   0);
+
+			emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
+				   swizzle(temp, X, X, X, X),
+				   undef,
+				   undef,
+				   0);
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+				   swizzle(temp, X, X, X, X),
+				   swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+				   negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI
+				   0);
+
+			/* SIN */
+
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+				   swizzle(temp, Z, Z, Z, Z),
+				   rp->const_sin[0],
 				   pfs_zero,
 				   0);
 
-			if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
-			    emit_arith(rp, PFS_OP_MAD, dest, mask,
-				       swizzle(temp, Y, Y, Y, Y),
-				       absolute(swizzle(src[0], X, X, X, X)),
-				       swizzle(temp, X, X, X, X),
-				       flags);
-			}else{
-			    emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
-				       swizzle(temp, Y, Y, Y, Y),
-				       absolute(swizzle(src[0], X, X, X, X)),
-				       swizzle(temp, X, X, X, X),
-				       0);
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+				   swizzle(temp, Y, Y, Y, Y),
+				   absolute(swizzle(temp, Z, Z, Z, Z)),
+				   swizzle(temp, X, X, X, X),
+				   0);
 			
-			    emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
-				       swizzle(temp, X, X, X, X),
-				       absolute(swizzle(temp, X, X, X, X)),
-				       negate(swizzle(temp, X, X, X, X)),
-				       0);
+			emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+				   swizzle(temp, X, X, X, X),
+				   absolute(swizzle(temp, X, X, X, X)),
+				   negate(swizzle(temp, X, X, X, X)),
+				   0);
 
 
-			    emit_arith(rp, PFS_OP_MAD, dest, mask,
-				       swizzle(temp, Y, Y, Y, Y),
-				       swizzle(cnst, W, W, W, W),
-				       swizzle(temp, X, X, X, X),
-				       flags);
-			}
+	    		emit_arith(rp, PFS_OP_MAD, dest, mask,
+				   swizzle(temp, Y, Y, Y, Y),
+				   swizzle(rp->const_sin[0], W, W, W, W),
+				   swizzle(temp, X, X, X, X),
+				   flags);
+
 			free_temp(rp, temp);
 			break;
 		case OPCODE_SLT:
@@ -1739,7 +1835,7 @@ static void init_program(r300ContextPtr 
 	rp->max_temp_idx = 0;
 	rp->node[0].alu_end = -1;
 	rp->node[0].tex_end = -1;
-	rp->const_sin = -1;
+	rp->const_sin[0] = -1;
 	
 	_mesa_memset(cs, 0, sizeof(*rp->cs));
 	for (i=0;i<PFS_MAX_ALU_INST;i++) {



More information about the mesa-commit mailing list