Mesa (master): r300: rewrite and hopefully simplify RS setup

Nicolai Hähnle nh at kemper.freedesktop.org
Fri Mar 6 21:20:00 UTC 2009


Module: Mesa
Branch: master
Commit: d8b8fb68954e6eebd0b38708c25a5bec4cf1a26c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8b8fb68954e6eebd0b38708c25a5bec4cf1a26c

Author: Maciej Cencora <m.cencora at gmail.com>
Date:   Wed Feb 18 10:42:01 2009 +0100

r300: rewrite and hopefully simplify RS setup

Testing and regression fixes by Markus Amsler

Signed-off-by: Nicolai Haehnle <nhaehnle at gmail.com>

---

 src/mesa/drivers/dri/r300/r300_state.c          |  401 ++++++++++++-----------
 src/mesa/drivers/dri/r300/r300_vertprog.c       |    9 +-
 src/mesa/drivers/dri/r300/radeon_program_pair.c |   28 +-
 3 files changed, 225 insertions(+), 213 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index 37dae6c..171a154 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1595,18 +1595,14 @@ union r300_outputs_written {
 static void r300SetupRSUnit(GLcontext * ctx)
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-	/* I'm still unsure if these are needed */
-	GLuint interp_col[8];
         TNLcontext *tnl = TNL_CONTEXT(ctx);
 	struct vertex_buffer *VB = &tnl->vb;
 	union r300_outputs_written OutputsWritten;
 	GLuint InputsRead;
 	int fp_reg, high_rr;
-	int col_interp_nr;
-	int rs_tex_count = 0, rs_col_count = 0;
-	int i, count;
-
-	memset(interp_col, 0, sizeof(interp_col));
+	int col_ip, tex_ip;
+	int rs_tex_count = 0;
+	int i, count, col_fmt;
 
 	if (hw_tcl_on)
 		OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
@@ -1624,51 +1620,66 @@ static void r300SetupRSUnit(GLcontext * ctx)
 	R300_STATECHANGE(r300, rc);
 	R300_STATECHANGE(r300, rr);
 
-	fp_reg = col_interp_nr = high_rr = 0;
-
-	r300->hw.rr.cmd[R300_RR_INST_1] = 0;
+	fp_reg = col_ip = tex_ip = col_fmt = 0;
 
-	if (InputsRead & FRAG_BIT_WPOS) {
-		for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-			if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
-				break;
+	r300->hw.rc.cmd[1] = 0;
+	r300->hw.rc.cmd[2] = 0;
+	for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+		r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
 
-		if (i == ctx->Const.MaxTextureUnits) {
-			fprintf(stderr, "\tno free texcoord found...\n");
-			_mesa_exit(-1);
-		}
+	for (i=0; i<R300_RI_CMDSIZE-1; ++i)
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
 
-		InputsRead |= (FRAG_BIT_TEX0 << i);
-		InputsRead &= ~FRAG_BIT_WPOS;
-	}
 
 	if (InputsRead & FRAG_BIT_COL0) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
-		interp_col[0] |= R300_RS_COL_PTR(rs_col_count);
-		if (count == 3)
-			interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
-		rs_col_count += count;
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL0;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+		}
 	}
-	else
-		interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
 
 	if (InputsRead & FRAG_BIT_COL1) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
-		if (count == 3)
-			interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
-		interp_col[1] |= R300_RS_COL_PTR(1);
-		rs_col_count += count;
-	}
-
-	if (InputsRead & FRAG_BIT_FOGC) {
-		/* XXX FIX THIS
-		 * Just turn off the bit for now.
-		 * Need to do something similar to the color/texcoord inputs.
-		 */
-		InputsRead &= ~FRAG_BIT_FOGC;
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL1;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+		}
 	}
 
 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+		    continue;
+
+		if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+		    WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+		    continue;
+		}
+
 		int swiz;
 
 		/* with TCL we always seem to route 4 components */
@@ -1677,7 +1688,6 @@ static void r300SetupRSUnit(GLcontext * ctx)
 		else
 		  count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
 
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count;
 		switch(count) {
 		case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break;
 		case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
@@ -1686,63 +1696,48 @@ static void r300SetupRSUnit(GLcontext * ctx)
 		case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
 		};
 
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz;
-
-		r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
-		if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-
-			rs_tex_count += count;
-
-			//assert(r300->state.texture.tc_count != 0);
-			r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i	/* source INTERP */
-			    | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT);
-			high_rr = fp_reg;
-
-			/* Passing invalid data here can lock the GPU. */
-			if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
-				InputsRead &= ~(FRAG_BIT_TEX0 << i);
-				fp_reg++;
-			} else {
-				WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
-			}
-		}
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count);
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~(FRAG_BIT_TEX0 << i);
+		rs_tex_count += count;
+		++tex_ip;
+		++fp_reg;
 	}
 
-	if (InputsRead & FRAG_BIT_COL0) {
-		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
-			r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
-			InputsRead &= ~FRAG_BIT_COL0;
-			col_interp_nr++;
+	if (InputsRead & FRAG_BIT_FOGC) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |=  R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) |  R300_RS_TEX_PTR(rs_tex_count);
+			r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_FOGC;
+			rs_tex_count += 4;
+			++tex_ip;
+			++fp_reg;
 		} else {
-			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+			WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
 		}
 	}
 
-	if (InputsRead & FRAG_BIT_COL1) {
-		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
-			r300->hw.rr.cmd[R300_RR_INST_1] |= R300_RS_INST_COL_ID(1) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
-			InputsRead &= ~FRAG_BIT_COL1;
-			if (high_rr < 1)
-				high_rr = 1;
-			col_interp_nr++;
-		} else {
-			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
-		}
+	if (InputsRead & FRAG_BIT_WPOS) {
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |=  R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) |  R300_RS_TEX_PTR(rs_tex_count);
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~FRAG_BIT_WPOS;
+		rs_tex_count += 4;
+		++tex_ip;
+		++fp_reg;
 	}
+	InputsRead &= ~FRAG_BIT_WPOS;
 
-	/* Need at least one. This might still lock as the values are undefined... */
-	if (rs_tex_count == 0 && col_interp_nr == 0) {
-		r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
-		col_interp_nr++;
+	/* Setup default color if no color or tex was set */
+	if (rs_tex_count == 0 && col_ip == 0) {
+		r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+		++col_ip;
 	}
 
-	r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT)
-	  | (col_interp_nr << R300_IC_COUNT_SHIFT)
-	  | R300_HIRES_EN;
+	high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT)  | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+	r300->hw.rc.cmd[2] |= high_rr - 1;
 
-	assert(high_rr >= 0);
-	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
-	r300->hw.rc.cmd[2] = high_rr;
+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr);
 
 	if (InputsRead)
 		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
@@ -1751,18 +1746,15 @@ static void r300SetupRSUnit(GLcontext * ctx)
 static void r500SetupRSUnit(GLcontext * ctx)
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-	/* I'm still unsure if these are needed */
-	GLuint interp_col[8];
-	union r300_outputs_written OutputsWritten;
         TNLcontext *tnl = TNL_CONTEXT(ctx);
 	struct vertex_buffer *VB = &tnl->vb;
+	union r300_outputs_written OutputsWritten;
 	GLuint InputsRead;
 	int fp_reg, high_rr;
-	int rs_col_count = 0;
-	int in_texcoords, col_interp_nr;
-	int i, count;
+	int col_ip, tex_ip;
+	int rs_tex_count = 0;
+	int i, count, col_fmt;
 
-	memset(interp_col, 0, sizeof(interp_col));
 	if (hw_tcl_on)
 		OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
 	else
@@ -1779,130 +1771,151 @@ static void r500SetupRSUnit(GLcontext * ctx)
 	R300_STATECHANGE(r300, rc);
 	R300_STATECHANGE(r300, rr);
 
-	fp_reg = col_interp_nr = high_rr = in_texcoords = 0;
-
-	r300->hw.rr.cmd[R300_RR_INST_1] = 0;
+	fp_reg = col_ip = tex_ip = col_fmt = 0;
 
-	if (InputsRead & FRAG_BIT_WPOS) {
-		for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-			if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
-				break;
+	r300->hw.rc.cmd[1] = 0;
+	r300->hw.rc.cmd[2] = 0;
+	for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+		r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
 
-		if (i == ctx->Const.MaxTextureUnits) {
-			fprintf(stderr, "\tno free texcoord found...\n");
-			_mesa_exit(-1);
-		}
+	for (i=0; i<R500_RI_CMDSIZE-1; ++i)
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
 
-		InputsRead |= (FRAG_BIT_TEX0 << i);
-		InputsRead &= ~FRAG_BIT_WPOS;
-	}
 
 	if (InputsRead & FRAG_BIT_COL0) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
-		interp_col[0] |= R500_RS_COL_PTR(rs_col_count);
-		if (count == 3)
-			interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
-		rs_col_count += count;
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL0;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+		}
 	}
-	else
-		interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
 
 	if (InputsRead & FRAG_BIT_COL1) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
-		interp_col[1] |= R500_RS_COL_PTR(1);
-		if (count == 3)
-			interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
-		rs_col_count += count;
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL1;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+		}
 	}
 
+
 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-		GLuint swiz = 0;
+		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+		    continue;
 
-		/* with TCL we always seem to route 4 components */
-		if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-
-		  if (hw_tcl_on)
-		    count = 4;
-		  else
-		    count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
-
-		  /* always have on texcoord */
-		  swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT;
-		  if (count >= 2)
-		    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT;
-		  else
-		    swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
-
-		  if (count >= 3)
-		    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT;
-		  else
-		    swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
-
-		  if (count == 4)
-		    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT;
-		  else
-		    swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
-
-		} else
-		   swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-		          (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-		          (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-		          (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz;
-
-		r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
-		if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-			//assert(r300->state.texture.tc_count != 0);
-			r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i	/* source INTERP */
-			    | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT);
-			high_rr = fp_reg;
-
-			/* Passing invalid data here can lock the GPU. */
-			if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
-				InputsRead &= ~(FRAG_BIT_TEX0 << i);
-				fp_reg++;
-			} else {
-				WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
-			}
+		if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+		    WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+		    continue;
 		}
-	}
 
-	if (InputsRead & FRAG_BIT_COL0) {
-		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
-			r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
-			InputsRead &= ~FRAG_BIT_COL0;
-			col_interp_nr++;
+		int swiz = 0;
+
+		/* with TCL we always seem to route 4 components */
+		if (hw_tcl_on)
+		  count = 4;
+		else
+		  count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
+
+		if (count == 4) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		} else if (count == 3) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		} else if (count == 2) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		} else if (count == 1) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
 		} else {
-			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
 		}
+
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz;
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~(FRAG_BIT_TEX0 << i);
+		rs_tex_count += count;
+		++tex_ip;
+		++fp_reg;
 	}
 
-	if (InputsRead & FRAG_BIT_COL1) {
-		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
-			r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE |  (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
-			InputsRead &= ~FRAG_BIT_COL1;
-			if (high_rr < 1)
-				high_rr = 1;
-			col_interp_nr++;
+	if (InputsRead & FRAG_BIT_FOGC) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
+				((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+				((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+				((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
+
+			r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_FOGC;
+			rs_tex_count += 4;
+			++tex_ip;
+			++fp_reg;
 		} else {
-			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+			WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
 		}
 	}
 
-	/* Need at least one. This might still lock as the values are undefined... */
-	if (in_texcoords == 0 && col_interp_nr == 0) {
-		r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
-		col_interp_nr++;
+	if (InputsRead & FRAG_BIT_WPOS) {
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
+				((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+				((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+				((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
+
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~FRAG_BIT_WPOS;
+		rs_tex_count += 4;
+		++tex_ip;
+		++fp_reg;
+	}
+
+	/* Setup default color if no color or tex was set */
+	if (rs_tex_count == 0 && col_ip == 0) {
+		r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+		++col_ip;
 	}
 
-	r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT)
-	  | (col_interp_nr << R300_IC_COUNT_SHIFT)
-	  | R300_HIRES_EN;
+	high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT)  | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+	r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1);
 
-	assert(high_rr >= 0);
-	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
-	r300->hw.rc.cmd[2] = 0xC0 | high_rr;
+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr);
 
 	if (InputsRead)
 		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index c4e325e..b6a4d30 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -993,17 +993,16 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
 		    vp->outputs[VERT_RESULT_COL0] + 3;
 		cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
 	}
-#if 0
-	if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
-		vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
-	}
-#endif
 
 	for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
 		if (vp->key.OutputsWritten & (1 << i)) {
 			vp->outputs[i] = cur_reg++;
 		}
 	}
+
+	if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+		vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+	}
 }
 
 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
index 365e7c1..f398404 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
@@ -451,19 +451,7 @@ static void allocate_input_registers(struct pair_state *s)
 	int i;
 	GLuint hwindex = 0;
 
-	/* Texcoords come first */
-	for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
-		if (InputsRead & (FRAG_BIT_TEX0 << i))
-			alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
-	}
-	InputsRead &= ~FRAG_BITS_TEX_ANY;
-
-	/* fragment position treated as a texcoord */
-	if (InputsRead & FRAG_BIT_WPOS)
-		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
-	InputsRead &= ~FRAG_BIT_WPOS;
-
-	/* Then primary colour */
+	/* Primary colour */
 	if (InputsRead & FRAG_BIT_COL0)
 		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
 	InputsRead &= ~FRAG_BIT_COL0;
@@ -473,11 +461,23 @@ static void allocate_input_registers(struct pair_state *s)
 		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
 	InputsRead &= ~FRAG_BIT_COL1;
 
-	/* Fog coordinate */
+	/* Texcoords */
+	for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
+		if (InputsRead & (FRAG_BIT_TEX0 << i))
+			alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
+	}
+	InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+	/* Fogcoords treated as a texcoord */
 	if (InputsRead & FRAG_BIT_FOGC)
 		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
 	InputsRead &= ~FRAG_BIT_FOGC;
 
+	/* fragment position treated as a texcoord */
+	if (InputsRead & FRAG_BIT_WPOS)
+		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
+	InputsRead &= ~FRAG_BIT_WPOS;
+
 	/* Anything else */
 	if (InputsRead)
 		error("Don't know how to handle inputs 0x%x\n", InputsRead);




More information about the mesa-commit mailing list