[PATCH] nv50: add support for two-sided lighting

Christoph Bumiller e0425955 at student.tuwien.ac.at
Sun Jun 21 09:03:29 PDT 2009


---
 src/gallium/drivers/nv50/nv50_context.h        |    1 +
 src/gallium/drivers/nv50/nv50_program.c        |  171 ++++++++++++++++++------
 src/gallium/drivers/nv50/nv50_program.h        |    1 +
 src/gallium/drivers/nv50/nv50_state_validate.c |    3 +
 4 files changed, 138 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 44463d6..c31c42a 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -190,6 +190,7 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
 /* nv50_program.c */
 extern void nv50_vertprog_validate(struct nv50_context *nv50);
 extern void nv50_fragprog_validate(struct nv50_context *nv50);
+extern void nv50_linkage_validate(struct nv50_context *nv50);
 extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
 
 /* nv50_state_validate.c */
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 7a4bc18..30a1d32 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1779,7 +1779,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 	struct tgsi_parse_context p;
 	boolean ret = FALSE;
 	unsigned i, c;
-	unsigned fcol, bcol, fcrd, depr;
+	unsigned fcol[2], bcol[2], fcrd, depr;
 
 	/* count (centroid) perspective interpolations */
 	unsigned centroid_loads = 0;
@@ -1791,7 +1791,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 	r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
 	r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
 
-	depr = fcol = bcol = fcrd = 0xffff;
+	fcol[0] = fcol[1] = 0xffff;
+	bcol[0] = bcol[1] = 0xffff;
+	depr = fcrd = 0xffff;
 
 	tgsi_parse_init(&p, pc->p->pipe.tokens);
 	while (!tgsi_parse_end_of_tokens(&p)) {
@@ -1826,12 +1828,21 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 				if (!d->Declaration.Semantic)
 					break;
 
+				c = d->Semantic.SemanticIndex;
 				switch (d->Semantic.SemanticName) {
 				case TGSI_SEMANTIC_POSITION:
 					depr = first;
 					pc->p->cfg.fp.regs[2] |= 0x00000100;
 					pc->p->cfg.fp.regs[3] |= 0x00000011;
 					break;
+				case TGSI_SEMANTIC_COLOR:
+					if (pc->p->type == PIPE_SHADER_VERTEX)
+						fcol[c] = first;
+					break;
+				case TGSI_SEMANTIC_BCOLOR:
+					if (pc->p->type == PIPE_SHADER_VERTEX)
+						bcol[c] = first;
+					break;
 				default:
 					break;
 				}
@@ -1854,17 +1865,14 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 					break;
 				}
 
+				c = d->Semantic.SemanticIndex;
 				if (d->Declaration.Semantic) {
 					switch (d->Semantic.SemanticName) {
 					case TGSI_SEMANTIC_POSITION:
 						fcrd = first;
 						break;
 					case TGSI_SEMANTIC_COLOR:
-						fcol = first;
-						mode = INTERP_PERSPECTIVE;
-						break;
-					case TGSI_SEMANTIC_BCOLOR:
-						bcol = first;
+						fcol[c] = first;
 						mode = INTERP_PERSPECTIVE;
 						break;
 					}
@@ -1931,10 +1939,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			/* position should be loaded first */
 			if (fcrd != 0xffff) {
 				unsigned mask;
-				mid = 0;
+				oid = mid = 0;
 				mask = load_fp_attrib(pc, fcrd, r_usage[1],
 						      &mid, &aid, &oid);
-				oid = 0;
 				pc->p->cfg.fp.regs[1] |= (mask << 24);
 				pc->p->cfg.fp.map[0] += 0x04040404 * fcrd;
 			}
@@ -1966,16 +1973,24 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 				pc->p->cfg.fp.regs[1] |= 0x08000000;
 			}
 
-			for (c = 0; c < 4; c++) {
-				/* I don't know what these values do, but
-				 * let's set them like the blob does:
-				 */
-				if (fcol != 0xffff && r_usage[1][fcol * 4 + c])
-					pc->p->cfg.fp.regs[0] += 0x00010000;
-				if (bcol != 0xffff && r_usage[1][bcol * 4 + c])
-					pc->p->cfg.fp.regs[0] += 0x00010000;
-			}
+			/* load colors directly after position - XXX: might
+			 * not be necessary if we always get colors first
+			 */
+			oid += fcol[0] * 4;
+			i = mid;
+
+			if (fcol[0] != 0xffff)
+				load_fp_attrib(pc, fcol[0], r_usage[1],
+					       &mid, &aid, &oid);
+			if (fcol[1] != 0xffff)
+				load_fp_attrib(pc, fcol[1], r_usage[1],
+					       &mid, &aid, &oid);
+
+			/* set count of mapped color components */
+			pc->p->cfg.fp.regs[0] |= (mid - i) << 16;
 
+			/* reset oid and load remaining attrs */
+			oid = (fcrd == 0xffff) ? 4 : 0;
 			for (i = 0; i < pc->attr_nr; i++)
 				load_fp_attrib(pc, i, r_usage[1],
 					       &mid, &aid, &oid);
@@ -1985,8 +2000,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			if (pc->iv_c)
 				free_temp(pc, pc->iv_c);
 
-			pc->p->cfg.fp.high_map = (mid / 4);
-			pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0);
+			pc->p->cfg.fp.high_map = mid;
 		} else {
 			/* vertex program */
 			for (i = 0; i < pc->attr_nr; i++) {
@@ -2011,6 +2025,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 		if (pc->p->type == PIPE_SHADER_VERTEX) {
 			for (i = 0; i < nr; i++)
 				ctor_reg(&pc->result[i], P_RESULT, i / 4, i);
+
+			/* output id offset bcol from fcol */
+			if (bcol[0] != 0xffff)
+				pc->p->cfg.vp.bcol = bcol[0] - fcol[0];
 		} else {
 			/* pc->p->type == PIPE_SHADER_FRAGMENT */
 			for (i = 0; i < pc->result_nr; i++) {
@@ -2101,7 +2119,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 		p->cfg.fp.regs[1] = 0x00000400;
 
 		p->cfg.fp.map[0] = 0x03020100;
-		p->cfg.fp.high_map = 1;
+		p->cfg.fp.high_map = 4;
 		break;
 	default:
 		assert(!"unsupported GPU program type");
@@ -2389,15 +2407,12 @@ nv50_vertprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(10, 0);
+	so = so_new(32, 0);
 	so_method(so, tesla, 0x1650, 2);
 	so_data  (so, p->cfg.vp.attr[0]);
 	so_data  (so, p->cfg.vp.attr[1]);
 	so_method(so, tesla, 0x16b8, 1);
 	so_data  (so, p->cfg.high_result);
-	so_method(so, tesla, 0x16ac, 2);
-	so_data  (so, p->cfg.high_result); //8);
-	so_data  (so, p->cfg.high_temp);
 	so_method(so, tesla, 0x140c, 1);
 	so_data  (so, p->code->start);
 	so_ref(so, &nv50->state.vertprog);
@@ -2410,7 +2425,6 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_program *p = nv50->fragprog;
 	struct nouveau_stateobj *so;
-	unsigned i;
 
 	if (!p->translated) {
 		nv50_program_validate(nv50, p);
@@ -2421,18 +2435,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(32, 0);
-	so_method(so, tesla, 0x1904, 4);
-	so_data  (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
-	so_data  (so, 0x00000004);
-	so_data  (so, 0x00000000);
-	so_data  (so, 0x00000000);
-	so_method(so, tesla, 0x16bc, p->cfg.fp.high_map);
-	for (i = 0; i < p->cfg.fp.high_map; i++)
-		so_data(so, p->cfg.fp.map[i]);
-	so_method(so, tesla, 0x1988, 2);
-	so_data  (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */
-	so_data  (so, p->cfg.high_temp);
+	so = so_new(8, 0);
 	so_method(so, tesla, 0x1298, 1);
 	so_data  (so, p->cfg.high_result);
 	so_method(so, tesla, 0x19a8, 1);
@@ -2445,6 +2448,98 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	so_ref(NULL, &so);
 }
 
+/*
+ * 1510 = bitmask to enable clipping planes
+ * 1688 = two-sided lighting enable
+ * 16ac = entry count of mapping table at [16bc]
+ * 16b0 = count of temporaries used in VP
+ *
+ * 1904 = 0x01CCBBFF (01 is sometimes 00 - ?)
+ *	CC = number of color components in map (primary + secondary)
+ *	BB = first back color's map index (colors should be contiguous)
+ *	FF = first front color's map index
+ *
+ * 1908 = 0x0000HHLL
+ *	LL = first clipping distance map index (4 if no UCPs)
+ *	HH = last clipping distance map index + 1 (0 if no UCPs)
+ *
+ * 1910 = 0x00000SSe
+ *	 e = enable point size output (0 / 1)
+ *	SS = point size map index (0 if disabled)
+ *
+ * 1988 = 0xMMIInnii
+ *	MM = bitmask to un-mask masked VP/GP outputs (i.e. HPOS, generic ?)
+ *	nn = map index of first non-masked output, where to put front color
+ *	II = count of non-masked interpolants
+ *	ii = almost always equal to II (except if II -> 00, why ?)
+ */
+void
+nv50_linkage_validate(struct nv50_context *nv50)
+{
+	/* this is going to be rather complicated at first, but it works
+	 * like this; maybe we can simplify later, though
+	 */
+	struct nouveau_stateobj *so = nv50->state.vertprog;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nv50_program *vp = nv50->vertprog;
+	struct nv50_program *fp = nv50->fragprog;
+
+	uint32_t regs[5];
+	uint32_t map[8], i, n, k, m = 4;
+
+	memset(map, 0, 8 * sizeof(uint32_t));
+	map[0] = fp->cfg.fp.map[0];
+
+	regs[1] /* 1908 */ = 0x00000004;
+	regs[2] /* 190c */ = 0x00000000;
+	regs[3] /* 1910 */ = 0x00000000;
+	regs[0] /* 1904 */ = fp->cfg.fp.regs[0];
+	regs[4] /* 1988 */ = fp->cfg.fp.regs[1];
+
+	so_method(so, tesla, 0x1688, 1);
+
+	if (nv50->rasterizer->pipe.light_twoside) {
+		so_data(so, 1);
+		n = (regs[0] >> 16) & 0xff;
+
+		/* copy front color mappings and add output offset to BFC0 */
+		for (i = 4; i < 4 + n; i++, m++) {
+			k = fp->cfg.fp.map[i / 4] >> (8 * (i % 4));
+			k &= 0xff;
+			map[m / 4] |= (k + vp->cfg.vp.bcol) << (8 * (m % 4));
+		}
+
+		regs[0] += n;
+		regs[2] += (n << 8);
+	} else
+		so_data(so, 0);
+
+	for (i = 4; i < fp->cfg.fp.high_map; i++, m++) {
+		k = fp->cfg.fp.map[i / 4] >> (8 * (i % 4));
+		k &= 0xff;
+		map[m / 4] |= k << (8 * (m % 4));
+	}
+
+	so_method(so, tesla, 0x16ac, 2);
+	so_data  (so, m);
+	so_data  (so, vp->cfg.high_temp);
+
+	so_method(so, tesla, 0x1904, 4);
+	so_data  (so, regs[0]);
+	so_data  (so, regs[1]);
+	so_data  (so, regs[2]);
+	so_data  (so, regs[3]);
+
+	n = (m / 4) + ((m % 4) ? 1 : 0);
+	so_method(so, tesla, 0x16bc, n);
+	for (i = 0; i < n; i++)
+		so_data(so, map[i]);
+
+	so_method(so, tesla, 0x1988, 2);
+        so_data  (so, regs[4]);
+        so_data  (so, fp->cfg.high_temp);
+}
+
 void
 nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
 {
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index ed3f67b..b7921ad 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -39,6 +39,7 @@ struct nv50_program {
 		unsigned high_result;
 		struct {
 			unsigned attr[2];
+			unsigned bcol;
 		} vp;
 		struct {
 			unsigned regs[4];
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 0caf4b4..aa02947 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -199,6 +199,9 @@ nv50_state_validate(struct nv50_context *nv50)
 	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
 		nv50_fragprog_validate(nv50);
 
+	if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG))
+		nv50_linkage_validate(nv50);
+
 	if (nv50->dirty & NV50_NEW_RASTERIZER)
 		so_ref(nv50->rasterizer->so, &nv50->state.rast);
 
-- 
1.6.0.6


--------------090503050107050804030002
Content-Type: text/plain;
 name="0008-nv50-introduce-linkage-stateobj.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0008-nv50-introduce-linkage-stateobj.patch"



More information about the Nouveau mailing list