[PATCH] nv50: fix previous patches

Christoph Bumiller e0425955 at student.tuwien.ac.at
Wed Jun 24 13:18:37 PDT 2009


This fixes the previous patches, and add some debugging output
if NV50_PROGRAM_DUMP is un-commented.
Will merge this into the patches, later.
---
 src/gallium/drivers/nv50/nv50_program.c |   94 +++++++++++++++++++------------
 1 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 4b05075..caf03c9 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -28,11 +28,12 @@
 #include "pipe/p_shader_tokens.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
+#include "tgsi/tgsi_dump.h"
 
 #include "nv50_context.h"
 
 #define NV50_SU_MAX_TEMP 64
-//#define NV50_PROGRAM_DUMP
+/* #define NV50_PROGRAM_DUMP */
 
 /* ARL - gallium craps itself on progs/vp/arl.txt
  *
@@ -44,7 +45,7 @@
  * case, if the emit_src() causes the inst to suddenly become long.
  *
  * Verify half-insns work where expected - if they are used, they have to
- * come in pairs. We cannot branch to between two half insns.
+ * come in pairs. Also, we cannot branch to between two half insns.
  *
  * Watch dst == src vectors, can overwrite components that are needed:
  *	p.e. SUB R0, R0.yzxw, R0
@@ -52,8 +53,8 @@
  * some cases (notably XPD) may still be bad though.
  *
  * Things to check with renouveau:
- *	FP results: can DEPR be mapped to another registers
- *	(currently it goes after all color outputs)
+ *	FP results: can DEPR output be mapped to another register ?
+ *	(currently it's index is that of the last color's register + 1)
  *
  * 1298 = 0x00000004; or 0x00000005 if DEPR is written
  *
@@ -444,7 +445,7 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 
 
 #define INTERP_LINEAR		0
-#define INTERP_FLAT			1
+#define INTERP_FLAT		1
 #define INTERP_PERSPECTIVE	2
 #define INTERP_CENTROID		4
 
@@ -1852,6 +1853,10 @@ prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
 	dst = &insn->FullDstRegisters[0].DstRegister;
 	mask = dst->WriteMask;
 
+#ifdef NV50_PROGRAM_DUMP
+	tgsi_dump_instruction(insn, 1);
+#endif
+
 	if (dst->File == TGSI_FILE_TEMPORARY) {
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
@@ -1900,13 +1905,14 @@ static unsigned
 prep_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *p_mid,
 	       int *aid, int *p_oid)
 {
-	int c, n, oid = *p_oid, mid = *p_mid;
+	int c, n, oid, mid = *p_mid;
 	unsigned mask = 0;
 
 	for (c = 0, n = i * 4; c < 4; c++, n++) {
 		pc->attr[n].type = P_TEMP;
 		pc->attr[n].hw = -1;
 		pc->attr[n].index = i;
+		oid = (*p_oid)++;
 
 		if (pc->attr[n].acc == acc[n])
 			continue;
@@ -1915,13 +1921,12 @@ prep_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *p_mid,
 		pc->attr[n].acc = acc[n];
 		pc->attr[n].rhw = (*aid)++;
 
-		pc->p->cfg.fp.map[mid / 4] |= (oid + c) << (8 * (mid % 4));
+		pc->p->cfg.fp.map[mid / 4] |= oid << (8 * (mid % 4));
 		mid++;
 		pc->p->cfg.fp.regs[1] += 0x00010001;
 	}
 
 	*p_mid = mid;
-	*p_oid = oid + 4;
 	return mask;
 }
 
@@ -1958,6 +1963,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			const struct tgsi_full_immediate *imm =
 				&p.FullToken.FullImmediate;
 
+#ifdef NV50_PROGRAM_DUMP
+			tgsi_dump_immediate(imm);
+#endif
+
 			ctor_immd(pc, imm->u.ImmediateFloat32[0].Float,
 				      imm->u.ImmediateFloat32[1].Float,
 				      imm->u.ImmediateFloat32[2].Float,
@@ -1973,6 +1982,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			first = d->DeclarationRange.First;
 			last = d->DeclarationRange.Last;
 
+#ifdef NV50_PROGRAM_DUMP
+			tgsi_dump_declaration(d);
+#endif
+
 			switch (d->Declaration.File) {
 			case TGSI_FILE_TEMPORARY:
 				break;
@@ -2094,38 +2107,36 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			/* position should be loaded first */
 			if (fcrd != 0xffff) {
 				unsigned mask;
-				oid = mid = 0;
+				oid = 0;
+				mid = 0;
 				mask = prep_fp_attrib(pc, fcrd, r_usage[1],
 						      &mid, &aid, &oid);
 				pc->p->cfg.fp.regs[1] |= (mask << 24);
 				pc->p->cfg.fp.map[0] += 0x04040404 * fcrd;
+				oid = 0;
 			}
 
 			/* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */
 
 			if (perspect_loads) {
 				pc->iv_p = alloc_temp(pc, NULL);
-
-				if (!(pc->p->cfg.fp.regs[1] & 0x08000000)) {
-					pc->p->cfg.fp.regs[1] |= 0x08000000;
+				pc->iv_p->rhw = aid - 1;
+				if (!(pc->p->cfg.fp.regs[1] & (1 << 27)))
 					pc->iv_p->rhw = aid++;
-					emit_interp(pc, pc->iv_p, NULL,
-						    INTERP_LINEAR);
-					emit_flop(pc, 0, pc->iv_p, pc->iv_p);
-				} else {
-					pc->iv_p->rhw = aid - 1;
-					emit_flop(pc, 0, pc->iv_p,
-						  &pc->attr[fcrd * 4 + 3]);
-				}
+				pc->p->cfg.fp.regs[1] |= (1 << 27);
+				emit_interp(pc, pc->iv_p, NULL, INTERP_LINEAR);
+				emit_flop(pc, 0, pc->iv_p, pc->iv_p);
 			}
 
 			if (centroid_loads) {
 				pc->iv_c = alloc_temp(pc, NULL);
-				pc->iv_c->rhw = pc->iv_p ? aid - 1 : aid++;
+				pc->iv_c->rhw = aid - 1;
+				if (!(pc->p->cfg.fp.regs[1] & (1 << 27)))
+					pc->iv_c->rhw = aid++;
+				pc->p->cfg.fp.regs[1] |= (1 << 27);
 				emit_interp(pc, pc->iv_c, NULL,
 					    INTERP_CENTROID);
 				emit_flop(pc, 0, pc->iv_c, pc->iv_c);
-				pc->p->cfg.fp.regs[1] |= 0x08000000;
 			}
 
 			/* load colors directly after position - XXX: might
@@ -2301,7 +2312,8 @@ nv50fp_move_outputs(struct nv50_pc *pc)
 	ctor_reg(&out, P_TEMP, -1, -1);
 
 	for (i = 0; i < pc->result_nr * 4; i++) {
-		if (pc->result[i].rhw < 0)
+		if (pc->result[i].rhw < 0 ||
+		    pc->result[i].rhw == pc->result[i].hw)
 			continue;
 		out.hw = pc->result[i].rhw;
 		emit_mov(pc, &out, &pc->result[i]);
@@ -2337,7 +2349,7 @@ static void nv50_program_tx_postprocess(struct nv50_pc *pc)
 	if (pc->p->type == PIPE_SHADER_FRAGMENT)
 		nv50fp_move_outputs(pc);
 	else
-	if (pc->p->type == PIPE_SHADER_VERTEX)
+	if (pc->p->type == PIPE_SHADER_VERTEX && pc->p->cfg.vp.ucp.nr > 0)
 		nv50vp_ucp_append(pc);
 
 	/* collect branching instructions, we need to adjust their target
@@ -2811,14 +2823,26 @@ nv50_linkage_create(struct nv50_context *nv50)
 	so_ref(so, &ln->so);
 	so_ref(NULL, &so);
 
+#ifdef NV50_PROGRAM_DUMP
+	fprintf(stderr, "LINKAGE:\n");
+	for (i = 0; i < n; i++)
+		fprintf(stderr, "MAP[%i] = 0x%08x\n",i,map[i]);
+	fprintf(stderr, "REG1904 = 0x%08x\n",regs[0]);
+	fprintf(stderr, "REG1908 = 0x%08x\n",regs[1]);
+	fprintf(stderr, "REG190c = 0x%08x\n",regs[2]);
+	fprintf(stderr, "REG1910 = 0x%08x\n",regs[3]);
+	fprintf(stderr, "REG1988 = 0x%08x\n",regs[4]);
+	fprintf(stderr, "REG19a8 = 0x%08x\n",fp->cfg.fp.regs[2]);
+	fprintf(stderr, "REG196c = 0x%08x\n",fp->cfg.fp.regs[3]);
+#endif
+
 	return ln;
 }
 
 void nv50_linkage_validate(struct nv50_context *nv50)
 {
-	struct nv50_linkage *it, *ln = NULL;
+	struct nv50_linkage *ln;
 	struct nv50_program *vp = nv50->vertprog;
-	struct nv50_program *fp = nv50->fragprog;
 	unsigned cfg;
 
 	cfg = nv50->rasterizer->pipe.light_twoside;
@@ -2827,20 +2851,18 @@ void nv50_linkage_validate(struct nv50_context *nv50)
 		cfg |= (1 << 2);
 
 	if (vp->ln) {
-		it = vp->ln->next[0];
+		ln = vp->ln->next[0];
 		do {
-			if (it->prog[1] == (void *)fp && it->cfg == cfg) {
-				ln = it;
-				break;
+			if (ln->prog[1] == nv50->fragprog && ln->cfg == cfg) {
+				so_ref(ln->so, &nv50->state.plinkage);
+				return;
 			}
-			it = it->next[0];
-		} while (it != vp->ln);
+			ln = ln->next[0];
+		} while (ln != vp->ln);
 	}
 
-	if (!ln) {
-		ln = nv50_linkage_create(nv50);
-		ln->cfg = cfg;
-	}
+	ln = nv50_linkage_create(nv50);
+	ln->cfg = cfg;
 
 	so_ref(ln->so, &nv50->state.plinkage);
 }
-- 
1.6.0.6


--------------090105040905020104000900--


More information about the Nouveau mailing list