[PATCH] nv50: defer FP attribute loading

Christoph Bumiller e0425955 at student.tuwien.ac.at
Sun Jun 21 07:54:43 PDT 2009


This might keep the number of used TEMPs down.
---
 src/gallium/drivers/nv50/nv50_program.c |   59 ++++++++++++++++++++-----------
 1 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 28a9f2a..249f069 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -132,6 +132,7 @@ struct nv50_pc {
 
 	boolean allow32;
 	boolean join_on;
+	boolean preload;
 };
 
 static inline void
@@ -1242,6 +1243,23 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 	e->inst[1] |= q;
 }
 
+static void
+load_interpolant(struct nv50_pc *pc, struct nv50_reg *r)
+{
+	struct nv50_reg *iv = pc->iv_p;
+	int rhw = r->rhw;
+
+	if (pc->interp_mode[r->index] & INTERP_CENTROID)
+		iv = pc->iv_c;
+
+	r->rhw = -1;
+	alloc_reg(pc, r);
+	r->rhw = rhw;
+
+	if (pc->preload)
+		emit_interp(pc, r, iv, pc->interp_mode[r->index]);
+}
+
 static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
@@ -1297,6 +1315,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 		switch (src->SrcRegister.File) {
 		case TGSI_FILE_INPUT:
 			r = &pc->attr[src->SrcRegister.Index * 4 + c];
+			if (r->hw == -1 && r->rhw >= 0)
+				load_interpolant(pc, r);
 			break;
 		case TGSI_FILE_TEMPORARY:
 			r = &pc->temp[src->SrcRegister.Index * 4 + c];
@@ -1416,6 +1436,8 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 	if (*pp_rtmp && (*pp_rtmp)->type != P_TEMP && (nr_dst > 1 || sat))
 		pp_rtmp = &temp;
 
+	pc->preload = (inst->Instruction.Opcode != TGSI_OPCODE_TXP);
+
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fs =
 			&inst->FullSrcRegisters[i];
@@ -1860,18 +1882,15 @@ prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
 }
 
 static unsigned
-load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
+prep_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *p_mid,
 	       int *aid, int *p_oid)
 {
-	struct nv50_reg *iv;
-	int oid, c, n;
+	int c, n, oid = *p_oid, mid = *p_mid;
 	unsigned mask = 0;
 
-	iv = (pc->interp_mode[i] & INTERP_CENTROID) ? pc->iv_c : pc->iv_p;
-
 	for (c = 0, n = i * 4; c < 4; c++, n++) {
-		oid = (*p_oid)++;
 		pc->attr[n].type = P_TEMP;
+		pc->attr[n].hw = -1;
 		pc->attr[n].index = i;
 
 		if (pc->attr[n].acc == acc[n])
@@ -1879,17 +1898,15 @@ load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
 		mask |= (1 << c);
 
 		pc->attr[n].acc = acc[n];
-		pc->attr[n].rhw = pc->attr[n].hw = -1;
-		alloc_reg(pc, &pc->attr[n]);
-
 		pc->attr[n].rhw = (*aid)++;
-		emit_interp(pc, &pc->attr[n], iv, pc->interp_mode[i]);
 
-		pc->p->cfg.fp.map[(*mid) / 4] |= oid << (8 * ((*mid) % 4));
-		(*mid)++;
+		pc->p->cfg.fp.map[mid / 4] |= (oid + c) << (8 * (mid % 4));
+		mid++;
 		pc->p->cfg.fp.regs[1] += 0x00010001;
 	}
 
+	*p_mid = mid;
+	*p_oid = oid + 4;
 	return mask;
 }
 
@@ -2063,7 +2080,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			if (fcrd != 0xffff) {
 				unsigned mask;
 				oid = mid = 0;
-				mask = load_fp_attrib(pc, fcrd, r_usage[1],
+				mask = prep_fp_attrib(pc, fcrd, r_usage[1],
 						      &mid, &aid, &oid);
 				pc->p->cfg.fp.regs[1] |= (mask << 24);
 				pc->p->cfg.fp.map[0] += 0x04040404 * fcrd;
@@ -2103,10 +2120,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			i = mid;
 
 			if (fcol[0] != 0xffff)
-				load_fp_attrib(pc, fcol[0], r_usage[1],
+				prep_fp_attrib(pc, fcol[0], r_usage[1],
 					       &mid, &aid, &oid);
 			if (fcol[1] != 0xffff)
-				load_fp_attrib(pc, fcol[1], r_usage[1],
+				prep_fp_attrib(pc, fcol[1], r_usage[1],
 					       &mid, &aid, &oid);
 
 			/* set count of mapped color components */
@@ -2115,14 +2132,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			/* reset oid and load remaining attrs */
 			oid = (fcrd == 0xffff) ? 4 : 0;
 			for (i = 0; i < pc->attr_nr; i++)
-				load_fp_attrib(pc, i, r_usage[1],
+				prep_fp_attrib(pc, i, r_usage[1],
 					       &mid, &aid, &oid);
 
-			if (pc->iv_p)
-				free_temp(pc, pc->iv_p);
-			if (pc->iv_c)
-				free_temp(pc, pc->iv_c);
-
 			pc->p->cfg.fp.high_map = mid;
 		} else {
 			/* vertex program */
@@ -2228,6 +2240,11 @@ free_nv50_pc(struct nv50_pc *pc)
 	if (pc->temp)
 		FREE(pc->temp);
 
+	if (pc->iv_p)
+		free_temp(pc, pc->iv_p);
+	if (pc->iv_c)
+		free_temp(pc, pc->iv_c);
+
 	FREE(pc);
 }
 
-- 
1.6.0.6


--------------090503050107050804030002
Content-Type: text/plain;
 name="0016-nv50-update-comments.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0016-nv50-update-comments.patch"



More information about the Nouveau mailing list