[PATCH 09/13] nv50: move allocation of pc regs

Christoph Bumiller e0425955 at student.tuwien.ac.at
Sat Sep 12 06:36:09 PDT 2009


Make use of tgsi_shader_info to determine how many nv50_regs
we need to allocate, if program uses KIL, or writes DEPR.
---
 src/gallium/drivers/nv50/nv50_program.c |  284 ++++++++++++++-----------------
 1 files changed, 129 insertions(+), 155 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index e2adeca..c8157f1 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1575,7 +1575,6 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_kil(pc, src[0][1]);
 		emit_kil(pc, src[0][2]);
 		emit_kil(pc, src[0][3]);
-		pc->p->cfg.fp.regs[2] |= 0x00100000;
 		break;
 	case TGSI_OPCODE_LIT:
 		emit_lit(pc, &dst[0], mask, &src[0][0]);
@@ -1754,64 +1753,52 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 }
 
 static void
-prep_inspect_insn(struct nv50_pc *pc, const union tgsi_full_token *tok,
-		  unsigned *r_usage[2])
+prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 {
-	const struct tgsi_full_instruction *insn;
+	struct nv50_reg *reg = NULL;
 	const struct tgsi_full_src_register *src;
 	const struct tgsi_dst_register *dst;
+	unsigned i, c, k, mask;
 
-	unsigned i, c, k, n, mask, *acc_p;
-
-	insn = &tok->FullInstruction;
 	dst = &insn->FullDstRegisters[0].DstRegister;
 	mask = dst->WriteMask;
 
-	if (!r_usage[0])
-		r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
-	if (!r_usage[1])
-		r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
+        if (dst->File == TGSI_FILE_TEMPORARY)
+                reg = pc->temp;
+        else
+        if (dst->File == TGSI_FILE_OUTPUT)
+                reg = pc->result;
 
-	if (dst->File == TGSI_FILE_TEMPORARY) {
+	if (reg) {
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			r_usage[0][dst->Index * 4 + c] = pc->insn_nr;
+			reg[dst->Index * 4 + c].acc = pc->insn_nr;
 		}
 	}
 
 	for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
 		src = &insn->FullSrcRegisters[i];
 
-		switch (src->SrcRegister.File) {
-		case TGSI_FILE_TEMPORARY:
-			acc_p = r_usage[0];
-			break;
-		case TGSI_FILE_INPUT:
-			acc_p = r_usage[1];
-			break;
-		default:
+		if (src->SrcRegister.File == TGSI_FILE_TEMPORARY)
+			reg = pc->temp;
+		else
+		if (src->SrcRegister.File == TGSI_FILE_INPUT)
+			reg = pc->attr;
+		else
 			continue;
-		}
 
 		mask = nv50_tgsi_src_mask(insn, i);
 
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-
 			k = tgsi_util_get_full_src_register_extswizzle(src, c);
-			switch (k) {
-			case TGSI_EXTSWIZZLE_X:
-			case TGSI_EXTSWIZZLE_Y:
-			case TGSI_EXTSWIZZLE_Z:
-			case TGSI_EXTSWIZZLE_W:
-				n = src->SrcRegister.Index * 4 + k;
-				acc_p[n] = pc->insn_nr;
-				break;
-			default:
-				break;
-			}
+
+			if (k > TGSI_EXTSWIZZLE_W)
+				continue;
+
+			reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr;
 		}
 	}
 }
@@ -1986,8 +1973,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 }
 
 static unsigned
-load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
-	       int *aid, int *p_oid)
+load_fp_attrib(struct nv50_pc *pc, int i, int *mid, int *aid, int *p_oid)
 {
 	struct nv50_reg *iv;
 	int oid, c, n;
@@ -1997,15 +1983,11 @@ load_fp_attrib(struct nv50_pc *pc, int i, unsigned *acc, int *mid,
 
 	for (c = 0, n = i * 4; c < 4; c++, n++) {
 		oid = (*p_oid)++;
-		pc->attr[n].type = P_TEMP;
-		pc->attr[n].index = i;
 
-		if (pc->attr[n].acc == acc[n])
+		if (!pc->attr[n].acc)
 			continue;
 		mask |= (1 << c);
 
-		pc->attr[n].acc = acc[n];
-		pc->attr[n].rhw = pc->attr[n].hw = -1;
 		alloc_reg(pc, &pc->attr[n]);
 
 		pc->attr[n].rhw = (*aid)++;
@@ -2025,23 +2007,13 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 	struct tgsi_parse_context p;
 	boolean ret = FALSE;
 	unsigned i, c;
-	unsigned fcol, bcol, fcrd, depr;
+	unsigned fcol, bcol, fcrd;
 
 	/* count (centroid) perspective interpolations */
 	unsigned centroid_loads = 0;
 	unsigned perspect_loads = 0;
 
-	/* track register access for temps and attrs */
-	unsigned *r_usage[2];
-	r_usage[0] = NULL;
-	r_usage[1] = NULL;
-
-	depr = fcol = bcol = fcrd = 0xffff;
-
-	if (pc->p->type == PIPE_SHADER_FRAGMENT) {
-		pc->p->cfg.fp.regs[0] = 0x01000404;
-		pc->p->cfg.fp.regs[1] = 0x00000400;
-	}
+	fcol = bcol = fcrd = ~0;
 
 	tgsi_parse_init(&p, pc->p->pipe.tokens);
 	while (!tgsi_parse_end_of_tokens(&p)) {
@@ -2071,32 +2043,11 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
 			switch (d->Declaration.File) {
 			case TGSI_FILE_TEMPORARY:
-				if (pc->temp_nr < (last + 1))
-					pc->temp_nr = last + 1;
 				break;
 			case TGSI_FILE_OUTPUT:
-				if (pc->result_nr < (last + 1))
-					pc->result_nr = last + 1;
-
-				if (!d->Declaration.Semantic)
-					break;
-
-				switch (d->Semantic.SemanticName) {
-				case TGSI_SEMANTIC_POSITION:
-					depr = first;
-					pc->p->cfg.fp.regs[2] |= 0x00000100;
-					pc->p->cfg.fp.regs[3] |= 0x00000011;
-					break;
-				default:
-					break;
-				}
-
 				break;
 			case TGSI_FILE_INPUT:
 			{
-				if (pc->attr_nr < (last + 1))
-					pc->attr_nr = last + 1;
-
 				if (pc->p->type != PIPE_SHADER_FRAGMENT)
 					break;
 
@@ -2121,10 +2072,6 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 						fcol = first;
 						mode = INTERP_PERSPECTIVE;
 						break;
-					case TGSI_SEMANTIC_BCOLOR:
-						bcol = first;
-						mode = INTERP_PERSPECTIVE;
-						break;
 					}
 				}
 
@@ -2142,8 +2089,6 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			}
 				break;
 			case TGSI_FILE_CONSTANT:
-				if (pc->param_nr < (last + 1))
-					pc->param_nr = last + 1;
 				break;
 			case TGSI_FILE_SAMPLER:
 				break;
@@ -2156,49 +2101,29 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			break;
 		case TGSI_TOKEN_TYPE_INSTRUCTION:
 			pc->insn_nr++;
-			prep_inspect_insn(pc, tok, r_usage);
+			prep_inspect_insn(pc, &tok->FullInstruction);
 			break;
 		default:
 			break;
 		}
 	}
 
-	if (pc->temp_nr) {
-		pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg));
-		if (!pc->temp)
-			goto out_err;
-
-		for (i = 0; i < pc->temp_nr; i++) {
-			for (c = 0; c < 4; c++) {
-				ctor_reg(&pc->temp[i*4+c], P_TEMP, i, -1);
-				pc->temp[i*4+c].acc = r_usage[0][i*4+c];
-			}
-		}
-	}
-
 	if (pc->attr_nr) {
 		int oid = 4, mid = 4, aid = 0;
 		/* oid = VP output id
 		 * aid = FP attribute/interpolant id
 		 * mid = VP output mapping field ID
 		 */
-
-		pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg));
-		if (!pc->attr)
-			goto out_err;
-
 		if (pc->p->type == PIPE_SHADER_FRAGMENT) {
 			/* position should be loaded first */
-			if (fcrd != 0xffff) {
+			if (fcrd < 0x40) {
 				unsigned mask;
 				mid = 0;
-				mask = load_fp_attrib(pc, fcrd, r_usage[1],
-						      &mid, &aid, &oid);
-				oid = 0;
+				mask = load_fp_attrib(pc, fcrd, &mid, &aid,
+						      &oid);
 				pc->p->cfg.fp.regs[1] |= (mask << 24);
-				pc->p->cfg.fp.map[0] = 0x04040404 * fcrd;
+				pc->p->cfg.fp.map[0] += 0x04040404 * fcrd;
 			}
-			pc->p->cfg.fp.map[0] += 0x03020100;
 
 			/* should do MAD fcrd.xy, fcrd, SOME_CONST, fcrd */
 
@@ -2228,18 +2153,13 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			}
 
 			for (c = 0; c < 4; c++) {
-				/* I don't know what these values do, but
-				 * let's set them like the blob does:
-				 */
-				if (fcol != 0xffff && r_usage[1][fcol * 4 + c])
-					pc->p->cfg.fp.regs[0] += 0x00010000;
-				if (bcol != 0xffff && r_usage[1][bcol * 4 + c])
+				/* XXX: secondary colour, tbd */
+				if (fcol < 0x40 && pc->attr[fcol * 4 + c].acc)
 					pc->p->cfg.fp.regs[0] += 0x00010000;
 			}
 
-			for (i = 0; i < pc->attr_nr; i++)
-				load_fp_attrib(pc, i, r_usage[1],
-					       &mid, &aid, &oid);
+			for (i = ((fcrd < 0x40) ? 1 : 0); i < pc->attr_nr; i++)
+				load_fp_attrib(pc, i, &mid, &aid, &oid);
 
 			if (pc->iv_p)
 				free_temp(pc, pc->iv_p);
@@ -2253,48 +2173,26 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			for (i = 0; i < pc->attr_nr * 4; i++) {
 				pc->p->cfg.vp.attr[aid / 32] |=
 					(1 << (aid % 32));
-				ctor_reg(&pc->attr[i], P_ATTR, i / 4, aid++);
+				pc->attr[i].hw = aid++;
 			}
 		}
 	}
 
 	if (pc->result_nr) {
-		unsigned nr = pc->result_nr * 4;
-		int rid = 0;
-
-		pc->result = MALLOC(nr * sizeof(struct nv50_reg));
-		if (!pc->result)
-			goto out_err;
-
 		if (pc->p->type == PIPE_SHADER_VERTEX) {
-			for (i = 0; i < nr; i++)
-				ctor_reg(&pc->result[i], P_RESULT, i / 4, i);
+			for (i = 0; i < pc->result_nr * 4; i++)
+				pc->result[i].hw = i;
 		} else {
-			/* pc->p->type == PIPE_SHADER_FRAGMENT */
-			for (i = 0; i < pc->result_nr; i++) {
-				for (c = 0; c < 4; c++) {
-					ctor_reg(&pc->result[i*4+c],
-						 P_TEMP, i, -1);
-					if (i != depr)
-						pc->result[i*4+c].rhw = rid++;
-				}
-			}
-
-			if (depr != 0xffff)
-				pc->result[depr*4+2].rhw = rid++;
-		}
-	}
-
-	if (pc->param_nr) {
-		int rid = 0;
-
-		pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg));
-		if (!pc->param)
-			goto out_err;
+			/* type == PIPE_SHADER_FRAGMENT
+			 * FragDepth is always first TGSI and last HW output
+			 */
+			int rid = 0;
+			i = pc->p->info.writes_z ? 4 : 0;
 
-		for (i = 0; i < pc->param_nr; i++) {
-			for (c = 0; c < 4; c++, rid++)
-				ctor_reg(&pc->param[rid], P_CONST, i, rid);
+			for (; i < pc->result_nr * 4; i++)
+				pc->result[i].rhw = rid++;
+			if (pc->p->info.writes_z)
+				pc->result[2].rhw = rid;
 		}
 	}
 
@@ -2313,11 +2211,6 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
 	ret = TRUE;
 out_err:
-	if (r_usage[0])
-		FREE(r_usage[0]);
-	if (r_usage[1])
-		FREE(r_usage[1]);
-
 	tgsi_parse_free(&p);
 	return ret;
 }
@@ -2340,6 +2233,85 @@ free_nv50_pc(struct nv50_pc *pc)
 }
 
 static boolean
+ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
+{
+	int i, c;
+	unsigned rtype[2] = { P_ATTR, P_RESULT };
+
+	pc->p = p;
+	pc->temp_nr = p->info.file_max[TGSI_FILE_TEMPORARY] + 1;
+	pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
+	pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
+	pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+	p->cfg.high_temp = 4;
+
+	switch (p->type) {
+	case PIPE_SHADER_VERTEX:
+		break;
+	case PIPE_SHADER_FRAGMENT:
+		p->cfg.fp.regs[0] = 0x01000404;
+		p->cfg.fp.regs[1] = 0x00000400;
+
+		p->cfg.fp.map[0] = 0x03020100;
+		p->cfg.fp.high_map = 1;
+
+		rtype[0] = rtype[1] = P_TEMP;
+
+		if (p->info.writes_z) {
+			p->cfg.fp.regs[2] |= 0x00000100;
+			p->cfg.fp.regs[3] |= 0x00000011;
+		}
+		if (p->info.uses_kill)
+			p->cfg.fp.regs[2] |= 0x00100000;
+		break;
+	}
+
+	if (pc->temp_nr) {
+		pc->temp = MALLOC(pc->temp_nr * 4 * sizeof(struct nv50_reg));
+		if (!pc->temp)
+			return FALSE;
+
+		for (i = 0; i < pc->temp_nr * 4; ++i)
+			ctor_reg(&pc->temp[i], P_TEMP, i / 4, -1);
+	}
+
+	if (pc->attr_nr) {
+		pc->attr = MALLOC(pc->attr_nr * 4 * sizeof(struct nv50_reg));
+		if (!pc->attr)
+			return FALSE;
+
+		for (i = 0; i < pc->attr_nr * 4; ++i)
+			ctor_reg(&pc->attr[i], rtype[0], i / 4, -1);
+	}
+
+	if (pc->result_nr) {
+		unsigned nr = pc->result_nr * 4;
+
+		pc->result = MALLOC(nr * sizeof(struct nv50_reg));
+		if (!pc->result)
+			return FALSE;
+
+		for (i = 0; i < nr; ++i)
+			ctor_reg(&pc->result[i], rtype[1], i / 4, -1);
+	}
+
+	if (pc->param_nr) {
+		int rid = 0;
+
+		pc->param = MALLOC(pc->param_nr * 4 * sizeof(struct nv50_reg));
+		if (!pc->param)
+			return FALSE;
+
+		for (i = 0; i < pc->param_nr; ++i)
+			for (c = 0; c < 4; ++c, ++rid)
+				ctor_reg(&pc->param[rid], P_CONST, i, rid);
+	}
+
+	return TRUE;
+}
+
+static boolean
 nv50_program_tx(struct nv50_program *p)
 {
 	struct tgsi_parse_context parse;
@@ -2350,8 +2322,10 @@ nv50_program_tx(struct nv50_program *p)
 	pc = CALLOC_STRUCT(nv50_pc);
 	if (!pc)
 		return FALSE;
-	pc->p = p;
-	pc->p->cfg.high_temp = 4;
+
+	ret = ctor_nv50_pc(pc, p);
+	if (ret == FALSE)
+		goto out_cleanup;
 
 	ret = nv50_program_tx_prep(pc);
 	if (ret == FALSE)
-- 
1.6.3.3


--------------010208090904000401070505
Content-Type: text/plain;
 name="0010-nv50-proper-linkage-between-VP-and-FP.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0010-nv50-proper-linkage-between-VP-and-FP.patch"



More information about the Nouveau mailing list