[PATCH] nv50: introduce linkage stateobj

Christoph Bumiller e0425955 at student.tuwien.ac.at
Sun Jun 21 09:16:52 PDT 2009


An attempt to improve performance, since assembling the VP output
to FP input map became a mess.

This probably makes it even worse: It creates VP, FP stateobjs
only once and introduces a third shader related stateobj, called
linkage, which is stored in a list object that is obtained or
created on validation. For each configuration (VP, FP, BFC, PTSZ)
there is an extra object.
---
 src/gallium/drivers/nv50/nv50_context.h        |    1 +
 src/gallium/drivers/nv50/nv50_program.c        |  143 +++++++++++++++++++-----
 src/gallium/drivers/nv50/nv50_program.h        |   10 ++
 src/gallium/drivers/nv50/nv50_state_validate.c |    2 +
 4 files changed, 129 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index c31c42a..aadcfda 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -117,6 +117,7 @@ struct nv50_state {
 	unsigned miptree_nr;
 	struct nouveau_stateobj *vertprog;
 	struct nouveau_stateobj *fragprog;
+	struct nouveau_stateobj *plinkage;
 	struct nouveau_stateobj *vtxfmt;
 	struct nouveau_stateobj *vtxbuf;
 };
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 30a1d32..5fae325 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2407,16 +2407,21 @@ nv50_vertprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(32, 0);
-	so_method(so, tesla, 0x1650, 2);
-	so_data  (so, p->cfg.vp.attr[0]);
-	so_data  (so, p->cfg.vp.attr[1]);
-	so_method(so, tesla, 0x16b8, 1);
-	so_data  (so, p->cfg.high_result);
-	so_method(so, tesla, 0x140c, 1);
-	so_data  (so, p->code->start);
-	so_ref(so, &nv50->state.vertprog);
-	so_ref(NULL, &so);
+	if (!p->so) {
+		so = so_new(7, 0);
+		so_method(so, tesla, 0x1650, 2);
+		so_data  (so, p->cfg.vp.attr[0]);
+		so_data  (so, p->cfg.vp.attr[1]);
+		so_method(so, tesla, 0x16b8, 1);
+		so_data  (so, p->cfg.high_result);
+		so_method(so, tesla, 0x140c, 1);
+		so_data  (so, p->code->start);
+		so_ref(so, &p->so);
+		so_ref(NULL, &so);
+
+	}
+
+	so_ref(p->so, &nv50->state.vertprog);
 }
 
 void
@@ -2435,17 +2440,64 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(8, 0);
-	so_method(so, tesla, 0x1298, 1);
-	so_data  (so, p->cfg.high_result);
-	so_method(so, tesla, 0x19a8, 1);
-	so_data  (so, p->cfg.fp.regs[2]);
-	so_method(so, tesla, 0x196c, 1);
-	so_data  (so, p->cfg.fp.regs[3]);
-	so_method(so, tesla, 0x1414, 1);
-	so_data  (so, p->code->start);
-	so_ref(so, &nv50->state.fragprog);
-	so_ref(NULL, &so);
+	if (!p->so) {
+		so = so_new(8, 0);
+		so_method(so, tesla, 0x1298, 1);
+		so_data  (so, p->cfg.high_result);
+		so_method(so, tesla, 0x19a8, 1);
+		so_data  (so, p->cfg.fp.regs[2]);
+		so_method(so, tesla, 0x196c, 1);
+		so_data  (so, p->cfg.fp.regs[3]);
+		so_method(so, tesla, 0x1414, 1);
+		so_data  (so, p->code->start);
+		so_ref(so, &p->so);
+		so_ref(NULL, &so);
+	}
+
+	so_ref(p->so, &nv50->state.fragprog);
+}
+
+static struct nv50_linkage *
+program_add_linkage(struct nv50_program *vp, struct nv50_program *fp)
+{
+	struct nv50_linkage *ln = CALLOC_STRUCT(nv50_linkage);
+	struct nv50_program *pg[2] = { vp, fp };
+	unsigned i;
+
+	for (i = 0; i < 2; i++) {
+		if (pg[i]->ln) {
+			ln->next[i] = pg[i]->ln->next[i];
+			pg[i]->ln->next[i] = ln;
+		} else {
+			pg[i]->ln = ln;
+			ln->next[i] = ln;
+		}
+		ln->prog[i] = (void *)pg[i];
+	}
+
+	return ln;
+}
+
+static void
+program_del_linkage(struct nv50_linkage *ln)
+{
+	struct nv50_linkage *it;
+	struct nv50_program *pg[2];
+	unsigned i;
+
+	pg[0] = (struct nv50_program *)ln->prog[0];
+	pg[1] = (struct nv50_program *)ln->prog[1];
+
+	for (i = 0; i < 2; i++) {
+		for (it = pg[i]->ln; it->next[i] != ln; it = it->next[i]);
+		it->next[i] = ln->next[i];
+		if (pg[i]->ln == ln)
+			pg[i]->ln = (ln->next[i] == ln) ? NULL : ln->next[i];
+	}
+
+	if (ln->so)
+		so_ref(NULL, &ln->so);
+	FREE(ln);
 }
 
 /*
@@ -2473,16 +2525,14 @@ nv50_fragprog_validate(struct nv50_context *nv50)
  *	II = count of non-masked interpolants
  *	ii = almost always equal to II (except if II -> 00, why ?)
  */
-void
-nv50_linkage_validate(struct nv50_context *nv50)
+static struct nv50_linkage *
+nv50_linkage_create(struct nv50_context *nv50)
 {
-	/* this is going to be rather complicated at first, but it works
-	 * like this; maybe we can simplify later, though
-	 */
-	struct nouveau_stateobj *so = nv50->state.vertprog;
+	struct nv50_linkage *ln;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_program *vp = nv50->vertprog;
 	struct nv50_program *fp = nv50->fragprog;
+	struct nouveau_stateobj *so = so_new(32, 0);
 
 	uint32_t regs[5];
 	uint32_t map[8], i, n, k, m = 4;
@@ -2538,6 +2588,42 @@ nv50_linkage_validate(struct nv50_context *nv50)
 	so_method(so, tesla, 0x1988, 2);
         so_data  (so, regs[4]);
         so_data  (so, fp->cfg.high_temp);
+
+	ln = program_add_linkage(vp, fp);
+
+	so_ref(so, &ln->so);
+	so_ref(NULL, &so);
+
+	return ln;
+}
+
+void nv50_linkage_validate(struct nv50_context *nv50)
+{
+	struct nv50_linkage *it, *ln = NULL;
+	struct nv50_program *vp = nv50->vertprog;
+	struct nv50_program *fp = nv50->fragprog;
+	unsigned cfg;
+
+	cfg = nv50->rasterizer->pipe.light_twoside;
+	cfg |= nv50->rasterizer->pipe.point_size_per_vertex << 1;
+
+	if (vp->ln) {
+		it = vp->ln->next[0];
+		do {
+			if (it->prog[1] == (void *)fp && it->cfg == cfg) {
+				ln = it;
+				break;
+			}
+			it = it->next[0];
+		} while (it != vp->ln);
+	}
+
+	if (!ln) {
+		ln = nv50_linkage_create(nv50);
+		ln->cfg = cfg;
+	}
+
+	so_ref(ln->so, &nv50->state.plinkage);
 }
 
 void
@@ -2558,6 +2644,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
 	nouveau_resource_free(&p->data[1]);
 	nouveau_resource_free(&p->code);
 
+	while (p->ln)
+		program_del_linkage(p->ln);
+
 	p->translated = 0;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index b7921ad..6478338 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -15,6 +15,13 @@ struct nv50_program_exec {
 	} param;
 };
 
+struct nv50_linkage {
+	struct nv50_linkage *next[2];
+	struct nouveau_stateobj *so;
+	void *prog[2];
+	unsigned cfg;
+};
+
 struct nv50_program {
 	struct pipe_shader_state pipe;
 	struct tgsi_shader_info info;
@@ -34,6 +41,9 @@ struct nv50_program {
 	unsigned immd_nr;
 	unsigned param_nr;
 
+	struct nouveau_stateobj *so;
+	struct nv50_linkage *ln;
+
 	struct {
 		unsigned high_temp;
 		unsigned high_result;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index aa02947..cb9bb76 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -150,6 +150,8 @@ nv50_state_emit(struct nv50_context *nv50)
 		so_emit(chan, nv50->state.vertprog);
 	if (nv50->state.dirty & NV50_NEW_FRAGPROG)
 		so_emit(chan, nv50->state.fragprog);
+	if (nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG))
+		so_emit(chan, nv50->state.plinkage);
 	if (nv50->state.dirty & NV50_NEW_RASTERIZER)
 		so_emit(chan, nv50->state.rast);
 	if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
-- 
1.6.0.6


--------------090503050107050804030002
Content-Type: text/plain;
 name="0009-nv50-support-for-user-clip-planes.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0009-nv50-support-for-user-clip-planes.patch"



More information about the Nouveau mailing list