[PATCH] nv50: introduce linkage stateobj
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sun Jun 21 09:16:52 PDT 2009
An attempt to improve performance, since assembling the VP output
to FP input map became a mess.
This probably makes it even worse: It creates VP, FP stateobjs
only once and introduces a third shader related stateobj, called
linkage, which is stored in a list object that is obtained or
created on validation. For each configuration (VP, FP, BFC, PTSZ)
there is an extra object.
---
src/gallium/drivers/nv50/nv50_context.h | 1 +
src/gallium/drivers/nv50/nv50_program.c | 143 +++++++++++++++++++-----
src/gallium/drivers/nv50/nv50_program.h | 10 ++
src/gallium/drivers/nv50/nv50_state_validate.c | 2 +
4 files changed, 129 insertions(+), 27 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index c31c42a..aadcfda 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -117,6 +117,7 @@ struct nv50_state {
unsigned miptree_nr;
struct nouveau_stateobj *vertprog;
struct nouveau_stateobj *fragprog;
+ struct nouveau_stateobj *plinkage;
struct nouveau_stateobj *vtxfmt;
struct nouveau_stateobj *vtxbuf;
};
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 30a1d32..5fae325 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2407,16 +2407,21 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(32, 0);
- so_method(so, tesla, 0x1650, 2);
- so_data (so, p->cfg.vp.attr[0]);
- so_data (so, p->cfg.vp.attr[1]);
- so_method(so, tesla, 0x16b8, 1);
- so_data (so, p->cfg.high_result);
- so_method(so, tesla, 0x140c, 1);
- so_data (so, p->code->start);
- so_ref(so, &nv50->state.vertprog);
- so_ref(NULL, &so);
+ if (!p->so) {
+ so = so_new(7, 0);
+ so_method(so, tesla, 0x1650, 2);
+ so_data (so, p->cfg.vp.attr[0]);
+ so_data (so, p->cfg.vp.attr[1]);
+ so_method(so, tesla, 0x16b8, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, 0x140c, 1);
+ so_data (so, p->code->start);
+ so_ref(so, &p->so);
+ so_ref(NULL, &so);
+
+ }
+
+ so_ref(p->so, &nv50->state.vertprog);
}
void
@@ -2435,17 +2440,64 @@ nv50_fragprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(8, 0);
- so_method(so, tesla, 0x1298, 1);
- so_data (so, p->cfg.high_result);
- so_method(so, tesla, 0x19a8, 1);
- so_data (so, p->cfg.fp.regs[2]);
- so_method(so, tesla, 0x196c, 1);
- so_data (so, p->cfg.fp.regs[3]);
- so_method(so, tesla, 0x1414, 1);
- so_data (so, p->code->start);
- so_ref(so, &nv50->state.fragprog);
- so_ref(NULL, &so);
+ if (!p->so) {
+ so = so_new(8, 0);
+ so_method(so, tesla, 0x1298, 1);
+ so_data (so, p->cfg.high_result);
+ so_method(so, tesla, 0x19a8, 1);
+ so_data (so, p->cfg.fp.regs[2]);
+ so_method(so, tesla, 0x196c, 1);
+ so_data (so, p->cfg.fp.regs[3]);
+ so_method(so, tesla, 0x1414, 1);
+ so_data (so, p->code->start);
+ so_ref(so, &p->so);
+ so_ref(NULL, &so);
+ }
+
+ so_ref(p->so, &nv50->state.fragprog);
+}
+
+static struct nv50_linkage *
+program_add_linkage(struct nv50_program *vp, struct nv50_program *fp)
+{
+ struct nv50_linkage *ln = CALLOC_STRUCT(nv50_linkage);
+ struct nv50_program *pg[2] = { vp, fp };
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ if (pg[i]->ln) {
+ ln->next[i] = pg[i]->ln->next[i];
+ pg[i]->ln->next[i] = ln;
+ } else {
+ pg[i]->ln = ln;
+ ln->next[i] = ln;
+ }
+ ln->prog[i] = (void *)pg[i];
+ }
+
+ return ln;
+}
+
+static void
+program_del_linkage(struct nv50_linkage *ln)
+{
+ struct nv50_linkage *it;
+ struct nv50_program *pg[2];
+ unsigned i;
+
+ pg[0] = (struct nv50_program *)ln->prog[0];
+ pg[1] = (struct nv50_program *)ln->prog[1];
+
+ for (i = 0; i < 2; i++) {
+ for (it = pg[i]->ln; it->next[i] != ln; it = it->next[i]);
+ it->next[i] = ln->next[i];
+ if (pg[i]->ln == ln)
+ pg[i]->ln = (ln->next[i] == ln) ? NULL : ln->next[i];
+ }
+
+ if (ln->so)
+ so_ref(NULL, &ln->so);
+ FREE(ln);
}
/*
@@ -2473,16 +2525,14 @@ nv50_fragprog_validate(struct nv50_context *nv50)
* II = count of non-masked interpolants
* ii = almost always equal to II (except if II -> 00, why ?)
*/
-void
-nv50_linkage_validate(struct nv50_context *nv50)
+static struct nv50_linkage *
+nv50_linkage_create(struct nv50_context *nv50)
{
- /* this is going to be rather complicated at first, but it works
- * like this; maybe we can simplify later, though
- */
- struct nouveau_stateobj *so = nv50->state.vertprog;
+ struct nv50_linkage *ln;
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program *vp = nv50->vertprog;
struct nv50_program *fp = nv50->fragprog;
+ struct nouveau_stateobj *so = so_new(32, 0);
uint32_t regs[5];
uint32_t map[8], i, n, k, m = 4;
@@ -2538,6 +2588,42 @@ nv50_linkage_validate(struct nv50_context *nv50)
so_method(so, tesla, 0x1988, 2);
so_data (so, regs[4]);
so_data (so, fp->cfg.high_temp);
+
+ ln = program_add_linkage(vp, fp);
+
+ so_ref(so, &ln->so);
+ so_ref(NULL, &so);
+
+ return ln;
+}
+
+void nv50_linkage_validate(struct nv50_context *nv50)
+{
+ struct nv50_linkage *it, *ln = NULL;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *fp = nv50->fragprog;
+ unsigned cfg;
+
+ cfg = nv50->rasterizer->pipe.light_twoside;
+ cfg |= nv50->rasterizer->pipe.point_size_per_vertex << 1;
+
+ if (vp->ln) {
+ it = vp->ln->next[0];
+ do {
+ if (it->prog[1] == (void *)fp && it->cfg == cfg) {
+ ln = it;
+ break;
+ }
+ it = it->next[0];
+ } while (it != vp->ln);
+ }
+
+ if (!ln) {
+ ln = nv50_linkage_create(nv50);
+ ln->cfg = cfg;
+ }
+
+ so_ref(ln->so, &nv50->state.plinkage);
}
void
@@ -2558,6 +2644,9 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
nouveau_resource_free(&p->data[1]);
nouveau_resource_free(&p->code);
+ while (p->ln)
+ program_del_linkage(p->ln);
+
p->translated = 0;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index b7921ad..6478338 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -15,6 +15,13 @@ struct nv50_program_exec {
} param;
};
+struct nv50_linkage {
+ struct nv50_linkage *next[2];
+ struct nouveau_stateobj *so;
+ void *prog[2];
+ unsigned cfg;
+};
+
struct nv50_program {
struct pipe_shader_state pipe;
struct tgsi_shader_info info;
@@ -34,6 +41,9 @@ struct nv50_program {
unsigned immd_nr;
unsigned param_nr;
+ struct nouveau_stateobj *so;
+ struct nv50_linkage *ln;
+
struct {
unsigned high_temp;
unsigned high_result;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index aa02947..cb9bb76 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -150,6 +150,8 @@ nv50_state_emit(struct nv50_context *nv50)
so_emit(chan, nv50->state.vertprog);
if (nv50->state.dirty & NV50_NEW_FRAGPROG)
so_emit(chan, nv50->state.fragprog);
+ if (nv50->state.dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG))
+ so_emit(chan, nv50->state.plinkage);
if (nv50->state.dirty & NV50_NEW_RASTERIZER)
so_emit(chan, nv50->state.rast);
if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR)
--
1.6.0.6
--------------090503050107050804030002
Content-Type: text/plain;
name="0009-nv50-support-for-user-clip-planes.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="0009-nv50-support-for-user-clip-planes.patch"
More information about the Nouveau
mailing list