[PATCH] nv50: add support for two-sided lighting
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sun Jun 21 09:03:29 PDT 2009
---
src/gallium/drivers/nv50/nv50_context.h | 1 +
src/gallium/drivers/nv50/nv50_program.c | 171 ++++++++++++++++++------
src/gallium/drivers/nv50/nv50_program.h | 1 +
src/gallium/drivers/nv50/nv50_state_validate.c | 3 +
4 files changed, 138 insertions(+), 38 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 44463d6..c31c42a 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -190,6 +190,7 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
/* nv50_program.c */
extern void nv50_vertprog_validate(struct nv50_context *nv50);
extern void nv50_fragprog_validate(struct nv50_context *nv50);
+extern void nv50_linkage_validate(struct nv50_context *nv50);
extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
/* nv50_state_validate.c */
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 7a4bc18..30a1d32 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1779,7 +1779,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
struct tgsi_parse_context p;
boolean ret = FALSE;
unsigned i, c;
- unsigned fcol, bcol, fcrd, depr;
+ unsigned fcol[2], bcol[2], fcrd, depr;
/* count (centroid) perspective interpolations */
unsigned centroid_loads = 0;
@@ -1791,7 +1791,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
r_usage[0] = CALLOC(pc->temp_nr * 4, sizeof(unsigned));
r_usage[1] = CALLOC(pc->attr_nr * 4, sizeof(unsigned));
- depr = fcol = bcol = fcrd = 0xffff;
+ fcol[0] = fcol[1] = 0xffff;
+ bcol[0] = bcol[1] = 0xffff;
+ depr = fcrd = 0xffff;
tgsi_parse_init(&p, pc->p->pipe.tokens);
while (!tgsi_parse_end_of_tokens(&p)) {
@@ -1826,12 +1828,21 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (!d->Declaration.Semantic)
break;
+ c = d->Semantic.SemanticIndex;
switch (d->Semantic.SemanticName) {
case TGSI_SEMANTIC_POSITION:
depr = first;
pc->p->cfg.fp.regs[2] |= 0x00000100;
pc->p->cfg.fp.regs[3] |= 0x00000011;
break;
+ case TGSI_SEMANTIC_COLOR:
+ if (pc->p->type == PIPE_SHADER_VERTEX)
+ fcol[c] = first;
+ break;
+ case TGSI_SEMANTIC_BCOLOR:
+ if (pc->p->type == PIPE_SHADER_VERTEX)
+ bcol[c] = first;
+ break;
default:
break;
}
@@ -1854,17 +1865,14 @@ nv50_program_tx_prep(struct nv50_pc *pc)
break;
}
+ c = d->Semantic.SemanticIndex;
if (d->Declaration.Semantic) {
switch (d->Semantic.SemanticName) {
case TGSI_SEMANTIC_POSITION:
fcrd = first;
break;
case TGSI_SEMANTIC_COLOR:
- fcol = first;
- mode = INTERP_PERSPECTIVE;
- break;
- case TGSI_SEMANTIC_BCOLOR:
- bcol = first;
+ fcol[c] = first;
mode = INTERP_PERSPECTIVE;
break;
}
@@ -1931,10 +1939,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
/* position should be loaded first */
if (fcrd != 0xffff) {
unsigned mask;
- mid = 0;
+ oid = mid = 0;
mask = load_fp_attrib(pc, fcrd, r_usage[1],
&mid, &aid, &oid);
- oid = 0;
pc->p->cfg.fp.regs[1] |= (mask << 24);
pc->p->cfg.fp.map[0] += 0x04040404 * fcrd;
}
@@ -1966,16 +1973,24 @@ nv50_program_tx_prep(struct nv50_pc *pc)
pc->p->cfg.fp.regs[1] |= 0x08000000;
}
- for (c = 0; c < 4; c++) {
- /* I don't know what these values do, but
- * let's set them like the blob does:
- */
- if (fcol != 0xffff && r_usage[1][fcol * 4 + c])
- pc->p->cfg.fp.regs[0] += 0x00010000;
- if (bcol != 0xffff && r_usage[1][bcol * 4 + c])
- pc->p->cfg.fp.regs[0] += 0x00010000;
- }
+ /* load colors directly after position - XXX: might
+ * not be necessary if we always get colors first
+ */
+ oid += fcol[0] * 4;
+ i = mid;
+
+ if (fcol[0] != 0xffff)
+ load_fp_attrib(pc, fcol[0], r_usage[1],
+ &mid, &aid, &oid);
+ if (fcol[1] != 0xffff)
+ load_fp_attrib(pc, fcol[1], r_usage[1],
+ &mid, &aid, &oid);
+
+ /* set count of mapped color components */
+ pc->p->cfg.fp.regs[0] |= (mid - i) << 16;
+ /* reset oid and load remaining attrs */
+ oid = (fcrd == 0xffff) ? 4 : 0;
for (i = 0; i < pc->attr_nr; i++)
load_fp_attrib(pc, i, r_usage[1],
&mid, &aid, &oid);
@@ -1985,8 +2000,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (pc->iv_c)
free_temp(pc, pc->iv_c);
- pc->p->cfg.fp.high_map = (mid / 4);
- pc->p->cfg.fp.high_map += ((mid % 4) ? 1 : 0);
+ pc->p->cfg.fp.high_map = mid;
} else {
/* vertex program */
for (i = 0; i < pc->attr_nr; i++) {
@@ -2011,6 +2025,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
if (pc->p->type == PIPE_SHADER_VERTEX) {
for (i = 0; i < nr; i++)
ctor_reg(&pc->result[i], P_RESULT, i / 4, i);
+
+ /* output id offset bcol from fcol */
+ if (bcol[0] != 0xffff)
+ pc->p->cfg.vp.bcol = bcol[0] - fcol[0];
} else {
/* pc->p->type == PIPE_SHADER_FRAGMENT */
for (i = 0; i < pc->result_nr; i++) {
@@ -2101,7 +2119,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
p->cfg.fp.regs[1] = 0x00000400;
p->cfg.fp.map[0] = 0x03020100;
- p->cfg.fp.high_map = 1;
+ p->cfg.fp.high_map = 4;
break;
default:
assert(!"unsupported GPU program type");
@@ -2389,15 +2407,12 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(10, 0);
+ so = so_new(32, 0);
so_method(so, tesla, 0x1650, 2);
so_data (so, p->cfg.vp.attr[0]);
so_data (so, p->cfg.vp.attr[1]);
so_method(so, tesla, 0x16b8, 1);
so_data (so, p->cfg.high_result);
- so_method(so, tesla, 0x16ac, 2);
- so_data (so, p->cfg.high_result); //8);
- so_data (so, p->cfg.high_temp);
so_method(so, tesla, 0x140c, 1);
so_data (so, p->code->start);
so_ref(so, &nv50->state.vertprog);
@@ -2410,7 +2425,6 @@ nv50_fragprog_validate(struct nv50_context *nv50)
struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program *p = nv50->fragprog;
struct nouveau_stateobj *so;
- unsigned i;
if (!p->translated) {
nv50_program_validate(nv50, p);
@@ -2421,18 +2435,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(32, 0);
- so_method(so, tesla, 0x1904, 4);
- so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
- so_data (so, 0x00000004);
- so_data (so, 0x00000000);
- so_data (so, 0x00000000);
- so_method(so, tesla, 0x16bc, p->cfg.fp.high_map);
- for (i = 0; i < p->cfg.fp.high_map; i++)
- so_data(so, p->cfg.fp.map[i]);
- so_method(so, tesla, 0x1988, 2);
- so_data (so, p->cfg.fp.regs[1]); /* 0x08040404 / 0x0f000401 */
- so_data (so, p->cfg.high_temp);
+ so = so_new(8, 0);
so_method(so, tesla, 0x1298, 1);
so_data (so, p->cfg.high_result);
so_method(so, tesla, 0x19a8, 1);
@@ -2445,6 +2448,98 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_ref(NULL, &so);
}
+/*
+ * 1510 = bitmask to enable clipping planes
+ * 1688 = two-sided lighting enable
+ * 16ac = entry count of mapping table at [16bc]
+ * 16b0 = count of temporaries used in VP
+ *
+ * 1904 = 0x01CCBBFF (01 is sometimes 00 - ?)
+ * CC = number of color components in map (primary + secondary)
+ * BB = first back color's map index (colors should be contiguous)
+ * FF = first front color's map index
+ *
+ * 1908 = 0x0000HHLL
+ * LL = first clipping distance map index (4 if no UCPs)
+ * HH = last clipping distance map index + 1 (0 if no UCPs)
+ *
+ * 1910 = 0x00000SSe
+ * e = enable point size output (0 / 1)
+ * SS = point size map index (0 if disabled)
+ *
+ * 1988 = 0xMMIInnii
+ * MM = bitmask to un-mask masked VP/GP outputs (i.e. HPOS, generic ?)
+ * nn = map index of first non-masked output, where to put front color
+ * II = count of non-masked interpolants
+ * ii = almost always equal to II (except if II -> 00, why ?)
+ */
+void
+nv50_linkage_validate(struct nv50_context *nv50)
+{
+ /* this is going to be rather complicated at first, but it works
+ * like this; maybe we can simplify later, though
+ */
+ struct nouveau_stateobj *so = nv50->state.vertprog;
+ struct nouveau_grobj *tesla = nv50->screen->tesla;
+ struct nv50_program *vp = nv50->vertprog;
+ struct nv50_program *fp = nv50->fragprog;
+
+ uint32_t regs[5];
+ uint32_t map[8], i, n, k, m = 4;
+
+ memset(map, 0, 8 * sizeof(uint32_t));
+ map[0] = fp->cfg.fp.map[0];
+
+ regs[1] /* 1908 */ = 0x00000004;
+ regs[2] /* 190c */ = 0x00000000;
+ regs[3] /* 1910 */ = 0x00000000;
+ regs[0] /* 1904 */ = fp->cfg.fp.regs[0];
+ regs[4] /* 1988 */ = fp->cfg.fp.regs[1];
+
+ so_method(so, tesla, 0x1688, 1);
+
+ if (nv50->rasterizer->pipe.light_twoside) {
+ so_data(so, 1);
+ n = (regs[0] >> 16) & 0xff;
+
+ /* copy front color mappings and add output offset to BFC0 */
+ for (i = 4; i < 4 + n; i++, m++) {
+ k = fp->cfg.fp.map[i / 4] >> (8 * (i % 4));
+ k &= 0xff;
+ map[m / 4] |= (k + vp->cfg.vp.bcol) << (8 * (m % 4));
+ }
+
+ regs[0] += n;
+ regs[2] += (n << 8);
+ } else
+ so_data(so, 0);
+
+ for (i = 4; i < fp->cfg.fp.high_map; i++, m++) {
+ k = fp->cfg.fp.map[i / 4] >> (8 * (i % 4));
+ k &= 0xff;
+ map[m / 4] |= k << (8 * (m % 4));
+ }
+
+ so_method(so, tesla, 0x16ac, 2);
+ so_data (so, m);
+ so_data (so, vp->cfg.high_temp);
+
+ so_method(so, tesla, 0x1904, 4);
+ so_data (so, regs[0]);
+ so_data (so, regs[1]);
+ so_data (so, regs[2]);
+ so_data (so, regs[3]);
+
+ n = (m / 4) + ((m % 4) ? 1 : 0);
+ so_method(so, tesla, 0x16bc, n);
+ for (i = 0; i < n; i++)
+ so_data(so, map[i]);
+
+ so_method(so, tesla, 0x1988, 2);
+ so_data (so, regs[4]);
+ so_data (so, fp->cfg.high_temp);
+}
+
void
nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
{
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index ed3f67b..b7921ad 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -39,6 +39,7 @@ struct nv50_program {
unsigned high_result;
struct {
unsigned attr[2];
+ unsigned bcol;
} vp;
struct {
unsigned regs[4];
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 0caf4b4..aa02947 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -199,6 +199,9 @@ nv50_state_validate(struct nv50_context *nv50)
if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
nv50_fragprog_validate(nv50);
+ if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG))
+ nv50_linkage_validate(nv50);
+
if (nv50->dirty & NV50_NEW_RASTERIZER)
so_ref(nv50->rasterizer->so, &nv50->state.rast);
--
1.6.0.6
--------------090503050107050804030002
Content-Type: text/plain;
name="0008-nv50-introduce-linkage-stateobj.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="0008-nv50-introduce-linkage-stateobj.patch"
More information about the Nouveau
mailing list