[PATCH] nv50: select shader program through VP/FP_START_ID
Christoph Bumiller
e0425955 at student.tuwien.ac.at
Sun Jun 21 09:00:48 PDT 2009
Instead of specifying the program buffer address on every program
change, just set an offset in a shared program buffer, like the
binary driver does.
---
src/gallium/drivers/nv50/nv50_context.h | 6 ++
src/gallium/drivers/nv50/nv50_program.c | 78 +++++++++++------------------
src/gallium/drivers/nv50/nv50_program.h | 1 +
src/gallium/drivers/nv50/nv50_screen.c | 27 ++++++++++-
src/gallium/drivers/nv50/nv50_screen.h | 2 +
src/gallium/drivers/nv50/nv50_transfer.c | 12 +++++
6 files changed, 77 insertions(+), 49 deletions(-)
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 9b8cc4d..44463d6 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -198,4 +198,10 @@ extern boolean nv50_state_validate(struct nv50_context *nv50);
/* nv50_tex.c */
extern void nv50_tex_validate(struct nv50_context *);
+/* nv50_transfer.c */
+extern void nv50_transfer_gart_vram(struct pipe_screen *pscreen,
+ struct nouveau_bo *dst, unsigned dst_off,
+ struct nouveau_bo *src, unsigned src_off,
+ unsigned size);
+
#endif
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 32d1bf8..4ef7748 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2289,19 +2289,22 @@ static void
nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
{
struct nouveau_channel *chan = nv50->screen->base.channel;
- struct nouveau_grobj *tesla = nv50->screen->tesla;
struct nv50_program_exec *e;
- struct nouveau_stateobj *so;
- const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
- unsigned start, count, *up, *ptr;
+ struct nouveau_resource *heap;
+ struct nouveau_bo *code;
+ int ret;
+ unsigned size, *ptr;
boolean upload = FALSE;
if (!p->bo) {
- nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100,
- p->exec_size * 4, &p->bo);
+ nouveau_bo_new(chan->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+ 0x100, p->exec_size * 4, &p->bo);
upload = TRUE;
}
+ heap = nv50->screen->code_heap[p->type];
+ code = nv50->screen->sprogbuf_code[p->type];
+
if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
(p->data[1] && p->data[1]->start != p->data_start[1])) {
for (e = p->exec_head; e; e = e->next) {
@@ -2338,44 +2341,32 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
}
#endif
- up = ptr = MALLOC(p->exec_size * 4);
+ ret = nouveau_bo_map(p->bo, NOUVEAU_BO_WR);
+ if (ret) {
+ NOUVEAU_ERR("Failed to map program upload buffer (%i).\n",ret);
+ abort();
+ }
+
+ ptr = (unsigned *)p->bo->map;
for (e = p->exec_head; e; e = e->next) {
*(ptr++) = e->inst[0];
if (is_long(e))
*(ptr++) = e->inst[1];
}
- so = so_new(4,2);
- so_method(so, nv50->screen->tesla, 0x1280, 3);
- so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0);
- so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
-
- start = 0; count = p->exec_size;
- while (count) {
- struct nouveau_channel *chan = nv50->screen->base.channel;
- unsigned nr;
-
- so_emit(chan, so);
+ nouveau_bo_unmap(p->bo);
- nr = MIN2(count, 2047);
- nr = MIN2(chan->pushbuf->remaining, nr);
- if (chan->pushbuf->remaining < (nr + 3)) {
- FIRE_RING(chan);
- continue;
+ size = align(p->exec_size * 4, 0x100);
+ if (!p->code) {
+ ret = nouveau_resource_alloc(heap, size, p, &p->code);
+ if (ret) {
+ NOUVEAU_ERR("Program VRAM buffer is full.\n");
+ abort();
}
-
- BEGIN_RING(chan, tesla, 0x0f00, 1);
- OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD);
- BEGIN_RING(chan, tesla, 0x40000f04, nr);
- OUT_RINGp (chan, up + start, nr);
-
- start += nr;
- count -= nr;
}
- FREE(up);
- so_ref(NULL, &so);
+ nv50_transfer_gart_vram(&nv50->screen->base.base,
+ code, p->code->start, p->bo, 0, size);
}
void
@@ -2394,12 +2385,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(13, 2);
- so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
+ so = so_new(10, 0);
so_method(so, tesla, 0x1650, 2);
so_data (so, p->cfg.vp.attr[0]);
so_data (so, p->cfg.vp.attr[1]);
@@ -2409,7 +2395,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
so_data (so, p->cfg.high_result); //8);
so_data (so, p->cfg.high_temp);
so_method(so, tesla, 0x140c, 1);
- so_data (so, 0); /* program start offset */
+ so_data (so, p->code->start);
so_ref(so, &nv50->state.vertprog);
so_ref(NULL, &so);
}
@@ -2431,12 +2417,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
nv50_program_validate_data(nv50, p);
nv50_program_validate_code(nv50, p);
- so = so_new(64, 2);
- so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_HIGH, 0, 0);
- so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
- NOUVEAU_BO_LOW, 0, 0);
+ so = so_new(32, 0);
so_method(so, tesla, 0x1904, 4);
so_data (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
so_data (so, 0x00000004);
@@ -2455,7 +2436,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
so_method(so, tesla, 0x196c, 1);
so_data (so, p->cfg.fp.regs[3]);
so_method(so, tesla, 0x1414, 1);
- so_data (so, 0); /* program start offset */
+ so_data (so, p->code->start);
so_ref(so, &nv50->state.fragprog);
so_ref(NULL, &so);
}
@@ -2476,6 +2457,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
nouveau_resource_free(&p->data[0]);
nouveau_resource_free(&p->data[1]);
+ nouveau_resource_free(&p->code);
p->translated = 0;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 096e047..ed3f67b 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -27,6 +27,7 @@ struct nv50_program {
struct nouveau_resource *data[2];
unsigned data_start[2];
+ struct nouveau_resource *code;
struct nouveau_bo *bo;
float *immd;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index f42b784..954b67a 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -266,7 +266,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_ref(NULL, &so);
/* Static tesla init */
- so = so_new(256, 20);
+ so = so_new(256, 24);
so_method(so, screen->tesla, 0x1558, 1);
so_data (so, 1);
@@ -290,6 +290,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
so_method(so, screen->tesla, 0x16b8, 1);
so_data (so, 8);
+ /* create VRAM buffers for shader programs */
+ for (i = 0; i < 2; i++) {
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0x100, 0x10000,
+ &screen->sprogbuf_code[i]);
+ if (ret || nouveau_resource_init(
+ &screen->code_heap[i], 0, 0x10000)) {
+ NOUVEAU_ERR("Failed to initialize program buffers.");
+ nv50_screen_destroy(pscreen);
+ return NULL;
+ }
+ }
+
+ /* set program buffer addresses */
+ so_method(so, screen->tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
+ so_reloc (so, screen->sprogbuf_code[0], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->sprogbuf_code[0], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+
+ so_method(so, screen->tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
+ so_reloc (so, screen->sprogbuf_code[1], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+ so_reloc (so, screen->sprogbuf_code[1], 0, NOUVEAU_BO_VRAM |
+ NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+
/* constant buffers for immediates and VP/FP parameters */
ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
&screen->constbuf_misc[0]);
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 61e24a5..2481492 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -17,9 +17,11 @@ struct nv50_screen {
struct nouveau_bo *constbuf_misc[1];
struct nouveau_bo *constbuf_parm[2];
+ struct nouveau_bo *sprogbuf_code[2];
struct nouveau_resource *immd_heap[1];
struct nouveau_resource *parm_heap[2];
+ struct nouveau_resource *code_heap[2];
struct nouveau_bo *tic;
struct nouveau_bo *tsc;
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index d0b7f0b..f7f5858 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -99,6 +99,18 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
}
}
+void
+nv50_transfer_gart_vram(struct pipe_screen *pscreen,
+ struct nouveau_bo *dst, unsigned dst_offset,
+ struct nouveau_bo *src, unsigned src_offset,
+ unsigned size)
+{
+ nv50_transfer_rect_m2mf(pscreen,
+ src, src_offset, size, 0, 0, 0, 0,
+ dst, dst_offset, size, 0, 0, 0, 0,
+ 1, size, 1, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM);
+}
+
static struct pipe_transfer *
nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
unsigned face, unsigned level, unsigned zslice,
--
1.6.0.6
--------------090503050107050804030002
Content-Type: text/plain;
name="0004-nv50-use-ctor_reg-to-initialize-nv50_regs.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="0004-nv50-use-ctor_reg-to-initialize-nv50_regs.patch"
More information about the Nouveau
mailing list