[PATCH] nv50: select shader program through VP/FP_START_ID

Christoph Bumiller e0425955 at student.tuwien.ac.at
Sun Jun 21 09:00:48 PDT 2009


Instead of specifying the program buffer address on every program
change, just set an offset in a shared program buffer, like the
binary driver does.
---
 src/gallium/drivers/nv50/nv50_context.h  |    6 ++
 src/gallium/drivers/nv50/nv50_program.c  |   78 +++++++++++------------------
 src/gallium/drivers/nv50/nv50_program.h  |    1 +
 src/gallium/drivers/nv50/nv50_screen.c   |   27 ++++++++++-
 src/gallium/drivers/nv50/nv50_screen.h   |    2 +
 src/gallium/drivers/nv50/nv50_transfer.c |   12 +++++
 6 files changed, 77 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 9b8cc4d..44463d6 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -198,4 +198,10 @@ extern boolean nv50_state_validate(struct nv50_context *nv50);
 /* nv50_tex.c */
 extern void nv50_tex_validate(struct nv50_context *);
 
+/* nv50_transfer.c */
+extern void nv50_transfer_gart_vram(struct pipe_screen *pscreen,
+				    struct nouveau_bo *dst, unsigned dst_off,
+				    struct nouveau_bo *src, unsigned src_off,
+				    unsigned size);
+
 #endif
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 32d1bf8..4ef7748 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2289,19 +2289,22 @@ static void
 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_program_exec *e;
-	struct nouveau_stateobj *so;
-	const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
-	unsigned start, count, *up, *ptr;
+	struct nouveau_resource *heap;
+	struct nouveau_bo *code;
+	int ret;
+	unsigned size, *ptr;
 	boolean upload = FALSE;
 
 	if (!p->bo) {
-		nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100,
-			       p->exec_size * 4, &p->bo);
+		nouveau_bo_new(chan->device, NOUVEAU_BO_GART | NOUVEAU_BO_MAP,
+			       0x100, p->exec_size * 4, &p->bo);
 		upload = TRUE;
 	}
 
+	heap = nv50->screen->code_heap[p->type];
+	code = nv50->screen->sprogbuf_code[p->type];
+
 	if ((p->data[0] && p->data[0]->start != p->data_start[0]) ||
 		(p->data[1] && p->data[1]->start != p->data_start[1])) {
 		for (e = p->exec_head; e; e = e->next) {
@@ -2338,44 +2341,32 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 	}
 #endif
 
-	up = ptr = MALLOC(p->exec_size * 4);
+	ret = nouveau_bo_map(p->bo, NOUVEAU_BO_WR);
+	if (ret) {
+		NOUVEAU_ERR("Failed to map program upload buffer (%i).\n",ret);
+		abort();
+	}
+
+	ptr = (unsigned *)p->bo->map;
 	for (e = p->exec_head; e; e = e->next) {
 		*(ptr++) = e->inst[0];
 		if (is_long(e))
 			*(ptr++) = e->inst[1];
 	}
 
-	so = so_new(4,2);
-	so_method(so, nv50->screen->tesla, 0x1280, 3);
-	so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
-
-	start = 0; count = p->exec_size;
-	while (count) {
-		struct nouveau_channel *chan = nv50->screen->base.channel;
-		unsigned nr;
-
-		so_emit(chan, so);
+	nouveau_bo_unmap(p->bo);
 
-		nr = MIN2(count, 2047);
-		nr = MIN2(chan->pushbuf->remaining, nr);
-		if (chan->pushbuf->remaining < (nr + 3)) {
-			FIRE_RING(chan);
-			continue;
+	size = align(p->exec_size * 4, 0x100);
+	if (!p->code) {
+		ret = nouveau_resource_alloc(heap, size, p, &p->code);
+		if (ret) {
+			NOUVEAU_ERR("Program VRAM buffer is full.\n");
+			abort();
 		}
-
-		BEGIN_RING(chan, tesla, 0x0f00, 1);
-		OUT_RING  (chan, (start << 8) | NV50_CB_PUPLOAD);
-		BEGIN_RING(chan, tesla, 0x40000f04, nr);	
-		OUT_RINGp (chan, up + start, nr);
-
-		start += nr;
-		count -= nr;
 	}
 
-	FREE(up);
-	so_ref(NULL, &so);
+	nv50_transfer_gart_vram(&nv50->screen->base.base,
+				code, p->code->start, p->bo, 0, size);
 }
 
 void
@@ -2394,12 +2385,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(13, 2);
-	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
-	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
-		      NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
-		      NOUVEAU_BO_LOW, 0, 0);
+	so = so_new(10, 0);
 	so_method(so, tesla, 0x1650, 2);
 	so_data  (so, p->cfg.vp.attr[0]);
 	so_data  (so, p->cfg.vp.attr[1]);
@@ -2409,7 +2395,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
 	so_data  (so, p->cfg.high_result); //8);
 	so_data  (so, p->cfg.high_temp);
 	so_method(so, tesla, 0x140c, 1);
-	so_data  (so, 0); /* program start offset */
+	so_data  (so, p->code->start);
 	so_ref(so, &nv50->state.vertprog);
 	so_ref(NULL, &so);
 }
@@ -2431,12 +2417,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(64, 2);
-	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
-	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
-		      NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
-		      NOUVEAU_BO_LOW, 0, 0);
+	so = so_new(32, 0);
 	so_method(so, tesla, 0x1904, 4);
 	so_data  (so, p->cfg.fp.regs[0]); /* 0x01000404 / 0x00040404 */
 	so_data  (so, 0x00000004);
@@ -2455,7 +2436,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	so_method(so, tesla, 0x196c, 1);
 	so_data  (so, p->cfg.fp.regs[3]);
 	so_method(so, tesla, 0x1414, 1);
-	so_data  (so, 0); /* program start offset */
+	so_data  (so, p->code->start);
 	so_ref(so, &nv50->state.fragprog);
 	so_ref(NULL, &so);
 }
@@ -2476,6 +2457,7 @@ nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p)
 
 	nouveau_resource_free(&p->data[0]);
 	nouveau_resource_free(&p->data[1]);
+	nouveau_resource_free(&p->code);
 
 	p->translated = 0;
 }
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 096e047..ed3f67b 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -27,6 +27,7 @@ struct nv50_program {
 	struct nouveau_resource *data[2];
 	unsigned data_start[2];
 
+	struct nouveau_resource *code;
 	struct nouveau_bo *bo;
 
 	float *immd;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index f42b784..954b67a 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -266,7 +266,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_ref(NULL, &so);
 
 	/* Static tesla init */
-	so = so_new(256, 20);
+	so = so_new(256, 24);
 
 	so_method(so, screen->tesla, 0x1558, 1);
 	so_data  (so, 1);
@@ -290,6 +290,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, 0x16b8, 1);
 	so_data  (so, 8);
 
+	/* create VRAM buffers for shader programs */
+	for (i = 0; i < 2; i++) {
+		ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0x100, 0x10000,
+				     &screen->sprogbuf_code[i]);
+		if (ret || nouveau_resource_init(
+			    &screen->code_heap[i], 0, 0x10000)) {
+			NOUVEAU_ERR("Failed to initialize program buffers.");
+			nv50_screen_destroy(pscreen);
+			return NULL;
+		}
+	}
+
+	/* set program buffer addresses */
+	so_method(so, screen->tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
+	so_reloc (so, screen->sprogbuf_code[0], 0, NOUVEAU_BO_VRAM |
+		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+	so_reloc (so, screen->sprogbuf_code[0], 0, NOUVEAU_BO_VRAM |
+		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+
+	so_method(so, screen->tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
+	so_reloc (so, screen->sprogbuf_code[1], 0, NOUVEAU_BO_VRAM |
+		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
+	so_reloc (so, screen->sprogbuf_code[1], 0, NOUVEAU_BO_VRAM |
+		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
+
 	/* constant buffers for immediates and VP/FP parameters */
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
 			     &screen->constbuf_misc[0]);
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 61e24a5..2481492 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -17,9 +17,11 @@ struct nv50_screen {
 
 	struct nouveau_bo *constbuf_misc[1];
 	struct nouveau_bo *constbuf_parm[2];
+	struct nouveau_bo *sprogbuf_code[2];
 
 	struct nouveau_resource *immd_heap[1];
 	struct nouveau_resource *parm_heap[2];
+	struct nouveau_resource *code_heap[2];
 
 	struct nouveau_bo *tic;
 	struct nouveau_bo *tsc;
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index d0b7f0b..f7f5858 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -99,6 +99,18 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct nouveau_bo *src_bo,
 	}
 }
 
+void
+nv50_transfer_gart_vram(struct pipe_screen *pscreen,
+			struct nouveau_bo *dst, unsigned dst_offset,
+			struct nouveau_bo *src, unsigned src_offset,
+			unsigned size)
+{
+	nv50_transfer_rect_m2mf(pscreen,
+				src, src_offset, size, 0, 0, 0, 0,
+				dst, dst_offset, size, 0, 0, 0, 0,
+				1, size, 1, NOUVEAU_BO_GART, NOUVEAU_BO_VRAM);
+}
+
 static struct pipe_transfer *
 nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		  unsigned face, unsigned level, unsigned zslice,
-- 
1.6.0.6


--------------090503050107050804030002
Content-Type: text/plain;
 name="0004-nv50-use-ctor_reg-to-initialize-nv50_regs.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="0004-nv50-use-ctor_reg-to-initialize-nv50_regs.patch"



More information about the Nouveau mailing list