Mesa (master): nv50: use SIFC also for shader upload

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Sat Oct 31 12:41:05 UTC 2009


Module: Mesa
Branch: master
Commit: 9831e1f76cd020e1cde2b13e03149415319a8135
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=9831e1f76cd020e1cde2b13e03149415319a8135

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Sat Oct 31 13:38:22 2009 +0100

nv50: use SIFC also for shader upload

Adds a more generic SIFC transfer function.

---

 src/gallium/drivers/nv50/nv50_context.h  |   11 ++++-
 src/gallium/drivers/nv50/nv50_program.c  |   79 ++++++++-------------------
 src/gallium/drivers/nv50/nv50_transfer.c |   86 ++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 56 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 33667e8..890defb 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -196,7 +196,8 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
 extern void nv50_vertprog_validate(struct nv50_context *nv50);
 extern void nv50_fragprog_validate(struct nv50_context *nv50);
 extern void nv50_linkage_validate(struct nv50_context *nv50);
-extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
+extern void nv50_program_destroy(struct nv50_context *nv50,
+				 struct nv50_program *p);
 
 /* nv50_state_validate.c */
 extern boolean nv50_state_validate(struct nv50_context *nv50);
@@ -210,4 +211,12 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
 /* nv50_tex.c */
 extern void nv50_tex_validate(struct nv50_context *);
 
+/* nv50_transfer.c */
+extern void
+nv50_upload_sifc(struct nv50_context *nv50,
+		 struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
+		 unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
+		 void *src, unsigned src_format, int src_pitch,
+		 int x, int y, int w, int h, int cpp);
+
 #endif
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index c3edc02..faf6389 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2980,11 +2980,8 @@ static void
 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_program_exec *e;
-	struct nouveau_stateobj *so;
-	const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
-	unsigned start, count, *up, *ptr;
+	uint32_t *up, i;
 	boolean upload = FALSE;
 
 	if (!p->bo) {
@@ -2999,32 +2996,37 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 	if (!upload)
 		return;
 
-	for (e = p->exec_head; e; e = e->next) {
+	up = MALLOC(p->exec_size * 4);
+
+	for (i = 0, e = p->exec_head; e; e = e->next) {
 		unsigned ei, ci, bs;
 
-		if (e->param.index < 0)
-			continue;
+		if (e->param.index >= 0 && e->param.mask) {
+			bs = (e->inst[1] >> 22) & 0x07;
+			assert(bs < 2);
+			ei = e->param.shift >> 5;
+			ci = e->param.index;
+			if (bs == 0)
+				ci += p->data[bs]->start;
 
-		if (e->param.mask == 0) {
+			e->inst[ei] &= ~e->param.mask;
+			e->inst[ei] |= (ci << e->param.shift);
+		} else
+		if (e->param.index >= 0) {
+			/* zero mask means param is a jump/branch offset */
 			assert(!(e->param.index & 1));
 			/* seem to be 8 byte steps */
 			ei = (e->param.index >> 1) + 0 /* START_ID */;
 
 			e->inst[0] &= 0xf0000fff;
 			e->inst[0] |= ei << 12;
-			continue;
 		}
 
-		bs = (e->inst[1] >> 22) & 0x07;
-		assert(bs < 2);
-		ei = e->param.shift >> 5;
-		ci = e->param.index;
-		if (bs == 0)
-			ci += p->data[bs]->start;
-
-		e->inst[ei] &= ~e->param.mask;
-		e->inst[ei] |= (ci << e->param.shift);
+		up[i++] = e->inst[0];
+		if (is_long(e))
+			up[i++] = e->inst[1];
 	}
+	assert(i == p->exec_size);
 
 	if (p->data[0])
 		p->data_start[0] = p->data[0]->start;
@@ -3037,45 +3039,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 			NOUVEAU_ERR("0x%08x\n", e->inst[1]);
 	}
 #endif
-
-	up = ptr = MALLOC(p->exec_size * 4);
-	for (e = p->exec_head; e; e = e->next) {
-		*(ptr++) = e->inst[0];
-		if (is_long(e))
-			*(ptr++) = e->inst[1];
-	}
-
-	so = so_new(4,2);
-	so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
-	so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
-
-	start = 0; count = p->exec_size;
-	while (count) {
-		struct nouveau_channel *chan = nv50->screen->base.channel;
-		unsigned nr;
-
-		so_emit(chan, so);
-
-		nr = MIN2(count, 2047);
-		nr = MIN2(chan->pushbuf->remaining, nr);
-		if (chan->pushbuf->remaining < (nr + 3)) {
-			FIRE_RING(chan);
-			continue;
-		}
-
-		BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
-		OUT_RING  (chan, (start << 8) | NV50_CB_PUPLOAD);
-		BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr);
-		OUT_RINGp (chan, up + start, nr);
-
-		start += nr;
-		count -= nr;
-	}
+	nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM,
+			 NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144,
+			 up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0,
+			 0, 0, p->exec_size * 4, 1, 1);
 
 	FREE(up);
-	so_ref(NULL, &so);
 }
 
 void
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 9c28902..f1eb672 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -237,3 +237,89 @@ nv50_transfer_init_screen_functions(struct pipe_screen *pscreen)
 	pscreen->transfer_map = nv50_transfer_map;
 	pscreen->transfer_unmap = nv50_transfer_unmap;
 }
+
+void
+nv50_upload_sifc(struct nv50_context *nv50,
+		 struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
+		 unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
+		 void *src, unsigned src_format, int src_pitch,
+		 int x, int y, int w, int h, int cpp)
+{
+	struct nouveau_channel *chan = nv50->screen->base.channel;
+	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	unsigned line_dwords = (w * cpp + 3) / 4;
+
+	reloc |= NOUVEAU_BO_WR;
+
+	WAIT_RING (chan, 32);
+
+	if (bo->tile_flags) {
+		BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
+		OUT_RING  (chan, dst_format);
+		OUT_RING  (chan, 0);
+		OUT_RING  (chan, bo->tile_mode << 4);
+		OUT_RING  (chan, 1);
+		OUT_RING  (chan, 0);
+	} else {
+		BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
+		OUT_RING  (chan, dst_format);
+		OUT_RING  (chan, 1);
+		BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
+		OUT_RING  (chan, dst_pitch);
+	}
+
+	BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4);
+	OUT_RING  (chan, dst_w);
+	OUT_RING  (chan, dst_h);
+	OUT_RELOCh(chan, bo, dst_offset, reloc);
+	OUT_RELOCl(chan, bo, dst_offset, reloc);
+
+	/* NV50_2D_OPERATION_SRCCOPY assumed already set */
+
+	BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, src_format);
+	BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
+	OUT_RING  (chan, w);
+	OUT_RING  (chan, h);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, x);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, y);
+
+	while (h--) {
+		const uint32_t *p = src;
+		unsigned count = line_dwords;
+
+		while (count) {
+			unsigned nr = MIN2(count, 1792);
+
+			if (chan->pushbuf->remaining <= nr) {
+				FIRE_RING (chan);
+
+				BEGIN_RING(chan, eng2d,
+					   NV50_2D_DST_ADDRESS_HIGH, 2);
+				OUT_RELOCh(chan, bo, dst_offset, reloc);
+				OUT_RELOCl(chan, bo, dst_offset, reloc);
+			}
+			assert(chan->pushbuf->remaining > nr);
+
+			BEGIN_RING(chan, eng2d,
+				   NV50_2D_SIFC_DATA | (2 << 29), nr);
+			OUT_RINGp (chan, p, nr);
+
+			p += nr;
+			count -= nr;
+		}
+
+		src += src_pitch;
+	}
+
+	BEGIN_RING(chan, tesla, 0x1440, 1);
+	OUT_RING  (chan, 0);
+}




More information about the mesa-commit mailing list