[Nouveau] [PATCH] nv50 : fix too long shader uploads by splitting them

Xavier Chantry chantry.xavier at gmail.com
Sun May 9 02:37:50 PDT 2010


This fixes fp-long-alu test which failed/stopped at depth 3077 with the
following kernel errors :

[drm] nouveau 0000:01:00.0: Allocating FIFO number 3
[drm] nouveau 0000:01:00.0: nouveau_channel_alloc: initialised FIFO 3
[drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - Ch 3/3 Class 0x502d
Mthd 0x0838 Data 0x00000001:0x0001e0f8
[drm] nouveau 0000:01:00.0: PGRAPH_DATA_ERROR - INVALID_VALUE
[drm] nouveau 0000:01:00.0: PGRAPH_TRAP - Ch 3/7 Class 0x8297 Mthd
0x15e0 Data 0x00000000:0x00000000
[drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 0:
INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff
[drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 0 MP 1:
INVALID_OPCODE at 000000 warp 1, opcode ffffffff ffffffff
[drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 0:
INVALID_OPCODE at 000000 warp 3, opcode ffffffff ffffffff
[drm] nouveau 0000:01:00.0: PGRAPH_TRAP_MP_EXEC - TP 1 MP 1:
INVALID_OPCODE at 000000 warp 4, opcode ffffffff ffffffff
[drm] nouveau 0000:01:00.0: nouveau_channel_free: freeing fifo 3

Just one mystery, fp-long-alu test still fails if I split upload at 65536
(supposed to be the max value), without triggering any pgraph errors.
Any lower value (256 aligned) works.

Thanks to Christoph Bumiller and Marcin Kościelnicki for the help !

Signed-off-by: Xavier Chantry <chantry.xavier at gmail.com>
---
 src/gallium/drivers/nv50/nv50_program.c  |   23 +++++++++++++++++++----
 src/gallium/drivers/nv50/nv50_transfer.c |    3 ---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 0156ff9..9a41f44 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -4207,10 +4207,13 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
 static void
 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 {
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nouveau_channel *chan = nv50->screen->base.channel;
 	struct nv50_program_exec *e;
 	uint32_t *up, i;
 	boolean upload = FALSE;
+	unsigned offset;
+	int width;
 
 	if (!p->bo) {
 		nouveau_bo_new(chan->device, NOUVEAU_BO_VRAM, 0x100,
@@ -4267,10 +4270,22 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 			NOUVEAU_ERR("0x%08x\n", e->inst[1]);
 	}
 #endif
-	nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM,
-			 NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144,
-			 up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0,
-			 0, 0, p->exec_size * 4, 1, 1);
+
+	/* SIFC_HEIGHT/SIFC_WIDTH of 65536 do not work, and are not reported
+	 * as data error either. hw bug ? */
+#define SIFC_MAX_WIDTH (65536-256)
+	offset = 0;
+	width = p->exec_size * 4;
+	while(width > 0) {
+		nv50_upload_sifc(nv50, p->bo, offset, NOUVEAU_BO_VRAM,
+				NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144,
+				&up[offset / 4], NV50_2D_SIFC_FORMAT_R8_UNORM, 0,
+				0, 0, MIN2(SIFC_MAX_WIDTH, width), 1, 1);
+		width -= SIFC_MAX_WIDTH;
+		offset += SIFC_MAX_WIDTH;
+	}
+	BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
+	OUT_RING  (chan, 0);
 
 	FREE(up);
 }
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index c5581a6..8b66c42 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -274,7 +274,6 @@ nv50_upload_sifc(struct nv50_context *nv50,
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
 	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	unsigned line_dwords = (w * cpp + 3) / 4;
 
 	reloc |= NOUVEAU_BO_WR;
@@ -347,6 +346,4 @@ nv50_upload_sifc(struct nv50_context *nv50,
 		src = (uint8_t *) src + src_pitch;
 	}
 
-	BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
-	OUT_RING  (chan, 0);
 }
-- 
1.7.1



More information about the Nouveau mailing list