[Mesa-dev] [PATCH 13/15] radeonsi: add IB tracing support for debug contexts

Marek Olšák maraeo at gmail.com
Sun Aug 23 05:04:59 PDT 2015


From: Marek Olšák <marek.olsak at amd.com>

This adds trace points to all IBs and the parser prints them and also
prints which trace points were reached (executed) by the CP.
This can help pinpoint a problematic packet, draw call, etc.
---
 src/gallium/drivers/radeonsi/si_debug.c      | 67 ++++++++++++++++++++++++++--
 src/gallium/drivers/radeonsi/si_hw_context.c | 24 +++++++++-
 src/gallium/drivers/radeonsi/si_pipe.c       |  2 +
 src/gallium/drivers/radeonsi/si_pipe.h       |  9 +++-
 src/gallium/drivers/radeonsi/si_state_draw.c | 19 ++++----
 5 files changed, 105 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 72b7989..cf09686 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -142,7 +142,8 @@ static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
 		si_dump_reg(f, reg + i*4, ib[2+i], ~0);
 }
 
-static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
+static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
+				  int trace_id)
 {
 	unsigned count = PKT_COUNT_G(ib[0]);
 	unsigned op = PKT3_IT_OPCODE_G(ib[0]);
@@ -232,6 +233,36 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
 		if (ib[0] == 0xffff1000) {
 			count = -1; /* One dword NOP. */
 			break;
+		} else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) {
+			unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]);
+
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
+
+			if (trace_id == -1)
+				break; /* tracing was disabled */
+
+			print_spaces(f, INDENT_PKT);
+			if (packet_id < trace_id)
+				fprintf(f, COLOR_RED
+					"This trace point was reached by the CP."
+					COLOR_RESET "\n");
+			else if (packet_id == trace_id)
+				fprintf(f, COLOR_RED
+					"!!!!! This is the last trace point that "
+					"was reached by the CP !!!!!"
+					COLOR_RESET "\n");
+			else if (packet_id+1 == trace_id)
+				fprintf(f, COLOR_RED
+					"!!!!! This is the first trace point that "
+					"was NOT been reached by the CP !!!!!"
+					COLOR_RESET "\n");
+			else
+				fprintf(f, COLOR_RED
+					"!!!!! This trace point was NOT reached "
+					"by the CP !!!!!"
+					COLOR_RESET "\n");
+			break;
 		}
 		/* fall through, print all dwords */
 	default:
@@ -246,7 +277,17 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
 	return ib;
 }
 
-static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw)
+/**
+ * Parse and print an IB into a file.
+ *
+ * \param f		file
+ * \param ib		IB
+ * \param num_dw	size of the IB
+ * \param chip_class	chip class
+ * \param trace_id	the last trace ID that is known to have been reached
+ *			and executed by the CP, typically read from a buffer
+ */
+static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id)
 {
 	fprintf(f, "------------------ IB begin ------------------\n");
 
@@ -255,7 +296,7 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw)
 
 		switch (type) {
 		case 3:
-			ib = si_parse_packet3(f, ib, &num_dw);
+			ib = si_parse_packet3(f, ib, &num_dw, trace_id);
 			break;
 		case 2:
 			/* type-2 nop */
@@ -342,9 +383,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
 	si_dump_shader(sctx->ps_shader, "Fragment", f);
 
 	if (sctx->last_ib) {
-		si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size);
+		int last_trace_id = -1;
+
+		if (sctx->last_trace_buf) {
+			/* We are expecting that the ddebug pipe has already
+			 * waited for the context, so this buffer should be idle.
+			 * If the GPU is hung, there is no point in waiting for it.
+			 */
+			uint32_t *map =
+				sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
+						       NULL,
+						       PIPE_TRANSFER_UNSYNCHRONIZED |
+						       PIPE_TRANSFER_READ);
+			if (map)
+				last_trace_id = *map;
+		}
+
+		si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
+			    last_trace_id);
 		free(sctx->last_ib); /* dump only once */
 		sctx->last_ib = NULL;
+		r600_resource_reference(&sctx->last_trace_buf, NULL);
 	}
 
 	fprintf(f, "Done.\n");
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index b726eb3..110e316 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -89,7 +89,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	num_dw += ctx->atoms.s.cache_flush->num_dw;
 
 	if (ctx->screen->b.trace_bo)
-		num_dw += SI_TRACE_CS_DWORDS;
+		num_dw += SI_TRACE_CS_DWORDS * 2;
 
 	/* Flush if there's not enough space. */
 	if (num_dw > cs->max_dw) {
@@ -127,12 +127,17 @@ void si_context_gfx_flush(void *context, unsigned flags,
 	/* force to keep tiling flags */
 	flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
 
+	if (ctx->trace_buf)
+		si_trace_emit(ctx);
+
 	/* Save the IB for debug contexts. */
 	if (ctx->is_debug) {
 		free(ctx->last_ib);
 		ctx->last_ib_dw_size = cs->cdw;
 		ctx->last_ib = malloc(cs->cdw * 4);
 		memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
+		r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
+		r600_resource_reference(&ctx->trace_buf, NULL);
 	}
 
 	/* Flush the CS. */
@@ -148,6 +153,23 @@ void si_context_gfx_flush(void *context, unsigned flags,
 
 void si_begin_new_cs(struct si_context *ctx)
 {
+	if (ctx->is_debug) {
+		uint32_t zero = 0;
+
+		/* Create a buffer used for writing trace IDs and initialize it to 0. */
+		assert(!ctx->trace_buf);
+		ctx->trace_buf = (struct r600_resource*)
+				 pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM,
+						    PIPE_USAGE_STAGING, 4);
+		if (ctx->trace_buf)
+			pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
+						    0, sizeof(zero), &zero);
+		ctx->trace_id = 0;
+	}
+
+	if (ctx->trace_buf)
+		si_trace_emit(ctx);
+
 	/* Flush read caches at the beginning of CS. */
 	ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
 			SI_CONTEXT_INV_TC_L1 |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index e5900b7..92c6ae3 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -81,6 +81,8 @@ static void si_destroy_context(struct pipe_context *context)
 	LLVMDisposeTargetMachine(sctx->tm);
 #endif
 
+	r600_resource_reference(&sctx->trace_buf, NULL);
+	r600_resource_reference(&sctx->last_trace_buf, NULL);
 	free(sctx->last_ib);
 	FREE(sctx);
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 09a21ce..52167f2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -43,7 +43,7 @@
 #define SI_RESTART_INDEX_UNKNOWN INT_MIN
 #define SI_NUM_SMOOTH_AA_SAMPLES 8
 
-#define SI_TRACE_CS_DWORDS		6
+#define SI_TRACE_CS_DWORDS		7
 
 #define SI_MAX_DRAW_CS_DWORDS \
 	(/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
@@ -81,6 +81,10 @@
 					      SI_CONTEXT_FLUSH_AND_INV_DB | \
 					      SI_CONTEXT_FLUSH_AND_INV_DB_META)
 
+#define SI_ENCODE_TRACE_POINT(id)	(0xcafe0000 | ((id) & 0xffff))
+#define SI_IS_TRACE_POINT(x)		(((x) & 0xcafe0000) == 0xcafe0000)
+#define SI_GET_TRACE_POINT_ID(x)	((x) & 0xffff)
+
 struct si_compute;
 
 struct si_screen {
@@ -247,6 +251,9 @@ struct si_context {
 	bool			is_debug;
 	uint32_t		*last_ib;
 	unsigned		last_ib_dw_size;
+	struct r600_resource	*last_trace_buf;
+	struct r600_resource	*trace_buf;
+	unsigned		trace_id;
 };
 
 /* cik_sdma.c */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e56c9e7..b1aba12 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -835,7 +835,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	si_emit_draw_registers(sctx, info);
 	si_emit_draw_packets(sctx, info, &ib);
 
-	if (sctx->screen->b.trace_bo)
+	if (sctx->trace_buf)
 		si_trace_emit(sctx);
 
 	/* Workaround for a VGT hang when streamout is enabled.
@@ -873,19 +873,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 void si_trace_emit(struct si_context *sctx)
 {
-	struct si_screen *sscreen = sctx->screen;
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	uint64_t va;
 
-	va = sscreen->b.trace_bo->gpu_address;
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo,
+	sctx->trace_id++;
+	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
 	radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
 				PKT3_WRITE_DATA_WR_CONFIRM |
 				PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
-	radeon_emit(cs, va & 0xFFFFFFFFUL);
-	radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
-	radeon_emit(cs, cs->cdw);
-	radeon_emit(cs, sscreen->b.cs_count);
+	radeon_emit(cs, sctx->trace_buf->gpu_address);
+	radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
+	radeon_emit(cs, sctx->trace_id);
+	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+	radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id));
 }
-- 
2.1.4



More information about the mesa-dev mailing list