[Mesa-dev] [PATCH 11/16] radeonsi: print saved CS to the log context

Nicolai Hähnle nhaehnle at gmail.com
Wed Aug 16 11:05:41 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Use the auto logger facility, so that CS chunks will be interleaved
with other log info.
---
 src/gallium/drivers/radeonsi/si_debug.c      | 215 +++++++++++++++++++++------
 src/gallium/drivers/radeonsi/si_hw_context.c |  64 +++++---
 src/gallium/drivers/radeonsi/si_pipe.c       |   7 +-
 src/gallium/drivers/radeonsi/si_pipe.h       |  31 +++-
 src/gallium/drivers/radeonsi/si_state_draw.c |  28 ++--
 5 files changed, 257 insertions(+), 88 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index e797f10f7f0..9a7b0908199 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -34,6 +34,9 @@
 #include "util/u_memory.h"
 #include "ac_debug.h"
 
+static void si_dump_bo_list(struct si_context *sctx,
+			    const struct radeon_saved_cs *saved, FILE *f);
+
 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
 
 static void si_dump_shader(struct si_screen *sscreen,
@@ -266,51 +269,180 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
 	fprintf(f, "\n");
 }
 
-static void si_dump_last_ib(struct si_context *sctx, FILE *f)
+struct si_log_chunk_cs {
+	struct si_context *ctx;
+	struct si_saved_cs *cs;
+	bool dump_bo_list;
+	unsigned gfx_begin, gfx_end;
+	unsigned ce_begin, ce_end;
+};
+
+static void si_log_chunk_type_cs_destroy(void *data)
+{
+	struct si_log_chunk_cs *chunk = data;
+	si_saved_cs_reference(&chunk->cs, NULL);
+	free(chunk);
+}
+
+static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
+				unsigned begin, unsigned end,
+				unsigned last_trace_id, const char *name,
+				enum chip_class chip_class)
 {
+	unsigned orig_end = end;
+
+	assert(begin <= end);
+
+	fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
+		name, begin);
+
+	for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
+		struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
+
+		if (begin < chunk->cdw) {
+			ac_parse_ib_chunk(f, chunk->buf + begin,
+					  MIN2(end, chunk->cdw) - begin,
+					  last_trace_id, chip_class, NULL, NULL);
+		}
+
+		if (end <= chunk->cdw)
+			return;
+
+		if (begin < chunk->cdw)
+			fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
+				name);
+
+		begin -= MAX2(begin, chunk->cdw) - chunk->cdw;
+		end -= chunk->cdw;
+	}
+
+	assert(end <= cs->current.cdw);
+
+	ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
+			  chip_class, NULL, NULL);
+
+	fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
+		name, orig_end);
+}
+
+static void si_log_chunk_type_cs_print(void *data, FILE *f)
+{
+	struct si_log_chunk_cs *chunk = data;
+	struct si_context *ctx = chunk->ctx;
+	struct si_saved_cs *scs = chunk->cs;
 	int last_trace_id = -1;
 	int last_ce_trace_id = -1;
 
-	if (!sctx->last_gfx.ib)
-		return;
+	/* We are expecting that the ddebug pipe has already
+	 * waited for the context, so this buffer should be idle.
+	 * If the GPU is hung, there is no point in waiting for it.
+	 */
+	uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
+					      NULL,
+					      PIPE_TRANSFER_UNSYNCHRONIZED |
+					      PIPE_TRANSFER_READ);
+	if (map) {
+		last_trace_id = map[0];
+		last_ce_trace_id = map[1];
+	}
 
-	if (sctx->last_trace_buf) {
-		/* We are expecting that the ddebug pipe has already
-		 * waited for the context, so this buffer should be idle.
-		 * If the GPU is hung, there is no point in waiting for it.
-		 */
-		uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
-						       NULL,
-						       PIPE_TRANSFER_UNSYNCHRONIZED |
-						       PIPE_TRANSFER_READ);
-		if (map) {
-			last_trace_id = map[0];
-			last_ce_trace_id = map[1];
+	if (chunk->gfx_end != chunk->gfx_begin) {
+		if (chunk->gfx_begin == 0) {
+			if (ctx->init_config)
+				ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
+					    -1, "IB2: Init config", ctx->b.chip_class,
+					    NULL, NULL);
+
+			if (ctx->init_config_gs_rings)
+				ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
+					    ctx->init_config_gs_rings->ndw,
+					    -1, "IB2: Init GS rings", ctx->b.chip_class,
+					    NULL, NULL);
+		}
+
+		if (scs->flushed) {
+			ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
+				    chunk->gfx_end - chunk->gfx_begin,
+				    last_trace_id, "IB", ctx->b.chip_class,
+				    NULL, NULL);
+		} else {
+			si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
+					    chunk->gfx_end, last_trace_id, "IB",
+					    ctx->b.chip_class);
 		}
 	}
 
-	if (sctx->init_config)
-		ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
-			    -1, "IB2: Init config", sctx->b.chip_class,
-			    NULL, NULL);
-
-	if (sctx->init_config_gs_rings)
-		ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
-			    sctx->init_config_gs_rings->ndw,
-			    -1, "IB2: Init GS rings", sctx->b.chip_class,
-			    NULL, NULL);
-
-	ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
-		    last_trace_id, "IB", sctx->b.chip_class,
-		     NULL, NULL);
-
-	if (sctx->last_ce.ib) {
-		ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw,
-			    last_ce_trace_id, "CE IB", sctx->b.chip_class,
-			    NULL, NULL);
+	if (chunk->ce_end != chunk->ce_begin) {
+		assert(ctx->ce_ib);
+
+		if (scs->flushed) {
+			ac_parse_ib(f, scs->ce.ib + chunk->ce_begin,
+				    chunk->ce_end - chunk->ce_begin,
+				    last_ce_trace_id, "CE IB", ctx->b.chip_class,
+				    NULL, NULL);
+		} else {
+			si_parse_current_ib(f, ctx->ce_ib, chunk->ce_begin,
+					    chunk->ce_end, last_ce_trace_id, "CE IB",
+					    ctx->b.chip_class);
+		}
+	}
+
+	if (chunk->dump_bo_list) {
+		fprintf(f, "Flushing.\n\n");
+		si_dump_bo_list(ctx, &scs->gfx, f);
 	}
 }
 
+static const struct u_log_chunk_type si_log_chunk_type_cs = {
+	.destroy = si_log_chunk_type_cs_destroy,
+	.print = si_log_chunk_type_cs_print,
+};
+
+static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
+		      bool dump_bo_list)
+{
+	assert(ctx->current_saved_cs);
+
+	struct si_saved_cs *scs = ctx->current_saved_cs;
+	unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
+	unsigned ce_cur = ctx->ce_ib->prev_dw + ctx->ce_ib->current.cdw;
+
+	if (!dump_bo_list &&
+	    gfx_cur == scs->gfx_last_dw &&
+	    ce_cur == scs->ce_last_dw)
+		return;
+
+	struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
+
+	chunk->ctx = ctx;
+	si_saved_cs_reference(&chunk->cs, scs);
+	chunk->dump_bo_list = dump_bo_list;
+
+	chunk->gfx_begin = scs->gfx_last_dw;
+	chunk->gfx_end = gfx_cur;
+	scs->gfx_last_dw = gfx_cur;
+
+	chunk->ce_begin = scs->ce_last_dw;
+	chunk->ce_end = ce_cur;
+	scs->ce_last_dw = ce_cur;
+
+	u_log_chunk(log, &si_log_chunk_type_cs, chunk);
+}
+
+void si_auto_log_cs(void *data, struct u_log_context *log)
+{
+	struct si_context *ctx = (struct si_context *)data;
+	si_log_cs(ctx, log, false);
+}
+
+void si_log_hw_flush(struct si_context *sctx)
+{
+	if (!sctx->b.log)
+		return;
+
+	si_log_cs(sctx, sctx->b.log, true);
+}
+
 static const char *priority_to_string(enum radeon_bo_priority priority)
 {
 #define ITEM(x) [RADEON_PRIO_##x] = #x
@@ -920,6 +1052,9 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
 {
 	struct si_context *sctx = (struct si_context*)ctx;
 
+	if (sctx->b.log)
+		u_log_flush(sctx->b.log);
+
 	if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
 		si_dump_debug_registers(sctx, f);
 
@@ -957,18 +1092,6 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
 
 	u_log_new_page_print(&log, f);
 	u_log_context_destroy(&log);
-
-	if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
-		si_dump_bo_list(sctx, &sctx->last_gfx, f);
-		si_dump_last_ib(sctx, f);
-
-		fprintf(f, "Done.\n");
-
-		/* dump only once */
-		radeon_clear_saved_cs(&sctx->last_gfx);
-		radeon_clear_saved_cs(&sctx->last_ce);
-		r600_resource_reference(&sctx->last_trace_buf, NULL);
-	}
 }
 
 static void si_dump_dma(struct si_context *sctx,
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index 3582cd711b5..73aea70434e 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -57,6 +57,14 @@ static unsigned si_ce_needed_cs_space(void)
 	return space;
 }
 
+void si_destroy_saved_cs(struct si_saved_cs *scs)
+{
+	radeon_clear_saved_cs(&scs->gfx);
+	radeon_clear_saved_cs(&scs->ce);
+	r600_resource_reference(&scs->trace_buf, NULL);
+	free(scs);
+}
+
 /* initialize */
 void si_need_cs_space(struct si_context *ctx)
 {
@@ -139,17 +147,14 @@ void si_context_gfx_flush(void *context, unsigned flags,
 
 	si_emit_cache_flush(ctx);
 
-	if (ctx->trace_buf)
+	if (ctx->current_saved_cs) {
 		si_trace_emit(ctx);
+		si_log_hw_flush(ctx);
 
-	if (ctx->is_debug) {
 		/* Save the IB for debug contexts. */
-		radeon_clear_saved_cs(&ctx->last_gfx);
-		radeon_save_cs(ws, cs, &ctx->last_gfx, true);
-		radeon_clear_saved_cs(&ctx->last_ce);
-		radeon_save_cs(ws, ctx->ce_ib, &ctx->last_ce, false);
-		r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
-		r600_resource_reference(&ctx->trace_buf, NULL);
+		radeon_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
+		radeon_save_cs(ws, ctx->ce_ib, &ctx->current_saved_cs->ce, false);
+		ctx->current_saved_cs->flushed = true;
 	}
 
 	/* Flush the CS. */
@@ -165,31 +170,48 @@ void si_context_gfx_flush(void *context, unsigned flags,
 		 */
 		ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000);
 
-		si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX);
+		si_check_vm_faults(&ctx->b, &ctx->current_saved_cs->gfx, RING_GFX);
 	}
 
+	if (ctx->current_saved_cs)
+		si_saved_cs_reference(&ctx->current_saved_cs, NULL);
+
 	si_begin_new_cs(ctx);
 	ctx->gfx_flush_in_progress = false;
 }
 
-void si_begin_new_cs(struct si_context *ctx)
+static void si_begin_cs_debug(struct si_context *ctx)
 {
-	if (ctx->is_debug) {
-		static const uint32_t zeros[2];
+	static const uint32_t zeros[2];
+	assert(!ctx->current_saved_cs);
+
+	ctx->current_saved_cs = calloc(1, sizeof(*ctx->current_saved_cs));
 
-		/* Create a buffer used for writing trace IDs and initialize it to 0. */
-		assert(!ctx->trace_buf);
-		ctx->trace_buf = (struct r600_resource*)
+	pipe_reference_init(&ctx->current_saved_cs->reference, 1);
+
+	ctx->current_saved_cs->trace_buf = (struct r600_resource*)
 				 pipe_buffer_create(ctx->b.b.screen, 0,
 						    PIPE_USAGE_STAGING, 8);
-		if (ctx->trace_buf)
-			pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
-						    0, sizeof(zeros), zeros);
-		ctx->trace_id = 0;
+	if (!ctx->current_saved_cs) {
+		free(ctx->current_saved_cs);
+		ctx->current_saved_cs = NULL;
+		return;
 	}
 
-	if (ctx->trace_buf)
-		si_trace_emit(ctx);
+	pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->current_saved_cs->trace_buf->b.b,
+				    0, sizeof(zeros), zeros);
+	ctx->current_saved_cs->trace_id = 0;
+
+	si_trace_emit(ctx);
+
+	radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, ctx->current_saved_cs->trace_buf,
+			      RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
+}
+
+void si_begin_new_cs(struct si_context *ctx)
+{
+	if (ctx->is_debug)
+		si_begin_cs_debug(ctx);
 
 	/* Flush read caches at the beginning of CS not flushed by the kernel. */
 	if (ctx->b.chip_class >= CIK)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4aca6104ee9..2b8f7f4f5c2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -96,9 +96,7 @@ static void si_destroy_context(struct pipe_context *context)
 
 	LLVMDisposeTargetMachine(sctx->tm);
 
-	r600_resource_reference(&sctx->trace_buf, NULL);
-	r600_resource_reference(&sctx->last_trace_buf, NULL);
-	radeon_clear_saved_cs(&sctx->last_gfx);
+	si_saved_cs_reference(&sctx->current_saved_cs, NULL);
 
 	pb_slabs_deinit(&sctx->bindless_descriptor_slabs);
 	util_dynarray_fini(&sctx->bindless_descriptors);
@@ -163,6 +161,9 @@ static void si_set_log_context(struct pipe_context *ctx,
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	sctx->b.log = log;
+
+	if (log)
+		u_log_add_auto_logger(log, si_auto_log_cs, sctx);
 }
 
 static struct pipe_context *si_create_context(struct pipe_screen *screen,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index ee0ab1b37b9..4da7e833096 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -267,6 +267,19 @@ struct si_image_handle
 	struct pipe_image_view		view;
 };
 
+struct si_saved_cs {
+	struct pipe_reference	reference;
+	struct si_context	*ctx;
+	struct radeon_saved_cs	gfx;
+	struct radeon_saved_cs	ce;
+	struct r600_resource	*trace_buf;
+	unsigned		trace_id;
+
+	unsigned		gfx_last_dw;
+	unsigned		ce_last_dw;
+	bool			flushed;
+};
+
 struct si_context {
 	struct r600_common_context	b;
 	struct blitter_context		*blitter;
@@ -418,11 +431,7 @@ struct si_context {
 
 	/* Debug state. */
 	bool			is_debug;
-	struct radeon_saved_cs	last_gfx;
-	struct radeon_saved_cs	last_ce;
-	struct r600_resource	*last_trace_buf;
-	struct r600_resource	*trace_buf;
-	unsigned		trace_id;
+	struct si_saved_cs	*current_saved_cs;
 	uint64_t		dmesg_timestamp;
 	unsigned		apitrace_call_number;
 
@@ -496,6 +505,8 @@ void cik_emit_prefetch_L2(struct si_context *sctx);
 void si_init_cp_dma_functions(struct si_context *sctx);
 
 /* si_debug.c */
+void si_auto_log_cs(void *data, struct u_log_context *log);
+void si_log_hw_flush(struct si_context *sctx);
 void si_init_debug_functions(struct si_context *sctx);
 void si_check_vm_faults(struct r600_common_context *ctx,
 			struct radeon_saved_cs *saved, enum ring_type ring);
@@ -505,6 +516,7 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary);
 void si_init_dma_functions(struct si_context *sctx);
 
 /* si_hw_context.c */
+void si_destroy_saved_cs(struct si_saved_cs *scs);
 void si_context_gfx_flush(void *context, unsigned flags,
 			  struct pipe_fence_handle **fence);
 void si_begin_new_cs(struct si_context *ctx);
@@ -602,4 +614,13 @@ si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
 	return MIN2(alignment, tcc_cache_line_size);
 }
 
+static inline void
+si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
+{
+	if (pipe_reference(&(*dst)->reference, &src->reference))
+		si_destroy_saved_cs(*dst);
+
+	*dst = src;
+}
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index deb0691f5c3..971906bb306 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -30,6 +30,7 @@
 #include "gfx9d.h"
 
 #include "util/u_index_modify.h"
+#include "util/u_log.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_prim.h"
 
@@ -1435,7 +1436,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 	si_ce_post_draw_synchronization(sctx);
 
-	if (sctx->trace_buf)
+	if (unlikely(sctx->current_saved_cs))
 		si_trace_emit(sctx);
 
 	/* Workaround for a VGT hang when streamout is enabled.
@@ -1465,20 +1466,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 void si_trace_emit(struct si_context *sctx)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-
-	sctx->trace_id++;
-	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
-			      RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
+	uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
+	uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
 
 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
 	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
 		    S_370_WR_CONFIRM(1) |
 		    S_370_ENGINE_SEL(V_370_ME));
-	radeon_emit(cs, sctx->trace_buf->gpu_address);
-	radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
-	radeon_emit(cs, sctx->trace_id);
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+	radeon_emit(cs, trace_id);
 	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
-	radeon_emit(cs, AC_ENCODE_TRACE_POINT(sctx->trace_id));
+	radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
 
 	if (sctx->ce_ib) {
 		struct radeon_winsys_cs *ce = sctx->ce_ib;
@@ -1487,10 +1486,13 @@ void si_trace_emit(struct si_context *sctx)
 		radeon_emit(ce, S_370_DST_SEL(V_370_MEM_ASYNC) |
 			    S_370_WR_CONFIRM(1) |
 			    S_370_ENGINE_SEL(V_370_CE));
-		radeon_emit(ce, sctx->trace_buf->gpu_address + 4);
-		radeon_emit(ce, (sctx->trace_buf->gpu_address + 4) >> 32);
-		radeon_emit(ce, sctx->trace_id);
+		radeon_emit(ce, va + 4);
+		radeon_emit(ce, (va + 4) >> 32);
+		radeon_emit(ce, trace_id);
 		radeon_emit(ce, PKT3(PKT3_NOP, 0, 0));
-		radeon_emit(ce, AC_ENCODE_TRACE_POINT(sctx->trace_id));
+		radeon_emit(ce, AC_ENCODE_TRACE_POINT(trace_id));
 	}
+
+	if (sctx->b.log)
+		u_log_flush(sctx->b.log);
 }
-- 
2.11.0



More information about the mesa-dev mailing list