[Mesa-dev] [PATCH 5/6] radeonsi: print CE IBs into ddebug reports

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Aug 1 12:08:21 UTC 2017


Looks good to me.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 08/01/2017 12:43 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>   src/gallium/drivers/r600/r600_hw_context.c    |  2 +-
>   src/gallium/drivers/radeon/r600_pipe_common.c |  7 +++++--
>   src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
>   src/gallium/drivers/radeonsi/si_debug.c       | 14 ++++++++++++--
>   src/gallium/drivers/radeonsi/si_hw_context.c  | 10 ++++++----
>   src/gallium/drivers/radeonsi/si_pipe.h        |  1 +
>   src/gallium/drivers/radeonsi/si_state_draw.c  | 15 +++++++++++++++
>   7 files changed, 41 insertions(+), 10 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index ca7f41d..a821c35 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -277,21 +277,21 @@ void r600_context_gfx_flush(void *context, unsigned flags,
>   	if (ctx->trace_buf)
>   		eg_trace_emit(ctx);
>   	/* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
>   	if (ctx->b.chip_class == R600) {
>   		radeon_set_context_reg(cs, R_028350_SX_MISC, 0);
>   	}
>   
>   	if (ctx->is_debug) {
>   		/* Save the IB for debug contexts. */
>   		radeon_clear_saved_cs(&ctx->last_gfx);
> -		radeon_save_cs(ws, cs, &ctx->last_gfx);
> +		radeon_save_cs(ws, cs, &ctx->last_gfx, true);
>   		r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
>   		r600_resource_reference(&ctx->trace_buf, NULL);
>   	}
>   	/* Flush the CS. */
>   	ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence);
>   	if (fence)
>   		ws->fence_reference(fence, ctx->b.last_gfx_fence);
>   	ctx->b.num_gfx_cs_flushes++;
>   
>   	if (ctx->is_debug) {
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 8c66cc3..c58048f 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -435,21 +435,21 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
>   		(rctx->screen->debug_flags & DBG_CHECK_VM) &&
>   		rctx->check_vm_faults;
>   
>   	if (!radeon_emitted(cs, 0)) {
>   		if (fence)
>   			rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
>   		return;
>   	}
>   
>   	if (check_vm)
> -		radeon_save_cs(rctx->ws, cs, &saved);
> +		radeon_save_cs(rctx->ws, cs, &saved, true);
>   
>   	rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence);
>   	if (fence)
>   		rctx->ws->fence_reference(fence, rctx->last_sdma_fence);
>   
>   	if (check_vm) {
>   		/* Use conservative timeout 800ms, after which we won't wait any
>   		 * longer and assume the GPU is hung.
>   		 */
>   		rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000);
> @@ -457,38 +457,41 @@ static void r600_flush_dma_ring(void *ctx, unsigned flags,
>   		rctx->check_vm_faults(rctx, &saved, RING_DMA);
>   		radeon_clear_saved_cs(&saved);
>   	}
>   }
>   
>   /**
>    * Store a linearized copy of all chunks of \p cs together with the buffer
>    * list in \p saved.
>    */
>   void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
> -		    struct radeon_saved_cs *saved)
> +		    struct radeon_saved_cs *saved, bool get_buffer_list)
>   {
>   	void *buf;
>   	unsigned i;
>   
>   	/* Save the IB chunks. */
>   	saved->num_dw = cs->prev_dw + cs->current.cdw;
>   	saved->ib = MALLOC(4 * saved->num_dw);
>   	if (!saved->ib)
>   		goto oom;
>   
>   	buf = saved->ib;
>   	for (i = 0; i < cs->num_prev; ++i) {
>   		memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);
>   		buf += cs->prev[i].cdw;
>   	}
>   	memcpy(buf, cs->current.buf, cs->current.cdw * 4);
>   
> +	if (!get_buffer_list)
> +		return;
> +
>   	/* Save the buffer list. */
>   	saved->bo_count = ws->cs_get_buffer_list(cs, NULL);
>   	saved->bo_list = CALLOC(saved->bo_count,
>   				sizeof(saved->bo_list[0]));
>   	if (!saved->bo_list) {
>   		FREE(saved->ib);
>   		goto oom;
>   	}
>   	ws->cs_get_buffer_list(cs, saved->bo_list);
>   
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 4839c76..b391cbb 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -760,21 +760,21 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
>   bool r600_extra_shader_checks(struct r600_common_screen *rscreen,
>   			      unsigned processor);
>   void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
>   			      uint64_t offset, uint64_t size, unsigned value);
>   struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
>   						  const struct pipe_resource *templ);
>   const char *r600_get_llvm_processor_name(enum radeon_family family);
>   void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw,
>   			 struct r600_resource *dst, struct r600_resource *src);
>   void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs,
> -		    struct radeon_saved_cs *saved);
> +		    struct radeon_saved_cs *saved, bool get_buffer_list);
>   void radeon_clear_saved_cs(struct radeon_saved_cs *saved);
>   bool r600_check_device_reset(struct r600_common_context *rctx);
>   
>   /* r600_gpu_load.c */
>   void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
>   uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type);
>   unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,
>   			  uint64_t begin);
>   
>   /* r600_perfcounters.c */
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
> index 7c8a0fe..5a6d391 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -213,51 +213,60 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
>   	si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
>   	si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
>   	si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
>   	si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
>   	fprintf(f, "\n");
>   }
>   
>   static void si_dump_last_ib(struct si_context *sctx, FILE *f)
>   {
>   	int last_trace_id = -1;
> +	int last_ce_trace_id = -1;
>   
>   	if (!sctx->last_gfx.ib)
>   		return;
>   
>   	if (sctx->last_trace_buf) {
>   		/* We are expecting that the ddebug pipe has already
>   		 * waited for the context, so this buffer should be idle.
>   		 * If the GPU is hung, there is no point in waiting for it.
>   		 */
>   		uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
>   						       NULL,
>   						       PIPE_TRANSFER_UNSYNCHRONIZED |
>   						       PIPE_TRANSFER_READ);
> -		if (map)
> -			last_trace_id = *map;
> +		if (map) {
> +			last_trace_id = map[0];
> +			last_ce_trace_id = map[1];
> +		}
>   	}
>   
>   	if (sctx->init_config)
>   		ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
>   			    -1, "IB2: Init config", sctx->b.chip_class,
>   			    NULL, NULL);
>   
>   	if (sctx->init_config_gs_rings)
>   		ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
>   			    sctx->init_config_gs_rings->ndw,
>   			    -1, "IB2: Init GS rings", sctx->b.chip_class,
>   			    NULL, NULL);
>   
>   	ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
>   		    last_trace_id, "IB", sctx->b.chip_class,
>   		     NULL, NULL);
> +
> +	if (sctx->last_ce.ib) {
> +		ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw,
> +			    last_ce_trace_id, "CE IB", sctx->b.chip_class,
> +			    NULL, NULL);
> +	}
>   }
>   
>   static const char *priority_to_string(enum radeon_bo_priority priority)
>   {
>   #define ITEM(x) [RADEON_PRIO_##x] = #x
>   	static const char *table[64] = {
>   		ITEM(FENCE),
>   	        ITEM(TRACE),
>   	        ITEM(SO_FILLED_SIZE),
>   	        ITEM(QUERY),
> @@ -838,20 +847,21 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
>   	}
>   
>   	if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
>   		si_dump_bo_list(sctx, &sctx->last_gfx, f);
>   		si_dump_last_ib(sctx, f);
>   
>   		fprintf(f, "Done.\n");
>   
>   		/* dump only once */
>   		radeon_clear_saved_cs(&sctx->last_gfx);
> +		radeon_clear_saved_cs(&sctx->last_ce);
>   		r600_resource_reference(&sctx->last_trace_buf, NULL);
>   	}
>   }
>   
>   static void si_dump_dma(struct si_context *sctx,
>   			struct radeon_saved_cs *saved, FILE *f)
>   {
>   	static const char ib_name[] = "sDMA IB";
>   	unsigned i;
>   
> diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
> index 92d0cc5..f2dfcc7 100644
> --- a/src/gallium/drivers/radeonsi/si_hw_context.c
> +++ b/src/gallium/drivers/radeonsi/si_hw_context.c
> @@ -138,21 +138,23 @@ void si_context_gfx_flush(void *context, unsigned flags,
>   				SI_CONTEXT_INV_VMEM_L1;
>   
>   	si_emit_cache_flush(ctx);
>   
>   	if (ctx->trace_buf)
>   		si_trace_emit(ctx);
>   
>   	if (ctx->is_debug) {
>   		/* Save the IB for debug contexts. */
>   		radeon_clear_saved_cs(&ctx->last_gfx);
> -		radeon_save_cs(ws, cs, &ctx->last_gfx);
> +		radeon_save_cs(ws, cs, &ctx->last_gfx, true);
> +		radeon_clear_saved_cs(&ctx->last_ce);
> +		radeon_save_cs(ws, ctx->ce_ib, &ctx->last_ce, false);
>   		r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
>   		r600_resource_reference(&ctx->trace_buf, NULL);
>   	}
>   
>   	/* Flush the CS. */
>   	ws->cs_flush(cs, flags, &ctx->b.last_gfx_fence);
>   	if (fence)
>   		ws->fence_reference(fence, ctx->b.last_gfx_fence);
>   	ctx->b.num_gfx_cs_flushes++;
>   
> @@ -166,30 +168,30 @@ void si_context_gfx_flush(void *context, unsigned flags,
>   		si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX);
>   	}
>   
>   	si_begin_new_cs(ctx);
>   	ctx->gfx_flush_in_progress = false;
>   }
>   
>   void si_begin_new_cs(struct si_context *ctx)
>   {
>   	if (ctx->is_debug) {
> -		uint32_t zero = 0;
> +		static const uint32_t zeros[2];
>   
>   		/* Create a buffer used for writing trace IDs and initialize it to 0. */
>   		assert(!ctx->trace_buf);
>   		ctx->trace_buf = (struct r600_resource*)
>   				 pipe_buffer_create(ctx->b.b.screen, 0,
> -						    PIPE_USAGE_STAGING, 4);
> +						    PIPE_USAGE_STAGING, 8);
>   		if (ctx->trace_buf)
>   			pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
> -						    0, sizeof(zero), &zero);
> +						    0, sizeof(zeros), zeros);
>   		ctx->trace_id = 0;
>   	}
>   
>   	if (ctx->trace_buf)
>   		si_trace_emit(ctx);
>   
>   	/* Flush read caches at the beginning of CS not flushed by the kernel. */
>   	if (ctx->b.chip_class >= CIK)
>   		ctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
>   				SI_CONTEXT_INV_ICACHE;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index f7e0486..1984299 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -404,20 +404,21 @@ struct si_context {
>   	struct si_shader	*last_ls;
>   	struct si_shader_selector *last_tcs;
>   	int			last_num_tcs_input_cp;
>   	int			last_tes_sh_base;
>   	bool			last_tess_uses_primid;
>   	unsigned		last_num_patches;
>   
>   	/* Debug state. */
>   	bool			is_debug;
>   	struct radeon_saved_cs	last_gfx;
> +	struct radeon_saved_cs	last_ce;
>   	struct r600_resource	*last_trace_buf;
>   	struct r600_resource	*trace_buf;
>   	unsigned		trace_id;
>   	uint64_t		dmesg_timestamp;
>   	unsigned		apitrace_call_number;
>   
>   	/* Other state */
>   	bool need_check_render_feedback;
>   	bool			decompression_enabled;
>   
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index dfe4236..5254645 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1402,20 +1402,35 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>   		pipe_resource_reference(&indexbuf, NULL);
>   }
>   
>   void si_trace_emit(struct si_context *sctx)
>   {
>   	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
>   
>   	sctx->trace_id++;
>   	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
>   			      RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
> +
>   	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
>   	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
>   		    S_370_WR_CONFIRM(1) |
>   		    S_370_ENGINE_SEL(V_370_ME));
>   	radeon_emit(cs, sctx->trace_buf->gpu_address);
>   	radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
>   	radeon_emit(cs, sctx->trace_id);
>   	radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
>   	radeon_emit(cs, AC_ENCODE_TRACE_POINT(sctx->trace_id));
> +
> +	if (sctx->ce_ib) {
> +		struct radeon_winsys_cs *ce = sctx->ce_ib;
> +
> +		radeon_emit(ce, PKT3(PKT3_WRITE_DATA, 3, 0));
> +		radeon_emit(ce, S_370_DST_SEL(V_370_MEM_ASYNC) |
> +			    S_370_WR_CONFIRM(1) |
> +			    S_370_ENGINE_SEL(V_370_CE));
> +		radeon_emit(ce, sctx->trace_buf->gpu_address + 4);
> +		radeon_emit(ce, (sctx->trace_buf->gpu_address + 4) >> 32);
> +		radeon_emit(ce, sctx->trace_id);
> +		radeon_emit(ce, PKT3(PKT3_NOP, 0, 0));
> +		radeon_emit(ce, AC_ENCODE_TRACE_POINT(sctx->trace_id));
> +	}
>   }
> 


More information about the mesa-dev mailing list