[Mesa-dev] [PATCH] radeonsi: add cs tracing v2

Christian König deathsimple at vodafone.de
Tue Mar 26 03:22:53 PDT 2013


Am 25.03.2013 18:15, schrieb j.glisse at gmail.com:
> From: Jerome Glisse <jglisse at redhat.com>
>
> Same as on r600, trace cs execution by writting cs offset after each
> states, this allow to pin point lockup inside command stream and
> narrow down the scope of lockup investigation.
>
> v2: Use WRITE_DATA packet instead of WRITE_MEM
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
> ---
>   src/gallium/drivers/radeonsi/r600_hw_context.c | 61 ++++++++++++++++++++++++++
>   src/gallium/drivers/radeonsi/radeonsi_pipe.c   | 22 ++++++++++
>   src/gallium/drivers/radeonsi/radeonsi_pipe.h   | 12 +++++
>   src/gallium/drivers/radeonsi/radeonsi_pm4.c    | 12 +++++
>   src/gallium/drivers/radeonsi/si_state_draw.c   |  7 ++-
>   src/gallium/drivers/radeonsi/sid.h             | 14 ++++++
>   6 files changed, 127 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c
> index bd348f9..967f093 100644
> --- a/src/gallium/drivers/radeonsi/r600_hw_context.c
> +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
> @@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>   	/* Save 16 dwords for the fence mechanism. */
>   	num_dw += 16;
>   
> +#if R600_TRACE_CS
> +	if (ctx->screen->trace_bo) {
> +		num_dw += R600_TRACE_CS_DWORDS;
> +	}
> +#endif
> +
>   	/* Flush if there's not enough space. */
>   	if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
>   		radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
> @@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
>   	/* force to keep tiling flags */
>   	flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
>   
> +#if R600_TRACE_CS
> +	if (ctx->screen->trace_bo) {
> +		struct r600_screen *rscreen = ctx->screen;
> +		unsigned i;
> +
> +		for (i = 0; i < cs->cdw; i++) {
> +			fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
> +		}
> +		rscreen->cs_count++;
> +	}
> +#endif
> +
>   	/* Flush the CS. */
>   	ctx->ws->cs_flush(ctx->cs, flags);
>   
> +#if R600_TRACE_CS
> +	if (ctx->screen->trace_bo) {
> +		struct r600_screen *rscreen = ctx->screen;
> +		unsigned i;
> +
> +		for (i = 0; i < 10; i++) {
> +			usleep(5);
> +			if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
> +				break;
> +			}
> +		}
> +		if (i == 10) {
> +			fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
> +				rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
> +		} else {
> +			fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
> +		}
> +	}
> +#endif
> +
>   	ctx->pm4_dirty_cdwords = 0;
>   	ctx->flags = 0;
>   
> @@ -665,3 +703,26 @@ void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_tar
>   	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ);
>   
>   }
> +
> +#if R600_TRACE_CS
> +void r600_trace_emit(struct r600_context *rctx)
> +{
> +	struct r600_screen *rscreen = rctx->screen;
> +	struct radeon_winsys_cs *cs = rctx->cs;
> +	uint64_t va;
> +	uint32_t reloc;
> +
> +	va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
> +	reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
> +	cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
> +	cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
> +				PKT3_WRITE_DATA_WR_CONFIRM |
> +				PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
> +	cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
> +	cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
> +	cs->buf[cs->cdw++] = cs->cdw;
> +	cs->buf[cs->cdw++] = rscreen->cs_count;
> +	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
> +	cs->buf[cs->cdw++] = reloc;

The NOP packet here is superfluous,  also I don't really like how this 
is implemented after all.

May I just use this patch as base of a cleaner implementation?

Christian.

> +}
> +#endif
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> index c5dac29..a370d7e 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> @@ -525,6 +525,14 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
>   		rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
>   		si_resource_reference(&rscreen->fences.bo, NULL);
>   	}
> +
> +#if R600_TRACE_CS
> +	if (rscreen->trace_bo) {
> +		rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
> +		pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
> +	}
> +#endif
> +
>   	pipe_mutex_destroy(rscreen->fences.mutex);
>   
>   	rscreen->ws->destroy(rscreen->ws);
> @@ -727,5 +735,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
>   	LIST_INITHEAD(&rscreen->fences.blocks);
>   	pipe_mutex_init(rscreen->fences.mutex);
>   
> +#if R600_TRACE_CS
> +	rscreen->cs_count = 0;
> +	if (rscreen->info.drm_minor >= 28) {
> +		rscreen->trace_bo = (struct si_resource*)pipe_buffer_create(&rscreen->screen,
> +										PIPE_BIND_CUSTOM,
> +										PIPE_USAGE_STAGING,
> +										4096);
> +		if (rscreen->trace_bo) {
> +			rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
> +									PIPE_TRANSFER_UNSYNCHRONIZED);
> +		}
> +	}
> +#endif
> +
>   	return &rscreen->screen;
>   }
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> index d0f04f4..7943563 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> @@ -47,6 +47,9 @@
>   #define R600_BIG_ENDIAN 0
>   #endif
>   
> +#define R600_TRACE_CS 0
> +#define R600_TRACE_CS_DWORDS		8
> +
>   struct r600_pipe_fences {
>   	struct si_resource		*bo;
>   	unsigned			*data;
> @@ -67,6 +70,11 @@ struct r600_screen {
>   	struct r600_tiling_info		tiling_info;
>   	struct util_slab_mempool	pool_buffers;
>   	struct r600_pipe_fences		fences;
> +#if R600_TRACE_CS
> +	struct si_resource		*trace_bo;
> +	uint32_t			*trace_ptr;
> +	unsigned			cs_count;
> +#endif
>   };
>   
>   struct si_pipe_sampler_view {
> @@ -226,6 +234,10 @@ void r600_translate_index_buffer(struct r600_context *r600,
>   				 struct pipe_index_buffer *ib,
>   				 unsigned count);
>   
> +#if R600_TRACE_CS
> +void r600_trace_emit(struct r600_context *rctx);
> +#endif
> +
>   /*
>    * common helpers
>    */
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.c b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
> index 79a2521..8e01738 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
> @@ -199,6 +199,12 @@ unsigned si_pm4_dirty_dw(struct r600_context *rctx)
>   			continue;
>   
>   		count += state->ndw;
> +#if R600_TRACE_CS
> +		/* for tracing each states */
> +		if (rctx->screen->trace_bo) {
> +			count += R600_TRACE_CS_DWORDS;
> +		}
> +#endif
>   	}
>   
>   	return count;
> @@ -219,6 +225,12 @@ void si_pm4_emit(struct r600_context *rctx, struct si_pm4_state *state)
>   	}
>   
>   	cs->cdw += state->ndw;
> +
> +#if R600_TRACE_CS
> +	if (rctx->screen->trace_bo) {
> +		r600_trace_emit(rctx);
> +	}
> +#endif
>   }
>   
>   void si_pm4_emit_dirty(struct r600_context *rctx)
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index a78751b..1e1d1cc 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -579,6 +579,12 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>   	si_pm4_emit_dirty(rctx);
>   	rctx->pm4_dirty_cdwords = 0;
>   
> +#if R600_TRACE_CS
> +	if (rctx->screen->trace_bo) {
> +		r600_trace_emit(rctx);
> +	}
> +#endif
> +
>   #if 0
>   	/* Enable stream out if needed. */
>   	if (rctx->streamout_start) {
> @@ -587,7 +593,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>   	}
>   #endif
>   
> -
>   	rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
>   
>   	/* Set the depth buffer as dirty. */
> diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
> index 57553a6..8528981 100644
> --- a/src/gallium/drivers/radeonsi/sid.h
> +++ b/src/gallium/drivers/radeonsi/sid.h
> @@ -77,6 +77,20 @@
>   #define PKT3_DRAW_INDEX_IMMD                   0x2E
>   #define PKT3_NUM_INSTANCES                     0x2F
>   #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
> +#define PKT3_WRITE_DATA                        0x37
> +#define     PKT3_WRITE_DATA_DST_SEL(x)             ((x) << 8)
> +#define     PKT3_WRITE_DATA_DST_SEL_REG            0
> +#define     PKT3_WRITE_DATA_DST_SEL_MEM_SYNC       1
> +#define     PKT3_WRITE_DATA_DST_SEL_TC_OR_L2       2
> +#define     PKT3_WRITE_DATA_DST_SEL_GDS            3
> +#define     PKT3_WRITE_DATA_DST_SEL_RESERVED_4     4
> +#define     PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC      5
> +#define     PKT3_WR_ONE_ADDR                       (1 << 16)
> +#define PKT3_WRITE_DATA_WR_CONFIRM                 (1 << 20)
> +#define PKT3_WRITE_DATA_ENGINE_SEL(x)              ((x) << 30)
> +#define PKT3_WRITE_DATA_ENGINE_SEL_ME              0
> +#define PKT3_WRITE_DATA_ENGINE_SEL_PFP             1
> +#define PKT3_WRITE_DATA_ENGINE_SEL_CE              2
>   #define PKT3_MEM_SEMAPHORE                     0x39
>   #define PKT3_MPEG_INDEX                        0x3A
>   #define PKT3_WAIT_REG_MEM                      0x3C



More information about the mesa-dev mailing list