[Mesa-dev] [PATCH] radeonsi: add cs tracing v2
Jerome Glisse
j.glisse at gmail.com
Tue Mar 26 06:42:40 PDT 2013
On Tue, Mar 26, 2013 at 6:22 AM, Christian König
<deathsimple at vodafone.de> wrote:
> Am 25.03.2013 18:15, schrieb j.glisse at gmail.com:
>
>> From: Jerome Glisse <jglisse at redhat.com>
>>
>> Same as on r600, trace cs execution by writting cs offset after each
>> states, this allow to pin point lockup inside command stream and
>> narrow down the scope of lockup investigation.
>>
>> v2: Use WRITE_DATA packet instead of WRITE_MEM
>>
>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>> ---
>> src/gallium/drivers/radeonsi/r600_hw_context.c | 61
>> ++++++++++++++++++++++++++
>> src/gallium/drivers/radeonsi/radeonsi_pipe.c | 22 ++++++++++
>> src/gallium/drivers/radeonsi/radeonsi_pipe.h | 12 +++++
>> src/gallium/drivers/radeonsi/radeonsi_pm4.c | 12 +++++
>> src/gallium/drivers/radeonsi/si_state_draw.c | 7 ++-
>> src/gallium/drivers/radeonsi/sid.h | 14 ++++++
>> 6 files changed, 127 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c
>> b/src/gallium/drivers/radeonsi/r600_hw_context.c
>> index bd348f9..967f093 100644
>> --- a/src/gallium/drivers/radeonsi/r600_hw_context.c
>> +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
>> @@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx,
>> unsigned num_dw,
>> /* Save 16 dwords for the fence mechanism. */
>> num_dw += 16;
>> +#if R600_TRACE_CS
>> + if (ctx->screen->trace_bo) {
>> + num_dw += R600_TRACE_CS_DWORDS;
>> + }
>> +#endif
>> +
>> /* Flush if there's not enough space. */
>> if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
>> radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
>> @@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx,
>> unsigned flags)
>> /* force to keep tiling flags */
>> flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
>> +#if R600_TRACE_CS
>> + if (ctx->screen->trace_bo) {
>> + struct r600_screen *rscreen = ctx->screen;
>> + unsigned i;
>> +
>> + for (i = 0; i < cs->cdw; i++) {
>> + fprintf(stderr, "[%4d] [%5d] 0x%08x\n",
>> rscreen->cs_count, i, cs->buf[i]);
>> + }
>> + rscreen->cs_count++;
>> + }
>> +#endif
>> +
>> /* Flush the CS. */
>> ctx->ws->cs_flush(ctx->cs, flags);
>> +#if R600_TRACE_CS
>> + if (ctx->screen->trace_bo) {
>> + struct r600_screen *rscreen = ctx->screen;
>> + unsigned i;
>> +
>> + for (i = 0; i < 10; i++) {
>> + usleep(5);
>> + if
>> (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
>> + break;
>> + }
>> + }
>> + if (i == 10) {
>> + fprintf(stderr, "timeout on cs lockup likely
>> happen at cs %d dw %d\n",
>> + rscreen->trace_ptr[1],
>> rscreen->trace_ptr[0]);
>> + } else {
>> + fprintf(stderr, "cs %d executed in %dms\n",
>> rscreen->trace_ptr[1], i * 5);
>> + }
>> + }
>> +#endif
>> +
>> ctx->pm4_dirty_cdwords = 0;
>> ctx->flags = 0;
>> @@ -665,3 +703,26 @@ void r600_context_draw_opaque_count(struct
>> r600_context *ctx, struct r600_so_tar
>> cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size,
>> RADEON_USAGE_READ);
>> }
>> +
>> +#if R600_TRACE_CS
>> +void r600_trace_emit(struct r600_context *rctx)
>> +{
>> + struct r600_screen *rscreen = rctx->screen;
>> + struct radeon_winsys_cs *cs = rctx->cs;
>> + uint64_t va;
>> + uint32_t reloc;
>> +
>> + va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
>> + reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo,
>> RADEON_USAGE_READWRITE);
>> + cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
>> + cs->buf[cs->cdw++] =
>> PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
>> + PKT3_WRITE_DATA_WR_CONFIRM |
>> +
>> PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
>> + cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
>> + cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
>> + cs->buf[cs->cdw++] = cs->cdw;
>> + cs->buf[cs->cdw++] = rscreen->cs_count;
>> + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>> + cs->buf[cs->cdw++] = reloc;
>
>
> The NOP packet here is superfluous, also I don't really like how this is
> implemented after all.
>
> May I just use this patch as base of a cleaner implementation?
>
> Christian.
Yeah nop is a left over, what don't you like ? This is a build time
debug option only that proved very useful (at least to me) on r600g
Cheers,
Jerome
>
>> +}
>> +#endif
>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>> index c5dac29..a370d7e 100644
>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>> @@ -525,6 +525,14 @@ static void r600_destroy_screen(struct pipe_screen*
>> pscreen)
>> rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
>> si_resource_reference(&rscreen->fences.bo, NULL);
>> }
>> +
>> +#if R600_TRACE_CS
>> + if (rscreen->trace_bo) {
>> + rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
>> + pipe_resource_reference((struct
>> pipe_resource**)&rscreen->trace_bo, NULL);
>> + }
>> +#endif
>> +
>> pipe_mutex_destroy(rscreen->fences.mutex);
>> rscreen->ws->destroy(rscreen->ws);
>> @@ -727,5 +735,19 @@ struct pipe_screen *radeonsi_screen_create(struct
>> radeon_winsys *ws)
>> LIST_INITHEAD(&rscreen->fences.blocks);
>> pipe_mutex_init(rscreen->fences.mutex);
>> +#if R600_TRACE_CS
>> + rscreen->cs_count = 0;
>> + if (rscreen->info.drm_minor >= 28) {
>> + rscreen->trace_bo = (struct
>> si_resource*)pipe_buffer_create(&rscreen->screen,
>> +
>> PIPE_BIND_CUSTOM,
>> +
>> PIPE_USAGE_STAGING,
>> +
>> 4096);
>> + if (rscreen->trace_bo) {
>> + rscreen->trace_ptr =
>> rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
>> +
>> PIPE_TRANSFER_UNSYNCHRONIZED);
>> + }
>> + }
>> +#endif
>> +
>> return &rscreen->screen;
>> }
>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>> index d0f04f4..7943563 100644
>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>> @@ -47,6 +47,9 @@
>> #define R600_BIG_ENDIAN 0
>> #endif
>> +#define R600_TRACE_CS 0
>> +#define R600_TRACE_CS_DWORDS 8
>> +
>> struct r600_pipe_fences {
>> struct si_resource *bo;
>> unsigned *data;
>> @@ -67,6 +70,11 @@ struct r600_screen {
>> struct r600_tiling_info tiling_info;
>> struct util_slab_mempool pool_buffers;
>> struct r600_pipe_fences fences;
>> +#if R600_TRACE_CS
>> + struct si_resource *trace_bo;
>> + uint32_t *trace_ptr;
>> + unsigned cs_count;
>> +#endif
>> };
>> struct si_pipe_sampler_view {
>> @@ -226,6 +234,10 @@ void r600_translate_index_buffer(struct r600_context
>> *r600,
>> struct pipe_index_buffer *ib,
>> unsigned count);
>> +#if R600_TRACE_CS
>> +void r600_trace_emit(struct r600_context *rctx);
>> +#endif
>> +
>> /*
>> * common helpers
>> */
>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>> b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>> index 79a2521..8e01738 100644
>> --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>> @@ -199,6 +199,12 @@ unsigned si_pm4_dirty_dw(struct r600_context *rctx)
>> continue;
>> count += state->ndw;
>> +#if R600_TRACE_CS
>> + /* for tracing each states */
>> + if (rctx->screen->trace_bo) {
>> + count += R600_TRACE_CS_DWORDS;
>> + }
>> +#endif
>> }
>> return count;
>> @@ -219,6 +225,12 @@ void si_pm4_emit(struct r600_context *rctx, struct
>> si_pm4_state *state)
>> }
>> cs->cdw += state->ndw;
>> +
>> +#if R600_TRACE_CS
>> + if (rctx->screen->trace_bo) {
>> + r600_trace_emit(rctx);
>> + }
>> +#endif
>> }
>> void si_pm4_emit_dirty(struct r600_context *rctx)
>> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>> b/src/gallium/drivers/radeonsi/si_state_draw.c
>> index a78751b..1e1d1cc 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>> @@ -579,6 +579,12 @@ void si_draw_vbo(struct pipe_context *ctx, const
>> struct pipe_draw_info *info)
>> si_pm4_emit_dirty(rctx);
>> rctx->pm4_dirty_cdwords = 0;
>> +#if R600_TRACE_CS
>> + if (rctx->screen->trace_bo) {
>> + r600_trace_emit(rctx);
>> + }
>> +#endif
>> +
>> #if 0
>> /* Enable stream out if needed. */
>> if (rctx->streamout_start) {
>> @@ -587,7 +593,6 @@ void si_draw_vbo(struct pipe_context *ctx, const
>> struct pipe_draw_info *info)
>> }
>> #endif
>> -
>> rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
>> /* Set the depth buffer as dirty. */
>> diff --git a/src/gallium/drivers/radeonsi/sid.h
>> b/src/gallium/drivers/radeonsi/sid.h
>> index 57553a6..8528981 100644
>> --- a/src/gallium/drivers/radeonsi/sid.h
>> +++ b/src/gallium/drivers/radeonsi/sid.h
>> @@ -77,6 +77,20 @@
>> #define PKT3_DRAW_INDEX_IMMD 0x2E
>> #define PKT3_NUM_INSTANCES 0x2F
>> #define PKT3_STRMOUT_BUFFER_UPDATE 0x34
>> +#define PKT3_WRITE_DATA 0x37
>> +#define PKT3_WRITE_DATA_DST_SEL(x) ((x) << 8)
>> +#define PKT3_WRITE_DATA_DST_SEL_REG 0
>> +#define PKT3_WRITE_DATA_DST_SEL_MEM_SYNC 1
>> +#define PKT3_WRITE_DATA_DST_SEL_TC_OR_L2 2
>> +#define PKT3_WRITE_DATA_DST_SEL_GDS 3
>> +#define PKT3_WRITE_DATA_DST_SEL_RESERVED_4 4
>> +#define PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC 5
>> +#define PKT3_WR_ONE_ADDR (1 << 16)
>> +#define PKT3_WRITE_DATA_WR_CONFIRM (1 << 20)
>> +#define PKT3_WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
>> +#define PKT3_WRITE_DATA_ENGINE_SEL_ME 0
>> +#define PKT3_WRITE_DATA_ENGINE_SEL_PFP 1
>> +#define PKT3_WRITE_DATA_ENGINE_SEL_CE 2
>> #define PKT3_MEM_SEMAPHORE 0x39
>> #define PKT3_MPEG_INDEX 0x3A
>> #define PKT3_WAIT_REG_MEM 0x3C
>
>
More information about the mesa-dev
mailing list