[Mesa-dev] [PATCH] radeonsi: add cs tracing v2

Christian König deathsimple at vodafone.de
Tue Mar 26 07:00:37 PDT 2013


Am 26.03.2013 14:42, schrieb Jerome Glisse:
> On Tue, Mar 26, 2013 at 6:22 AM, Christian König
> <deathsimple at vodafone.de> wrote:
>> Am 25.03.2013 18:15, schrieb j.glisse at gmail.com:
>>
>>> From: Jerome Glisse <jglisse at redhat.com>
>>>
>>> Same as on r600, trace cs execution by writting cs offset after each
>>> states, this allow to pin point lockup inside command stream and
>>> narrow down the scope of lockup investigation.
>>>
>>> v2: Use WRITE_DATA packet instead of WRITE_MEM
>>>
>>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>>> ---
>>>    src/gallium/drivers/radeonsi/r600_hw_context.c | 61
>>> ++++++++++++++++++++++++++
>>>    src/gallium/drivers/radeonsi/radeonsi_pipe.c   | 22 ++++++++++
>>>    src/gallium/drivers/radeonsi/radeonsi_pipe.h   | 12 +++++
>>>    src/gallium/drivers/radeonsi/radeonsi_pm4.c    | 12 +++++
>>>    src/gallium/drivers/radeonsi/si_state_draw.c   |  7 ++-
>>>    src/gallium/drivers/radeonsi/sid.h             | 14 ++++++
>>>    6 files changed, 127 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c
>>> b/src/gallium/drivers/radeonsi/r600_hw_context.c
>>> index bd348f9..967f093 100644
>>> --- a/src/gallium/drivers/radeonsi/r600_hw_context.c
>>> +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
>>> @@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx,
>>> unsigned num_dw,
>>>          /* Save 16 dwords for the fence mechanism. */
>>>          num_dw += 16;
>>>    +#if R600_TRACE_CS
>>> +       if (ctx->screen->trace_bo) {
>>> +               num_dw += R600_TRACE_CS_DWORDS;
>>> +       }
>>> +#endif
>>> +
>>>          /* Flush if there's not enough space. */
>>>          if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
>>>                  radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
>>> @@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx,
>>> unsigned flags)
>>>          /* force to keep tiling flags */
>>>          flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
>>>    +#if R600_TRACE_CS
>>> +       if (ctx->screen->trace_bo) {
>>> +               struct r600_screen *rscreen = ctx->screen;
>>> +               unsigned i;
>>> +
>>> +               for (i = 0; i < cs->cdw; i++) {
>>> +                       fprintf(stderr, "[%4d] [%5d] 0x%08x\n",
>>> rscreen->cs_count, i, cs->buf[i]);
>>> +               }
>>> +               rscreen->cs_count++;
>>> +       }
>>> +#endif
>>> +
>>>          /* Flush the CS. */
>>>          ctx->ws->cs_flush(ctx->cs, flags);
>>>    +#if R600_TRACE_CS
>>> +       if (ctx->screen->trace_bo) {
>>> +               struct r600_screen *rscreen = ctx->screen;
>>> +               unsigned i;
>>> +
>>> +               for (i = 0; i < 10; i++) {
>>> +                       usleep(5);
>>> +                       if
>>> (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
>>> +                               break;
>>> +                       }
>>> +               }
>>> +               if (i == 10) {
>>> +                       fprintf(stderr, "timeout on cs lockup likely
>>> happen at cs %d dw %d\n",
>>> +                               rscreen->trace_ptr[1],
>>> rscreen->trace_ptr[0]);
>>> +               } else {
>>> +                       fprintf(stderr, "cs %d executed in %dms\n",
>>> rscreen->trace_ptr[1], i * 5);
>>> +               }
>>> +       }
>>> +#endif
>>> +
>>>          ctx->pm4_dirty_cdwords = 0;
>>>          ctx->flags = 0;
>>>    @@ -665,3 +703,26 @@ void r600_context_draw_opaque_count(struct
>>> r600_context *ctx, struct r600_so_tar
>>>          cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size,
>>> RADEON_USAGE_READ);
>>>      }
>>> +
>>> +#if R600_TRACE_CS
>>> +void r600_trace_emit(struct r600_context *rctx)
>>> +{
>>> +       struct r600_screen *rscreen = rctx->screen;
>>> +       struct radeon_winsys_cs *cs = rctx->cs;
>>> +       uint64_t va;
>>> +       uint32_t reloc;
>>> +
>>> +       va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
>>> +       reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo,
>>> RADEON_USAGE_READWRITE);
>>> +       cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
>>> +       cs->buf[cs->cdw++] =
>>> PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
>>> +                               PKT3_WRITE_DATA_WR_CONFIRM |
>>> +
>>> PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
>>> +       cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
>>> +       cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
>>> +       cs->buf[cs->cdw++] = cs->cdw;
>>> +       cs->buf[cs->cdw++] = rscreen->cs_count;
>>> +       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>>> +       cs->buf[cs->cdw++] = reloc;
>>
>> The NOP packet here is superfluous,  also I don't really like how this is
>> implemented after all.
>>
>> May I just use this patch as base of a cleaner implementation?
>>
>> Christian.
> Yeah nop is a left over, what don't you like ? This is a build time
> debug option only that proved very useful (at least to me) on r600g

Yeah it's definitely useful and a quite nifty idea, but as one example I 
would like to have it as a runtime option instead of a compile time option.

Additional to that on SI the stuff in r600_hw_context is more or less 
deprecated, e.g. all the commands still have the now unnecessary NOPs, 
and actually I think some of them aren't really supported any more on SI 
(or at least a bit differently). Also I hoped to move all generating of 
PKT3 to si_commands. and so separate generating the packets from the 
logic using the packets.

Well to sum it up, you just reminded me of work still on my todo list. I 
don't mind if you commit it, but I sooner or later wanted to clean that up.

Christian.

>
> Cheers,
> Jerome
>
>>> +}
>>> +#endif
>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>> index c5dac29..a370d7e 100644
>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>> @@ -525,6 +525,14 @@ static void r600_destroy_screen(struct pipe_screen*
>>> pscreen)
>>>                  rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
>>>                  si_resource_reference(&rscreen->fences.bo, NULL);
>>>          }
>>> +
>>> +#if R600_TRACE_CS
>>> +       if (rscreen->trace_bo) {
>>> +               rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
>>> +               pipe_resource_reference((struct
>>> pipe_resource**)&rscreen->trace_bo, NULL);
>>> +       }
>>> +#endif
>>> +
>>>          pipe_mutex_destroy(rscreen->fences.mutex);
>>>          rscreen->ws->destroy(rscreen->ws);
>>> @@ -727,5 +735,19 @@ struct pipe_screen *radeonsi_screen_create(struct
>>> radeon_winsys *ws)
>>>          LIST_INITHEAD(&rscreen->fences.blocks);
>>>          pipe_mutex_init(rscreen->fences.mutex);
>>>    +#if R600_TRACE_CS
>>> +       rscreen->cs_count = 0;
>>> +       if (rscreen->info.drm_minor >= 28) {
>>> +               rscreen->trace_bo = (struct
>>> si_resource*)pipe_buffer_create(&rscreen->screen,
>>> +
>>> PIPE_BIND_CUSTOM,
>>> +
>>> PIPE_USAGE_STAGING,
>>> +
>>> 4096);
>>> +               if (rscreen->trace_bo) {
>>> +                       rscreen->trace_ptr =
>>> rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
>>> +
>>> PIPE_TRANSFER_UNSYNCHRONIZED);
>>> +               }
>>> +       }
>>> +#endif
>>> +
>>>          return &rscreen->screen;
>>>    }
>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>> index d0f04f4..7943563 100644
>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>> @@ -47,6 +47,9 @@
>>>    #define R600_BIG_ENDIAN 0
>>>    #endif
>>>    +#define R600_TRACE_CS 0
>>> +#define R600_TRACE_CS_DWORDS           8
>>> +
>>>    struct r600_pipe_fences {
>>>          struct si_resource              *bo;
>>>          unsigned                        *data;
>>> @@ -67,6 +70,11 @@ struct r600_screen {
>>>          struct r600_tiling_info         tiling_info;
>>>          struct util_slab_mempool        pool_buffers;
>>>          struct r600_pipe_fences         fences;
>>> +#if R600_TRACE_CS
>>> +       struct si_resource              *trace_bo;
>>> +       uint32_t                        *trace_ptr;
>>> +       unsigned                        cs_count;
>>> +#endif
>>>    };
>>>      struct si_pipe_sampler_view {
>>> @@ -226,6 +234,10 @@ void r600_translate_index_buffer(struct r600_context
>>> *r600,
>>>                                   struct pipe_index_buffer *ib,
>>>                                   unsigned count);
>>>    +#if R600_TRACE_CS
>>> +void r600_trace_emit(struct r600_context *rctx);
>>> +#endif
>>> +
>>>    /*
>>>     * common helpers
>>>     */
>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>> b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>> index 79a2521..8e01738 100644
>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>> @@ -199,6 +199,12 @@ unsigned si_pm4_dirty_dw(struct r600_context *rctx)
>>>                          continue;
>>>                  count += state->ndw;
>>> +#if R600_TRACE_CS
>>> +               /* for tracing each states */
>>> +               if (rctx->screen->trace_bo) {
>>> +                       count += R600_TRACE_CS_DWORDS;
>>> +               }
>>> +#endif
>>>          }
>>>          return count;
>>> @@ -219,6 +225,12 @@ void si_pm4_emit(struct r600_context *rctx, struct
>>> si_pm4_state *state)
>>>          }
>>>          cs->cdw += state->ndw;
>>> +
>>> +#if R600_TRACE_CS
>>> +       if (rctx->screen->trace_bo) {
>>> +               r600_trace_emit(rctx);
>>> +       }
>>> +#endif
>>>    }
>>>      void si_pm4_emit_dirty(struct r600_context *rctx)
>>> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>>> b/src/gallium/drivers/radeonsi/si_state_draw.c
>>> index a78751b..1e1d1cc 100644
>>> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>>> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>>> @@ -579,6 +579,12 @@ void si_draw_vbo(struct pipe_context *ctx, const
>>> struct pipe_draw_info *info)
>>>          si_pm4_emit_dirty(rctx);
>>>          rctx->pm4_dirty_cdwords = 0;
>>>    +#if R600_TRACE_CS
>>> +       if (rctx->screen->trace_bo) {
>>> +               r600_trace_emit(rctx);
>>> +       }
>>> +#endif
>>> +
>>>    #if 0
>>>          /* Enable stream out if needed. */
>>>          if (rctx->streamout_start) {
>>> @@ -587,7 +593,6 @@ void si_draw_vbo(struct pipe_context *ctx, const
>>> struct pipe_draw_info *info)
>>>          }
>>>    #endif
>>>    -
>>>          rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
>>>          /* Set the depth buffer as dirty. */
>>> diff --git a/src/gallium/drivers/radeonsi/sid.h
>>> b/src/gallium/drivers/radeonsi/sid.h
>>> index 57553a6..8528981 100644
>>> --- a/src/gallium/drivers/radeonsi/sid.h
>>> +++ b/src/gallium/drivers/radeonsi/sid.h
>>> @@ -77,6 +77,20 @@
>>>    #define PKT3_DRAW_INDEX_IMMD                   0x2E
>>>    #define PKT3_NUM_INSTANCES                     0x2F
>>>    #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
>>> +#define PKT3_WRITE_DATA                        0x37
>>> +#define     PKT3_WRITE_DATA_DST_SEL(x)             ((x) << 8)
>>> +#define     PKT3_WRITE_DATA_DST_SEL_REG            0
>>> +#define     PKT3_WRITE_DATA_DST_SEL_MEM_SYNC       1
>>> +#define     PKT3_WRITE_DATA_DST_SEL_TC_OR_L2       2
>>> +#define     PKT3_WRITE_DATA_DST_SEL_GDS            3
>>> +#define     PKT3_WRITE_DATA_DST_SEL_RESERVED_4     4
>>> +#define     PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC      5
>>> +#define     PKT3_WR_ONE_ADDR                       (1 << 16)
>>> +#define PKT3_WRITE_DATA_WR_CONFIRM                 (1 << 20)
>>> +#define PKT3_WRITE_DATA_ENGINE_SEL(x)              ((x) << 30)
>>> +#define PKT3_WRITE_DATA_ENGINE_SEL_ME              0
>>> +#define PKT3_WRITE_DATA_ENGINE_SEL_PFP             1
>>> +#define PKT3_WRITE_DATA_ENGINE_SEL_CE              2
>>>    #define PKT3_MEM_SEMAPHORE                     0x39
>>>    #define PKT3_MPEG_INDEX                        0x3A
>>>    #define PKT3_WAIT_REG_MEM                      0x3C
>>



More information about the mesa-dev mailing list