[Mesa-dev] [PATCH 11/16] radeonsi: print saved CS to the log context

Marek Olšák maraeo at gmail.com
Sun Aug 20 16:08:12 UTC 2017


On Wed, Aug 16, 2017 at 1:05 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> Use the auto logger facility, so that CS chunks will be interleaved
> with other log info.
> ---
>  src/gallium/drivers/radeonsi/si_debug.c      | 215 +++++++++++++++++++++------
>  src/gallium/drivers/radeonsi/si_hw_context.c |  64 +++++---
>  src/gallium/drivers/radeonsi/si_pipe.c       |   7 +-
>  src/gallium/drivers/radeonsi/si_pipe.h       |  31 +++-
>  src/gallium/drivers/radeonsi/si_state_draw.c |  28 ++--
>  5 files changed, 257 insertions(+), 88 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
> index e797f10f7f0..9a7b0908199 100644
> --- a/src/gallium/drivers/radeonsi/si_debug.c
> +++ b/src/gallium/drivers/radeonsi/si_debug.c
> @@ -34,6 +34,9 @@
>  #include "util/u_memory.h"
>  #include "ac_debug.h"
>
> +static void si_dump_bo_list(struct si_context *sctx,
> +                           const struct radeon_saved_cs *saved, FILE *f);
> +
>  DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
>
>  static void si_dump_shader(struct si_screen *sscreen,
> @@ -266,51 +269,180 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
>         fprintf(f, "\n");
>  }
>
> -static void si_dump_last_ib(struct si_context *sctx, FILE *f)
> +struct si_log_chunk_cs {
> +       struct si_context *ctx;
> +       struct si_saved_cs *cs;
> +       bool dump_bo_list;
> +       unsigned gfx_begin, gfx_end;
> +       unsigned ce_begin, ce_end;
> +};
> +
> +static void si_log_chunk_type_cs_destroy(void *data)
> +{
> +       struct si_log_chunk_cs *chunk = data;
> +       si_saved_cs_reference(&chunk->cs, NULL);
> +       free(chunk);
> +}
> +
> +static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
> +                               unsigned begin, unsigned end,
> +                               unsigned last_trace_id, const char *name,
> +                               enum chip_class chip_class)
>  {
> +       unsigned orig_end = end;
> +
> +       assert(begin <= end);
> +
> +       fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
> +               name, begin);
> +
> +       for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
> +               struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
> +
> +               if (begin < chunk->cdw) {
> +                       ac_parse_ib_chunk(f, chunk->buf + begin,
> +                                         MIN2(end, chunk->cdw) - begin,
> +                                         last_trace_id, chip_class, NULL, NULL);
> +               }
> +
> +               if (end <= chunk->cdw)
> +                       return;
> +
> +               if (begin < chunk->cdw)
> +                       fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
> +                               name);
> +
> +               begin -= MAX2(begin, chunk->cdw) - chunk->cdw;
> +               end -= chunk->cdw;
> +       }
> +
> +       assert(end <= cs->current.cdw);
> +
> +       ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
> +                         chip_class, NULL, NULL);
> +
> +       fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
> +               name, orig_end);
> +}
> +
> +static void si_log_chunk_type_cs_print(void *data, FILE *f)
> +{
> +       struct si_log_chunk_cs *chunk = data;
> +       struct si_context *ctx = chunk->ctx;
> +       struct si_saved_cs *scs = chunk->cs;
>         int last_trace_id = -1;
>         int last_ce_trace_id = -1;
>
> -       if (!sctx->last_gfx.ib)
> -               return;
> +       /* We are expecting that the ddebug pipe has already
> +        * waited for the context, so this buffer should be idle.
> +        * If the GPU is hung, there is no point in waiting for it.
> +        */
> +       uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
> +                                             NULL,
> +                                             PIPE_TRANSFER_UNSYNCHRONIZED |
> +                                             PIPE_TRANSFER_READ);
> +       if (map) {
> +               last_trace_id = map[0];
> +               last_ce_trace_id = map[1];
> +       }
>
> -       if (sctx->last_trace_buf) {
> -               /* We are expecting that the ddebug pipe has already
> -                * waited for the context, so this buffer should be idle.
> -                * If the GPU is hung, there is no point in waiting for it.
> -                */
> -               uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
> -                                                      NULL,
> -                                                      PIPE_TRANSFER_UNSYNCHRONIZED |
> -                                                      PIPE_TRANSFER_READ);
> -               if (map) {
> -                       last_trace_id = map[0];
> -                       last_ce_trace_id = map[1];
> +       if (chunk->gfx_end != chunk->gfx_begin) {
> +               if (chunk->gfx_begin == 0) {
> +                       if (ctx->init_config)
> +                               ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
> +                                           -1, "IB2: Init config", ctx->b.chip_class,
> +                                           NULL, NULL);
> +
> +                       if (ctx->init_config_gs_rings)
> +                               ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
> +                                           ctx->init_config_gs_rings->ndw,
> +                                           -1, "IB2: Init GS rings", ctx->b.chip_class,
> +                                           NULL, NULL);
> +               }
> +
> +               if (scs->flushed) {
> +                       ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
> +                                   chunk->gfx_end - chunk->gfx_begin,
> +                                   last_trace_id, "IB", ctx->b.chip_class,
> +                                   NULL, NULL);
> +               } else {
> +                       si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
> +                                           chunk->gfx_end, last_trace_id, "IB",
> +                                           ctx->b.chip_class);
>                 }
>         }
>
> -       if (sctx->init_config)
> -               ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
> -                           -1, "IB2: Init config", sctx->b.chip_class,
> -                           NULL, NULL);
> -
> -       if (sctx->init_config_gs_rings)
> -               ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
> -                           sctx->init_config_gs_rings->ndw,
> -                           -1, "IB2: Init GS rings", sctx->b.chip_class,
> -                           NULL, NULL);
> -
> -       ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
> -                   last_trace_id, "IB", sctx->b.chip_class,
> -                    NULL, NULL);
> -
> -       if (sctx->last_ce.ib) {
> -               ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw,
> -                           last_ce_trace_id, "CE IB", sctx->b.chip_class,
> -                           NULL, NULL);
> +       if (chunk->ce_end != chunk->ce_begin) {
> +               assert(ctx->ce_ib);
> +
> +               if (scs->flushed) {
> +                       ac_parse_ib(f, scs->ce.ib + chunk->ce_begin,
> +                                   chunk->ce_end - chunk->ce_begin,
> +                                   last_ce_trace_id, "CE IB", ctx->b.chip_class,
> +                                   NULL, NULL);
> +               } else {
> +                       si_parse_current_ib(f, ctx->ce_ib, chunk->ce_begin,
> +                                           chunk->ce_end, last_ce_trace_id, "CE IB",
> +                                           ctx->b.chip_class);
> +               }
> +       }
> +
> +       if (chunk->dump_bo_list) {
> +               fprintf(f, "Flushing.\n\n");
> +               si_dump_bo_list(ctx, &scs->gfx, f);
>         }
>  }
>
> +static const struct u_log_chunk_type si_log_chunk_type_cs = {
> +       .destroy = si_log_chunk_type_cs_destroy,
> +       .print = si_log_chunk_type_cs_print,
> +};
> +
> +static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
> +                     bool dump_bo_list)
> +{
> +       assert(ctx->current_saved_cs);
> +
> +       struct si_saved_cs *scs = ctx->current_saved_cs;
> +       unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
> +       unsigned ce_cur = ctx->ce_ib->prev_dw + ctx->ce_ib->current.cdw;
> +
> +       if (!dump_bo_list &&
> +           gfx_cur == scs->gfx_last_dw &&
> +           ce_cur == scs->ce_last_dw)
> +               return;
> +
> +       struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
> +
> +       chunk->ctx = ctx;
> +       si_saved_cs_reference(&chunk->cs, scs);
> +       chunk->dump_bo_list = dump_bo_list;
> +
> +       chunk->gfx_begin = scs->gfx_last_dw;
> +       chunk->gfx_end = gfx_cur;
> +       scs->gfx_last_dw = gfx_cur;
> +
> +       chunk->ce_begin = scs->ce_last_dw;
> +       chunk->ce_end = ce_cur;
> +       scs->ce_last_dw = ce_cur;
> +
> +       u_log_chunk(log, &si_log_chunk_type_cs, chunk);
> +}
> +
> +void si_auto_log_cs(void *data, struct u_log_context *log)
> +{
> +       struct si_context *ctx = (struct si_context *)data;
> +       si_log_cs(ctx, log, false);
> +}
> +
> +void si_log_hw_flush(struct si_context *sctx)
> +{
> +       if (!sctx->b.log)
> +               return;
> +
> +       si_log_cs(sctx, sctx->b.log, true);
> +}
> +
>  static const char *priority_to_string(enum radeon_bo_priority priority)
>  {
>  #define ITEM(x) [RADEON_PRIO_##x] = #x
> @@ -920,6 +1052,9 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
>  {
>         struct si_context *sctx = (struct si_context*)ctx;
>
> +       if (sctx->b.log)
> +               u_log_flush(sctx->b.log);
> +
>         if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
>                 si_dump_debug_registers(sctx, f);
>
> @@ -957,18 +1092,6 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
>
>         u_log_new_page_print(&log, f);
>         u_log_context_destroy(&log);
> -
> -       if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
> -               si_dump_bo_list(sctx, &sctx->last_gfx, f);
> -               si_dump_last_ib(sctx, f);
> -
> -               fprintf(f, "Done.\n");
> -
> -               /* dump only once */
> -               radeon_clear_saved_cs(&sctx->last_gfx);
> -               radeon_clear_saved_cs(&sctx->last_ce);
> -               r600_resource_reference(&sctx->last_trace_buf, NULL);
> -       }
>  }
>
>  static void si_dump_dma(struct si_context *sctx,
> diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
> index 3582cd711b5..73aea70434e 100644
> --- a/src/gallium/drivers/radeonsi/si_hw_context.c
> +++ b/src/gallium/drivers/radeonsi/si_hw_context.c
> @@ -57,6 +57,14 @@ static unsigned si_ce_needed_cs_space(void)
>         return space;
>  }
>
> +void si_destroy_saved_cs(struct si_saved_cs *scs)
> +{
> +       radeon_clear_saved_cs(&scs->gfx);
> +       radeon_clear_saved_cs(&scs->ce);
> +       r600_resource_reference(&scs->trace_buf, NULL);
> +       free(scs);
> +}
> +
>  /* initialize */
>  void si_need_cs_space(struct si_context *ctx)
>  {
> @@ -139,17 +147,14 @@ void si_context_gfx_flush(void *context, unsigned flags,
>
>         si_emit_cache_flush(ctx);
>
> -       if (ctx->trace_buf)
> +       if (ctx->current_saved_cs) {
>                 si_trace_emit(ctx);
> +               si_log_hw_flush(ctx);
>
> -       if (ctx->is_debug) {
>                 /* Save the IB for debug contexts. */
> -               radeon_clear_saved_cs(&ctx->last_gfx);
> -               radeon_save_cs(ws, cs, &ctx->last_gfx, true);
> -               radeon_clear_saved_cs(&ctx->last_ce);
> -               radeon_save_cs(ws, ctx->ce_ib, &ctx->last_ce, false);
> -               r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
> -               r600_resource_reference(&ctx->trace_buf, NULL);
> +               radeon_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
> +               radeon_save_cs(ws, ctx->ce_ib, &ctx->current_saved_cs->ce, false);
> +               ctx->current_saved_cs->flushed = true;
>         }
>
>         /* Flush the CS. */
> @@ -165,31 +170,48 @@ void si_context_gfx_flush(void *context, unsigned flags,
>                  */
>                 ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000);
>
> -               si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX);
> +               si_check_vm_faults(&ctx->b, &ctx->current_saved_cs->gfx, RING_GFX);
>         }
>
> +       if (ctx->current_saved_cs)
> +               si_saved_cs_reference(&ctx->current_saved_cs, NULL);
> +
>         si_begin_new_cs(ctx);
>         ctx->gfx_flush_in_progress = false;
>  }
>
> -void si_begin_new_cs(struct si_context *ctx)
> +static void si_begin_cs_debug(struct si_context *ctx)
>  {
> -       if (ctx->is_debug) {
> -               static const uint32_t zeros[2];
> +       static const uint32_t zeros[2];
> +       assert(!ctx->current_saved_cs);
> +
> +       ctx->current_saved_cs = calloc(1, sizeof(*ctx->current_saved_cs));

Missing fail path for calloc.

>
> -               /* Create a buffer used for writing trace IDs and initialize it to 0. */
> -               assert(!ctx->trace_buf);
> -               ctx->trace_buf = (struct r600_resource*)
> +       pipe_reference_init(&ctx->current_saved_cs->reference, 1);
> +
> +       ctx->current_saved_cs->trace_buf = (struct r600_resource*)
>                                  pipe_buffer_create(ctx->b.b.screen, 0,
>                                                     PIPE_USAGE_STAGING, 8);
> -               if (ctx->trace_buf)
> -                       pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
> -                                                   0, sizeof(zeros), zeros);
> -               ctx->trace_id = 0;
> +       if (!ctx->current_saved_cs) {

This should check trace_buf.

With those fixed and adding the "squash!" patch, this is:

Reviewed-by: Marek Olšák <marek.olsak at amd.com>

Marek


More information about the mesa-dev mailing list