[Mesa-dev] [PATCH] r600g: add cs tracing infrastructure for lockup pin pointing

Jerome Glisse j.glisse at gmail.com
Wed Dec 19 09:23:23 PST 2012


On Wed, Dec 19, 2012 at 12:17 PM,  <j.glisse at gmail.com> wrote:
> From: Jerome Glisse <jglisse at redhat.com>
>
> It's a build time option you need to set R600_TRACE_CS to 1 and it
> will print to stderr all cs along as cs trace point value which
> gave last offset into a cs process by the GPU.
>
> Signed-off-by: Jerome Glisse <jglisse at redhat.com>

For information this is something i have been using for a while and i
am just getting tire of porting it over and over so i cleaned it up
into something that i believe is usefull. My rdb tools can be used to
annotate cs output given by this infrastructure: rdb_annotateib
hd2xxx.rdb dumpfile > dumpfile.readablebyhuman

I gave the the last dw before lockup. If you don't have many
application running at the same time it has proven to be accurate most
of the time.

Note you will need the kernel patch i just sent.

Cheers,
Jerome


> ---
>  src/gallium/drivers/r600/r600_hw_context.c      | 41 +++++++++++++++++++++++++
>  src/gallium/drivers/r600/r600_hw_context_priv.h |  5 +--
>  src/gallium/drivers/r600/r600_pipe.c            | 20 ++++++++++++
>  src/gallium/drivers/r600/r600_pipe.h            | 16 ++++++++++
>  src/gallium/drivers/r600/r600_state_common.c    | 26 ++++++++++++++++
>  5 files changed, 106 insertions(+), 2 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
> index cdd31a4..6c8cb9d 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -27,6 +27,7 @@
>  #include "r600d.h"
>  #include "util/u_memory.h"
>  #include <errno.h>
> +#include <unistd.h>
>
>  /* Get backends mask */
>  void r600_get_backend_mask(struct r600_context *ctx)
> @@ -369,6 +370,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>                 for (i = 0; i < R600_NUM_ATOMS; i++) {
>                         if (ctx->atoms[i] && ctx->atoms[i]->dirty) {
>                                 num_dw += ctx->atoms[i]->num_dw;
> +#if R600_TRACE_CS
> +                               if (ctx->screen->trace_bo) {
> +                                       num_dw += R600_TRACE_CS_DWORDS;
> +                               }
> +#endif
>                         }
>                 }
>
> @@ -376,6 +382,11 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>
>                 /* The upper-bound of how much space a draw command would take. */
>                 num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
> +#if R600_TRACE_CS
> +               if (ctx->screen->trace_bo) {
> +                       num_dw += R600_TRACE_CS_DWORDS;
> +               }
> +#endif
>         }
>
>         /* Count in queries_suspend. */
> @@ -717,7 +728,37 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
>         }
>
>         /* Flush the CS. */
> +#if R600_TRACE_CS
> +       if (ctx->screen->trace_bo) {
> +               struct r600_screen *rscreen = ctx->screen;
> +               unsigned i;
> +
> +               for (i = 0; i < cs->cdw; i++) {
> +                       fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
> +               }
> +               rscreen->cs_count++;
> +       }
> +#endif
>         ctx->ws->cs_flush(ctx->cs, flags);
> +#if R600_TRACE_CS
> +       if (ctx->screen->trace_bo) {
> +               struct r600_screen *rscreen = ctx->screen;
> +               unsigned i;
> +
> +               for (i = 0; i < 10; i++) {
> +                       usleep(5);
> +                       if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
> +                               break;
> +                       }
> +               }
> +               if (i == 10) {
> +                       fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
> +                               rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
> +               } else {
> +                       fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
> +               }
> +       }
> +#endif
>
>         r600_begin_new_cs(ctx);
>  }
> diff --git a/src/gallium/drivers/r600/r600_hw_context_priv.h b/src/gallium/drivers/r600/r600_hw_context_priv.h
> index 050c472..692e6ec 100644
> --- a/src/gallium/drivers/r600/r600_hw_context_priv.h
> +++ b/src/gallium/drivers/r600/r600_hw_context_priv.h
> @@ -29,8 +29,9 @@
>  #include "r600_pipe.h"
>
>  /* the number of CS dwords for flushing and drawing */
> -#define R600_MAX_FLUSH_CS_DWORDS 12
> -#define R600_MAX_DRAW_CS_DWORDS 34
> +#define R600_MAX_FLUSH_CS_DWORDS       12
> +#define R600_MAX_DRAW_CS_DWORDS                34
> +#define R600_TRACE_CS_DWORDS           7
>
>  /* these flags are used in register flags and added into block flags */
>  #define REG_FLAG_NEED_BO 1
> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
> index e497744..7990400 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -723,6 +723,12 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
>                 rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
>                 pipe_resource_reference((struct pipe_resource**)&rscreen->fences.bo, NULL);
>         }
> +#if R600_TRACE_CS
> +       if (rscreen->trace_bo) {
> +               rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
> +               pipe_resource_reference((struct pipe_resource**)&rscreen->trace_bo, NULL);
> +       }
> +#endif
>         pipe_mutex_destroy(rscreen->fences.mutex);
>
>         rscreen->ws->destroy(rscreen->ws);
> @@ -1042,5 +1048,19 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
>
>         rscreen->global_pool = compute_memory_pool_new(rscreen);
>
> +#if R600_TRACE_CS
> +       rscreen->cs_count = 0;
> +       if (rscreen->info.drm_minor >= 28) {
> +               rscreen->trace_bo = (struct r600_resource*)pipe_buffer_create(&rscreen->screen,
> +                                                                               PIPE_BIND_CUSTOM,
> +                                                                               PIPE_USAGE_STAGING,
> +                                                                               4096);
> +               if (rscreen->trace_bo) {
> +                       rscreen->trace_ptr = rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
> +                                                                       PIPE_TRANSFER_UNSYNCHRONIZED);
> +               }
> +       }
> +#endif
> +
>         return &rscreen->screen;
>  }
> diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
> index c52e0e1..2733522 100644
> --- a/src/gallium/drivers/r600/r600_pipe.h
> +++ b/src/gallium/drivers/r600/r600_pipe.h
> @@ -37,6 +37,8 @@
>
>  #define R600_NUM_ATOMS 37
>
> +#define R600_TRACE_CS 0
> +
>  #define R600_MAX_USER_CONST_BUFFERS 1
>  #define R600_MAX_DRIVER_CONST_BUFFERS 2
>  #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS)
> @@ -235,6 +237,11 @@ struct r600_screen {
>          * XXX: Not sure if this is the best place for global_pool.  Also,
>          * it's not thread safe, so it won't work with multiple contexts. */
>         struct compute_memory_pool *global_pool;
> +#if R600_TRACE_CS
> +       struct r600_resource            *trace_bo;
> +       uint32_t                        *trace_ptr;
> +       unsigned                        cs_count;
> +#endif
>  };
>
>  struct r600_pipe_sampler_view {
> @@ -533,10 +540,19 @@ static INLINE void r600_emit_command_buffer(struct radeon_winsys_cs *cs,
>         cs->cdw += cb->num_dw;
>  }
>
> +#if R600_TRACE_CS
> +void r600_trace_emit(struct r600_context *rctx);
> +#endif
> +
>  static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
>  {
>         atom->emit(rctx, atom);
>         atom->dirty = false;
> +#if R600_TRACE_CS
> +       if (rctx->screen->trace_bo) {
> +               r600_trace_emit(rctx);
> +       }
> +#endif
>  }
>
>  static INLINE void r600_set_cso_state(struct r600_cso_state *state, void *cso)
> diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
> index c69149b..f8b64cc 100644
> --- a/src/gallium/drivers/r600/r600_state_common.c
> +++ b/src/gallium/drivers/r600/r600_state_common.c
> @@ -1328,6 +1328,12 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
>                                         (info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
>         }
>
> +#if R600_TRACE_CS
> +       if (rctx->screen->trace_bo) {
> +               r600_trace_emit(rctx);
> +       }
> +#endif
> +
>         /* Set the depth buffer as dirty. */
>         if (rctx->framebuffer.state.zsbuf) {
>                 struct pipe_surface *surf = rctx->framebuffer.state.zsbuf;
> @@ -1620,3 +1626,23 @@ void r600_init_common_state_functions(struct r600_context *rctx)
>         rctx->context.set_stream_output_targets = r600_set_so_targets;
>         rctx->context.draw_vbo = r600_draw_vbo;
>  }
> +
> +#if R600_TRACE_CS
> +void r600_trace_emit(struct r600_context *rctx)
> +{
> +       struct r600_screen *rscreen = rctx->screen;
> +       struct radeon_winsys_cs *cs = rctx->cs;
> +       uint64_t va;
> +       uint32_t reloc;
> +
> +       va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
> +       reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
> +       r600_write_value(cs, PKT3(PKT3_MEM_WRITE, 3, 0));
> +       r600_write_value(cs, va & 0xFFFFFFFFUL);
> +       r600_write_value(cs, (va >> 32UL) & 0xFFUL);
> +       r600_write_value(cs, cs->cdw);
> +       r600_write_value(cs, rscreen->cs_count);
> +       r600_write_value(cs, PKT3(PKT3_NOP, 0, 0));
> +       r600_write_value(cs, reloc);
> +}
> +#endif
> --
> 1.7.11.7
>


More information about the mesa-dev mailing list