[Mesa-dev] [PATCH] radeonsi: add cs tracing v2

Christian König deathsimple at vodafone.de
Tue Mar 26 07:59:45 PDT 2013


Am 26.03.2013 15:34, schrieb Marek Olšák:
> Speaking of si_pm4_state, I think it's a horrible mechanism for
> anything other than constant state objects (create/bind/delete
> functions). For everything else (set/draw functions), you want to emit
> directly into the command stream. It's not so different from the bad
> state management which r600g used to have (which is now gone). If you
> have to call malloc or calloc in a set_* or draw_* function, you're
> doing it wrong. Are there plans to change it to something more
> efficient (e.g. how r300g and r600g emit non-CSO states right now), or
> will it be like this forever?

Actually I hoped that r600g sooner or later moves into the same 
direction some more. The fact that we currently need to malloc every 
buffer indeed sucks badly, but that is still better than mixing packet 
generation with driver logic.

Also I don't think that emitting directly into the command stream is 
such a good idea, we sooner or later want that buffer to be a buffer 
allocated in GART memory. And under this condition it is better to build 
up the commands in a (heavily cached) system memory and then memcpy then 
to the destination buffer.

Christian.

>
> Marek
>
> On Tue, Mar 26, 2013 at 3:00 PM, Christian König
> <deathsimple at vodafone.de> wrote:
>> Am 26.03.2013 14:42, schrieb Jerome Glisse:
>>
>>> On Tue, Mar 26, 2013 at 6:22 AM, Christian König
>>> <deathsimple at vodafone.de> wrote:
>>>> Am 25.03.2013 18:15, schrieb j.glisse at gmail.com:
>>>>
>>>>> From: Jerome Glisse <jglisse at redhat.com>
>>>>>
>>>>> Same as on r600, trace cs execution by writting cs offset after each
>>>>> states, this allow to pin point lockup inside command stream and
>>>>> narrow down the scope of lockup investigation.
>>>>>
>>>>> v2: Use WRITE_DATA packet instead of WRITE_MEM
>>>>>
>>>>> Signed-off-by: Jerome Glisse <jglisse at redhat.com>
>>>>> ---
>>>>>     src/gallium/drivers/radeonsi/r600_hw_context.c | 61
>>>>> ++++++++++++++++++++++++++
>>>>>     src/gallium/drivers/radeonsi/radeonsi_pipe.c   | 22 ++++++++++
>>>>>     src/gallium/drivers/radeonsi/radeonsi_pipe.h   | 12 +++++
>>>>>     src/gallium/drivers/radeonsi/radeonsi_pm4.c    | 12 +++++
>>>>>     src/gallium/drivers/radeonsi/si_state_draw.c   |  7 ++-
>>>>>     src/gallium/drivers/radeonsi/sid.h             | 14 ++++++
>>>>>     6 files changed, 127 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>>> b/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>>> index bd348f9..967f093 100644
>>>>> --- a/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>>> +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>>> @@ -142,6 +142,12 @@ void si_need_cs_space(struct r600_context *ctx,
>>>>> unsigned num_dw,
>>>>>           /* Save 16 dwords for the fence mechanism. */
>>>>>           num_dw += 16;
>>>>>     +#if R600_TRACE_CS
>>>>> +       if (ctx->screen->trace_bo) {
>>>>> +               num_dw += R600_TRACE_CS_DWORDS;
>>>>> +       }
>>>>> +#endif
>>>>> +
>>>>>           /* Flush if there's not enough space. */
>>>>>           if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
>>>>>                   radeonsi_flush(&ctx->context, NULL,
>>>>> RADEON_FLUSH_ASYNC);
>>>>> @@ -206,9 +212,41 @@ void si_context_flush(struct r600_context *ctx,
>>>>> unsigned flags)
>>>>>           /* force to keep tiling flags */
>>>>>           flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
>>>>>     +#if R600_TRACE_CS
>>>>> +       if (ctx->screen->trace_bo) {
>>>>> +               struct r600_screen *rscreen = ctx->screen;
>>>>> +               unsigned i;
>>>>> +
>>>>> +               for (i = 0; i < cs->cdw; i++) {
>>>>> +                       fprintf(stderr, "[%4d] [%5d] 0x%08x\n",
>>>>> rscreen->cs_count, i, cs->buf[i]);
>>>>> +               }
>>>>> +               rscreen->cs_count++;
>>>>> +       }
>>>>> +#endif
>>>>> +
>>>>>           /* Flush the CS. */
>>>>>           ctx->ws->cs_flush(ctx->cs, flags);
>>>>>     +#if R600_TRACE_CS
>>>>> +       if (ctx->screen->trace_bo) {
>>>>> +               struct r600_screen *rscreen = ctx->screen;
>>>>> +               unsigned i;
>>>>> +
>>>>> +               for (i = 0; i < 10; i++) {
>>>>> +                       usleep(5);
>>>>> +                       if
>>>>> (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf,
>>>>> RADEON_USAGE_READWRITE)) {
>>>>> +                               break;
>>>>> +                       }
>>>>> +               }
>>>>> +               if (i == 10) {
>>>>> +                       fprintf(stderr, "timeout on cs lockup likely
>>>>> happen at cs %d dw %d\n",
>>>>> +                               rscreen->trace_ptr[1],
>>>>> rscreen->trace_ptr[0]);
>>>>> +               } else {
>>>>> +                       fprintf(stderr, "cs %d executed in %dms\n",
>>>>> rscreen->trace_ptr[1], i * 5);
>>>>> +               }
>>>>> +       }
>>>>> +#endif
>>>>> +
>>>>>           ctx->pm4_dirty_cdwords = 0;
>>>>>           ctx->flags = 0;
>>>>>     @@ -665,3 +703,26 @@ void r600_context_draw_opaque_count(struct
>>>>> r600_context *ctx, struct r600_so_tar
>>>>>           cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size,
>>>>> RADEON_USAGE_READ);
>>>>>       }
>>>>> +
>>>>> +#if R600_TRACE_CS
>>>>> +void r600_trace_emit(struct r600_context *rctx)
>>>>> +{
>>>>> +       struct r600_screen *rscreen = rctx->screen;
>>>>> +       struct radeon_winsys_cs *cs = rctx->cs;
>>>>> +       uint64_t va;
>>>>> +       uint32_t reloc;
>>>>> +
>>>>> +       va = r600_resource_va(&rscreen->screen,
>>>>> (void*)rscreen->trace_bo);
>>>>> +       reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo,
>>>>> RADEON_USAGE_READWRITE);
>>>>> +       cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
>>>>> +       cs->buf[cs->cdw++] =
>>>>> PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
>>>>> +                               PKT3_WRITE_DATA_WR_CONFIRM |
>>>>> +
>>>>> PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
>>>>> +       cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
>>>>> +       cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
>>>>> +       cs->buf[cs->cdw++] = cs->cdw;
>>>>> +       cs->buf[cs->cdw++] = rscreen->cs_count;
>>>>> +       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
>>>>> +       cs->buf[cs->cdw++] = reloc;
>>>>
>>>> The NOP packet here is superfluous,  also I don't really like how this is
>>>> implemented after all.
>>>>
>>>> May I just use this patch as base of a cleaner implementation?
>>>>
>>>> Christian.
>>> Yeah nop is a left over, what don't you like ? This is a build time
>>> debug option only that proved very useful (at least to me) on r600g
>>
>> Yeah it's definitely useful and a quite nifty idea, but as one example I
>> would like to have it as a runtime option instead of a compile time option.
>>
>> Additional to that on SI the stuff in r600_hw_context is more or less
>> deprecated, e.g. all the commands still have the now unnecessary NOPs, and
>> actually I think some of them aren't really supported any more on SI (or at
>> least a bit differently). Also I hoped to move all generating of PKT3 to
>> si_commands. and so separate generating the packets from the logic using the
>> packets.
>>
>> Well to sum it up, you just reminded me of work still on my todo list. I
>> don't mind if you commit it, but I sooner or later wanted to clean that up.
>>
>> Christian.
>>
>>
>>> Cheers,
>>> Jerome
>>>
>>>>> +}
>>>>> +#endif
>>>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>>>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>>>> index c5dac29..a370d7e 100644
>>>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
>>>>> @@ -525,6 +525,14 @@ static void r600_destroy_screen(struct pipe_screen*
>>>>> pscreen)
>>>>>                   rscreen->ws->buffer_unmap(rscreen->fences.bo->cs_buf);
>>>>>                   si_resource_reference(&rscreen->fences.bo, NULL);
>>>>>           }
>>>>> +
>>>>> +#if R600_TRACE_CS
>>>>> +       if (rscreen->trace_bo) {
>>>>> +               rscreen->ws->buffer_unmap(rscreen->trace_bo->cs_buf);
>>>>> +               pipe_resource_reference((struct
>>>>> pipe_resource**)&rscreen->trace_bo, NULL);
>>>>> +       }
>>>>> +#endif
>>>>> +
>>>>>           pipe_mutex_destroy(rscreen->fences.mutex);
>>>>>           rscreen->ws->destroy(rscreen->ws);
>>>>> @@ -727,5 +735,19 @@ struct pipe_screen *radeonsi_screen_create(struct
>>>>> radeon_winsys *ws)
>>>>>           LIST_INITHEAD(&rscreen->fences.blocks);
>>>>>           pipe_mutex_init(rscreen->fences.mutex);
>>>>>     +#if R600_TRACE_CS
>>>>> +       rscreen->cs_count = 0;
>>>>> +       if (rscreen->info.drm_minor >= 28) {
>>>>> +               rscreen->trace_bo = (struct
>>>>> si_resource*)pipe_buffer_create(&rscreen->screen,
>>>>> +
>>>>> PIPE_BIND_CUSTOM,
>>>>> +
>>>>> PIPE_USAGE_STAGING,
>>>>> +
>>>>> 4096);
>>>>> +               if (rscreen->trace_bo) {
>>>>> +                       rscreen->trace_ptr =
>>>>> rscreen->ws->buffer_map(rscreen->trace_bo->cs_buf, NULL,
>>>>> +
>>>>> PIPE_TRANSFER_UNSYNCHRONIZED);
>>>>> +               }
>>>>> +       }
>>>>> +#endif
>>>>> +
>>>>>           return &rscreen->screen;
>>>>>     }
>>>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>>> index d0f04f4..7943563 100644
>>>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>>> @@ -47,6 +47,9 @@
>>>>>     #define R600_BIG_ENDIAN 0
>>>>>     #endif
>>>>>     +#define R600_TRACE_CS 0
>>>>> +#define R600_TRACE_CS_DWORDS           8
>>>>> +
>>>>>     struct r600_pipe_fences {
>>>>>           struct si_resource              *bo;
>>>>>           unsigned                        *data;
>>>>> @@ -67,6 +70,11 @@ struct r600_screen {
>>>>>           struct r600_tiling_info         tiling_info;
>>>>>           struct util_slab_mempool        pool_buffers;
>>>>>           struct r600_pipe_fences         fences;
>>>>> +#if R600_TRACE_CS
>>>>> +       struct si_resource              *trace_bo;
>>>>> +       uint32_t                        *trace_ptr;
>>>>> +       unsigned                        cs_count;
>>>>> +#endif
>>>>>     };
>>>>>       struct si_pipe_sampler_view {
>>>>> @@ -226,6 +234,10 @@ void r600_translate_index_buffer(struct
>>>>> r600_context
>>>>> *r600,
>>>>>                                    struct pipe_index_buffer *ib,
>>>>>                                    unsigned count);
>>>>>     +#if R600_TRACE_CS
>>>>> +void r600_trace_emit(struct r600_context *rctx);
>>>>> +#endif
>>>>> +
>>>>>     /*
>>>>>      * common helpers
>>>>>      */
>>>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>>>> b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>>>> index 79a2521..8e01738 100644
>>>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pm4.c
>>>>> @@ -199,6 +199,12 @@ unsigned si_pm4_dirty_dw(struct r600_context *rctx)
>>>>>                           continue;
>>>>>                   count += state->ndw;
>>>>> +#if R600_TRACE_CS
>>>>> +               /* for tracing each states */
>>>>> +               if (rctx->screen->trace_bo) {
>>>>> +                       count += R600_TRACE_CS_DWORDS;
>>>>> +               }
>>>>> +#endif
>>>>>           }
>>>>>           return count;
>>>>> @@ -219,6 +225,12 @@ void si_pm4_emit(struct r600_context *rctx, struct
>>>>> si_pm4_state *state)
>>>>>           }
>>>>>           cs->cdw += state->ndw;
>>>>> +
>>>>> +#if R600_TRACE_CS
>>>>> +       if (rctx->screen->trace_bo) {
>>>>> +               r600_trace_emit(rctx);
>>>>> +       }
>>>>> +#endif
>>>>>     }
>>>>>       void si_pm4_emit_dirty(struct r600_context *rctx)
>>>>> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>>>>> b/src/gallium/drivers/radeonsi/si_state_draw.c
>>>>> index a78751b..1e1d1cc 100644
>>>>> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>>>>> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>>>>> @@ -579,6 +579,12 @@ void si_draw_vbo(struct pipe_context *ctx, const
>>>>> struct pipe_draw_info *info)
>>>>>           si_pm4_emit_dirty(rctx);
>>>>>           rctx->pm4_dirty_cdwords = 0;
>>>>>     +#if R600_TRACE_CS
>>>>> +       if (rctx->screen->trace_bo) {
>>>>> +               r600_trace_emit(rctx);
>>>>> +       }
>>>>> +#endif
>>>>> +
>>>>>     #if 0
>>>>>           /* Enable stream out if needed. */
>>>>>           if (rctx->streamout_start) {
>>>>> @@ -587,7 +593,6 @@ void si_draw_vbo(struct pipe_context *ctx, const
>>>>> struct pipe_draw_info *info)
>>>>>           }
>>>>>     #endif
>>>>>     -
>>>>>           rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
>>>>>           /* Set the depth buffer as dirty. */
>>>>> diff --git a/src/gallium/drivers/radeonsi/sid.h
>>>>> b/src/gallium/drivers/radeonsi/sid.h
>>>>> index 57553a6..8528981 100644
>>>>> --- a/src/gallium/drivers/radeonsi/sid.h
>>>>> +++ b/src/gallium/drivers/radeonsi/sid.h
>>>>> @@ -77,6 +77,20 @@
>>>>>     #define PKT3_DRAW_INDEX_IMMD                   0x2E
>>>>>     #define PKT3_NUM_INSTANCES                     0x2F
>>>>>     #define PKT3_STRMOUT_BUFFER_UPDATE             0x34
>>>>> +#define PKT3_WRITE_DATA                        0x37
>>>>> +#define     PKT3_WRITE_DATA_DST_SEL(x)             ((x) << 8)
>>>>> +#define     PKT3_WRITE_DATA_DST_SEL_REG            0
>>>>> +#define     PKT3_WRITE_DATA_DST_SEL_MEM_SYNC       1
>>>>> +#define     PKT3_WRITE_DATA_DST_SEL_TC_OR_L2       2
>>>>> +#define     PKT3_WRITE_DATA_DST_SEL_GDS            3
>>>>> +#define     PKT3_WRITE_DATA_DST_SEL_RESERVED_4     4
>>>>> +#define     PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC      5
>>>>> +#define     PKT3_WR_ONE_ADDR                       (1 << 16)
>>>>> +#define PKT3_WRITE_DATA_WR_CONFIRM                 (1 << 20)
>>>>> +#define PKT3_WRITE_DATA_ENGINE_SEL(x)              ((x) << 30)
>>>>> +#define PKT3_WRITE_DATA_ENGINE_SEL_ME              0
>>>>> +#define PKT3_WRITE_DATA_ENGINE_SEL_PFP             1
>>>>> +#define PKT3_WRITE_DATA_ENGINE_SEL_CE              2
>>>>>     #define PKT3_MEM_SEMAPHORE                     0x39
>>>>>     #define PKT3_MPEG_INDEX                        0x3A
>>>>>     #define PKT3_WAIT_REG_MEM                      0x3C
>>>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list