[Mesa-dev] [PATCH 15/20] radeonsi: add basic infrastructure for atom-based states

Marek Olšák maraeo at gmail.com
Thu Aug 8 06:24:54 PDT 2013


On Thu, Aug 8, 2013 at 2:55 PM, Christian König <deathsimple at vodafone.de> wrote:
> Am 08.08.2013 14:02, schrieb Marek Olšák:
>
>> Why is the order so important? Note that cache flushes shouldn't be
>> treated like states and neither should the draw packets and any state
>> that comes from pipe_draw_info. The only things left are the register
>> updates and resource updates where the order doesn't matter, does it?
>
>
> Unfortunately it does. In theory the CP context should prevent us from
> dealing with such details and allow us to emit the resource updates and
> register writes in any order, but AFAIK in practice we have some erratas on
> this (that reminds me to remind Alex to show you the documents on that).
>
>
>> Anyway, the order is determined by the order of si_add_atom calls,
>> which should be done once in create_context.
>
>
> Then why don't you just use a constant structure for this?

Yes, it indeed looks like a good idea to do so for clarity now that I
know the order does matter.

Marek

>
> Christian.
>
>
>>
>> Marek
>>
>> On Thu, Aug 8, 2013 at 10:32 AM, Christian König
>> <deathsimple at vodafone.de> wrote:
>>>
>>> Am 08.08.2013 02:20, schrieb Marek Olšák:
>>>
>>>> It's the same as in r600g. Look how simple it is.
>>>
>>>
>>> That concept has the problem that we don't necessary know in which order
>>> the
>>> state is emitted.
>>>
>>> Why not just add an "emit" callback to si_pm4_state for the short term
>>> instead?
>>>
>>> For the long term we should probably teach the kernel interface to accept
>>> a
>>> bunch of pointers to smaller IB fragments instead of one large IB. That
>>> would allow us to not only save the copy of commands for the CSO states,
>>> but
>>> also allows us to emit optional IB fragments that are only inserted if
>>> the
>>> we had a context switch in between.
>>>
>>> Christian.
>>>
>>>
>>>> ---
>>>>    src/gallium/drivers/radeonsi/r600_hw_context.c |  8 ++++++++
>>>>    src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  9 +++++++++
>>>>    src/gallium/drivers/radeonsi/si_state.h        | 10 ++++++++++
>>>>    src/gallium/drivers/radeonsi/si_state_draw.c   |  8 +++++++-
>>>>    4 files changed, 34 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>> b/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>> index 382382b..7ed7496 100644
>>>> --- a/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>> +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
>>>> @@ -114,9 +114,17 @@ err:
>>>>    void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>>>>                          boolean count_draw_in)
>>>>    {
>>>> +       int i;
>>>> +
>>>>          /* The number of dwords we already used in the CS so far. */
>>>>          num_dw += ctx->cs->cdw;
>>>>    +     for (i = 0; i < ctx->num_atoms; i++) {
>>>> +               if (ctx->atoms[i]->dirty) {
>>>> +                       num_dw += ctx->atoms[i]->num_dw;
>>>> +               }
>>>> +       }
>>>> +
>>>>          if (count_draw_in) {
>>>>                  /* The number of dwords all the dirty states would
>>>> take.
>>>> */
>>>>                  num_dw += ctx->pm4_dirty_cdwords;
>>>> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>> b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>> index e370149..5fa9bdc 100644
>>>> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
>>>> @@ -145,6 +145,10 @@ struct r600_context {
>>>>          void                            *custom_blend_decompress;
>>>>          struct r600_screen              *screen;
>>>>          struct radeon_winsys            *ws;
>>>> +
>>>> +       struct si_atom                  *atoms[SI_MAX_ATOMS];
>>>> +       unsigned                        num_atoms;
>>>> +
>>>>          struct si_vertex_element        *vertex_elements;
>>>>          struct pipe_framebuffer_state   framebuffer;
>>>>          unsigned                        fb_log_samples;
>>>> @@ -329,4 +333,9 @@ static INLINE uint64_t r600_resource_va(struct
>>>> pipe_screen *screen, struct pipe_
>>>>          return
>>>> rscreen->ws->buffer_get_virtual_address(rresource->cs_buf);
>>>>    }
>>>>    +static INLINE void si_add_atom(struct r600_context *rctx, struct
>>>> si_atom *atom)
>>>> +{
>>>> +       rctx->atoms[rctx->num_atoms++] = atom;
>>>> +}
>>>> +
>>>>    #endif
>>>> diff --git a/src/gallium/drivers/radeonsi/si_state.h
>>>> b/src/gallium/drivers/radeonsi/si_state.h
>>>> index b01fbf2..4aabdef 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_state.h
>>>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>>>> @@ -29,6 +29,16 @@
>>>>      #include "radeonsi_pm4.h"
>>>>    +#define SI_MAX_ATOMS 2
>>>> +
>>>> +/* This encapsulates a state or an operation which can emitted into the
>>>> GPU
>>>> + * command stream. */
>>>> +struct si_atom {
>>>> +       void (*emit)(struct r600_context *ctx, struct si_atom *state);
>>>> +       unsigned                num_dw;
>>>> +       bool                    dirty;
>>>> +};
>>>> +
>>>>    struct si_state_blend {
>>>>          struct si_pm4_state     pm4;
>>>>          uint32_t                cb_target_mask;
>>>> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c
>>>> b/src/gallium/drivers/radeonsi/si_state_draw.c
>>>> index 4208fa7..bcae778 100644
>>>> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
>>>> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
>>>> @@ -664,7 +664,7 @@ void si_draw_vbo(struct pipe_context *ctx, const
>>>> struct pipe_draw_info *info)
>>>>    {
>>>>          struct r600_context *rctx = (struct r600_context *)ctx;
>>>>          struct pipe_index_buffer ib = {};
>>>> -       uint32_t cp_coher_cntl;
>>>> +       uint32_t cp_coher_cntl, i;
>>>>          if (!info->count && (info->indexed ||
>>>> !info->count_from_stream_output))
>>>>                  return;
>>>> @@ -728,6 +728,12 @@ void si_draw_vbo(struct pipe_context *ctx, const
>>>> struct pipe_draw_info *info)
>>>>          si_need_cs_space(rctx, 0, TRUE);
>>>>    +     for (i = 0; i < rctx->num_atoms; i++) {
>>>> +               if (rctx->atoms[i]->dirty) {
>>>> +                       rctx->atoms[i]->emit(rctx, rctx->atoms[i]);
>>>> +               }
>>>> +       }
>>>> +
>>>>          si_pm4_emit_dirty(rctx);
>>>>          rctx->pm4_dirty_cdwords = 0;
>>>>
>>>
>


More information about the mesa-dev mailing list