[Mesa-dev] [PATCH v2 05/12] radeonsi: Create CE IB.

Marek Olšák maraeo at gmail.com
Sun Apr 17 22:19:49 UTC 2016


On Mon, Apr 18, 2016 at 12:08 AM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> On Mon, Apr 18, 2016 at 12:04 AM, Marek Olšák <maraeo at gmail.com> wrote:
>> On Sun, Apr 17, 2016 at 1:43 AM, Bas Nieuwenhuizen
>> <bas at basnieuwenhuizen.nl> wrote:
>>> Based on work by Marek Olšák.
>>>
>>> v2: Add preamble IB.
>>>
>>> Leaves the load packet in the space calculation as the
>>> radeon winsys might not be able to support a premable.
>>>
>>> The added space calculation may look expensive, but
>>> is converted to a constant with (at least) -O2 and -O3.
>>>
>>> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>>> ---
>>>  src/gallium/drivers/radeon/r600_pipe_common.c |  1 +
>>>  src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
>>>  src/gallium/drivers/radeonsi/si_hw_context.c  | 32 ++++++++++++++++++++++++++-
>>>  src/gallium/drivers/radeonsi/si_pipe.c        | 12 ++++++++++
>>>  src/gallium/drivers/radeonsi/si_pipe.h        |  3 +++
>>>  5 files changed, 48 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
>>> index a7477ab..a8660f2 100644
>>> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
>>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
>>> @@ -402,6 +402,7 @@ static const struct debug_named_value common_debug_options[] = {
>>>         { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
>>>         { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
>>>         { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
>>> +       { "noce", DBG_NO_CE, "Disable the constant engine"},
>>>
>>>         DEBUG_NAMED_VALUE_END /* must be last */
>>>  };
>>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
>>> index b23a780..91f8d5e 100644
>>> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
>>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
>>> @@ -95,6 +95,7 @@
>>>  #define DBG_NO_RB_PLUS         (1llu << 45)
>>>  #define DBG_SI_SCHED           (1llu << 46)
>>>  #define DBG_MONOLITHIC_SHADERS (1llu << 47)
>>> +#define DBG_NO_CE              (1llu << 48)
>>>
>>>  #define R600_MAP_BUFFER_ALIGNMENT 64
>>>  #define R600_MAX_VIEWPORTS        16
>>> diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
>>> index b621b55..60f2b58 100644
>>> --- a/src/gallium/drivers/radeonsi/si_hw_context.c
>>> +++ b/src/gallium/drivers/radeonsi/si_hw_context.c
>>> @@ -26,10 +26,38 @@
>>>
>>>  #include "si_pipe.h"
>>>
>>> +static unsigned si_descriptor_list_cs_space(unsigned count, unsigned element_size)
>>> +{
>>> +       /* 5 dwords for possible load to reinitialize + 5 dwords for write to
>>> +        * L2 + 3 bytes for every range written to CE RAM.
>>> +        */
>>> +       return 5 + 5 + 3 + count * MAX2(3, element_size);
>>
>> Please make it clear in the comment that the load packet is needed in
>> the main CE IB only when the preamble is unsupported.
>>
>> Also, that MAX2 statement seems useless, because element_size is
>> always >= 4. Did you mean (3 + element_size)?
>>
>
> No, just defensive programming against element_size < 3. Can remove it
> if preferred.

I prefer assert(element_size >= 3).

>
>>> +}
>>> +
>>> +static unsigned si_ce_needed_cs_space() {
>>> +       unsigned space = 0;
>>> +
>>> +       space += si_descriptor_list_cs_space(SI_NUM_CONST_BUFFERS, 4);
>>> +       space += si_descriptor_list_cs_space(SI_NUM_RW_BUFFERS, 4);
>>> +       space += si_descriptor_list_cs_space(SI_NUM_SHADER_BUFFERS, 4);
>>> +       space += si_descriptor_list_cs_space(SI_NUM_SAMPLERS, 16);
>>> +       space += si_descriptor_list_cs_space(SI_NUM_IMAGES, 8);
>>> +
>>> +       space *= SI_NUM_SHADERS;
>>> +
>>> +       space += si_descriptor_list_cs_space(SI_NUM_VERTEX_BUFFERS, 4);
>>
>> You dropped vertex buffer support, didn't you? This looks like a leftover.
>>
>
> Indeed, will fix.
>
>>> +
>>> +       /* Increment CE counter packet */
>>> +       space += 2;
>>> +
>>> +       return space;
>>> +}
>>> +
>>>  /* initialize */
>>>  void si_need_cs_space(struct si_context *ctx)
>>>  {
>>>         struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
>>> +       struct radeon_winsys_cs *ce_ib = ctx->ce_ib;
>>>         struct radeon_winsys_cs *dma = ctx->b.dma.cs;
>>>
>>>         /* Flush the DMA IB if it's not empty. */
>>> @@ -53,7 +81,9 @@ void si_need_cs_space(struct si_context *ctx)
>>>         /* If the CS is sufficiently large, don't count the space needed
>>>          * and just flush if there is not enough space left.
>>>          */
>>> -       if (unlikely(cs->cdw > cs->max_dw - 2048))
>>> +       if (unlikely(cs->cdw > cs->max_dw - 2048 ||
>>> +                     (ce_ib && ce_ib->max_dw - ce_ib->cdw <
>>> +                      si_ce_needed_cs_space())))
>>>                 ctx->b.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
>>>  }
>>>
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
>>> index 6a990ed..ceacf37 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> @@ -142,6 +142,18 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>>>
>>>         sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
>>>                                        si_context_gfx_flush, sctx);
>>> +
>>> +       if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) {
>>> +               sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
>>> +               if (!sctx->ce_ib)
>>> +                       goto fail;
>>> +
>>> +               if (ws->cs_add_const_preamble_ib) {
>>> +                       sctx->ce_preamble_ib =
>>> +                                  ws->cs_add_const_preamble_ib(sctx->b.gfx.cs);
>>
>> please check for failure here
>
> Was thinking we could just use the preamble is unsupported path here
> even though creation failed.

My preference is to fail if the callback fails. It's up to you.

Marek


More information about the mesa-dev mailing list