[Mesa-dev] [PATCH] radeonsi: add flexible shader descriptor management and use it for sampler views

Alex Deucher alexdeucher at gmail.com
Thu Aug 15 06:04:50 PDT 2013


On Wed, Aug 14, 2013 at 11:25 PM, Marek Olšák <maraeo at gmail.com> wrote:
> (This should be applied before MSAA, which will need to be rebased.)
>
> It moves all sampler view descriptors to a buffer.
> It supports partial resource updates and it can also unbind resources
> (required for FMASK texturing).
>
> The buffer contains all sampler view descriptors for one shader stage,
> represented as an array. On top of that, there are N arrays in the buffer,
> which are used to emulate context registers as implemented by the previous
> ASICs (each array is a context).
>
> This uses the RCU synchronization approach to avoid read-after-write hazards
> as discussed in the thread:
> "radeonsi: add FMASK texture binding slots and resource setup"
>
> CP DMA is used to clear the descriptors at context initialization and to copy
> the descriptors from one context to the next.
>
> IMPORTANT:
>   128 resource contexts are needed, 64 doesn't work. If I set
>   SH_KCACHE_ACTION_ENA before every draw call, only 2 contexts are needed.
>   I don't have an explanation for this.
> ---
>  src/gallium/drivers/radeonsi/Makefile.sources  |   1 +
>  src/gallium/drivers/radeonsi/r600_blit.c       |  12 +-
>  src/gallium/drivers/radeonsi/r600_hw_context.c |  14 ++
>  src/gallium/drivers/radeonsi/radeonsi_pipe.c   |   7 +-
>  src/gallium/drivers/radeonsi/radeonsi_pipe.h   |  19 +-
>  src/gallium/drivers/radeonsi/si_descriptors.c  | 335 +++++++++++++++++++++++++
>  src/gallium/drivers/radeonsi/si_state.c        |  47 ++--
>  src/gallium/drivers/radeonsi/si_state.h        |  56 +++++
>  src/gallium/drivers/radeonsi/si_state_draw.c   |  18 +-
>  src/gallium/drivers/radeonsi/sid.h             |  43 ++++
>  10 files changed, 500 insertions(+), 52 deletions(-)
>  create mode 100644 src/gallium/drivers/radeonsi/si_descriptors.c
>
> diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
> index b3ffa72..68c8282 100644
> --- a/src/gallium/drivers/radeonsi/Makefile.sources
> +++ b/src/gallium/drivers/radeonsi/Makefile.sources
> @@ -10,6 +10,7 @@ C_SOURCES := \
>         r600_translate.c \
>         radeonsi_pm4.c \
>         radeonsi_compute.c \
> +       si_descriptors.c \
>         si_state.c \
>         si_state_streamout.c \
>         si_state_draw.c \
> diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c
> index bab108e..5bd1a62 100644
> --- a/src/gallium/drivers/radeonsi/r600_blit.c
> +++ b/src/gallium/drivers/radeonsi/r600_blit.c
> @@ -70,12 +70,12 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
>
>         if (op & R600_SAVE_TEXTURES) {
>                 util_blitter_save_fragment_sampler_states(
> -                       rctx->blitter, rctx->ps_samplers.n_samplers,
> -                       (void**)rctx->ps_samplers.samplers);
> +                       rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers,
> +                       (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers);
>
> -               util_blitter_save_fragment_sampler_views(
> -                       rctx->blitter, rctx->ps_samplers.n_views,
> -                       (struct pipe_sampler_view**)rctx->ps_samplers.views);
> +               util_blitter_save_fragment_sampler_views(rctx->blitter,
> +                       util_bitcount(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask),
> +                       rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
>         }
>
>         if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
> @@ -224,7 +224,7 @@ void si_flush_depth_textures(struct r600_context *rctx,
>                 struct pipe_sampler_view *view;
>                 struct r600_texture *tex;
>
> -               view = &textures->views[i]->base;
> +               view = textures->views.views[i];
>                 if (!view) continue;
>
>                 tex = (struct r600_texture *)view->texture;
> diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c
> index 25c972b..cf43089 100644
> --- a/src/gallium/drivers/radeonsi/r600_hw_context.c
> +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
> @@ -114,9 +114,17 @@ err:
>  void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
>                         boolean count_draw_in)
>  {
> +       int i;
> +
>         /* The number of dwords we already used in the CS so far. */
>         num_dw += ctx->cs->cdw;
>
> +       for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
> +               if (ctx->atoms.array[i]->dirty) {
> +                       num_dw += ctx->atoms.array[i]->num_dw;
> +               }
> +       }
> +
>         if (count_draw_in) {
>                 /* The number of dwords all the dirty states would take. */
>                 num_dw += ctx->pm4_dirty_cdwords;
> @@ -254,6 +262,10 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
>         ctx->pm4_dirty_cdwords = 0;
>         ctx->flags = 0;
>
> +       /* The CS initialization should be emitted before everything else. */
> +       si_pm4_emit(ctx, ctx->queued.named.init);
> +       ctx->emitted.named.init = ctx->queued.named.init;
> +
>  #if 0
>         if (streamout_suspended) {
>                 ctx->streamout_start = TRUE;
> @@ -270,6 +282,8 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
>          * next draw command
>          */
>         si_pm4_reset_emitted(ctx);
> +
> +       si_all_descriptors_begin_new_cs(ctx);
>  }
>
>  void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> index b4a1ca9..9afc7f2 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> @@ -178,6 +178,8 @@ static void r600_destroy_context(struct pipe_context *context)
>  {
>         struct r600_context *rctx = (struct r600_context *)context;
>
> +       si_release_all_descriptors(rctx);
> +
>         si_resource_reference(&rctx->border_color_table, NULL);
>
>         if (rctx->dummy_pixel_shader) {
> @@ -231,12 +233,15 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
>                 rctx->context.create_video_buffer = vl_video_buffer_create;
>         }
>
> +       rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
> +
> +       si_init_all_descriptors(rctx);
> +
>         switch (rctx->chip_class) {
>         case SI:
>         case CIK:
>                 si_init_state_functions(rctx);
>                 LIST_INITHEAD(&rctx->active_query_list);
> -               rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
>                 rctx->max_db = 8;
>                 si_init_config(rctx);
>                 break;
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> index 6fbe653..674c630 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> @@ -94,11 +94,8 @@ struct si_cs_shader_state {
>         struct si_pipe_compute          *program;
>  };
>
> -/* needed for blitter save */
> -#define NUM_TEX_UNITS 16
> -
>  struct r600_textures_info {
> -       struct si_pipe_sampler_view     *views[NUM_TEX_UNITS];
> +       struct si_sampler_views         views;
>         struct si_pipe_sampler_state    *samplers[NUM_TEX_UNITS];
>         unsigned                        n_views;
>         uint32_t                        depth_texture_mask; /* which textures are depth */
> @@ -131,6 +128,9 @@ struct r600_constbuf_state
>         uint32_t                        dirty_mask;
>  };
>
> +#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0]))
> +#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1)
> +
>  struct r600_context {
>         struct pipe_context             context;
>         struct blitter_context          *blitter;
> @@ -142,6 +142,14 @@ struct r600_context {
>         void                            *custom_dsa_flush_inplace;
>         struct r600_screen              *screen;
>         struct radeon_winsys            *ws;
> +
> +       union {
> +               struct {
> +                       struct si_atom *sampler_views[SI_NUM_SHADERS];
> +               };
> +               struct si_atom *array[0];
> +       } atoms;
> +
>         struct si_vertex_element        *vertex_elements;
>         struct pipe_framebuffer_state   framebuffer;
>         unsigned                        pa_sc_line_stipple;
> @@ -161,8 +169,7 @@ struct r600_context {
>         unsigned                        sprite_coord_enable;
>         unsigned                        export_16bpc;
>         struct r600_constbuf_state      constbuf_state[PIPE_SHADER_TYPES];
> -       struct r600_textures_info       vs_samplers;
> -       struct r600_textures_info       ps_samplers;
> +       struct r600_textures_info       samplers[SI_NUM_SHADERS];
>         struct si_resource              *border_color_table;
>         unsigned                        border_color_offset;
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> new file mode 100644
> index 0000000..98cd789
> --- /dev/null
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -0,0 +1,335 @@
> +/*
> + * Copyright 2013 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * on the rights to use, copy, modify, merge, publish, distribute, sub
> + * license, and/or sell copies of the Software, and to permit persons to whom
> + * the Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors:
> + *      Marek Olšák <marek.olsak at amd.com>
> + */
> +
> +#include "radeonsi_pipe.h"
> +#include "radeonsi_resource.h"
> +#include "radeonsi_shader.h"
> +#include "r600_hw_context_priv.h"
> +
> +#include "util/u_memory.h"
> +
> +#define SI_NUM_CONTEXTS 128
> +
> +static const uint32_t null_desc[8]; /* zeros */
> +
> +/* Emit a CP DMA packet to do a copy from one buffer to another.
> + * The size must fit in bits [20:0]. Notes:
> + *
> + * 1) Set sync to true if you want the 3D engine to wait until CP DMA is done.
> + *
> + * 2) Set raw_hazard_wait to true if the source data was used as a destination
> + *    in a previous CP DMA packet. It's for preventing a read-after-write hazard
> + *    between two CP DMA packets.
> + */
> +static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx,
> +                                      uint64_t dst_va, uint64_t src_va,
> +                                      unsigned size,
> +                                      bool sync, bool raw_hazard_wait)
> +{
> +       struct radeon_winsys_cs *cs = rctx->cs;
> +       uint32_t sync_flag = sync ? PKT3_CP_DMA_CP_SYNC : 0;
> +       uint32_t raw_wait = raw_hazard_wait ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
> +
> +       assert(size);
> +       assert((size & ((1<<21)-1)) == size);
> +
> +       cs->buf[cs->cdw++] = PKT3(PKT3_CP_DMA, 4, 0);
> +       cs->buf[cs->cdw++] = src_va;                    /* SRC_ADDR_LO [31:0] */
> +       cs->buf[cs->cdw++] = sync_flag | ((src_va >> 32) & 0xff); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */
> +       cs->buf[cs->cdw++] = dst_va;                    /* DST_ADDR_LO [31:0] */
> +       cs->buf[cs->cdw++] = (dst_va >> 32) & 0xff;     /* DST_ADDR_HI [7:0] */
> +       cs->buf[cs->cdw++] = size | raw_wait;           /* COMMAND [29:22] | BYTE_COUNT [20:0] */
> +}
> +
> +/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
> +static void si_emit_cp_dma_clear_buffer(struct r600_context *rctx,
> +                                       uint64_t dst_va, unsigned size,
> +                                       uint32_t clear_value,
> +                                       bool sync, bool raw_hazard_wait)
> +{
> +       struct radeon_winsys_cs *cs = rctx->cs;
> +       uint32_t sync_flag = sync ? PKT3_CP_DMA_CP_SYNC : 0;
> +       uint32_t raw_wait = raw_hazard_wait ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
> +
> +       assert(size);
> +       assert((size & ((1<<21)-1)) == size);
> +
> +       cs->buf[cs->cdw++] = PKT3(PKT3_CP_DMA, 4, 0);
> +       cs->buf[cs->cdw++] = clear_value;               /* DATA [31:0] */
> +       cs->buf[cs->cdw++] = sync_flag | PKT3_CP_DMA_SRC_SEL(2); /* CP_SYNC [31] | SRC_SEL[30:29] */
> +       cs->buf[cs->cdw++] = dst_va;                    /* DST_ADDR_LO [31:0] */
> +       cs->buf[cs->cdw++] = (dst_va >> 32) & 0xff;     /* DST_ADDR_HI [7:0] */
> +       cs->buf[cs->cdw++] = size | raw_wait;           /* COMMAND [29:22] | BYTE_COUNT [20:0] */


CIK parts use the new DMA_DATA packet.  Current bonaire ucode may
still support the old packet, but we shouldn't rely on it.

Alex


More information about the mesa-dev mailing list