[Mesa-dev] [PATCH] radeonsi: add flexible shader descriptor management and use it for sampler views
Alex Deucher
alexdeucher at gmail.com
Thu Aug 15 06:04:50 PDT 2013
On Wed, Aug 14, 2013 at 11:25 PM, Marek Olšák <maraeo at gmail.com> wrote:
> (This should be applied before MSAA, which will need to be rebased.)
>
> It moves all sampler view descriptors to a buffer.
> It supports partial resource updates and it can also unbind resources
> (required for FMASK texturing).
>
> The buffer contains all sampler view descriptors for one shader stage,
> represented as an array. On top of that, there are N arrays in the buffer,
> which are used to emulate context registers as implemented by the previous
> ASICs (each array is a context).
>
> This uses the RCU synchronization approach to avoid read-after-write hazards
> as discussed in the thread:
> "radeonsi: add FMASK texture binding slots and resource setup"
>
> CP DMA is used to clear the descriptors at context initialization and to copy
> the descriptors from one context to the next.
>
> IMPORTANT:
> 128 resource contexts are needed, 64 doesn't work. If I set
> SH_KCACHE_ACTION_ENA before every draw call, only 2 contexts are needed.
> I don't have an explanation for this.
> ---
> src/gallium/drivers/radeonsi/Makefile.sources | 1 +
> src/gallium/drivers/radeonsi/r600_blit.c | 12 +-
> src/gallium/drivers/radeonsi/r600_hw_context.c | 14 ++
> src/gallium/drivers/radeonsi/radeonsi_pipe.c | 7 +-
> src/gallium/drivers/radeonsi/radeonsi_pipe.h | 19 +-
> src/gallium/drivers/radeonsi/si_descriptors.c | 335 +++++++++++++++++++++++++
> src/gallium/drivers/radeonsi/si_state.c | 47 ++--
> src/gallium/drivers/radeonsi/si_state.h | 56 +++++
> src/gallium/drivers/radeonsi/si_state_draw.c | 18 +-
> src/gallium/drivers/radeonsi/sid.h | 43 ++++
> 10 files changed, 500 insertions(+), 52 deletions(-)
> create mode 100644 src/gallium/drivers/radeonsi/si_descriptors.c
>
> diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
> index b3ffa72..68c8282 100644
> --- a/src/gallium/drivers/radeonsi/Makefile.sources
> +++ b/src/gallium/drivers/radeonsi/Makefile.sources
> @@ -10,6 +10,7 @@ C_SOURCES := \
> r600_translate.c \
> radeonsi_pm4.c \
> radeonsi_compute.c \
> + si_descriptors.c \
> si_state.c \
> si_state_streamout.c \
> si_state_draw.c \
> diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c
> index bab108e..5bd1a62 100644
> --- a/src/gallium/drivers/radeonsi/r600_blit.c
> +++ b/src/gallium/drivers/radeonsi/r600_blit.c
> @@ -70,12 +70,12 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
>
> if (op & R600_SAVE_TEXTURES) {
> util_blitter_save_fragment_sampler_states(
> - rctx->blitter, rctx->ps_samplers.n_samplers,
> - (void**)rctx->ps_samplers.samplers);
> + rctx->blitter, rctx->samplers[PIPE_SHADER_FRAGMENT].n_samplers,
> + (void**)rctx->samplers[PIPE_SHADER_FRAGMENT].samplers);
>
> - util_blitter_save_fragment_sampler_views(
> - rctx->blitter, rctx->ps_samplers.n_views,
> - (struct pipe_sampler_view**)rctx->ps_samplers.views);
> + util_blitter_save_fragment_sampler_views(rctx->blitter,
> + util_bitcount(rctx->samplers[PIPE_SHADER_FRAGMENT].views.desc.enabled_mask),
> + rctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
> }
>
> if ((op & R600_DISABLE_RENDER_COND) && rctx->current_render_cond) {
> @@ -224,7 +224,7 @@ void si_flush_depth_textures(struct r600_context *rctx,
> struct pipe_sampler_view *view;
> struct r600_texture *tex;
>
> - view = &textures->views[i]->base;
> + view = textures->views.views[i];
> if (!view) continue;
>
> tex = (struct r600_texture *)view->texture;
> diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c b/src/gallium/drivers/radeonsi/r600_hw_context.c
> index 25c972b..cf43089 100644
> --- a/src/gallium/drivers/radeonsi/r600_hw_context.c
> +++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
> @@ -114,9 +114,17 @@ err:
> void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
> boolean count_draw_in)
> {
> + int i;
> +
> /* The number of dwords we already used in the CS so far. */
> num_dw += ctx->cs->cdw;
>
> + for (i = 0; i < SI_NUM_ATOMS(ctx); i++) {
> + if (ctx->atoms.array[i]->dirty) {
> + num_dw += ctx->atoms.array[i]->num_dw;
> + }
> + }
> +
> if (count_draw_in) {
> /* The number of dwords all the dirty states would take. */
> num_dw += ctx->pm4_dirty_cdwords;
> @@ -254,6 +262,10 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
> ctx->pm4_dirty_cdwords = 0;
> ctx->flags = 0;
>
> + /* The CS initialization should be emitted before everything else. */
> + si_pm4_emit(ctx, ctx->queued.named.init);
> + ctx->emitted.named.init = ctx->queued.named.init;
> +
> #if 0
> if (streamout_suspended) {
> ctx->streamout_start = TRUE;
> @@ -270,6 +282,8 @@ void si_context_flush(struct r600_context *ctx, unsigned flags)
> * next draw command
> */
> si_pm4_reset_emitted(ctx);
> +
> + si_all_descriptors_begin_new_cs(ctx);
> }
>
> void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> index b4a1ca9..9afc7f2 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
> @@ -178,6 +178,8 @@ static void r600_destroy_context(struct pipe_context *context)
> {
> struct r600_context *rctx = (struct r600_context *)context;
>
> + si_release_all_descriptors(rctx);
> +
> si_resource_reference(&rctx->border_color_table, NULL);
>
> if (rctx->dummy_pixel_shader) {
> @@ -231,12 +233,15 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
> rctx->context.create_video_buffer = vl_video_buffer_create;
> }
>
> + rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
> +
> + si_init_all_descriptors(rctx);
> +
> switch (rctx->chip_class) {
> case SI:
> case CIK:
> si_init_state_functions(rctx);
> LIST_INITHEAD(&rctx->active_query_list);
> - rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX, NULL);
> rctx->max_db = 8;
> si_init_config(rctx);
> break;
> diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> index 6fbe653..674c630 100644
> --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
> @@ -94,11 +94,8 @@ struct si_cs_shader_state {
> struct si_pipe_compute *program;
> };
>
> -/* needed for blitter save */
> -#define NUM_TEX_UNITS 16
> -
> struct r600_textures_info {
> - struct si_pipe_sampler_view *views[NUM_TEX_UNITS];
> + struct si_sampler_views views;
> struct si_pipe_sampler_state *samplers[NUM_TEX_UNITS];
> unsigned n_views;
> uint32_t depth_texture_mask; /* which textures are depth */
> @@ -131,6 +128,9 @@ struct r600_constbuf_state
> uint32_t dirty_mask;
> };
>
> +#define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0]))
> +#define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1)
> +
> struct r600_context {
> struct pipe_context context;
> struct blitter_context *blitter;
> @@ -142,6 +142,14 @@ struct r600_context {
> void *custom_dsa_flush_inplace;
> struct r600_screen *screen;
> struct radeon_winsys *ws;
> +
> + union {
> + struct {
> + struct si_atom *sampler_views[SI_NUM_SHADERS];
> + };
> + struct si_atom *array[0];
> + } atoms;
> +
> struct si_vertex_element *vertex_elements;
> struct pipe_framebuffer_state framebuffer;
> unsigned pa_sc_line_stipple;
> @@ -161,8 +169,7 @@ struct r600_context {
> unsigned sprite_coord_enable;
> unsigned export_16bpc;
> struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES];
> - struct r600_textures_info vs_samplers;
> - struct r600_textures_info ps_samplers;
> + struct r600_textures_info samplers[SI_NUM_SHADERS];
> struct si_resource *border_color_table;
> unsigned border_color_offset;
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> new file mode 100644
> index 0000000..98cd789
> --- /dev/null
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -0,0 +1,335 @@
> +/*
> + * Copyright 2013 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * on the rights to use, copy, modify, merge, publish, distribute, sub
> + * license, and/or sell copies of the Software, and to permit persons to whom
> + * the Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors:
> + * Marek Olšák <marek.olsak at amd.com>
> + */
> +
> +#include "radeonsi_pipe.h"
> +#include "radeonsi_resource.h"
> +#include "radeonsi_shader.h"
> +#include "r600_hw_context_priv.h"
> +
> +#include "util/u_memory.h"
> +
> +#define SI_NUM_CONTEXTS 128
> +
> +static const uint32_t null_desc[8]; /* zeros */
> +
> +/* Emit a CP DMA packet to do a copy from one buffer to another.
> + * The size must fit in bits [20:0]. Notes:
> + *
> + * 1) Set sync to true if you want the 3D engine to wait until CP DMA is done.
> + *
> + * 2) Set raw_hazard_wait to true if the source data was used as a destination
> + * in a previous CP DMA packet. It's for preventing a read-after-write hazard
> + * between two CP DMA packets.
> + */
> +static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx,
> + uint64_t dst_va, uint64_t src_va,
> + unsigned size,
> + bool sync, bool raw_hazard_wait)
> +{
> + struct radeon_winsys_cs *cs = rctx->cs;
> + uint32_t sync_flag = sync ? PKT3_CP_DMA_CP_SYNC : 0;
> + uint32_t raw_wait = raw_hazard_wait ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
> +
> + assert(size);
> + assert((size & ((1<<21)-1)) == size);
> +
> + cs->buf[cs->cdw++] = PKT3(PKT3_CP_DMA, 4, 0);
> + cs->buf[cs->cdw++] = src_va; /* SRC_ADDR_LO [31:0] */
> + cs->buf[cs->cdw++] = sync_flag | ((src_va >> 32) & 0xff); /* CP_SYNC [31] | SRC_ADDR_HI [7:0] */
> + cs->buf[cs->cdw++] = dst_va; /* DST_ADDR_LO [31:0] */
> + cs->buf[cs->cdw++] = (dst_va >> 32) & 0xff; /* DST_ADDR_HI [7:0] */
> + cs->buf[cs->cdw++] = size | raw_wait; /* COMMAND [29:22] | BYTE_COUNT [20:0] */
> +}
> +
> +/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
> +static void si_emit_cp_dma_clear_buffer(struct r600_context *rctx,
> + uint64_t dst_va, unsigned size,
> + uint32_t clear_value,
> + bool sync, bool raw_hazard_wait)
> +{
> + struct radeon_winsys_cs *cs = rctx->cs;
> + uint32_t sync_flag = sync ? PKT3_CP_DMA_CP_SYNC : 0;
> + uint32_t raw_wait = raw_hazard_wait ? PKT3_CP_DMA_CMD_RAW_WAIT : 0;
> +
> + assert(size);
> + assert((size & ((1<<21)-1)) == size);
> +
> + cs->buf[cs->cdw++] = PKT3(PKT3_CP_DMA, 4, 0);
> + cs->buf[cs->cdw++] = clear_value; /* DATA [31:0] */
> + cs->buf[cs->cdw++] = sync_flag | PKT3_CP_DMA_SRC_SEL(2); /* CP_SYNC [31] | SRC_SEL[30:29] */
> + cs->buf[cs->cdw++] = dst_va; /* DST_ADDR_LO [31:0] */
> + cs->buf[cs->cdw++] = (dst_va >> 32) & 0xff; /* DST_ADDR_HI [7:0] */
> + cs->buf[cs->cdw++] = size | raw_wait; /* COMMAND [29:22] | BYTE_COUNT [20:0] */
CIK parts use the new DMA_DATA packet. Current bonaire ucode may
still support the old packet, but we shouldn't rely on it.
Alex
More information about the mesa-dev
mailing list