[Mesa-stable] [Mesa-dev] [PATCH] radeonsi: Save and restore entire CE RAM.
Nicolai Hähnle
nhaehnle at gmail.com
Mon Jun 6 15:14:57 UTC 2016
On 06.06.2016 00:28, Bas Nieuwenhuizen wrote:
> This fixes a problem with the CE preamble and restoring only stuff in the
> preamble when needed.
>
> To illustrate suppose we have two graphics IB's 1 and 2, which are submitted in
> that order. Furthermore suppose IB 1 does not use CE ram, but IB 2 does, and we
> have a context switch at the start of IB 1, but not between IB 1 and IB 2.
>
> The old code put the CE RAM loads in the preamble of IB 2. As the preamble of
> IB 1 does not have the loads and the preamble of IB 2 does not get executed, the
> old values are not load into CE RAM.
>
> Fix this by always restoring the entire CE RAM.
Nice catch!
Have you considered restoring from the desc->buffers instead? The
double-dump seems a bit redundant. But maybe it's easier this way...
Also, do we really need to dump the entire CE RAM? After initializing
the descriptor sets, we should know exactly how much we need...
Nicolai
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> Cc: "12.0" <mesa-stable at lists.freedesktop.org>
> ---
> src/gallium/drivers/radeonsi/si_descriptors.c | 56 +++++++++++++--------------
> src/gallium/drivers/radeonsi/si_hw_context.c | 4 ++
> src/gallium/drivers/radeonsi/si_pipe.c | 7 ++++
> src/gallium/drivers/radeonsi/si_pipe.h | 1 +
> src/gallium/drivers/radeonsi/si_state.h | 7 ++--
> 5 files changed, 43 insertions(+), 32 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index baddc5f..5a1a344 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -160,30 +160,39 @@ static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned s
> return true;
> }
>
> -static void si_reinitialize_ce_ram(struct si_context *sctx,
> - struct si_descriptors *desc)
> +void si_ce_save_ram(struct si_context *sctx)
> {
> - if (desc->buffer) {
> - struct r600_resource *buffer = (struct r600_resource*)desc->buffer;
> - unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
> - uint64_t va = buffer->gpu_address + desc->buffer_offset;
> - struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
> + if (!sctx->ce_ib)
> + return;
>
> - if (!ib)
> - ib = sctx->ce_ib;
> + radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0));
> + radeon_emit(sctx->ce_ib, 0);
> + radeon_emit(sctx->ce_ib, SI_CE_RAM_SIZE / 4);
> + radeon_emit(sctx->ce_ib, sctx->ce_ram_data->gpu_address);
> + radeon_emit(sctx->ce_ib, sctx->ce_ram_data->gpu_address >> 32);
>
> - list_size = align(list_size, 32);
> + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->ce_ram_data,
> + RADEON_USAGE_WRITE, RADEON_PRIO_DESCRIPTORS);
> +}
>
> - radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
> - radeon_emit(ib, va);
> - radeon_emit(ib, va >> 32);
> - radeon_emit(ib, list_size / 4);
> - radeon_emit(ib, desc->ce_offset);
> +void si_ce_restore_ram(struct si_context *sctx)
> +{
> + struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
>
> - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
> - RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
> - }
> - desc->ce_ram_dirty = false;
> + if (!ib)
> + ib = sctx->ce_ib;
> +
> + if (!ib)
> + return;
> +
> + radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
> + radeon_emit(ib, sctx->ce_ram_data->gpu_address);
> + radeon_emit(ib, sctx->ce_ram_data->gpu_address >> 32);
> + radeon_emit(ib, SI_CE_RAM_SIZE / 4);
> + radeon_emit(ib, 0);
> +
> + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->ce_ram_data,
> + RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
> }
>
> void si_ce_enable_loads(struct radeon_winsys_cs *ib)
> @@ -206,9 +215,6 @@ static bool si_upload_descriptors(struct si_context *sctx,
> if (sctx->ce_ib) {
> uint32_t const* list = (uint32_t const*)desc->list;
>
> - if (desc->ce_ram_dirty)
> - si_reinitialize_ce_ram(sctx, desc);
> -
> while(desc->dirty_mask) {
> int begin, count;
> u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
> @@ -287,8 +293,6 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
> RADEON_USAGE_READ);
> }
>
> - views->desc.ce_ram_dirty = true;
> -
> if (!views->desc.buffer)
> return;
> radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, views->desc.buffer,
> @@ -489,8 +493,6 @@ si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *imag
> RADEON_USAGE_READWRITE);
> }
>
> - images->desc.ce_ram_dirty = true;
> -
> if (images->desc.buffer) {
> radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> images->desc.buffer,
> @@ -737,8 +739,6 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
> buffers->shader_usage, buffers->priority);
> }
>
> - buffers->desc.ce_ram_dirty = true;
> -
> if (!buffers->desc.buffer)
> return;
> radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
> index fa6a2cb..1088c21 100644
> --- a/src/gallium/drivers/radeonsi/si_hw_context.c
> +++ b/src/gallium/drivers/radeonsi/si_hw_context.c
> @@ -121,6 +121,8 @@ void si_context_gfx_flush(void *context, unsigned flags,
> ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 |
> SI_CONTEXT_INV_VMEM_L1;
>
> + si_ce_save_ram(ctx);
> +
> si_emit_cache_flush(ctx, NULL);
>
> /* force to keep tiling flags */
> @@ -213,6 +215,8 @@ void si_begin_new_cs(struct si_context *ctx)
> else if (ctx->ce_ib)
> si_ce_enable_loads(ctx->ce_ib);
>
> + si_ce_restore_ram(ctx);
> +
> ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
> ctx->framebuffer.dirty_zsbuf = true;
> si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 7bb3dc2..a204b9a 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -47,6 +47,7 @@ static void si_destroy_context(struct pipe_context *context)
> if (sctx->ce_suballocator)
> u_suballocator_destroy(sctx->ce_suballocator);
>
> + r600_resource_reference(&sctx->ce_ram_data, NULL);
> pipe_resource_reference(&sctx->esgs_ring, NULL);
> pipe_resource_reference(&sctx->gsvs_ring, NULL);
> pipe_resource_reference(&sctx->tf_ring, NULL);
> @@ -168,6 +169,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
> PIPE_USAGE_DEFAULT, FALSE);
> if (!sctx->ce_suballocator)
> goto fail;
> +
> + sctx->ce_ram_data = (struct r600_resource*)
> + pipe_buffer_create(screen, PIPE_BIND_CUSTOM,
> + PIPE_USAGE_DEFAULT, SI_CE_RAM_SIZE);
> + if (!sctx->ce_ram_data)
> + goto fail;
> }
>
> sctx->b.gfx.flush = si_context_gfx_flush;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index b3fe656..a62d558 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -199,6 +199,7 @@ struct si_context {
> struct radeon_winsys_cs *ce_preamble_ib;
> bool ce_need_synchronization;
> struct u_suballocator *ce_suballocator;
> + struct r600_resource *ce_ram_data;
>
> struct pipe_fence_handle *last_gfx_fence;
> struct si_shader_ctx_state fixed_func_tcs_shader;
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index e5795eb..2dd166f 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -41,6 +41,7 @@
> #define SI_NUM_SHADER_BUFFERS 16
>
> #define SI_TESS_OFFCHIP_BLOCK_SIZE (8192 * 4)
> +#define SI_CE_RAM_SIZE 32768
>
> struct si_screen;
> struct si_shader;
> @@ -205,10 +206,6 @@ struct si_descriptors {
> /* elements of the list that are changed and need to be uploaded */
> unsigned dirty_mask;
>
> - /* Whether the CE ram is dirty and needs to be reinitialized entirely
> - * before we can do partial updates. */
> - bool ce_ram_dirty;
> -
> /* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
> * array will be stored. */
> unsigned shader_userdata_offset;
> @@ -250,6 +247,8 @@ struct si_buffer_resources {
> } while(0)
>
> /* si_descriptors.c */
> +void si_ce_save_ram(struct si_context *sctx);
> +void si_ce_restore_ram(struct si_context *sctx);
> void si_ce_enable_loads(struct radeon_winsys_cs *ib);
> void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
> const struct radeon_surf_level *base_level_info,
>
More information about the mesa-stable
mailing list