[Mesa-stable] [PATCH] radeonsi: Save and restore entire CE RAM.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Sun Jun 5 22:28:17 UTC 2016


This fixes a problem with the CE preamble and restoring only stuff in the
preamble when needed.

To illustrate suppose we have two graphics IB's 1 and 2, which  are submitted in
that order. Furthermore suppose IB 1 does not use CE ram, but IB 2 does, and we
have a context switch at the start of IB 1, but not between IB 1 and IB 2.

The old code put the CE RAM loads in the preamble of IB 2. As the preamble of
IB 1 does not have the loads and the preamble of IB 2 does not get executed, the
old values are not load into CE RAM.

Fix this by always restoring the entire CE RAM.

Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Cc: "12.0" <mesa-stable at lists.freedesktop.org>
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 56 +++++++++++++--------------
 src/gallium/drivers/radeonsi/si_hw_context.c  |  4 ++
 src/gallium/drivers/radeonsi/si_pipe.c        |  7 ++++
 src/gallium/drivers/radeonsi/si_pipe.h        |  1 +
 src/gallium/drivers/radeonsi/si_state.h       |  7 ++--
 5 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index baddc5f..5a1a344 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -160,30 +160,39 @@ static bool si_ce_upload(struct si_context *sctx, unsigned ce_offset, unsigned s
 	return true;
 }
 
-static void si_reinitialize_ce_ram(struct si_context *sctx,
-                            struct si_descriptors *desc)
+void si_ce_save_ram(struct si_context *sctx)
 {
-	if (desc->buffer) {
-		struct r600_resource *buffer = (struct r600_resource*)desc->buffer;
-		unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
-		uint64_t va = buffer->gpu_address + desc->buffer_offset;
-		struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
+	if (!sctx->ce_ib)
+		return;
 
-		if (!ib)
-			ib = sctx->ce_ib;
+	radeon_emit(sctx->ce_ib, PKT3(PKT3_DUMP_CONST_RAM, 3, 0));
+	radeon_emit(sctx->ce_ib, 0);
+	radeon_emit(sctx->ce_ib, SI_CE_RAM_SIZE / 4);
+	radeon_emit(sctx->ce_ib, sctx->ce_ram_data->gpu_address);
+	radeon_emit(sctx->ce_ib, sctx->ce_ram_data->gpu_address >> 32);
 
-		list_size = align(list_size, 32);
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->ce_ram_data,
+	                       RADEON_USAGE_WRITE, RADEON_PRIO_DESCRIPTORS);
+}
 
-		radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
-		radeon_emit(ib, va);
-		radeon_emit(ib, va >> 32);
-		radeon_emit(ib, list_size / 4);
-		radeon_emit(ib, desc->ce_offset);
+void si_ce_restore_ram(struct si_context *sctx)
+{
+	struct radeon_winsys_cs *ib = sctx->ce_preamble_ib;
 
-		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, desc->buffer,
-		                    RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
-	}
-	desc->ce_ram_dirty = false;
+	if (!ib)
+		ib = sctx->ce_ib;
+
+	if (!ib)
+		return;
+
+	radeon_emit(ib, PKT3(PKT3_LOAD_CONST_RAM, 3, 0));
+	radeon_emit(ib, sctx->ce_ram_data->gpu_address);
+	radeon_emit(ib, sctx->ce_ram_data->gpu_address >> 32);
+	radeon_emit(ib, SI_CE_RAM_SIZE / 4);
+	radeon_emit(ib, 0);
+
+	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->ce_ram_data,
+	                    RADEON_USAGE_READ, RADEON_PRIO_DESCRIPTORS);
 }
 
 void si_ce_enable_loads(struct radeon_winsys_cs *ib)
@@ -206,9 +215,6 @@ static bool si_upload_descriptors(struct si_context *sctx,
 	if (sctx->ce_ib) {
 		uint32_t const* list = (uint32_t const*)desc->list;
 
-		if (desc->ce_ram_dirty)
-			si_reinitialize_ce_ram(sctx, desc);
-
 		while(desc->dirty_mask) {
 			int begin, count;
 			u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
@@ -287,8 +293,6 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
 					   RADEON_USAGE_READ);
 	}
 
-	views->desc.ce_ram_dirty = true;
-
 	if (!views->desc.buffer)
 		return;
 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, views->desc.buffer,
@@ -489,8 +493,6 @@ si_image_views_begin_new_cs(struct si_context *sctx, struct si_images_info *imag
 					   RADEON_USAGE_READWRITE);
 	}
 
-	images->desc.ce_ram_dirty = true;
-
 	if (images->desc.buffer) {
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 					  images->desc.buffer,
@@ -737,8 +739,6 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 				      buffers->shader_usage, buffers->priority);
 	}
 
-	buffers->desc.ce_ram_dirty = true;
-
 	if (!buffers->desc.buffer)
 		return;
 	radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c
index fa6a2cb..1088c21 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -121,6 +121,8 @@ void si_context_gfx_flush(void *context, unsigned flags,
 		ctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2 |
 				SI_CONTEXT_INV_VMEM_L1;
 
+	si_ce_save_ram(ctx);
+
 	si_emit_cache_flush(ctx, NULL);
 
 	/* force to keep tiling flags */
@@ -213,6 +215,8 @@ void si_begin_new_cs(struct si_context *ctx)
 	else if (ctx->ce_ib)
 		si_ce_enable_loads(ctx->ce_ib);
 
+	si_ce_restore_ram(ctx);
+
 	ctx->framebuffer.dirty_cbufs = (1 << 8) - 1;
 	ctx->framebuffer.dirty_zsbuf = true;
 	si_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 7bb3dc2..a204b9a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -47,6 +47,7 @@ static void si_destroy_context(struct pipe_context *context)
 	if (sctx->ce_suballocator)
 		u_suballocator_destroy(sctx->ce_suballocator);
 
+	r600_resource_reference(&sctx->ce_ram_data, NULL);
 	pipe_resource_reference(&sctx->esgs_ring, NULL);
 	pipe_resource_reference(&sctx->gsvs_ring, NULL);
 	pipe_resource_reference(&sctx->tf_ring, NULL);
@@ -168,6 +169,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 						      PIPE_USAGE_DEFAULT, FALSE);
 		if (!sctx->ce_suballocator)
 			goto fail;
+
+		sctx->ce_ram_data = (struct r600_resource*)
+		                    pipe_buffer_create(screen, PIPE_BIND_CUSTOM,
+		                    PIPE_USAGE_DEFAULT, SI_CE_RAM_SIZE);
+		if (!sctx->ce_ram_data)
+			goto fail;
 	}
 
 	sctx->b.gfx.flush = si_context_gfx_flush;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b3fe656..a62d558 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -199,6 +199,7 @@ struct si_context {
 	struct radeon_winsys_cs		*ce_preamble_ib;
 	bool				ce_need_synchronization;
 	struct u_suballocator		*ce_suballocator;
+	struct r600_resource		*ce_ram_data;
 
 	struct pipe_fence_handle	*last_gfx_fence;
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index e5795eb..2dd166f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -41,6 +41,7 @@
 #define SI_NUM_SHADER_BUFFERS		16
 
 #define SI_TESS_OFFCHIP_BLOCK_SIZE	(8192 * 4)
+#define SI_CE_RAM_SIZE			32768
 
 struct si_screen;
 struct si_shader;
@@ -205,10 +206,6 @@ struct si_descriptors {
 	/* elements of the list that are changed and need to be uploaded */
 	unsigned dirty_mask;
 
-	/* Whether the CE ram is dirty and needs to be reinitialized entirely
-	 * before we can do partial updates. */
-	bool ce_ram_dirty;
-
 	/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
 	 * array will be stored. */
 	unsigned shader_userdata_offset;
@@ -250,6 +247,8 @@ struct si_buffer_resources {
 	} while(0)
 
 /* si_descriptors.c */
+void si_ce_save_ram(struct si_context *sctx);
+void si_ce_restore_ram(struct si_context *sctx);
 void si_ce_enable_loads(struct radeon_winsys_cs *ib);
 void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
 				    const struct radeon_surf_level *base_level_info,
-- 
2.8.3



More information about the mesa-stable mailing list