[Mesa-dev] [PATCH 7/9] radeonsi/gfx9: keep reusing the same buffer/address for the gfx9 flush fence

Marek Olšák maraeo at gmail.com
Mon Jun 19 14:11:48 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

instead of using a monotonic suballocator

v2: initialize the memory at context creation
---
 src/gallium/drivers/radeonsi/si_pipe.c       | 18 ++++++++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.h       |  2 ++
 src/gallium/drivers/radeonsi/si_state_draw.c | 16 ++++++++--------
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 895d53f..c5cc415 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -57,20 +57,21 @@ static void si_destroy_context(struct pipe_context *context)
 	r600_resource_reference(&sctx->ce_ram_saved_buffer, NULL);
 	pipe_resource_reference(&sctx->esgs_ring, NULL);
 	pipe_resource_reference(&sctx->gsvs_ring, NULL);
 	pipe_resource_reference(&sctx->tf_ring, NULL);
 	pipe_resource_reference(&sctx->tess_offchip_ring, NULL);
 	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
 	r600_resource_reference(&sctx->border_color_buffer, NULL);
 	free(sctx->border_color_table);
 	r600_resource_reference(&sctx->scratch_buffer, NULL);
 	r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
+	r600_resource_reference(&sctx->wait_mem_scratch, NULL);
 
 	si_pm4_free_state(sctx, sctx->init_config, ~0);
 	if (sctx->init_config_gs_rings)
 		si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
 	for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
 		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
 
 	if (sctx->fixed_func_tcs_shader.cso)
 		sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
 	if (sctx->custom_dsa_flush)
@@ -259,20 +260,37 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	sctx->blitter = util_blitter_create(&sctx->b.b);
 	if (sctx->blitter == NULL)
 		goto fail;
 	sctx->blitter->draw_rectangle = r600_draw_rectangle;
 
 	sctx->sample_mask.sample_mask = 0xffff;
 
 	/* these must be last */
 	si_begin_new_cs(sctx);
 
+	if (sctx->b.chip_class >= GFX9) {
+		sctx->wait_mem_scratch = (struct r600_resource*)
+			pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
+		if (!sctx->wait_mem_scratch)
+			goto fail;
+
+		/* Initialize the memory. */
+		struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+			    S_370_WR_CONFIRM(1) |
+			    S_370_ENGINE_SEL(V_370_ME));
+		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
+		radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
+		radeon_emit(cs, sctx->wait_mem_number);
+	}
+
 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
 	 * if NUM_RECORDS == 0). We need to use a dummy buffer instead. */
 	if (sctx->b.chip_class == CIK) {
 		sctx->null_const_buf.buffer =
 			r600_aligned_buffer_create(screen,
 						   R600_RESOURCE_FLAG_UNMAPPABLE,
 						   PIPE_USAGE_DEFAULT, 16,
 						   sctx->screen->b.info.tcc_cache_line_size);
 		if (!sctx->null_const_buf.buffer)
 			goto fail;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e734595..f6fe11b 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -258,20 +258,22 @@ struct si_context {
 	struct r600_common_context	b;
 	struct blitter_context		*blitter;
 	void				*custom_dsa_flush;
 	void				*custom_blend_resolve;
 	void				*custom_blend_fmask_decompress;
 	void				*custom_blend_eliminate_fastclear;
 	void				*custom_blend_dcc_decompress;
 	struct si_screen		*screen;
 	LLVMTargetMachineRef		tm; /* only non-threaded compilation */
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
+	struct r600_resource		*wait_mem_scratch;
+	unsigned			wait_mem_number;
 
 	struct radeon_winsys_cs		*ce_ib;
 	struct radeon_winsys_cs		*ce_preamble_ib;
 	struct r600_resource		*ce_ram_saved_buffer;
 	struct u_suballocator		*ce_suballocator;
 	unsigned			ce_ram_saved_offset;
 	uint16_t			total_ce_ram_allocated;
 	bool				ce_need_synchronization:1;
 
 	bool				gfx_flush_in_progress:1;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 2b000e7..85ceaca 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -947,23 +947,22 @@ void si_emit_cache_flush(struct si_context *sctx)
 	}
 	if (rctx->flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) {
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
 	}
 
 	/* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't
 	 * wait for idle on GFX9. We have to use a TS event.
 	 */
 	if (sctx->b.chip_class >= GFX9 && flush_cb_db) {
-		struct r600_resource *rbuf = NULL;
 		uint64_t va;
-		unsigned offset = 0, tc_flags, cb_db_event;
+		unsigned tc_flags, cb_db_event;
 
 		/* Set the CB/DB flush event. */
 		switch (flush_cb_db) {
 		case SI_CONTEXT_FLUSH_AND_INV_CB:
 			cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
 			break;
 		case SI_CONTEXT_FLUSH_AND_INV_DB:
 			cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
 			break;
 		default:
@@ -990,28 +989,29 @@ void si_emit_cache_flush(struct si_context *sctx)
 			tc_flags |= EVENT_TC_ACTION_ENA |
 				    EVENT_TCL1_ACTION_ENA;
 
 			/* Clear the flags. */
 			rctx->flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 |
 					 SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
 					 SI_CONTEXT_INV_VMEM_L1);
 			sctx->b.num_L2_invalidates++;
 		}
 
-		/* Allocate memory for the fence. */
-		u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
-				     &offset, (struct pipe_resource**)&rbuf);
-		va = rbuf->gpu_address + offset;
+		/* Do the flush (enqueue the event and wait for it). */
+		va = sctx->wait_mem_scratch->gpu_address;
+		sctx->wait_mem_number++;
 
 		r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1,
-					 rbuf, va, 0, 1);
-		r600_gfx_wait_fence(rctx, va, 1, 0xffffffff);
+					 sctx->wait_mem_scratch, va,
+					 sctx->wait_mem_number - 1,
+					 sctx->wait_mem_number);
+		r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
 	}
 
 	/* Make sure ME is idle (it executes most packets) before continuing.
 	 * This prevents read-after-write hazards between PFP and ME.
 	 */
 	if (cp_coher_cntl ||
 	    (rctx->flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
 			    SI_CONTEXT_INV_VMEM_L1 |
 			    SI_CONTEXT_INV_GLOBAL_L2 |
 			    SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
-- 
2.7.4



More information about the mesa-dev mailing list