[Mesa-dev] [PATCH 7/9] radeonsi/gfx9: keep reusing the same buffer/address for the gfx9 flush fence

Nicolai Hähnle nhaehnle at gmail.com
Mon Jun 19 12:32:52 UTC 2017


On 16.06.2017 14:58, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> instead of using a monotonic suballocator
> ---
>   src/gallium/drivers/radeonsi/si_pipe.c       |  6 ++++++
>   src/gallium/drivers/radeonsi/si_pipe.h       |  2 ++
>   src/gallium/drivers/radeonsi/si_state_draw.c | 16 ++++++++--------
>   3 files changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 895d53f..11dcbe3 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -57,20 +57,21 @@ static void si_destroy_context(struct pipe_context *context)
>   	r600_resource_reference(&sctx->ce_ram_saved_buffer, NULL);
>   	pipe_resource_reference(&sctx->esgs_ring, NULL);
>   	pipe_resource_reference(&sctx->gsvs_ring, NULL);
>   	pipe_resource_reference(&sctx->tf_ring, NULL);
>   	pipe_resource_reference(&sctx->tess_offchip_ring, NULL);
>   	pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
>   	r600_resource_reference(&sctx->border_color_buffer, NULL);
>   	free(sctx->border_color_table);
>   	r600_resource_reference(&sctx->scratch_buffer, NULL);
>   	r600_resource_reference(&sctx->compute_scratch_buffer, NULL);
> +	r600_resource_reference(&sctx->wait_mem_scratch, NULL);
>   
>   	si_pm4_free_state(sctx, sctx->init_config, ~0);
>   	if (sctx->init_config_gs_rings)
>   		si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0);
>   	for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
>   		si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]);
>   
>   	if (sctx->fixed_func_tcs_shader.cso)
>   		sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso);
>   	if (sctx->custom_dsa_flush)
> @@ -236,20 +237,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>   				   sizeof(*sctx->border_color_table));
>   	if (!sctx->border_color_buffer)
>   		goto fail;
>   
>   	sctx->border_color_map =
>   		ws->buffer_map(sctx->border_color_buffer->buf,
>   			       NULL, PIPE_TRANSFER_WRITE);
>   	if (!sctx->border_color_map)
>   		goto fail;
>   
> +	sctx->wait_mem_scratch = (struct r600_resource*)
> +		pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
> +	if (!sctx->wait_mem_scratch)
> +		goto fail;

The memory returned here is not 0-initialized, so there's a chance that 
the first fence doesn't work correctly. A possible fix would be to just 
add a write_data in the first IB, or when sctx->wait_mem_number == 0.

Cheers,
Nicolai


> +
>   	si_init_all_descriptors(sctx);
>   	si_init_state_functions(sctx);
>   	si_init_shader_functions(sctx);
>   	si_init_ia_multi_vgt_param_table(sctx);
>   
>   	if (sctx->b.chip_class >= CIK)
>   		cik_init_sdma_functions(sctx);
>   	else
>   		si_init_dma_functions(sctx);
>   
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index e734595..f6fe11b 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -258,20 +258,22 @@ struct si_context {
>   	struct r600_common_context	b;
>   	struct blitter_context		*blitter;
>   	void				*custom_dsa_flush;
>   	void				*custom_blend_resolve;
>   	void				*custom_blend_fmask_decompress;
>   	void				*custom_blend_eliminate_fastclear;
>   	void				*custom_blend_dcc_decompress;
>   	struct si_screen		*screen;
>   	LLVMTargetMachineRef		tm; /* only non-threaded compilation */
>   	struct si_shader_ctx_state	fixed_func_tcs_shader;
> +	struct r600_resource		*wait_mem_scratch;
> +	unsigned			wait_mem_number;
>   
>   	struct radeon_winsys_cs		*ce_ib;
>   	struct radeon_winsys_cs		*ce_preamble_ib;
>   	struct r600_resource		*ce_ram_saved_buffer;
>   	struct u_suballocator		*ce_suballocator;
>   	unsigned			ce_ram_saved_offset;
>   	uint16_t			total_ce_ram_allocated;
>   	bool				ce_need_synchronization:1;
>   
>   	bool				gfx_flush_in_progress:1;
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 2b000e7..85ceaca 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -947,23 +947,22 @@ void si_emit_cache_flush(struct si_context *sctx)
>   	}
>   	if (rctx->flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) {
>   		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
>   		radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
>   	}
>   
>   	/* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't
>   	 * wait for idle on GFX9. We have to use a TS event.
>   	 */
>   	if (sctx->b.chip_class >= GFX9 && flush_cb_db) {
> -		struct r600_resource *rbuf = NULL;
>   		uint64_t va;
> -		unsigned offset = 0, tc_flags, cb_db_event;
> +		unsigned tc_flags, cb_db_event;
>   
>   		/* Set the CB/DB flush event. */
>   		switch (flush_cb_db) {
>   		case SI_CONTEXT_FLUSH_AND_INV_CB:
>   			cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
>   			break;
>   		case SI_CONTEXT_FLUSH_AND_INV_DB:
>   			cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
>   			break;
>   		default:
> @@ -990,28 +989,29 @@ void si_emit_cache_flush(struct si_context *sctx)
>   			tc_flags |= EVENT_TC_ACTION_ENA |
>   				    EVENT_TCL1_ACTION_ENA;
>   
>   			/* Clear the flags. */
>   			rctx->flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 |
>   					 SI_CONTEXT_WRITEBACK_GLOBAL_L2 |
>   					 SI_CONTEXT_INV_VMEM_L1);
>   			sctx->b.num_L2_invalidates++;
>   		}
>   
> -		/* Allocate memory for the fence. */
> -		u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4,
> -				     &offset, (struct pipe_resource**)&rbuf);
> -		va = rbuf->gpu_address + offset;
> +		/* Do the flush (enqueue the event and wait for it). */
> +		va = sctx->wait_mem_scratch->gpu_address;
> +		sctx->wait_mem_number++;
>   
>   		r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1,
> -					 rbuf, va, 0, 1);
> -		r600_gfx_wait_fence(rctx, va, 1, 0xffffffff);
> +					 sctx->wait_mem_scratch, va,
> +					 sctx->wait_mem_number - 1,
> +					 sctx->wait_mem_number);
> +		r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
>   	}
>   
>   	/* Make sure ME is idle (it executes most packets) before continuing.
>   	 * This prevents read-after-write hazards between PFP and ME.
>   	 */
>   	if (cp_coher_cntl ||
>   	    (rctx->flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>   			    SI_CONTEXT_INV_VMEM_L1 |
>   			    SI_CONTEXT_INV_GLOBAL_L2 |
>   			    SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list