[Mesa-dev] [PATCH 4/4] radeonsi: use SDMA for uploading data through const_uploader

Nicolai Hähnle nhaehnle at gmail.com
Mon Feb 11 09:38:27 UTC 2019


On 07.02.19 02:22, Marek Olšák wrote:
> +	bool use_sdma_upload = sscreen->info.has_dedicated_vram && sctx->dma_cs && debug_get_bool_option("SDMA", true);

Could you please namespace the environment variable, e.g. RADEONSI_SDMA?

Apart from that, series is

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


> +	sctx->b.const_uploader = u_upload_create(&sctx->b, 256 * 1024,
> +						 0, PIPE_USAGE_DEFAULT,
> +						 SI_RESOURCE_FLAG_32BIT |
> +						 (use_sdma_upload ?
> +							  SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA :
> +							  (sscreen->cpdma_prefetch_writes_memory ?
> +								   0 : SI_RESOURCE_FLAG_READ_ONLY)));
> +	if (!sctx->b.const_uploader)
> +		goto fail;
> +
> +	if (use_sdma_upload)
> +		u_upload_enable_flush_explicit(sctx->b.const_uploader);
> +
>   	si_init_buffer_functions(sctx);
>   	si_init_clear_functions(sctx);
>   	si_init_blit_functions(sctx);
>   	si_init_compute_functions(sctx);
>   	si_init_compute_blit_functions(sctx);
>   	si_init_debug_functions(sctx);
>   	si_init_msaa_functions(sctx);
>   	si_init_streamout_functions(sctx);
>   
>   	if (sscreen->info.has_hw_decode) {
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index b01d5744752..b208bdeb848 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -103,20 +103,22 @@
>   #define SI_MAX_VARIABLE_THREADS_PER_BLOCK 1024
>   
>   #define SI_RESOURCE_FLAG_TRANSFER	(PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
>   #define SI_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
>   #define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
>   #define SI_RESOURCE_FLAG_DISABLE_DCC	(PIPE_RESOURCE_FLAG_DRV_PRIV << 3)
>   #define SI_RESOURCE_FLAG_UNMAPPABLE	(PIPE_RESOURCE_FLAG_DRV_PRIV << 4)
>   #define SI_RESOURCE_FLAG_READ_ONLY	(PIPE_RESOURCE_FLAG_DRV_PRIV << 5)
>   #define SI_RESOURCE_FLAG_32BIT		(PIPE_RESOURCE_FLAG_DRV_PRIV << 6)
>   #define SI_RESOURCE_FLAG_CLEAR		(PIPE_RESOURCE_FLAG_DRV_PRIV << 7)
> +/* For const_uploader, upload data via GTT and copy to VRAM on context flush via SDMA. */
> +#define SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA  (PIPE_RESOURCE_FLAG_DRV_PRIV << 8)
>   
>   enum si_clear_code
>   {
>   	DCC_CLEAR_COLOR_0000   = 0x00000000,
>   	DCC_CLEAR_COLOR_0001   = 0x40404040,
>   	DCC_CLEAR_COLOR_1110   = 0x80808080,
>   	DCC_CLEAR_COLOR_1111   = 0xC0C0C0C0,
>   	DCC_CLEAR_COLOR_REG    = 0x20202020,
>   	DCC_UNCOMPRESSED       = 0xFFFFFFFF,
>   };
> @@ -769,20 +771,28 @@ struct si_saved_cs {
>   	struct si_context	*ctx;
>   	struct radeon_saved_cs	gfx;
>   	struct si_resource	*trace_buf;
>   	unsigned		trace_id;
>   
>   	unsigned		gfx_last_dw;
>   	bool			flushed;
>   	int64_t			time_flush;
>   };
>   
> +struct si_sdma_upload {
> +	struct si_resource	*dst;
> +	struct si_resource	*src;
> +	unsigned		src_offset;
> +	unsigned		dst_offset;
> +	unsigned		size;
> +};
> +
>   struct si_context {
>   	struct pipe_context		b; /* base class */
>   
>   	enum radeon_family		family;
>   	enum chip_class			chip_class;
>   
>   	struct radeon_winsys		*ws;
>   	struct radeon_winsys_ctx	*ctx;
>   	struct radeon_cmdbuf		*gfx_cs;
>   	struct radeon_cmdbuf		*dma_cs;
> @@ -1074,20 +1084,26 @@ struct si_context {
>   	int				num_perfect_occlusion_queries;
>   	struct list_head		active_queries;
>   	unsigned			num_cs_dw_queries_suspend;
>   
>   	/* Render condition. */
>   	struct pipe_query		*render_cond;
>   	unsigned			render_cond_mode;
>   	bool				render_cond_invert;
>   	bool				render_cond_force_off; /* for u_blitter */
>   
> +	/* For uploading data via GTT and copy to VRAM on context flush via SDMA. */
> +	bool				sdma_uploads_in_progress;
> +	struct si_sdma_upload		*sdma_uploads;
> +	unsigned			num_sdma_uploads;
> +	unsigned			max_sdma_uploads;
> +
>   	/* Statistics gathering for the DCC enablement heuristic. It can't be
>   	 * in si_texture because si_texture can be shared by multiple
>   	 * contexts. This is for back buffers only. We shouldn't get too many
>   	 * of those.
>   	 *
>   	 * X11 DRI3 rotates among a finite set of back buffers. They should
>   	 * all fit in this array. If they don't, separate DCC might never be
>   	 * enabled by DCC stat gathering.
>   	 */
>   	struct {
> @@ -1273,20 +1289,21 @@ struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
>   					  struct tc_unflushed_batch_token *tc_token);
>   
>   /* si_get.c */
>   void si_init_screen_get_functions(struct si_screen *sscreen);
>   
>   /* si_gfx_cs.c */
>   void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
>   		     struct pipe_fence_handle **fence);
>   void si_begin_new_gfx_cs(struct si_context *ctx);
>   void si_need_gfx_cs_space(struct si_context *ctx);
> +void si_unref_sdma_uploads(struct si_context *sctx);
>   
>   /* si_gpu_load.c */
>   void si_gpu_load_kill_thread(struct si_screen *sscreen);
>   uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type);
>   unsigned si_end_counter(struct si_screen *sscreen, unsigned type,
>   			uint64_t begin);
>   
>   /* si_compute.c */
>   void si_init_compute_functions(struct si_context *sctx);
>   
> 

-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list