[Mesa-dev] [PATCH] radeonsi: rework clear_buffer flags

Nicolai Hähnle nhaehnle at gmail.com
Thu Apr 28 17:53:03 UTC 2016


LGTM.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

On 28.04.2016 07:54, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> Changes:
> - don't flush DB for fast color clears
> - don't flush any caches for initial clears
> - remove the flag from si_copy_buffer, always assume shader coherency
> ---
>   src/gallium/drivers/r600/r600_blit.c          |  2 +-
>   src/gallium/drivers/radeon/r600_pipe_common.c |  4 +--
>   src/gallium/drivers/radeon/r600_pipe_common.h | 10 +++++--
>   src/gallium/drivers/radeon/r600_texture.c     | 11 ++++----
>   src/gallium/drivers/radeon/radeon_video.c     |  2 +-
>   src/gallium/drivers/radeonsi/si_blit.c        |  5 ++--
>   src/gallium/drivers/radeonsi/si_cp_dma.c      | 38 ++++++++++++++++-----------
>   src/gallium/drivers/radeonsi/si_pipe.c        |  3 ++-
>   src/gallium/drivers/radeonsi/si_pipe.h        |  3 +--
>   9 files changed, 46 insertions(+), 32 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
> index 2d30807..ed67cb8 100644
> --- a/src/gallium/drivers/r600/r600_blit.c
> +++ b/src/gallium/drivers/r600/r600_blit.c
> @@ -582,7 +582,7 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
>
>   static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
>   			      uint64_t offset, uint64_t size, unsigned value,
> -			      bool is_framebuffer)
> +			      enum r600_coherency coher)
>   {
>   	struct r600_context *rctx = (struct r600_context*)ctx;
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
> index 929fecb..823ba46 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -984,12 +984,12 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
>
>   void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
>   			      uint64_t offset, uint64_t size, unsigned value,
> -			      bool is_framebuffer)
> +			      enum r600_coherency coher)
>   {
>   	struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
>
>   	pipe_mutex_lock(rscreen->aux_context_lock);
> -	rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
> +	rctx->clear_buffer(&rctx->b, dst, offset, size, value, coher);
>   	rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
>   	pipe_mutex_unlock(rscreen->aux_context_lock);
>   }
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index d7478ef..74eefbb 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -100,6 +100,12 @@
>   #define R600_MAP_BUFFER_ALIGNMENT 64
>   #define R600_MAX_VIEWPORTS        16
>
> +enum r600_coherency {
> +	R600_COHERENCY_NONE, /* no cache flushes needed */
> +	R600_COHERENCY_SHADER,
> +	R600_COHERENCY_CB_META,
> +};
> +
>   #ifdef PIPE_ARCH_BIG_ENDIAN
>   #define R600_BIG_ENDIAN 1
>   #else
> @@ -513,7 +519,7 @@ struct r600_common_context {
>
>   	void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
>   			     uint64_t offset, uint64_t size, unsigned value,
> -			     bool is_framebuffer);
> +			     enum r600_coherency coher);
>
>   	void (*blit_decompress_depth)(struct pipe_context *ctx,
>   				      struct r600_texture *texture,
> @@ -584,7 +590,7 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
>   			  unsigned processor);
>   void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
>   			      uint64_t offset, uint64_t size, unsigned value,
> -			      bool is_framebuffer);
> +			      enum r600_coherency coher);
>   struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
>   						  const struct pipe_resource *templ);
>   const char *r600_get_llvm_processor_name(enum radeon_family family);
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 7e58490..41bc48a 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -717,7 +717,7 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
>   		R600_ERR("Failed to create buffer object for htile buffer.\n");
>   	} else {
>   		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
> -					 htile_size, 0, true);
> +					 htile_size, 0, R600_COHERENCY_NONE);
>   	}
>   }
>
> @@ -892,13 +892,13 @@ r600_texture_create_object(struct pipe_screen *screen,
>   		/* Initialize the cmask to 0xCC (= compressed state). */
>   		r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
>   					 rtex->cmask.offset, rtex->cmask.size,
> -					 0xCCCCCCCC, true);
> +					 0xCCCCCCCC, R600_COHERENCY_NONE);
>   	}
>   	if (rtex->dcc_offset) {
>   		r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
>   					 rtex->dcc_offset,
>   					 rtex->surface.dcc_size,
> -					 0xFFFFFFFF, true);
> +					 0xFFFFFFFF, R600_COHERENCY_NONE);
>   	}
>
>   	/* Initialize the CMASK base register value. */
> @@ -1623,7 +1623,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>
>   			rctx->clear_buffer(&rctx->b, &tex->resource.b.b,
>   					   tex->dcc_offset, tex->surface.dcc_size,
> -					   reset_value, true);
> +					   reset_value, R600_COHERENCY_CB_META);
>
>   			if (clear_words_needed)
>   				tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
> @@ -1640,7 +1640,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>
>   			/* Do the fast clear. */
>   			rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
> -					tex->cmask.offset, tex->cmask.size, 0, true);
> +					   tex->cmask.offset, tex->cmask.size, 0,
> +					   R600_COHERENCY_CB_META);
>
>   			tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>   		}
> diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
> index e2ff037..acbf790 100644
> --- a/src/gallium/drivers/radeon/radeon_video.c
> +++ b/src/gallium/drivers/radeon/radeon_video.c
> @@ -122,7 +122,7 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
>   	struct r600_common_context *rctx = (struct r600_common_context*)context;
>
>   	rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
> -			   0, false);
> +			   0, R600_COHERENCY_NONE);
>   	context->flush(context, NULL, 0);
>   }
>
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 6fa5b97..0233e10 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -630,7 +630,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
>
>   	/* Handle buffers first. */
>   	if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
> -		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
> +		si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
>   		return;
>   	}
>
> @@ -949,7 +949,8 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
>   		dword_value = *(uint32_t*)clear_value_ptr;
>   	}
>
> -	sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
> +	sctx->b.clear_buffer(ctx, dst, offset, size, dword_value,
> +			     R600_COHERENCY_SHADER);
>   }
>
>   void si_init_blit_functions(struct si_context *sctx)
> diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
> index bca9cc5..cbb84b0 100644
> --- a/src/gallium/drivers/radeonsi/si_cp_dma.c
> +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
> @@ -107,19 +107,26 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
>   	}
>   }
>
> -static unsigned get_flush_flags(struct si_context *sctx, bool is_framebuffer)
> +static unsigned get_flush_flags(struct si_context *sctx, enum r600_coherency coher)
>   {
> -	if (is_framebuffer)
> -		return SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
> -
> -	return SI_CONTEXT_INV_SMEM_L1 |
> -	       SI_CONTEXT_INV_VMEM_L1 |
> -	       (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
> +	switch (coher) {
> +	default:
> +	case R600_COHERENCY_NONE:
> +		return 0;
> +	case R600_COHERENCY_SHADER:
> +		return SI_CONTEXT_INV_SMEM_L1 |
> +		       SI_CONTEXT_INV_VMEM_L1 |
> +		       (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
> +	case R600_COHERENCY_CB_META:
> +		return SI_CONTEXT_FLUSH_AND_INV_CB |
> +		       SI_CONTEXT_FLUSH_AND_INV_CB_META;
> +	}
>   }
>
> -static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
> +static unsigned get_tc_l2_flag(struct si_context *sctx, enum r600_coherency coher)
>   {
> -	return is_framebuffer || sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
> +	return coher == R600_COHERENCY_SHADER &&
> +	       sctx->b.chip_class >= CIK ? CIK_CP_DMA_USE_L2 : 0;
>   }
>
>   static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst,
> @@ -159,11 +166,11 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
>
>   static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
>   			    uint64_t offset, uint64_t size, unsigned value,
> -			    bool is_framebuffer)
> +			    enum r600_coherency coher)
>   {
>   	struct si_context *sctx = (struct si_context*)ctx;
> -	unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
> -	unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
> +	unsigned tc_l2_flag = get_tc_l2_flag(sctx, coher);
> +	unsigned flush_flags = get_flush_flags(sctx, coher);
>
>   	if (!size)
>   		return;
> @@ -249,14 +256,13 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size)
>
>   void si_copy_buffer(struct si_context *sctx,
>   		    struct pipe_resource *dst, struct pipe_resource *src,
> -		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
> -		    bool is_framebuffer)
> +		    uint64_t dst_offset, uint64_t src_offset, unsigned size)
>   {
>   	uint64_t main_dst_offset, main_src_offset;
>   	unsigned skipped_size = 0;
>   	unsigned realign_size = 0;
> -	unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
> -	unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
> +	unsigned tc_l2_flag = get_tc_l2_flag(sctx, R600_COHERENCY_SHADER);
> +	unsigned flush_flags = get_flush_flags(sctx, R600_COHERENCY_SHADER);
>
>   	if (!size)
>   		return;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index ab6ea40..61d5578 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -224,7 +224,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
>
>   		/* Clear the NULL constant buffer, because loads should return zeros. */
>   		sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
> -				     sctx->null_const_buf.buffer->width0, 0, false);
> +				     sctx->null_const_buf.buffer->width0, 0,
> +				     R600_COHERENCY_SHADER);
>   	}
>
>   	/* XXX: This is the maximum value allowed.  I'm not sure how to compute
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index 13946a5..d31e9a9 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -348,8 +348,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
>   /* si_cp_dma.c */
>   void si_copy_buffer(struct si_context *sctx,
>   		    struct pipe_resource *dst, struct pipe_resource *src,
> -		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
> -		    bool is_framebuffer);
> +		    uint64_t dst_offset, uint64_t src_offset, unsigned size);
>   void si_init_cp_dma_functions(struct si_context *sctx);
>
>   /* si_debug.c */
>


More information about the mesa-dev mailing list