[Mesa-dev] [PATCH 1/5] radeonsi: remove fast color clear for single-sample buffers

Dieter Nützel Dieter at nuetzel-hh.de
Fri Mar 30 08:47:31 UTC 2018


Hello Marek,

2-3 landed.
Is #1 dead after my findings? ;-)

Dieter

Am 11.03.2018 19:11, schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> This should improve the score for the GpuTest Triangle benchmark.
> Vulkan doesn't use this either.
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.h |  1 -
>  src/gallium/drivers/radeon/r600_texture.c     | 11 +-------
>  src/gallium/drivers/radeonsi/si_clear.c       | 37 
> ++-------------------------
>  src/gallium/drivers/radeonsi/si_state.c       |  6 -----
>  4 files changed, 3 insertions(+), 52 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 7941903..9701757 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -209,21 +209,20 @@ struct r600_cmask_info {
>  struct r600_texture {
>  	struct r600_resource		resource;
> 
>  	struct radeon_surf		surface;
>  	uint64_t			size;
>  	struct r600_texture		*flushed_depth_texture;
> 
>  	/* Colorbuffer compression and fast clear. */
>  	struct r600_fmask_info		fmask;
>  	struct r600_cmask_info		cmask;
> -	struct r600_resource		*cmask_buffer;
>  	uint64_t			dcc_offset; /* 0 = disabled */
>  	unsigned			cb_color_info; /* fast clear enable bit */
>  	unsigned			color_clear_value[2];
>  	unsigned			last_msaa_resolve_target_micro_mode;
>  	unsigned			num_level0_transfers;
> 
>  	/* Depth buffer compression and fast clear. */
>  	uint64_t			htile_offset;
>  	float				depth_clear_value;
>  	uint16_t			dirty_level_mask; /* each bit says if that mipmap is 
> compressed */
> diff --git a/src/gallium/drivers/radeon/r600_texture.c
> b/src/gallium/drivers/radeon/r600_texture.c
> index 125e7ef..03bc955 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen 
> *sscreen,
>  {
>  	if (!rtex->cmask.size)
>  		return;
> 
>  	assert(rtex->resource.b.b.nr_samples <= 1);
> 
>  	/* Disable CMASK. */
>  	memset(&rtex->cmask, 0, sizeof(rtex->cmask));
>  	rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
>  	rtex->dirty_level_mask = 0;
> -
>  	rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
> 
> -	if (rtex->cmask_buffer != &rtex->resource)
> -	    r600_resource_reference(&rtex->cmask_buffer, NULL);
> -
>  	/* Notify all contexts about the change. */
>  	p_atomic_inc(&sscreen->dirty_tex_counter);
>  	p_atomic_inc(&sscreen->compressed_colortex_counter);
>  }
> 
>  static bool r600_can_disable_dcc(struct r600_texture *rtex)
>  {
>  	/* We can't disable DCC if it can be written by another process. */
>  	return rtex->dcc_offset &&
>  	       (!rtex->resource.b.is_shared ||
> @@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct
> pipe_screen* screen,
>  					      slice_size, whandle);
>  }
> 
>  static void r600_texture_destroy(struct pipe_screen *screen,
>  				 struct pipe_resource *ptex)
>  {
>  	struct r600_texture *rtex = (struct r600_texture*)ptex;
>  	struct r600_resource *resource = &rtex->resource;
> 
>  	r600_texture_reference(&rtex->flushed_depth_texture, NULL);
> -
> -	if (rtex->cmask_buffer != &rtex->resource) {
> -	    r600_resource_reference(&rtex->cmask_buffer, NULL);
> -	}
>  	pb_reference(&resource->buf, NULL);
>  	r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
>  	r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
>  	FREE(rtex);
>  }
> 
>  static const struct u_resource_vtbl r600_texture_vtbl;
> 
>  /* The number of samples can be specified independently of the 
> texture. */
>  void si_texture_get_fmask_info(struct si_screen *sscreen,
> @@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen 
> *screen,
>  			rtex->db_compatible = true;
> 
>  			if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
>  				r600_texture_allocate_htile(sscreen, rtex);
>  		}
>  	} else {
>  		if (base->nr_samples > 1) {
>  			if (!buf) {
>  				r600_texture_allocate_fmask(sscreen, rtex);
>  				r600_texture_allocate_cmask(sscreen, rtex);
> -				rtex->cmask_buffer = &rtex->resource;
>  			}
>  			if (!rtex->fmask.size || !rtex->cmask.size) {
>  				FREE(rtex);
>  				return NULL;
>  			}
>  		}
> 
>  		/* Shared textures must always set up DCC here.
>  		 * If it's not present, it will be disabled by
>  		 * apply_opaque_metadata later.
> @@ -1306,21 +1297,21 @@ r600_texture_create_object(struct pipe_screen 
> *screen,
>  		resource->bo_alignment = buf->alignment;
>  		resource->domains = 
> sscreen->ws->buffer_get_initial_domain(resource->buf);
>  		if (resource->domains & RADEON_DOMAIN_VRAM)
>  			resource->vram_usage = buf->size;
>  		else if (resource->domains & RADEON_DOMAIN_GTT)
>  			resource->gart_usage = buf->size;
>  	}
> 
>  	if (rtex->cmask.size) {
>  		/* Initialize the cmask to 0xCC (= compressed state). */
> -		si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b,
> +		si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
>  					 rtex->cmask.offset, rtex->cmask.size,
>  					 0xCCCCCCCC);
>  	}
>  	if (rtex->htile_offset) {
>  		uint32_t clear_value = 0;
> 
>  		if (sscreen->info.chip_class >= GFX9 || rtex->tc_compatible_htile)
>  			clear_value = 0x0000030F;
> 
>  		si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
> diff --git a/src/gallium/drivers/radeonsi/si_clear.c
> b/src/gallium/drivers/radeonsi/si_clear.c
> index 464b9d7..a940aea 100644
> --- a/src/gallium/drivers/radeonsi/si_clear.c
> +++ b/src/gallium/drivers/radeonsi/si_clear.c
> @@ -26,51 +26,20 @@
> 
>  #include "util/u_format.h"
>  #include "util/u_pack_color.h"
>  #include "util/u_surface.h"
> 
>  enum {
>  	SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
>  	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
>  };
> 
> -static void si_alloc_separate_cmask(struct si_screen *sscreen,
> -				    struct r600_texture *rtex)
> -{
> -	if (rtex->cmask_buffer)
> -                return;
> -
> -	assert(rtex->cmask.size == 0);
> -
> -	si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
> -	if (!rtex->cmask.size)
> -		return;
> -
> -	rtex->cmask_buffer = (struct r600_resource *)
> -		si_aligned_buffer_create(&sscreen->b,
> -					 R600_RESOURCE_FLAG_UNMAPPABLE,
> -					 PIPE_USAGE_DEFAULT,
> -					 rtex->cmask.size,
> -					 rtex->cmask.alignment);
> -	if (rtex->cmask_buffer == NULL) {
> -		rtex->cmask.size = 0;
> -		return;
> -	}
> -
> -	/* update colorbuffer state bits */
> -	rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
> -
> -	rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
> -
> -	p_atomic_inc(&sscreen->compressed_colortex_counter);
> -}
> -
>  static void si_set_clear_color(struct r600_texture *rtex,
>  			       enum pipe_format surface_format,
>  			       const union pipe_color_union *color)
>  {
>  	union util_color uc;
> 
>  	memset(&uc, 0, sizeof(uc));
> 
>  	if (rtex->surface.bpe == 16) {
>  		/* DCC fast clear only:
> @@ -451,21 +420,21 @@ static void si_do_fast_color_clear(struct
> si_context *sctx,
> 
>  			if (clear_words_needed && too_small)
>  				continue;
> 
>  			/* DCC fast clear with MSAA should clear CMASK to 0xC. */
>  			if (tex->resource.b.b.nr_samples >= 2 && tex->cmask.size) {
>  				/* TODO: This doesn't work with MSAA. */
>  				if (clear_words_needed)
>  					continue;
> 
> -				si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
> +				si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
>  						tex->cmask.offset, tex->cmask.size,
>  						0xCCCCCCCC, R600_COHERENCY_CB_META);
>  				need_decompress_pass = true;
>  			}
> 
>  			vi_dcc_clear_level(sctx, tex, 0, reset_value);
> 
>  			if (clear_words_needed)
>  				need_decompress_pass = true;
> 
> @@ -476,28 +445,26 @@ static void si_do_fast_color_clear(struct
> si_context *sctx,
> 
>  			/* 128-bit formats are unusupported */
>  			if (tex->surface.bpe > 8) {
>  				continue;
>  			}
> 
>  			/* RB+ doesn't work with CMASK fast clear on Stoney. */
>  			if (sctx->b.family == CHIP_STONEY)
>  				continue;
> 
> -			/* ensure CMASK is enabled */
> -			si_alloc_separate_cmask(sctx->screen, tex);
>  			if (tex->cmask.size == 0) {
>  				continue;
>  			}
> 
>  			/* Do the fast clear. */
> -			si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
> +			si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
>  					tex->cmask.offset, tex->cmask.size, 0,
>  					R600_COHERENCY_CB_META);
>  			need_decompress_pass = true;
>  		}
> 
>  		if (need_decompress_pass &&
>  		    !(tex->dirty_level_mask & (1 << level))) {
>  			tex->dirty_level_mask |= 1 << level;
>  			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
>  		}
> diff --git a/src/gallium/drivers/radeonsi/si_state.c
> b/src/gallium/drivers/radeonsi/si_state.c
> index 6c82257..aae7332 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2980,26 +2980,20 @@ static void si_emit_framebuffer_state(struct
> si_context *sctx, struct r600_atom
>  			continue;
>  		}
> 
>  		tex = (struct r600_texture *)cb->base.texture;
>  		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>  				      &tex->resource, RADEON_USAGE_READWRITE,
>  				      tex->resource.b.b.nr_samples > 1 ?
>  					      RADEON_PRIO_COLOR_BUFFER_MSAA :
>  					      RADEON_PRIO_COLOR_BUFFER);
> 
> -		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
> -			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> -				tex->cmask_buffer, RADEON_USAGE_READWRITE,
> -				RADEON_PRIO_CMASK);
> -		}
> -
>  		if (tex->dcc_separate_buffer)
>  			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>  						  tex->dcc_separate_buffer,
>  						  RADEON_USAGE_READWRITE,
>  						  RADEON_PRIO_DCC);
> 
>  		/* Compute mutable surface parameters. */
>  		cb_color_base = tex->resource.gpu_address >> 8;
>  		cb_color_fmask = 0;
>  		cb_dcc_base = 0;


More information about the mesa-dev mailing list