[Mesa-dev] [PATCH 1/5] radeonsi: remove fast color clear for single-sample buffers

Dieter Nützel Dieter at nuetzel-hh.de
Thu Mar 15 07:35:03 UTC 2018


Wow,

if we compare Polaris 20 with Ryzen 3 2200G, AMD Vega 8 on Phoronix
https://www.phoronix.com/scan.php?page=article&item=ryzen3-2200g-vega8&num=3

Even TessMark against NVIDIA GeForce GTX 1080:
http://openbenchmarking.org/prospect/1606047-HA-PASCALGRA41/946288150b2d292b67300c7fea4e9a47f6bb3f4c

=> 16270 poinst

RX 580: 42083 !!! ;-)

it looks really nice.

Dieter

Am 15.03.2018 07:56, schrieb Dieter Nützel:
> For the series (1-3)
> 
> Tested-by: Dieter Nützel <Dieter at nuetzel-hh.de>
> 
> Are these numbers OK?
> 
> Triangle,Radeon RX 580 Series (POLARIS10 / DRM 3.25.0 /
> 4.16.0-rc1-1.g7262353-default+, LLVM 7.0.0),3.1 Mesa 18.1.0-devel
> (git-a8cc051d2e),1920,1080,YES,Off,5240,60000,0,314471
> 
> Or should I retest without this series?
> 
> Dieter
> 
> Am 11.03.2018 19:11, schrieb Marek Olšák:
>> From: Marek Olšák <marek.olsak at amd.com>
>> 
>> This should improve the score for the GpuTest Triangle benchmark.
>> Vulkan doesn't use this either.
>> ---
>>  src/gallium/drivers/radeon/r600_pipe_common.h |  1 -
>>  src/gallium/drivers/radeon/r600_texture.c     | 11 +-------
>>  src/gallium/drivers/radeonsi/si_clear.c       | 37 
>> ++-------------------------
>>  src/gallium/drivers/radeonsi/si_state.c       |  6 -----
>>  4 files changed, 3 insertions(+), 52 deletions(-)
>> 
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
>> b/src/gallium/drivers/radeon/r600_pipe_common.h
>> index 7941903..9701757 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
>> @@ -209,21 +209,20 @@ struct r600_cmask_info {
>>  struct r600_texture {
>>  	struct r600_resource		resource;
>> 
>>  	struct radeon_surf		surface;
>>  	uint64_t			size;
>>  	struct r600_texture		*flushed_depth_texture;
>> 
>>  	/* Colorbuffer compression and fast clear. */
>>  	struct r600_fmask_info		fmask;
>>  	struct r600_cmask_info		cmask;
>> -	struct r600_resource		*cmask_buffer;
>>  	uint64_t			dcc_offset; /* 0 = disabled */
>>  	unsigned			cb_color_info; /* fast clear enable bit */
>>  	unsigned			color_clear_value[2];
>>  	unsigned			last_msaa_resolve_target_micro_mode;
>>  	unsigned			num_level0_transfers;
>> 
>>  	/* Depth buffer compression and fast clear. */
>>  	uint64_t			htile_offset;
>>  	float				depth_clear_value;
>>  	uint16_t			dirty_level_mask; /* each bit says if that mipmap is 
>> compressed */
>> diff --git a/src/gallium/drivers/radeon/r600_texture.c
>> b/src/gallium/drivers/radeon/r600_texture.c
>> index 125e7ef..03bc955 100644
>> --- a/src/gallium/drivers/radeon/r600_texture.c
>> +++ b/src/gallium/drivers/radeon/r600_texture.c
>> @@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen 
>> *sscreen,
>>  {
>>  	if (!rtex->cmask.size)
>>  		return;
>> 
>>  	assert(rtex->resource.b.b.nr_samples <= 1);
>> 
>>  	/* Disable CMASK. */
>>  	memset(&rtex->cmask, 0, sizeof(rtex->cmask));
>>  	rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
>>  	rtex->dirty_level_mask = 0;
>> -
>>  	rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
>> 
>> -	if (rtex->cmask_buffer != &rtex->resource)
>> -	    r600_resource_reference(&rtex->cmask_buffer, NULL);
>> -
>>  	/* Notify all contexts about the change. */
>>  	p_atomic_inc(&sscreen->dirty_tex_counter);
>>  	p_atomic_inc(&sscreen->compressed_colortex_counter);
>>  }
>> 
>>  static bool r600_can_disable_dcc(struct r600_texture *rtex)
>>  {
>>  	/* We can't disable DCC if it can be written by another process. */
>>  	return rtex->dcc_offset &&
>>  	       (!rtex->resource.b.is_shared ||
>> @@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct
>> pipe_screen* screen,
>>  					      slice_size, whandle);
>>  }
>> 
>>  static void r600_texture_destroy(struct pipe_screen *screen,
>>  				 struct pipe_resource *ptex)
>>  {
>>  	struct r600_texture *rtex = (struct r600_texture*)ptex;
>>  	struct r600_resource *resource = &rtex->resource;
>> 
>>  	r600_texture_reference(&rtex->flushed_depth_texture, NULL);
>> -
>> -	if (rtex->cmask_buffer != &rtex->resource) {
>> -	    r600_resource_reference(&rtex->cmask_buffer, NULL);
>> -	}
>>  	pb_reference(&resource->buf, NULL);
>>  	r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
>>  	r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
>>  	FREE(rtex);
>>  }
>> 
>>  static const struct u_resource_vtbl r600_texture_vtbl;
>> 
>>  /* The number of samples can be specified independently of the 
>> texture. */
>>  void si_texture_get_fmask_info(struct si_screen *sscreen,
>> @@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen 
>> *screen,
>>  			rtex->db_compatible = true;
>> 
>>  			if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
>>  				r600_texture_allocate_htile(sscreen, rtex);
>>  		}
>>  	} else {
>>  		if (base->nr_samples > 1) {
>>  			if (!buf) {
>>  				r600_texture_allocate_fmask(sscreen, rtex);
>>  				r600_texture_allocate_cmask(sscreen, rtex);
>> -				rtex->cmask_buffer = &rtex->resource;
>>  			}
>>  			if (!rtex->fmask.size || !rtex->cmask.size) {
>>  				FREE(rtex);
>>  				return NULL;
>>  			}
>>  		}
>> 
>>  		/* Shared textures must always set up DCC here.
>>  		 * If it's not present, it will be disabled by
>>  		 * apply_opaque_metadata later.
>> @@ -1306,21 +1297,21 @@ r600_texture_create_object(struct pipe_screen 
>> *screen,
>>  		resource->bo_alignment = buf->alignment;
>>  		resource->domains = 
>> sscreen->ws->buffer_get_initial_domain(resource->buf);
>>  		if (resource->domains & RADEON_DOMAIN_VRAM)
>>  			resource->vram_usage = buf->size;
>>  		else if (resource->domains & RADEON_DOMAIN_GTT)
>>  			resource->gart_usage = buf->size;
>>  	}
>> 
>>  	if (rtex->cmask.size) {
>>  		/* Initialize the cmask to 0xCC (= compressed state). */
>> -		si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b,
>> +		si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
>>  					 rtex->cmask.offset, rtex->cmask.size,
>>  					 0xCCCCCCCC);
>>  	}
>>  	if (rtex->htile_offset) {
>>  		uint32_t clear_value = 0;
>> 
>>  		if (sscreen->info.chip_class >= GFX9 || rtex->tc_compatible_htile)
>>  			clear_value = 0x0000030F;
>> 
>>  		si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
>> diff --git a/src/gallium/drivers/radeonsi/si_clear.c
>> b/src/gallium/drivers/radeonsi/si_clear.c
>> index 464b9d7..a940aea 100644
>> --- a/src/gallium/drivers/radeonsi/si_clear.c
>> +++ b/src/gallium/drivers/radeonsi/si_clear.c
>> @@ -26,51 +26,20 @@
>> 
>>  #include "util/u_format.h"
>>  #include "util/u_pack_color.h"
>>  #include "util/u_surface.h"
>> 
>>  enum {
>>  	SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
>>  	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
>>  };
>> 
>> -static void si_alloc_separate_cmask(struct si_screen *sscreen,
>> -				    struct r600_texture *rtex)
>> -{
>> -	if (rtex->cmask_buffer)
>> -                return;
>> -
>> -	assert(rtex->cmask.size == 0);
>> -
>> -	si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
>> -	if (!rtex->cmask.size)
>> -		return;
>> -
>> -	rtex->cmask_buffer = (struct r600_resource *)
>> -		si_aligned_buffer_create(&sscreen->b,
>> -					 R600_RESOURCE_FLAG_UNMAPPABLE,
>> -					 PIPE_USAGE_DEFAULT,
>> -					 rtex->cmask.size,
>> -					 rtex->cmask.alignment);
>> -	if (rtex->cmask_buffer == NULL) {
>> -		rtex->cmask.size = 0;
>> -		return;
>> -	}
>> -
>> -	/* update colorbuffer state bits */
>> -	rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
>> -
>> -	rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
>> -
>> -	p_atomic_inc(&sscreen->compressed_colortex_counter);
>> -}
>> -
>>  static void si_set_clear_color(struct r600_texture *rtex,
>>  			       enum pipe_format surface_format,
>>  			       const union pipe_color_union *color)
>>  {
>>  	union util_color uc;
>> 
>>  	memset(&uc, 0, sizeof(uc));
>> 
>>  	if (rtex->surface.bpe == 16) {
>>  		/* DCC fast clear only:
>> @@ -451,21 +420,21 @@ static void si_do_fast_color_clear(struct
>> si_context *sctx,
>> 
>>  			if (clear_words_needed && too_small)
>>  				continue;
>> 
>>  			/* DCC fast clear with MSAA should clear CMASK to 0xC. */
>>  			if (tex->resource.b.b.nr_samples >= 2 && tex->cmask.size) {
>>  				/* TODO: This doesn't work with MSAA. */
>>  				if (clear_words_needed)
>>  					continue;
>> 
>> -				si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
>> +				si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
>>  						tex->cmask.offset, tex->cmask.size,
>>  						0xCCCCCCCC, R600_COHERENCY_CB_META);
>>  				need_decompress_pass = true;
>>  			}
>> 
>>  			vi_dcc_clear_level(sctx, tex, 0, reset_value);
>> 
>>  			if (clear_words_needed)
>>  				need_decompress_pass = true;
>> 
>> @@ -476,28 +445,26 @@ static void si_do_fast_color_clear(struct
>> si_context *sctx,
>> 
>>  			/* 128-bit formats are unusupported */
>>  			if (tex->surface.bpe > 8) {
>>  				continue;
>>  			}
>> 
>>  			/* RB+ doesn't work with CMASK fast clear on Stoney. */
>>  			if (sctx->b.family == CHIP_STONEY)
>>  				continue;
>> 
>> -			/* ensure CMASK is enabled */
>> -			si_alloc_separate_cmask(sctx->screen, tex);
>>  			if (tex->cmask.size == 0) {
>>  				continue;
>>  			}
>> 
>>  			/* Do the fast clear. */
>> -			si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
>> +			si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
>>  					tex->cmask.offset, tex->cmask.size, 0,
>>  					R600_COHERENCY_CB_META);
>>  			need_decompress_pass = true;
>>  		}
>> 
>>  		if (need_decompress_pass &&
>>  		    !(tex->dirty_level_mask & (1 << level))) {
>>  			tex->dirty_level_mask |= 1 << level;
>>  			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
>>  		}
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> b/src/gallium/drivers/radeonsi/si_state.c
>> index 6c82257..aae7332 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -2980,26 +2980,20 @@ static void si_emit_framebuffer_state(struct
>> si_context *sctx, struct r600_atom
>>  			continue;
>>  		}
>> 
>>  		tex = (struct r600_texture *)cb->base.texture;
>>  		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>>  				      &tex->resource, RADEON_USAGE_READWRITE,
>>  				      tex->resource.b.b.nr_samples > 1 ?
>>  					      RADEON_PRIO_COLOR_BUFFER_MSAA :
>>  					      RADEON_PRIO_COLOR_BUFFER);
>> 
>> -		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
>> -			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>> -				tex->cmask_buffer, RADEON_USAGE_READWRITE,
>> -				RADEON_PRIO_CMASK);
>> -		}
>> -
>>  		if (tex->dcc_separate_buffer)
>>  			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>>  						  tex->dcc_separate_buffer,
>>  						  RADEON_USAGE_READWRITE,
>>  						  RADEON_PRIO_DCC);
>> 
>>  		/* Compute mutable surface parameters. */
>>  		cb_color_base = tex->resource.gpu_address >> 8;
>>  		cb_color_fmask = 0;
>>  		cb_dcc_base = 0;
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list