[Mesa-dev] [PATCH 1/5] radeonsi: remove fast color clear for single-sample buffers

Marek Olšák maraeo at gmail.com
Sun Mar 11 18:11:09 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

This should improve the score for the GpuTest Triangle benchmark.
Vulkan doesn't use this either.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 -
 src/gallium/drivers/radeon/r600_texture.c     | 11 +-------
 src/gallium/drivers/radeonsi/si_clear.c       | 37 ++-------------------------
 src/gallium/drivers/radeonsi/si_state.c       |  6 -----
 4 files changed, 3 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 7941903..9701757 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -209,21 +209,20 @@ struct r600_cmask_info {
 struct r600_texture {
 	struct r600_resource		resource;
 
 	struct radeon_surf		surface;
 	uint64_t			size;
 	struct r600_texture		*flushed_depth_texture;
 
 	/* Colorbuffer compression and fast clear. */
 	struct r600_fmask_info		fmask;
 	struct r600_cmask_info		cmask;
-	struct r600_resource		*cmask_buffer;
 	uint64_t			dcc_offset; /* 0 = disabled */
 	unsigned			cb_color_info; /* fast clear enable bit */
 	unsigned			color_clear_value[2];
 	unsigned			last_msaa_resolve_target_micro_mode;
 	unsigned			num_level0_transfers;
 
 	/* Depth buffer compression and fast clear. */
 	uint64_t			htile_offset;
 	float				depth_clear_value;
 	uint16_t			dirty_level_mask; /* each bit says if that mipmap is compressed */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 125e7ef..03bc955 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen *sscreen,
 {
 	if (!rtex->cmask.size)
 		return;
 
 	assert(rtex->resource.b.b.nr_samples <= 1);
 
 	/* Disable CMASK. */
 	memset(&rtex->cmask, 0, sizeof(rtex->cmask));
 	rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
 	rtex->dirty_level_mask = 0;
-
 	rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
 
-	if (rtex->cmask_buffer != &rtex->resource)
-	    r600_resource_reference(&rtex->cmask_buffer, NULL);
-
 	/* Notify all contexts about the change. */
 	p_atomic_inc(&sscreen->dirty_tex_counter);
 	p_atomic_inc(&sscreen->compressed_colortex_counter);
 }
 
 static bool r600_can_disable_dcc(struct r600_texture *rtex)
 {
 	/* We can't disable DCC if it can be written by another process. */
 	return rtex->dcc_offset &&
 	       (!rtex->resource.b.is_shared ||
@@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
 					      slice_size, whandle);
 }
 
 static void r600_texture_destroy(struct pipe_screen *screen,
 				 struct pipe_resource *ptex)
 {
 	struct r600_texture *rtex = (struct r600_texture*)ptex;
 	struct r600_resource *resource = &rtex->resource;
 
 	r600_texture_reference(&rtex->flushed_depth_texture, NULL);
-
-	if (rtex->cmask_buffer != &rtex->resource) {
-	    r600_resource_reference(&rtex->cmask_buffer, NULL);
-	}
 	pb_reference(&resource->buf, NULL);
 	r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
 	r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
 	FREE(rtex);
 }
 
 static const struct u_resource_vtbl r600_texture_vtbl;
 
 /* The number of samples can be specified independently of the texture. */
 void si_texture_get_fmask_info(struct si_screen *sscreen,
@@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen *screen,
 			rtex->db_compatible = true;
 
 			if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
 				r600_texture_allocate_htile(sscreen, rtex);
 		}
 	} else {
 		if (base->nr_samples > 1) {
 			if (!buf) {
 				r600_texture_allocate_fmask(sscreen, rtex);
 				r600_texture_allocate_cmask(sscreen, rtex);
-				rtex->cmask_buffer = &rtex->resource;
 			}
 			if (!rtex->fmask.size || !rtex->cmask.size) {
 				FREE(rtex);
 				return NULL;
 			}
 		}
 
 		/* Shared textures must always set up DCC here.
 		 * If it's not present, it will be disabled by
 		 * apply_opaque_metadata later.
@@ -1306,21 +1297,21 @@ r600_texture_create_object(struct pipe_screen *screen,
 		resource->bo_alignment = buf->alignment;
 		resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
 		if (resource->domains & RADEON_DOMAIN_VRAM)
 			resource->vram_usage = buf->size;
 		else if (resource->domains & RADEON_DOMAIN_GTT)
 			resource->gart_usage = buf->size;
 	}
 
 	if (rtex->cmask.size) {
 		/* Initialize the cmask to 0xCC (= compressed state). */
-		si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b,
+		si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
 					 rtex->cmask.offset, rtex->cmask.size,
 					 0xCCCCCCCC);
 	}
 	if (rtex->htile_offset) {
 		uint32_t clear_value = 0;
 
 		if (sscreen->info.chip_class >= GFX9 || rtex->tc_compatible_htile)
 			clear_value = 0x0000030F;
 
 		si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
diff --git a/src/gallium/drivers/radeonsi/si_clear.c b/src/gallium/drivers/radeonsi/si_clear.c
index 464b9d7..a940aea 100644
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@@ -26,51 +26,20 @@
 
 #include "util/u_format.h"
 #include "util/u_pack_color.h"
 #include "util/u_surface.h"
 
 enum {
 	SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
 	SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
 };
 
-static void si_alloc_separate_cmask(struct si_screen *sscreen,
-				    struct r600_texture *rtex)
-{
-	if (rtex->cmask_buffer)
-                return;
-
-	assert(rtex->cmask.size == 0);
-
-	si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
-	if (!rtex->cmask.size)
-		return;
-
-	rtex->cmask_buffer = (struct r600_resource *)
-		si_aligned_buffer_create(&sscreen->b,
-					 R600_RESOURCE_FLAG_UNMAPPABLE,
-					 PIPE_USAGE_DEFAULT,
-					 rtex->cmask.size,
-					 rtex->cmask.alignment);
-	if (rtex->cmask_buffer == NULL) {
-		rtex->cmask.size = 0;
-		return;
-	}
-
-	/* update colorbuffer state bits */
-	rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
-
-	rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
-
-	p_atomic_inc(&sscreen->compressed_colortex_counter);
-}
-
 static void si_set_clear_color(struct r600_texture *rtex,
 			       enum pipe_format surface_format,
 			       const union pipe_color_union *color)
 {
 	union util_color uc;
 
 	memset(&uc, 0, sizeof(uc));
 
 	if (rtex->surface.bpe == 16) {
 		/* DCC fast clear only:
@@ -451,21 +420,21 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 
 			if (clear_words_needed && too_small)
 				continue;
 
 			/* DCC fast clear with MSAA should clear CMASK to 0xC. */
 			if (tex->resource.b.b.nr_samples >= 2 && tex->cmask.size) {
 				/* TODO: This doesn't work with MSAA. */
 				if (clear_words_needed)
 					continue;
 
-				si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
+				si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
 						tex->cmask.offset, tex->cmask.size,
 						0xCCCCCCCC, R600_COHERENCY_CB_META);
 				need_decompress_pass = true;
 			}
 
 			vi_dcc_clear_level(sctx, tex, 0, reset_value);
 
 			if (clear_words_needed)
 				need_decompress_pass = true;
 
@@ -476,28 +445,26 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 
 			/* 128-bit formats are unusupported */
 			if (tex->surface.bpe > 8) {
 				continue;
 			}
 
 			/* RB+ doesn't work with CMASK fast clear on Stoney. */
 			if (sctx->b.family == CHIP_STONEY)
 				continue;
 
-			/* ensure CMASK is enabled */
-			si_alloc_separate_cmask(sctx->screen, tex);
 			if (tex->cmask.size == 0) {
 				continue;
 			}
 
 			/* Do the fast clear. */
-			si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,
+			si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
 					tex->cmask.offset, tex->cmask.size, 0,
 					R600_COHERENCY_CB_META);
 			need_decompress_pass = true;
 		}
 
 		if (need_decompress_pass &&
 		    !(tex->dirty_level_mask & (1 << level))) {
 			tex->dirty_level_mask |= 1 << level;
 			p_atomic_inc(&sctx->screen->compressed_colortex_counter);
 		}
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6c82257..aae7332 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2980,26 +2980,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 			continue;
 		}
 
 		tex = (struct r600_texture *)cb->base.texture;
 		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 				      &tex->resource, RADEON_USAGE_READWRITE,
 				      tex->resource.b.b.nr_samples > 1 ?
 					      RADEON_PRIO_COLOR_BUFFER_MSAA :
 					      RADEON_PRIO_COLOR_BUFFER);
 
-		if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {
-			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-				tex->cmask_buffer, RADEON_USAGE_READWRITE,
-				RADEON_PRIO_CMASK);
-		}
-
 		if (tex->dcc_separate_buffer)
 			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
 						  tex->dcc_separate_buffer,
 						  RADEON_USAGE_READWRITE,
 						  RADEON_PRIO_DCC);
 
 		/* Compute mutable surface parameters. */
 		cb_color_base = tex->resource.gpu_address >> 8;
 		cb_color_fmask = 0;
 		cb_dcc_base = 0;
-- 
2.7.4



More information about the mesa-dev mailing list