[Mesa-dev] [PATCH 6/7] radeonsi: use all SPI color formats

Marek Olšák maraeo at gmail.com
Tue Jan 19 08:11:25 PST 2016


From: Marek Olšák <marek.olsak at amd.com>

because not using SPI_SHADER_32_ABGR doubles fill rate.

We should also get optimal performance if alpha isn't needed or blending
isn't enabled.
---
 src/gallium/drivers/radeon/r600_pipe_common.h   |   6 +-
 src/gallium/drivers/radeonsi/si_blit.c          |   8 +
 src/gallium/drivers/radeonsi/si_pipe.h          |   4 +
 src/gallium/drivers/radeonsi/si_state.c         | 207 +++++++++++++++++-------
 src/gallium/drivers/radeonsi/si_state.h         |   5 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  23 ++-
 6 files changed, 195 insertions(+), 58 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index f3271e2..d66e74f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -236,6 +236,7 @@ struct r600_surface {
 	/* Misc. color flags. */
 	bool alphatest_bypass;
 	bool export_16bpc;
+	bool color_is_int8;
 
 	/* Color registers. */
 	unsigned cb_color_info;
@@ -252,7 +253,10 @@ struct r600_surface {
 	unsigned cb_color_fmask_slice;	/* EG and later */
 	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
 	unsigned cb_color_mask;		/* R600 only */
-	unsigned spi_shader_col_format;	/* SI+ */
+	unsigned spi_shader_col_format;		/* SI+, no blending, no alpha-to-coverage. */
+	unsigned spi_shader_col_format_alpha;	/* SI+, alpha-to-coverage */
+	unsigned spi_shader_col_format_blend;	/* SI+, blending without alpha. */
+	unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
 	unsigned sx_ps_downconvert;	/* Stoney only */
 	unsigned sx_blend_opt_epsilon;	/* Stoney only */
 	struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 75a9d56..a93887e 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
 	enum pipe_format format = int_to_norm_format(info->dst.format);
 	unsigned sample_mask = ~0;
 
+	/* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
+	 * the format is R16G16. Use R16A16, which does work.
+	 */
+	if (format == PIPE_FORMAT_R16G16_UNORM)
+		format = PIPE_FORMAT_R16A16_UNORM;
+	if (format == PIPE_FORMAT_R16G16_SNORM)
+		format = PIPE_FORMAT_R16A16_SNORM;
+
 	if (info->src.resource->nr_samples > 1 &&
 	    info->dst.resource->nr_samples <= 1 &&
 	    util_max_layer(info->src.resource, 0) == 0 &&
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e2009de..e2725fe 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -126,6 +126,10 @@ struct si_framebuffer {
 	unsigned			cb0_is_integer;
 	unsigned			compressed_cb_mask;
 	unsigned			spi_shader_col_format;
+	unsigned			spi_shader_col_format_alpha;
+	unsigned			spi_shader_col_format_blend;
+	unsigned			spi_shader_col_format_blend_alpha;
+	unsigned			color_is_int8; /* bitmask */
 	unsigned			dirty_cbufs;
 	bool				dirty_zsbuf;
 };
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 492d3f9..42f5291 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
 		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
 
+	if (state->alpha_to_coverage)
+		blend->need_src_alpha_4bit |= 0xf;
+
 	blend->cb_target_mask = 0;
 	for (int i = 0; i < 8; i++) {
 		/* state->rt entries > 0 only written if independent blending */
@@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
 		}
 		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
+
+		blend->blend_enable_4bit |= 0xf << (i * 4);
+
+		/* This is only important for formats without alpha. */
+		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
+			blend->need_src_alpha_4bit |= 0xf << (i * 4);
 	}
 
 	if (blend->cb_target_mask) {
@@ -1270,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
 	}
 }
 
-/* Returns the size in bits of the widest component of a CB format */
-static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
-{
-	switch(colorformat) {
-	case V_028C70_COLOR_4_4_4_4:
-		return 4;
-
-	case V_028C70_COLOR_1_5_5_5:
-	case V_028C70_COLOR_5_5_5_1:
-		return 5;
-
-	case V_028C70_COLOR_5_6_5:
-		return 6;
-
-	case V_028C70_COLOR_8:
-	case V_028C70_COLOR_8_8:
-	case V_028C70_COLOR_8_8_8_8:
-		return 8;
-
-	case V_028C70_COLOR_10_10_10_2:
-	case V_028C70_COLOR_2_10_10_10:
-		return 10;
-
-	case V_028C70_COLOR_10_11_11:
-	case V_028C70_COLOR_11_11_10:
-		return 11;
-
-	case V_028C70_COLOR_16:
-	case V_028C70_COLOR_16_16:
-	case V_028C70_COLOR_16_16_16_16:
-		return 16;
-
-	case V_028C70_COLOR_8_24:
-	case V_028C70_COLOR_24_8:
-		return 24;
-
-	case V_028C70_COLOR_32:
-	case V_028C70_COLOR_32_32:
-	case V_028C70_COLOR_32_32_32_32:
-	case V_028C70_COLOR_X24_8_32_FLOAT:
-		return 32;
-	}
-
-	assert(!"Unknown maximum component size");
-	return 0;
-}
-
 static uint32_t si_translate_dbformat(enum pipe_format format)
 {
 	switch (format) {
@@ -1886,17 +1853,119 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
 
 static void si_choose_spi_color_formats(struct r600_surface *surf,
 					unsigned format, unsigned swap,
-					unsigned ntype)
+					unsigned ntype, bool is_depth)
 {
-	unsigned max_comp_size = si_colorformat_max_comp_size(format);
+	/* Alpha is needed for alpha-to-coverage.
+	 * Blending may be with or without alpha.
+	 */
+	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
+	unsigned alpha = 0; /* exports alpha, but may not support blending */
+	unsigned blend = 0; /* supports blending, but may not export alpha */
+	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
 
-	surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
+	/* Choose the SPI color formats. These are required values for Stoney/RB+.
+	 * Other chips have multiple choices, though they are not necessarily better.
+	 */
+	switch (format) {
+	case V_028C70_COLOR_5_6_5:
+	case V_028C70_COLOR_1_5_5_5:
+	case V_028C70_COLOR_5_5_5_1:
+	case V_028C70_COLOR_4_4_4_4:
+	case V_028C70_COLOR_10_11_11:
+	case V_028C70_COLOR_11_11_10:
+	case V_028C70_COLOR_8:
+	case V_028C70_COLOR_8_8:
+	case V_028C70_COLOR_8_8_8_8:
+	case V_028C70_COLOR_10_10_10_2:
+	case V_028C70_COLOR_2_10_10_10:
+		if (ntype == V_028C70_NUMBER_UINT)
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+		else if (ntype == V_028C70_NUMBER_SINT)
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+		else
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+		break;
+
+	case V_028C70_COLOR_16:
+	case V_028C70_COLOR_16_16:
+	case V_028C70_COLOR_16_16_16_16:
+		if (ntype == V_028C70_NUMBER_UNORM ||
+		    ntype == V_028C70_NUMBER_SNORM) {
+			/* UNORM16 and SNORM16 don't support blending */
+			if (ntype == V_028C70_NUMBER_UNORM)
+				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
+			else
+				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
+
+			/* Use 32 bits per channel for blending. */
+			if (format == V_028C70_COLOR_16) {
+				if (swap == V_028C70_SWAP_STD) { /* R */
+					blend = V_028714_SPI_SHADER_32_R;
+					blend_alpha = V_028714_SPI_SHADER_32_AR;
+				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+				else
+					assert(0);
+			} else if (format == V_028C70_COLOR_16_16) {
+				if (swap == V_028C70_SWAP_STD) { /* RG */
+					blend = V_028714_SPI_SHADER_32_GR;
+					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+				} else if (swap == V_028C70_SWAP_ALT) /* RA */
+					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
+				else
+					assert(0);
+			} else /* 16_16_16_16 */
+				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+		} else if (ntype == V_028C70_NUMBER_UINT)
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
+		else if (ntype == V_028C70_NUMBER_SINT)
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
+		else if (ntype == V_028C70_NUMBER_FLOAT)
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
+		else
+			assert(0);
+		break;
 
-	if (ntype == V_028C70_NUMBER_SRGB ||
-	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
-	     max_comp_size <= 10) ||
-	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
-		surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
+	case V_028C70_COLOR_32:
+		if (swap == V_028C70_SWAP_STD) { /* R */
+			blend = normal = V_028714_SPI_SHADER_32_R;
+			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
+		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+		else
+			assert(0);
+		break;
+
+	case V_028C70_COLOR_32_32:
+		if (swap == V_028C70_SWAP_STD) { /* RG */
+			blend = normal = V_028714_SPI_SHADER_32_GR;
+			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
+		} else if (swap == V_028C70_SWAP_ALT) /* RA */
+			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
+		else
+			assert(0);
+		break;
+
+	case V_028C70_COLOR_32_32_32_32:
+	case V_028C70_COLOR_8_24:
+	case V_028C70_COLOR_24_8:
+	case V_028C70_COLOR_X24_8_32_FLOAT:
+		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+		break;
+
+	default:
+		assert(0);
+		return;
+	}
+
+	/* The DB->CB copy needs 32_ABGR. */
+	if (is_depth)
+		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
+
+	surf->spi_shader_col_format = normal;
+	surf->spi_shader_col_format_alpha = alpha;
+	surf->spi_shader_col_format_blend = blend;
+	surf->spi_shader_col_format_blend_alpha = blend_alpha;
 }
 
 static void si_initialize_color_surface(struct si_context *sctx,
@@ -1989,6 +2058,12 @@ static void si_initialize_color_surface(struct si_context *sctx,
 		blend_bypass = 1;
 	}
 
+	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
+	    (format == V_028C70_COLOR_8 ||
+	     format == V_028C70_COLOR_8_8 ||
+	     format == V_028C70_COLOR_8_8_8_8))
+		surf->color_is_int8 = true;
+
 	color_info = S_028C70_FORMAT(format) |
 		S_028C70_COMP_SWAP(swap) |
 		S_028C70_BLEND_CLAMP(blend_clamp) |
@@ -2068,7 +2143,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
 	}
 
 	/* Determine pixel shader export format */
-	si_choose_spi_color_formats(surf, format, swap, ntype);
+	si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
 
 	if (sctx->b.family == CHIP_STONEY &&
 	    !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
@@ -2296,6 +2371,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
 
 	sctx->framebuffer.spi_shader_col_format = 0;
+	sctx->framebuffer.spi_shader_col_format_alpha = 0;
+	sctx->framebuffer.spi_shader_col_format_blend = 0;
+	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
+	sctx->framebuffer.color_is_int8 = 0;
+
 	sctx->framebuffer.compressed_cb_mask = 0;
 	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
 	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
@@ -2318,6 +2398,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
 		sctx->framebuffer.spi_shader_col_format |=
 			surf->spi_shader_col_format << (i * 4);
+		sctx->framebuffer.spi_shader_col_format_alpha |=
+			surf->spi_shader_col_format_alpha << (i * 4);
+		sctx->framebuffer.spi_shader_col_format_blend |=
+			surf->spi_shader_col_format_blend << (i * 4);
+		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+			surf->spi_shader_col_format_blend_alpha << (i * 4);
+
+		if (surf->color_is_int8)
+			sctx->framebuffer.color_is_int8 |= 1 << i;
 
 		if (rtex->fmask.size && rtex->cmask.size) {
 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
@@ -2328,6 +2417,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	if (i == 1 && surf) {
 		sctx->framebuffer.spi_shader_col_format |=
 			surf->spi_shader_col_format << (i * 4);
+		sctx->framebuffer.spi_shader_col_format_alpha |=
+			surf->spi_shader_col_format_alpha << (i * 4);
+		sctx->framebuffer.spi_shader_col_format_blend |=
+			surf->spi_shader_col_format_blend << (i * 4);
+		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
+			surf->spi_shader_col_format_blend_alpha << (i * 4);
 	}
 
 	if (state->zsbuf) {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 46ba3c4..be3488e 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -42,6 +42,11 @@ struct si_state_blend {
 	bool			alpha_to_coverage;
 	bool			alpha_to_one;
 	bool			dual_src_blend;
+	/* Set 0xf or 0x0 (4 bits) per render target if the following is
+	 * true. ANDed with spi_shader_col_format.
+	 */
+	unsigned		blend_enable_4bit;
+	unsigned		need_src_alpha_4bit;
 };
 
 struct si_state_rasterizer {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 158f1ce..80126f2 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -616,7 +616,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 		    sel->info.colors_written == 0x1)
 			key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
 
-		key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
+		if (blend) {
+			/* Select the shader color format based on whether
+			 * blending or alpha are needed.
+			 */
+			key->ps.spi_shader_col_format =
+				(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+				 sctx->framebuffer.spi_shader_col_format_blend_alpha) |
+				(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+				 sctx->framebuffer.spi_shader_col_format_blend) |
+				(~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
+				 sctx->framebuffer.spi_shader_col_format_alpha) |
+				(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
+				 sctx->framebuffer.spi_shader_col_format);
+		} else
+			key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
 
 		/* If alpha-to-coverage is enabled, we have to export alpha
 		 * even if there is no color buffer.
@@ -625,6 +639,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 		    blend && blend->alpha_to_coverage)
 			key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
 
+		/* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+		 * to the range supported by the type if a channel has less
+		 * than 16 bits and the export format is 16_ABGR.
+		 */
+		if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
+			key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
+
 		if (rs) {
 			bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
 					sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||
-- 
2.1.4



More information about the mesa-dev mailing list