[Mesa-dev] [PATCH 6/7] radeonsi: use all SPI color formats

Axel Davy axel.davy at ens.fr
Tue Jan 19 08:20:05 PST 2016


On 19/01/2016 17:11, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> because not using SPI_SHADER_32_ABGR doubles fill rate.
>
> We should also get optimal performance if alpha isn't needed or blending
> isn't enabled.
> ---
>   src/gallium/drivers/radeon/r600_pipe_common.h   |   6 +-
>   src/gallium/drivers/radeonsi/si_blit.c          |   8 +
>   src/gallium/drivers/radeonsi/si_pipe.h          |   4 +
>   src/gallium/drivers/radeonsi/si_state.c         | 207 +++++++++++++++++-------
>   src/gallium/drivers/radeonsi/si_state.h         |   5 +
>   src/gallium/drivers/radeonsi/si_state_shaders.c |  23 ++-
>   6 files changed, 195 insertions(+), 58 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index f3271e2..d66e74f 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -236,6 +236,7 @@ struct r600_surface {
>   	/* Misc. color flags. */
>   	bool alphatest_bypass;
>   	bool export_16bpc;
> +	bool color_is_int8;
>   
>   	/* Color registers. */
>   	unsigned cb_color_info;
> @@ -252,7 +253,10 @@ struct r600_surface {
>   	unsigned cb_color_fmask_slice;	/* EG and later */
>   	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
>   	unsigned cb_color_mask;		/* R600 only */
> -	unsigned spi_shader_col_format;	/* SI+ */
> +	unsigned spi_shader_col_format;		/* SI+, no blending, no alpha-to-coverage. */
> +	unsigned spi_shader_col_format_alpha;	/* SI+, alpha-to-coverage */
> +	unsigned spi_shader_col_format_blend;	/* SI+, blending without alpha. */
> +	unsigned spi_shader_col_format_blend_alpha; /* SI+, blending with alpha. */
>   	unsigned sx_ps_downconvert;	/* Stoney only */
>   	unsigned sx_blend_opt_epsilon;	/* Stoney only */
>   	struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 75a9d56..a93887e 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -680,6 +680,14 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
>   	enum pipe_format format = int_to_norm_format(info->dst.format);
>   	unsigned sample_mask = ~0;
>   
> +	/* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
> +	 * the format is R16G16. Use R16A16, which does work.
> +	 */
> +	if (format == PIPE_FORMAT_R16G16_UNORM)
> +		format = PIPE_FORMAT_R16A16_UNORM;
> +	if (format == PIPE_FORMAT_R16G16_SNORM)
> +		format = PIPE_FORMAT_R16A16_SNORM;
> +
>   	if (info->src.resource->nr_samples > 1 &&
>   	    info->dst.resource->nr_samples <= 1 &&
>   	    util_max_layer(info->src.resource, 0) == 0 &&
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index e2009de..e2725fe 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -126,6 +126,10 @@ struct si_framebuffer {
>   	unsigned			cb0_is_integer;
>   	unsigned			compressed_cb_mask;
>   	unsigned			spi_shader_col_format;
> +	unsigned			spi_shader_col_format_alpha;
> +	unsigned			spi_shader_col_format_blend;
> +	unsigned			spi_shader_col_format_blend_alpha;
> +	unsigned			color_is_int8; /* bitmask */
>   	unsigned			dirty_cbufs;
>   	bool				dirty_zsbuf;
>   };
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 492d3f9..42f5291 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -420,6 +420,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
>   		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
>   		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
>   
> +	if (state->alpha_to_coverage)
> +		blend->need_src_alpha_4bit |= 0xf;
> +
>   	blend->cb_target_mask = 0;
>   	for (int i = 0; i < 8; i++) {
>   		/* state->rt entries > 0 only written if independent blending */
> @@ -457,6 +460,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
>   			blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
>   		}
>   		si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
> +
> +		blend->blend_enable_4bit |= 0xf << (i * 4);
> +
> +		/* This is only important for formats without alpha. */
> +		if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
> +		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
> +		    srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
> +		    dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
> +		    srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
> +		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
> +			blend->need_src_alpha_4bit |= 0xf << (i * 4);
>   	}
>   
>   	if (blend->cb_target_mask) {
> @@ -1270,53 +1284,6 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat)
>   	}
>   }
>   
> -/* Returns the size in bits of the widest component of a CB format */
> -static unsigned si_colorformat_max_comp_size(uint32_t colorformat)
> -{
> -	switch(colorformat) {
> -	case V_028C70_COLOR_4_4_4_4:
> -		return 4;
> -
> -	case V_028C70_COLOR_1_5_5_5:
> -	case V_028C70_COLOR_5_5_5_1:
> -		return 5;
> -
> -	case V_028C70_COLOR_5_6_5:
> -		return 6;
> -
> -	case V_028C70_COLOR_8:
> -	case V_028C70_COLOR_8_8:
> -	case V_028C70_COLOR_8_8_8_8:
> -		return 8;
> -
> -	case V_028C70_COLOR_10_10_10_2:
> -	case V_028C70_COLOR_2_10_10_10:
> -		return 10;
> -
> -	case V_028C70_COLOR_10_11_11:
> -	case V_028C70_COLOR_11_11_10:
> -		return 11;
> -
> -	case V_028C70_COLOR_16:
> -	case V_028C70_COLOR_16_16:
> -	case V_028C70_COLOR_16_16_16_16:
> -		return 16;
> -
> -	case V_028C70_COLOR_8_24:
> -	case V_028C70_COLOR_24_8:
> -		return 24;
> -
> -	case V_028C70_COLOR_32:
> -	case V_028C70_COLOR_32_32:
> -	case V_028C70_COLOR_32_32_32_32:
> -	case V_028C70_COLOR_X24_8_32_FLOAT:
> -		return 32;
> -	}
> -
> -	assert(!"Unknown maximum component size");
> -	return 0;
> -}
> -
>   static uint32_t si_translate_dbformat(enum pipe_format format)
>   {
>   	switch (format) {
> @@ -1886,17 +1853,119 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
>   
>   static void si_choose_spi_color_formats(struct r600_surface *surf,
>   					unsigned format, unsigned swap,
> -					unsigned ntype)
> +					unsigned ntype, bool is_depth)
>   {
> -	unsigned max_comp_size = si_colorformat_max_comp_size(format);
> +	/* Alpha is needed for alpha-to-coverage.
> +	 * Blending may be with or without alpha.
> +	 */
> +	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
> +	unsigned alpha = 0; /* exports alpha, but may not support blending */
> +	unsigned blend = 0; /* supports blending, but may not export alpha */
> +	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
>   
> -	surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
> +	/* Choose the SPI color formats. These are required values for Stoney/RB+.
> +	 * Other chips have multiple choices, though they are not necessarily better.
> +	 */
> +	switch (format) {
> +	case V_028C70_COLOR_5_6_5:
> +	case V_028C70_COLOR_1_5_5_5:
> +	case V_028C70_COLOR_5_5_5_1:
> +	case V_028C70_COLOR_4_4_4_4:
> +	case V_028C70_COLOR_10_11_11:
> +	case V_028C70_COLOR_11_11_10:
> +	case V_028C70_COLOR_8:
> +	case V_028C70_COLOR_8_8:
> +	case V_028C70_COLOR_8_8_8_8:
> +	case V_028C70_COLOR_10_10_10_2:
> +	case V_028C70_COLOR_2_10_10_10:
> +		if (ntype == V_028C70_NUMBER_UINT)
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;

Hi,

The documentation of the  "BLEND_BYPASS" bit says it should be set if 
SINT or UINT is used.
I deduce blending is not possible with these formats, and thus I guess 
here you cannot use them
for blend and blend_alpha.

Yours,

Axel

> +		else if (ntype == V_028C70_NUMBER_SINT)
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
> +		else
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
> +		break;
> +
> +	case V_028C70_COLOR_16:
> +	case V_028C70_COLOR_16_16:
> +	case V_028C70_COLOR_16_16_16_16:
> +		if (ntype == V_028C70_NUMBER_UNORM ||
> +		    ntype == V_028C70_NUMBER_SNORM) {
> +			/* UNORM16 and SNORM16 don't support blending */
> +			if (ntype == V_028C70_NUMBER_UNORM)
> +				normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
> +			else
> +				normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
> +
> +			/* Use 32 bits per channel for blending. */
> +			if (format == V_028C70_COLOR_16) {
> +				if (swap == V_028C70_SWAP_STD) { /* R */
> +					blend = V_028714_SPI_SHADER_32_R;
> +					blend_alpha = V_028714_SPI_SHADER_32_AR;
> +				} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
> +					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
> +				else
> +					assert(0);
> +			} else if (format == V_028C70_COLOR_16_16) {
> +				if (swap == V_028C70_SWAP_STD) { /* RG */
> +					blend = V_028714_SPI_SHADER_32_GR;
> +					blend_alpha = V_028714_SPI_SHADER_32_ABGR;
> +				} else if (swap == V_028C70_SWAP_ALT) /* RA */
> +					blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
> +				else
> +					assert(0);
> +			} else /* 16_16_16_16 */
> +				blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
> +		} else if (ntype == V_028C70_NUMBER_UINT)
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
> +		else if (ntype == V_028C70_NUMBER_SINT)
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
> +		else if (ntype == V_028C70_NUMBER_FLOAT)
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
> +		else
> +			assert(0);
> +		break;
>   
> -	if (ntype == V_028C70_NUMBER_SRGB ||
> -	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
> -	     max_comp_size <= 10) ||
> -	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
> -		surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
> +	case V_028C70_COLOR_32:
> +		if (swap == V_028C70_SWAP_STD) { /* R */
> +			blend = normal = V_028714_SPI_SHADER_32_R;
> +			alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
> +		} else if (swap == V_028C70_SWAP_ALT_REV) /* A */
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
> +		else
> +			assert(0);
> +		break;
> +
> +	case V_028C70_COLOR_32_32:
> +		if (swap == V_028C70_SWAP_STD) { /* RG */
> +			blend = normal = V_028714_SPI_SHADER_32_GR;
> +			alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
> +		} else if (swap == V_028C70_SWAP_ALT) /* RA */
> +			alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
> +		else
> +			assert(0);
> +		break;
> +
> +	case V_028C70_COLOR_32_32_32_32:
> +	case V_028C70_COLOR_8_24:
> +	case V_028C70_COLOR_24_8:
> +	case V_028C70_COLOR_X24_8_32_FLOAT:
> +		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
> +		break;
> +
> +	default:
> +		assert(0);
> +		return;
> +	}
> +
> +	/* The DB->CB copy needs 32_ABGR. */
> +	if (is_depth)
> +		alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
> +
> +	surf->spi_shader_col_format = normal;
> +	surf->spi_shader_col_format_alpha = alpha;
> +	surf->spi_shader_col_format_blend = blend;
> +	surf->spi_shader_col_format_blend_alpha = blend_alpha;
>   }
>   
>   static void si_initialize_color_surface(struct si_context *sctx,
> @@ -1989,6 +2058,12 @@ static void si_initialize_color_surface(struct si_context *sctx,
>   		blend_bypass = 1;
>   	}
>   
> +	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
> +	    (format == V_028C70_COLOR_8 ||
> +	     format == V_028C70_COLOR_8_8 ||
> +	     format == V_028C70_COLOR_8_8_8_8))
> +		surf->color_is_int8 = true;
> +
>   	color_info = S_028C70_FORMAT(format) |
>   		S_028C70_COMP_SWAP(swap) |
>   		S_028C70_BLEND_CLAMP(blend_clamp) |
> @@ -2068,7 +2143,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
>   	}
>   
>   	/* Determine pixel shader export format */
> -	si_choose_spi_color_formats(surf, format, swap, ntype);
> +	si_choose_spi_color_formats(surf, format, swap, ntype, rtex->is_depth);
>   
>   	if (sctx->b.family == CHIP_STONEY &&
>   	    !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
> @@ -2296,6 +2371,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
>   
>   	sctx->framebuffer.spi_shader_col_format = 0;
> +	sctx->framebuffer.spi_shader_col_format_alpha = 0;
> +	sctx->framebuffer.spi_shader_col_format_blend = 0;
> +	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
> +	sctx->framebuffer.color_is_int8 = 0;
> +
>   	sctx->framebuffer.compressed_cb_mask = 0;
>   	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
>   	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
> @@ -2318,6 +2398,15 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   
>   		sctx->framebuffer.spi_shader_col_format |=
>   			surf->spi_shader_col_format << (i * 4);
> +		sctx->framebuffer.spi_shader_col_format_alpha |=
> +			surf->spi_shader_col_format_alpha << (i * 4);
> +		sctx->framebuffer.spi_shader_col_format_blend |=
> +			surf->spi_shader_col_format_blend << (i * 4);
> +		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
> +			surf->spi_shader_col_format_blend_alpha << (i * 4);
> +
> +		if (surf->color_is_int8)
> +			sctx->framebuffer.color_is_int8 |= 1 << i;
>   
>   		if (rtex->fmask.size && rtex->cmask.size) {
>   			sctx->framebuffer.compressed_cb_mask |= 1 << i;
> @@ -2328,6 +2417,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   	if (i == 1 && surf) {
>   		sctx->framebuffer.spi_shader_col_format |=
>   			surf->spi_shader_col_format << (i * 4);
> +		sctx->framebuffer.spi_shader_col_format_alpha |=
> +			surf->spi_shader_col_format_alpha << (i * 4);
> +		sctx->framebuffer.spi_shader_col_format_blend |=
> +			surf->spi_shader_col_format_blend << (i * 4);
> +		sctx->framebuffer.spi_shader_col_format_blend_alpha |=
> +			surf->spi_shader_col_format_blend_alpha << (i * 4);
>   	}
>   
>   	if (state->zsbuf) {
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index 46ba3c4..be3488e 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -42,6 +42,11 @@ struct si_state_blend {
>   	bool			alpha_to_coverage;
>   	bool			alpha_to_one;
>   	bool			dual_src_blend;
> +	/* Set 0xf or 0x0 (4 bits) per render target if the following is
> +	 * true. ANDed with spi_shader_col_format.
> +	 */
> +	unsigned		blend_enable_4bit;
> +	unsigned		need_src_alpha_4bit;
>   };
>   
>   struct si_state_rasterizer {
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 158f1ce..80126f2 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -616,7 +616,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
>   		    sel->info.colors_written == 0x1)
>   			key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
>   
> -		key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
> +		if (blend) {
> +			/* Select the shader color format based on whether
> +			 * blending or alpha are needed.
> +			 */
> +			key->ps.spi_shader_col_format =
> +				(blend->blend_enable_4bit & blend->need_src_alpha_4bit &
> +				 sctx->framebuffer.spi_shader_col_format_blend_alpha) |
> +				(blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
> +				 sctx->framebuffer.spi_shader_col_format_blend) |
> +				(~blend->blend_enable_4bit & blend->need_src_alpha_4bit &
> +				 sctx->framebuffer.spi_shader_col_format_alpha) |
> +				(~blend->blend_enable_4bit & ~blend->need_src_alpha_4bit &
> +				 sctx->framebuffer.spi_shader_col_format);
> +		} else
> +			key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
>   
>   		/* If alpha-to-coverage is enabled, we have to export alpha
>   		 * even if there is no color buffer.
> @@ -625,6 +639,13 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
>   		    blend && blend->alpha_to_coverage)
>   			key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
>   
> +		/* On SI and CIK except Hawaii, the CB doesn't clamp outputs
> +		 * to the range supported by the type if a channel has less
> +		 * than 16 bits and the export format is 16_ABGR.
> +		 */
> +		if (sctx->b.chip_class <= CIK && sctx->b.family != CHIP_HAWAII)
> +			key->ps.color_is_int8 = sctx->framebuffer.color_is_int8;
> +
>   		if (rs) {
>   			bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
>   					sctx->current_rast_prim <= PIPE_PRIM_POLYGON) ||



More information about the mesa-dev mailing list