[Mesa-dev] [PATCH 2/7] radeonsi: use SPI_SHADER_COL_FORMAT fields instead of export_16bpc

Nicolai Hähnle nhaehnle at gmail.com
Tue Jan 19 12:53:46 PST 2016


On 19.01.2016 11:11, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> This does change the behavior slightly:
>    If a shader writes COLOR[i] and that color buffer isn't bound,
>    the shader will export MRT_NULL instead and discard the IR tree that
>    calculates the output. The only exception is alpha-to-coverage, which
>    requires an alpha export.
> ---
>   src/gallium/drivers/radeon/r600_pipe_common.h   |  1 +
>   src/gallium/drivers/radeonsi/si_pipe.h          |  2 +-
>   src/gallium/drivers/radeonsi/si_shader.c        | 35 ++++++++++++------
>   src/gallium/drivers/radeonsi/si_shader.h        |  2 +-
>   src/gallium/drivers/radeonsi/si_state.c         | 39 +++++++++++---------
>   src/gallium/drivers/radeonsi/si_state.h         |  1 +
>   src/gallium/drivers/radeonsi/si_state_shaders.c | 47 ++++++++++++++++++++-----
>   7 files changed, 90 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 27f6e98..f3271e2 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -252,6 +252,7 @@ struct r600_surface {
>   	unsigned cb_color_fmask_slice;	/* EG and later */
>   	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
>   	unsigned cb_color_mask;		/* R600 only */
> +	unsigned spi_shader_col_format;	/* SI+ */
>   	unsigned sx_ps_downconvert;	/* Stoney only */
>   	unsigned sx_blend_opt_epsilon;	/* Stoney only */
>   	struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index f83cb02..e2009de 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -125,7 +125,7 @@ struct si_framebuffer {
>   	unsigned			log_samples;
>   	unsigned			cb0_is_integer;
>   	unsigned			compressed_cb_mask;
> -	unsigned			export_16bpc;
> +	unsigned			spi_shader_col_format;
>   	unsigned			dirty_cbufs;
>   	bool				dirty_zsbuf;
>   };
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 2de7def..266ef6d 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -1265,7 +1265,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
>   	struct lp_build_context *uint =
>   				&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
>   	struct lp_build_context *base = &bld_base->base;
> -	unsigned compressed = 0;
> +	unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
>   	unsigned chan;
>
>   	/* XXX: This controls which components of the output
> @@ -1286,17 +1286,29 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
>   	args[3] = lp_build_const_int32(base->gallivm, target);
>
>   	if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
> +		unsigned col_formats =
> +			si_shader_ctx->shader->key.ps.spi_shader_col_format;
>   		int cbuf = target - V_008DFC_SQ_EXP_MRT;
>
> -		if (cbuf >= 0 && cbuf < 8)
> -			compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
> +		assert(cbuf >= 0 && cbuf < 8);
> +		spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
>   	}
>
> -	/* Set COMPR flag */
> -	args[4] = compressed ? uint->one : uint->zero;
> +	args[4] = uint->zero; /* COMPR flag */
> +	args[5] = base->undef;
> +	args[6] = base->undef;
> +	args[7] = base->undef;
> +	args[8] = base->undef;
> +
> +	switch (spi_shader_col_format) {
> +	case V_028714_SPI_SHADER_ZERO:
> +		args[0] = uint->zero; /* writemask */
> +		args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
> +		break;
> +
> +	case V_028714_SPI_SHADER_FP16_ABGR:
> +		args[4] = uint->one; /* COMPR flag */
>
> -	if (compressed) {
> -		/* Pixel shader needs to pack output values before export */
>   		for (chan = 0; chan < 2; chan++) {
>   			LLVMValueRef pack_args[2] = {
>   				values[2 * chan],
> @@ -1314,10 +1326,13 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
>   						 packed,
>   						 LLVMFloatTypeInContext(base->gallivm->context),
>   						 "");
> -			args[chan + 7] = base->undef;
>   		}
> -	} else
> +		break;
> +
> +	case V_028714_SPI_SHADER_32_ABGR:
>   		memcpy(&args[5], values, sizeof(values[0]) * 4);
> +		break;
> +	}
>   }
>
>   static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
> @@ -4031,7 +4046,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
>   		break;
>
>   	case PIPE_SHADER_FRAGMENT:
> -		fprintf(f, "  export_16bpc = 0x%X\n", key->ps.export_16bpc);
> +		fprintf(f, "  spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format);
>   		fprintf(f, "  last_cbuf = %u\n", key->ps.last_cbuf);
>   		fprintf(f, "  color_two_side = %u\n", key->ps.color_two_side);
>   		fprintf(f, "  alpha_func = %u\n", key->ps.alpha_func);
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 1635358..a9b76c6 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -232,7 +232,7 @@ struct si_shader_selector {
>
>   union si_shader_key {
>   	struct {
> -		unsigned	export_16bpc:8;
> +		unsigned	spi_shader_col_format;
>   		unsigned	last_cbuf:3;
>   		unsigned	color_two_side:1;
>   		unsigned	alpha_func:3;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index ea441ac..492d3f9 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -403,6 +403,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
>   	if (!blend)
>   		return NULL;
>
> +	blend->alpha_to_coverage = state->alpha_to_coverage;
>   	blend->alpha_to_one = state->alpha_to_one;
>   	blend->dual_src_blend = util_blend_state_is_dual(state, 0);
>
> @@ -1883,6 +1884,21 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten
>    * framebuffer handling
>    */
>
> +static void si_choose_spi_color_formats(struct r600_surface *surf,
> +					unsigned format, unsigned swap,
> +					unsigned ntype)
> +{
> +	unsigned max_comp_size = si_colorformat_max_comp_size(format);
> +
> +	surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
> +
> +	if (ntype == V_028C70_NUMBER_SRGB ||
> +	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
> +	     max_comp_size <= 10) ||
> +	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16))
> +		surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR;
> +}
> +
>   static void si_initialize_color_surface(struct si_context *sctx,
>   					struct r600_surface *surf)
>   {
> @@ -1896,7 +1912,6 @@ static void si_initialize_color_surface(struct si_context *sctx,
>   	const struct util_format_description *desc;
>   	int i;
>   	unsigned blend_clamp = 0, blend_bypass = 0;
> -	unsigned max_comp_size;
>
>   	/* Layered rendering doesn't work with LINEAR_GENERAL.
>   	 * (LINEAR_ALIGNED and others work) */
> @@ -2053,13 +2068,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
>   	}
>
>   	/* Determine pixel shader export format */
> -	max_comp_size = si_colorformat_max_comp_size(format);
> -	if (ntype == V_028C70_NUMBER_SRGB ||
> -	    ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) &&
> -	     max_comp_size <= 10) ||
> -	    (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) {
> -		surf->export_16bpc = true;
> -	}
> +	si_choose_spi_color_formats(surf, format, swap, ntype);
>
>   	if (sctx->b.family == CHIP_STONEY &&
>   	    !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
> @@ -2286,7 +2295,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>
>   	util_copy_framebuffer_state(&sctx->framebuffer.state, state);
>
> -	sctx->framebuffer.export_16bpc = 0;
> +	sctx->framebuffer.spi_shader_col_format = 0;
>   	sctx->framebuffer.compressed_cb_mask = 0;
>   	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
>   	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
> @@ -2307,9 +2316,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   			si_initialize_color_surface(sctx, surf);
>   		}
>
> -		if (surf->export_16bpc) {
> -			sctx->framebuffer.export_16bpc |= 1 << i;
> -		}
> +		sctx->framebuffer.spi_shader_col_format |=
> +			surf->spi_shader_col_format << (i * 4);
>
>   		if (rtex->fmask.size && rtex->cmask.size) {
>   			sctx->framebuffer.compressed_cb_mask |= 1 << i;
> @@ -2317,12 +2325,11 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   		r600_context_add_resource_size(ctx, surf->base.texture);
>   	}
>   	/* Set the 16BPC export for possible dual-src blending. */
> -	if (i == 1 && surf && surf->export_16bpc) {
> -		sctx->framebuffer.export_16bpc |= 1 << 1;
> +	if (i == 1 && surf) {
> +		sctx->framebuffer.spi_shader_col_format |=
> +			surf->spi_shader_col_format << (i * 4);
>   	}

Update the comment about 16BPC please.

>
> -	assert(!(sctx->framebuffer.export_16bpc & ~0xff));
> -
>   	if (state->zsbuf) {
>   		surf = (struct r600_surface*)state->zsbuf;
>
> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
> index f5ca661..46ba3c4 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -39,6 +39,7 @@ struct si_shader;
>   struct si_state_blend {
>   	struct si_pm4_state	pm4;
>   	uint32_t		cb_target_mask;
> +	bool			alpha_to_coverage;
>   	bool			alpha_to_one;
>   	bool			dual_src_blend;
>   };
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 8ff70b4..fa78179 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -382,13 +382,28 @@ static void si_shader_vs(struct si_shader *shader)
>   		si_set_tesseval_regs(shader, pm4);
>   }
>
> +static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
> +{
> +	unsigned value = shader->key.ps.spi_shader_col_format;
> +	unsigned i, num_targets = (util_last_bit(value) + 3) / 4;
> +
> +	/* If the i-th target format is set, all previous target formats must
> +	 * be non-zero to avoid hangs.
> +	 */
> +	for (i = 0; i < num_targets; i++)
> +		if (!(value & (0xf << (i * 4))))
> +			value |= V_028714_SPI_SHADER_32_R << (i * 4);
> +
> +	return value;
> +}
> +
>   static void si_shader_ps(struct si_shader *shader)
>   {
>   	struct tgsi_shader_info *info = &shader->selector->info;
>   	struct si_pm4_state *pm4;
> -	unsigned i, spi_ps_in_control;
> -	unsigned spi_shader_col_format = 0, cb_shader_mask = 0;
> -	unsigned colors_written, export_16bpc;
> +	unsigned i, spi_ps_in_control, spi_shader_col_format;
> +	unsigned cb_shader_mask = 0;
> +	unsigned colors_written;
>   	unsigned num_sgprs, num_user_sgprs;
>   	unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
>   	uint64_t va;
> @@ -425,7 +440,6 @@ static void si_shader_ps(struct si_shader *shader)
>
>   	/* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */
>   	colors_written = info->colors_written;
> -	export_16bpc = shader->key.ps.export_16bpc;
>
>   	if (info->colors_written == 0x1 &&
>   	    info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) {
> @@ -434,13 +448,20 @@ static void si_shader_ps(struct si_shader *shader)
>
>   	while (colors_written) {
>   		i = u_bit_scan(&colors_written);
> -		if (export_16bpc & (1 << i))
> -			spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * i);
> -		else
> -			spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * i);
>   		cb_shader_mask |= 0xf << (4 * i);
>   	}
>
> +	spi_shader_col_format = si_get_spi_shader_col_format(shader);
> +
> +	/* This must be non-zero for alpha-test/kill to work.
> +	 * The hardware probably disables the pixel shader if no export
> +	 * memory is allocated.
> +	 */
> +	if (!spi_shader_col_format &&
> +	    (shader->selector->info.uses_kill ||
> +	     shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS))
> +		spi_shader_col_format = V_028714_SPI_SHADER_32_R;
> +

That's rather a coincidence with my patch from a moment ago, though I 
did miss the alpha function.

According to observation, discard will still work if 
spi_shader_col_format == 0 but the fragment shader writes to the MRTZ 
target, so this test could be tightened a bit.

Cheers,
Nicolai

>   	/* Set interpolation controls. */
>   	has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) ||
>   		       G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena);
> @@ -571,12 +592,20 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
>   		break;
>   	case PIPE_SHADER_FRAGMENT: {
>   		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
> +		struct si_state_blend *blend = sctx->queued.named.blend;
>
>   		if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
>   		    sel->info.colors_written == 0x1)
>   			key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1;
>
> -		key->ps.export_16bpc = sctx->framebuffer.export_16bpc;
> +		key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format;
> +
> +		/* If alpha-to-coverage is enabled, we have to export alpha
> +		 * even if there is no color buffer.
> +		 */
> +		if (!(key->ps.spi_shader_col_format & 0xf) &&
> +		    blend && blend->alpha_to_coverage)
> +			key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_FP16_ABGR;
>
>   		if (rs) {
>   			bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES &&
>


More information about the mesa-dev mailing list