[Mesa-dev] [PATCH 1/3] radeonsi/gfx9: set optimal OVERWRITE_COMBINER_WATERMARK

Samuel Pitoiset samuel.pitoiset at gmail.com
Mon Oct 29 11:06:46 UTC 2018


Are the values similar when they are set per CB instead of globally?

On 10/27/18 4:28 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>   src/gallium/drivers/radeonsi/si_pipe.h  |  1 +
>   src/gallium/drivers/radeonsi/si_state.c | 14 +++++++++++++-
>   2 files changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
> index dc95afb7421..0807c8ddacc 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.h
> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
> @@ -610,20 +610,21 @@ struct si_framebuffer {
>   	unsigned			spi_shader_col_format_blend;
>   	unsigned			spi_shader_col_format_blend_alpha;
>   	ubyte				nr_samples:5; /* at most 16xAA */
>   	ubyte				log_samples:3; /* at most 4 = 16xAA */
>   	ubyte				nr_color_samples; /* at most 8xAA */
>   	ubyte				compressed_cb_mask;
>   	ubyte				uncompressed_cb_mask;
>   	ubyte				color_is_int8;
>   	ubyte				color_is_int10;
>   	ubyte				dirty_cbufs;
> +	ubyte				dcc_overwrite_combiner_watermark;
>   	bool				dirty_zsbuf;
>   	bool				any_dst_linear;
>   	bool				CB_has_shader_readable_metadata;
>   	bool				DB_has_shader_readable_metadata;
>   };
>   
>   enum si_quant_mode {
>   	/* This is the list we want to support. */
>   	SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH,
>   	SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 36dce381539..43d76d19916 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -106,26 +106,27 @@ static void si_emit_cb_render_state(struct si_context *sctx)
>   	if (sctx->chip_class >= VI) {
>   		/* DCC MSAA workaround for blending.
>   		 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_-
>   		 * COMBINER_DISABLE, but that would be more complicated.
>   		 */
>   		bool oc_disable = (sctx->chip_class == VI ||
>   				   sctx->chip_class == GFX9) &&
>   				  blend &&
>   				  blend->blend_enable_4bit & cb_target_mask &&
>   				  sctx->framebuffer.nr_samples >= 2;
> +		unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark;
>   
>   		radeon_opt_set_context_reg(
>   				sctx, R_028424_CB_DCC_CONTROL,
>   				SI_TRACKED_CB_DCC_CONTROL,
>   				S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
> -				S_028424_OVERWRITE_COMBINER_WATERMARK(4) |
> +				S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
>   				S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable));
>   	}
>   
>   	/* RB+ register settings. */
>   	if (sctx->screen->rbplus_allowed) {
>   		unsigned spi_shader_col_format =
>   			sctx->ps_shader.cso ?
>   			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
>   		unsigned sx_ps_downconvert = 0;
>   		unsigned sx_blend_opt_epsilon = 0;
> @@ -2848,20 +2849,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   	sctx->framebuffer.color_is_int10 = 0;
>   
>   	sctx->framebuffer.compressed_cb_mask = 0;
>   	sctx->framebuffer.uncompressed_cb_mask = 0;
>   	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
>   	sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples;
>   	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
>   	sctx->framebuffer.any_dst_linear = false;
>   	sctx->framebuffer.CB_has_shader_readable_metadata = false;
>   	sctx->framebuffer.DB_has_shader_readable_metadata = false;
> +	unsigned num_bpp64_colorbufs = 0;
>   
>   	for (i = 0; i < state->nr_cbufs; i++) {
>   		if (!state->cbufs[i])
>   			continue;
>   
>   		surf = (struct si_surface*)state->cbufs[i];
>   		tex = (struct si_texture*)surf->base.texture;
>   
>   		if (!surf->color_initialized) {
>   			si_initialize_color_surface(sctx, surf);
> @@ -2894,35 +2896,45 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>   		    tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) {
>   			sctx->framebuffer.nr_color_samples =
>   				MIN2(sctx->framebuffer.nr_color_samples,
>   				     tex->buffer.b.b.nr_storage_samples);
>   			sctx->framebuffer.nr_color_samples =
>   				MAX2(1, sctx->framebuffer.nr_color_samples);
>   		}
>   
>   		if (tex->surface.is_linear)
>   			sctx->framebuffer.any_dst_linear = true;
> +		if (tex->surface.bpe >= 8)
> +			num_bpp64_colorbufs++;
>   
>   		if (vi_dcc_enabled(tex, surf->base.u.tex.level))
>   			sctx->framebuffer.CB_has_shader_readable_metadata = true;
>   
>   		si_context_add_resource_size(sctx, surf->base.texture);
>   
>   		p_atomic_inc(&tex->framebuffers_bound);
>   
>   		if (tex->dcc_gather_statistics) {
>   			/* Dirty tracking must be enabled for DCC usage analysis. */
>   			sctx->framebuffer.compressed_cb_mask |= 1 << i;
>   			vi_separate_dcc_start_query(sctx, tex);
>   		}
>   	}
>   
> +	/* For optimal DCC performance. */
> +	if (sctx->chip_class == VI)
> +		sctx->framebuffer.dcc_overwrite_combiner_watermark = 4;
> +	else if (num_bpp64_colorbufs >= 5)
> +		sctx->framebuffer.dcc_overwrite_combiner_watermark = 8;
> +	else
> +		sctx->framebuffer.dcc_overwrite_combiner_watermark = 6;
> +
>   	struct si_texture *zstex = NULL;
>   
>   	if (state->zsbuf) {
>   		surf = (struct si_surface*)state->zsbuf;
>   		zstex = (struct si_texture*)surf->base.texture;
>   
>   		if (!surf->depth_initialized) {
>   			si_init_depth_surface(sctx, surf);
>   		}
>   
> 


More information about the mesa-dev mailing list