[Mesa-dev] [PATCH 3/3] radeonsi: try to hit direct hw MSAA resolve by changing micro mode in clear

Nicolai Hähnle nhaehnle at gmail.com
Tue Jun 14 10:41:49 UTC 2016


On 13.06.2016 18:17, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> We could also do MSAA resolve in a compute shader like Vulkan and remove
> these workarounds.
> ---
>   src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
>   src/gallium/drivers/radeon/r600_texture.c     | 83 +++++++++++++++++++++++++++
>   src/gallium/drivers/radeonsi/si_blit.c        | 20 ++++++-
>   3 files changed, 103 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 59962be..eb8a25a 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -252,6 +252,7 @@ struct r600_texture {
>   	uint64_t			dcc_offset; /* 0 = disabled */
>   	unsigned			cb_color_info; /* fast clear enable bit */
>   	unsigned			color_clear_value[2];
> +	unsigned			last_msaa_resolve_target_micro_mode;
>
>   	/* Depth buffer compression and fast clear. */
>   	struct r600_htile_info		htile;
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 3368dc9..31544b6 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -1015,6 +1015,8 @@ r600_texture_create_object(struct pipe_screen *screen,
>   	 * This must be done after r600_setup_surface.
>   	 * Applies to R600-Cayman. */
>   	rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
> +	/* Applies to GCN. */
> +	rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
>
>   	if (rtex->is_depth) {
>   		if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
> @@ -1821,6 +1823,79 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
>   			   clear_value, R600_COHERENCY_CB_META);
>   }
>
> +/* Set the same micro tile mode as the destination of the last MSAA resolve.
> + * This allows hitting the MSAA resolve fast path, which requires that both
> + * src and dst micro tile modes match.
> + */
> +static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
> +					   struct r600_texture *rtex)
> +{
> +	if (rtex->resource.is_shared ||
> +	    rtex->surface.nsamples <= 1 ||
> +	    rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
> +		return;
> +
> +	assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
> +	assert(rtex->surface.last_level == 0);
> +
> +	if (rscreen->chip_class >= CIK) {
> +		switch (rtex->last_msaa_resolve_target_micro_mode) {
> +		case 0: /* displayable */
> +			rtex->surface.tiling_index[0] = 10;
> +			break;
> +		case 1: /* thin */
> +			rtex->surface.tiling_index[0] = 14;
> +			break;
> +		case 3: /* rotated */
> +			rtex->surface.tiling_index[0] = 28;
> +			break;
> +		default: /* depth, thick */
> +			assert(!"unexpected micro mode");
> +			return;
> +		}
> +	} else { /* SI */
> +		switch (rtex->last_msaa_resolve_target_micro_mode) {
> +		case 0: /* displayable */
> +			switch (rtex->surface.bpe) {
> +			case 8:
> +                            rtex->surface.tiling_index[0] = 10;
> +                            break;
> +			case 16:
> +                            rtex->surface.tiling_index[0] = 11;
> +                            break;
> +			default: /* 32, 64 */
> +                            rtex->surface.tiling_index[0] = 12;
> +                            break;
> +			}
> +			break;
> +		case 1: /* thin */
> +			switch (rtex->surface.bpe) {
> +			case 8:
> +                                rtex->surface.tiling_index[0] = 14;
> +                                break;
> +			case 16:
> +                                rtex->surface.tiling_index[0] = 15;
> +                                break;
> +			case 32:
> +                                rtex->surface.tiling_index[0] = 16;
> +                                break;
> +			default: /* 64, 128 */
> +                                rtex->surface.tiling_index[0] = 17;
> +                                break;
> +			}

Are those magic numbers documented somewhere?

Nicolai

> +			break;
> +		default: /* depth, thick */
> +			assert(!"unexpected micro mode");
> +			return;
> +		}
> +	}
> +
> +	rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
> +
> +	p_atomic_inc(&rscreen->dirty_fb_counter);
> +	p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
> +}
> +
>   void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>   				   struct pipe_framebuffer_state *fb,
>   				   struct r600_atom *fb_state,
> @@ -1918,6 +1993,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>   			if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
>   				continue;
>
> +			/* We can change the micro tile mode before a full clear. */
> +			if (rctx->screen->chip_class >= SI)
> +				si_set_optimal_micro_tile_mode(rctx->screen, tex);
> +
>   			vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
>   			vi_dcc_clear_level(rctx, tex, 0, reset_value);
>
> @@ -1934,6 +2013,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>   				continue;
>   			}
>
> +			/* We can change the micro tile mode before a full clear. */
> +			if (rctx->screen->chip_class >= SI)
> +				si_set_optimal_micro_tile_mode(rctx->screen, tex);
> +
>   			/* Do the fast clear. */
>   			rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
>   					   tex->cmask.offset, tex->cmask.size, 0,
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 13e10de..f2f1ef5 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -22,6 +22,7 @@
>    */
>
>   #include "si_pipe.h"
> +#include "sid.h"
>   #include "util/u_format.h"
>   #include "util/u_surface.h"
>
> @@ -903,8 +904,18 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
>   	    info->src.box.height == dst_height &&
>   	    info->src.box.depth == 1 &&
>   	    dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
> -	    src->surface.micro_tile_mode == dst->surface.micro_tile_mode &&
>   	    (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
> +		/* Check the last constraint. */
> +		if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
> +			/* The next fast clear will switch to this mode to
> +			 * get direct hw resolve next time if the mode is
> +			 * different now.
> +			 */
> +			src->last_msaa_resolve_target_micro_mode =
> +				dst->surface.micro_tile_mode;
> +			goto resolve_to_temp;
> +		}
> +
>   		/* Resolving into a surface with DCC is unsupported. Since
>   		 * it's being overwritten anyway, clear it to uncompressed.
>   		 * This is still the fastest codepath even with this clear.
> @@ -929,6 +940,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
>   		return true;
>   	}
>
> +resolve_to_temp:
>   	/* Shader-based resolve is VERY SLOW. Instead, resolve into
>   	 * a temporary texture and blit.
>   	 */
> @@ -943,6 +955,12 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
>   	templ.flags = R600_RESOURCE_FLAG_FORCE_TILING |
>   		      R600_RESOURCE_FLAG_DISABLE_DCC;
>
> +	/* The src and dst microtile modes must be the same. */
> +	if (src->surface.micro_tile_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING)
> +		templ.bind = PIPE_BIND_SCANOUT;
> +	else
> +		templ.bind = 0;
> +
>   	tmp = ctx->screen->resource_create(ctx->screen, &templ);
>   	if (!tmp)
>   		return false;
>


More information about the mesa-dev mailing list