[Mesa-dev] [PATCH 3/3] radeonsi: try to hit direct hw MSAA resolve by changing micro mode in clear
Nicolai Hähnle
nhaehnle at gmail.com
Tue Jun 14 10:41:49 UTC 2016
On 13.06.2016 18:17, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> We could also do MSAA resolve in a compute shader like Vulkan and remove
> these workarounds.
> ---
> src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
> src/gallium/drivers/radeon/r600_texture.c | 83 +++++++++++++++++++++++++++
> src/gallium/drivers/radeonsi/si_blit.c | 20 ++++++-
> 3 files changed, 103 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 59962be..eb8a25a 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -252,6 +252,7 @@ struct r600_texture {
> uint64_t dcc_offset; /* 0 = disabled */
> unsigned cb_color_info; /* fast clear enable bit */
> unsigned color_clear_value[2];
> + unsigned last_msaa_resolve_target_micro_mode;
>
> /* Depth buffer compression and fast clear. */
> struct r600_htile_info htile;
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 3368dc9..31544b6 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -1015,6 +1015,8 @@ r600_texture_create_object(struct pipe_screen *screen,
> * This must be done after r600_setup_surface.
> * Applies to R600-Cayman. */
> rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
> + /* Applies to GCN. */
> + rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
>
> if (rtex->is_depth) {
> if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
> @@ -1821,6 +1823,79 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
> clear_value, R600_COHERENCY_CB_META);
> }
>
> +/* Set the same micro tile mode as the destination of the last MSAA resolve.
> + * This allows hitting the MSAA resolve fast path, which requires that both
> + * src and dst micro tile modes match.
> + */
> +static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
> + struct r600_texture *rtex)
> +{
> + if (rtex->resource.is_shared ||
> + rtex->surface.nsamples <= 1 ||
> + rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
> + return;
> +
> + assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
> + assert(rtex->surface.last_level == 0);
> +
> + if (rscreen->chip_class >= CIK) {
> + switch (rtex->last_msaa_resolve_target_micro_mode) {
> + case 0: /* displayable */
> + rtex->surface.tiling_index[0] = 10;
> + break;
> + case 1: /* thin */
> + rtex->surface.tiling_index[0] = 14;
> + break;
> + case 3: /* rotated */
> + rtex->surface.tiling_index[0] = 28;
> + break;
> + default: /* depth, thick */
> + assert(!"unexpected micro mode");
> + return;
> + }
> + } else { /* SI */
> + switch (rtex->last_msaa_resolve_target_micro_mode) {
> + case 0: /* displayable */
> + switch (rtex->surface.bpe) {
> + case 8:
> + rtex->surface.tiling_index[0] = 10;
> + break;
> + case 16:
> + rtex->surface.tiling_index[0] = 11;
> + break;
> + default: /* 32, 64 */
> + rtex->surface.tiling_index[0] = 12;
> + break;
> + }
> + break;
> + case 1: /* thin */
> + switch (rtex->surface.bpe) {
> + case 8:
> + rtex->surface.tiling_index[0] = 14;
> + break;
> + case 16:
> + rtex->surface.tiling_index[0] = 15;
> + break;
> + case 32:
> + rtex->surface.tiling_index[0] = 16;
> + break;
> + default: /* 64, 128 */
> + rtex->surface.tiling_index[0] = 17;
> + break;
> + }
Are those magic numbers documented somewhere?
Nicolai
> + break;
> + default: /* depth, thick */
> + assert(!"unexpected micro mode");
> + return;
> + }
> + }
> +
> + rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
> +
> + p_atomic_inc(&rscreen->dirty_fb_counter);
> + p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
> +}
> +
> void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
> struct pipe_framebuffer_state *fb,
> struct r600_atom *fb_state,
> @@ -1918,6 +1993,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
> if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
> continue;
>
> + /* We can change the micro tile mode before a full clear. */
> + if (rctx->screen->chip_class >= SI)
> + si_set_optimal_micro_tile_mode(rctx->screen, tex);
> +
> vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
> vi_dcc_clear_level(rctx, tex, 0, reset_value);
>
> @@ -1934,6 +2013,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
> continue;
> }
>
> + /* We can change the micro tile mode before a full clear. */
> + if (rctx->screen->chip_class >= SI)
> + si_set_optimal_micro_tile_mode(rctx->screen, tex);
> +
> /* Do the fast clear. */
> rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
> tex->cmask.offset, tex->cmask.size, 0,
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index 13e10de..f2f1ef5 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -22,6 +22,7 @@
> */
>
> #include "si_pipe.h"
> +#include "sid.h"
> #include "util/u_format.h"
> #include "util/u_surface.h"
>
> @@ -903,8 +904,18 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
> info->src.box.height == dst_height &&
> info->src.box.depth == 1 &&
> dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
> - src->surface.micro_tile_mode == dst->surface.micro_tile_mode &&
> (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
> + /* Check the last constraint. */
> + if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
> + /* The next fast clear will switch to this mode to
> + * get direct hw resolve next time if the mode is
> + * different now.
> + */
> + src->last_msaa_resolve_target_micro_mode =
> + dst->surface.micro_tile_mode;
> + goto resolve_to_temp;
> + }
> +
> /* Resolving into a surface with DCC is unsupported. Since
> * it's being overwritten anyway, clear it to uncompressed.
> * This is still the fastest codepath even with this clear.
> @@ -929,6 +940,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
> return true;
> }
>
> +resolve_to_temp:
> /* Shader-based resolve is VERY SLOW. Instead, resolve into
> * a temporary texture and blit.
> */
> @@ -943,6 +955,12 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
> templ.flags = R600_RESOURCE_FLAG_FORCE_TILING |
> R600_RESOURCE_FLAG_DISABLE_DCC;
>
> + /* The src and dst microtile modes must be the same. */
> + if (src->surface.micro_tile_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING)
> + templ.bind = PIPE_BIND_SCANOUT;
> + else
> + templ.bind = 0;
> +
> tmp = ctx->screen->resource_create(ctx->screen, &templ);
> if (!tmp)
> return false;
>
More information about the mesa-dev
mailing list