[Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

Marek Olšák maraeo at gmail.com
Thu Oct 22 03:12:41 PDT 2015


On Wed, Oct 21, 2015 at 12:10 AM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> Uses the DCC buffer instead of the CMASK buffer. The ELIMINATE_FAST_CLEAR
> still works. Furthermore, with DCC compression we can directly clear
> to a limited set of colors such that we do not need a postprocessing step.
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>  src/gallium/drivers/radeon/r600_texture.c     | 105 +++++++++++++++++++++++---
>  src/gallium/drivers/radeonsi/si_blit.c        |   4 +-
>  src/gallium/drivers/radeonsi/si_descriptors.c |   2 +-
>  3 files changed, 97 insertions(+), 14 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 0314049..4391665 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -1239,6 +1239,79 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
>         memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
>  }
>
> +static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
> +                                        const union pipe_color_union *color,
> +                                        uint32_t* reset_value,
> +                                        bool* clear_words_needed)
> +{
> +       bool values[4] = {};
> +       bool main_value = false;
> +       int i;
> +       int extra_channel;
> +       int extra_component = 0;
> +       const struct util_format_description *desc = util_format_description(surface_format);
> +
> +       *clear_words_needed = true;
> +       *reset_value = 0x20202020U;
> +
> +       /* If we want to clear without needing a fast clear eliminate step, we can set each channel to
> +        * 0 or 1 (or 0/max for integer formats). We have two sets of flags, one for the last or first
> +        * channel and one for the rest. We decide on the last or first channel by r600_translate_colorswap.
> +        *
> +        * Note that in formats as R8G8B8X*, the X8 is the last channel, so the last channel may not correspond
> +        * to the last enabled component.
> +        */
> +
> +       /* Not sure if it is a coincidence that these are all the 3-channel color formats. */
> +       if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
> +           surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
> +           surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
> +               extra_channel = -1;
> +       } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
> +               extra_channel = (r600_translate_colorswap(surface_format) <= 1) ? desc->nr_channels - 1 : 0;
> +       } else
> +               return;
> +
> +       for (i = 0; i < 4; ++i) {
> +               int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X;
> +
> +               if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X || desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W)
> +                       continue;
> +
> +
> +               if (util_format_is_pure_sint(surface_format)) {
> +                       values[i] = color->i[i] != 0;
> +                       if (color->i[i] != 0 && color->i[i] != (1ULL << (desc->channel[index].size - 1)) - 1)
> +                               return;
> +               } else if (util_format_is_pure_uint(surface_format)) {
> +                       values[i] = color->ui[i] != 0U;
> +                       if (color->ui[i] != 0U && color->ui[i] != (1ULL << desc->channel[index].size) - 1)
> +                               return;
> +               } else {
> +                       values[i] = color->f[i] != 0.0F;
> +                       if (color->f[i] != 0.0F && color->f[i] != 1.0F)
> +                               return;
> +               }
> +
> +               if (index == extra_channel)
> +                       extra_component = i;
> +               else
> +                       main_value = values[i];
> +       }
> +
> +       for (int i = 0; i < 4; ++i)
> +               if (values[i] != main_value && desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel &&
> +                   desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X && desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W)
> +                       return;
> +
> +       *clear_words_needed = false;
> +       if (main_value)
> +               *reset_value |= 0x80808080U;
> +
> +       if (values[extra_component])
> +               *reset_value |= 0x40404040U;

Could you please reformat this function and rename things to be more
readable? "main" is color and "extra" is alpha, right? if yes, they
should be called color and alpha. Also, 80 characters per line where
possible.


> +}
> +
>  void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>                                    struct pipe_framebuffer_state *fb,
>                                    struct r600_atom *fb_state,
> @@ -1292,23 +1365,33 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>                         continue;
>                 }
>
> -               /* CMASK clear does not work for DCC compressed textures */
>                 if (tex->surface.dcc_enabled) {
> -                       continue;
> -               }
> +                       uint32_t reset_value;
> +                       bool clear_words_needed;
>
> -               /* ensure CMASK is enabled */
> -               r600_texture_alloc_cmask_separate(rctx->screen, tex);
> -               if (tex->cmask.size == 0) {
> -                       continue;
> +                       vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
> +
> +                       rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b,
> +                                       0, tex->surface.dcc_size, reset_value, true);
> +
> +                       if (clear_words_needed)
> +                               tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
> +               } else {
> +                       /* ensure CMASK is enabled */
> +                       r600_texture_alloc_cmask_separate(rctx->screen, tex);
> +                       if (tex->cmask.size == 0) {
> +                               continue;
> +                       }
> +
> +                       /* Do the fast clear. */
> +                       rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
> +                                       tex->cmask.offset, tex->cmask.size, 0, true);
> +
> +                       tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>                 }
>
> -               /* Do the fast clear. */
>                 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
> -               rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
> -                                  tex->cmask.offset, tex->cmask.size, 0, true);
>
> -               tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>                 if (dirty_cbufs)
>                         *dirty_cbufs |= 1 << i;
>                 rctx->set_atom_dirty(rctx, fb_state, true);
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index a1af4f8..06e7e66 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
>                 assert(view);
>
>                 tex = (struct r600_texture *)view->texture;
> -               assert(tex->cmask.size || tex->fmask.size);
> +               assert(tex->cmask.size || tex->fmask.size || tex->surface.dcc_enabled);
>
>                 si_blit_decompress_color(&sctx->b.b, tex,
>                                          view->u.tex.first_level, view->u.tex.last_level,
> @@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
>                         si_blit_decompress_depth_in_place(sctx, rtex, true,
>                                                           level, level,
>                                                           first_layer, last_layer);
> -       } else if (rtex->fmask.size || rtex->cmask.size) {
> +       } else if (rtex->fmask.size || rtex->cmask.size || rtex->surface.dcc_enabled) {
>                 si_blit_decompress_color(ctx, rtex, level, level,
>                                          first_layer, last_layer);
>         }
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 5548cba3..a277fa5 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context *ctx,
>                         } else {
>                                 samplers->depth_texture_mask &= ~(1 << slot);
>                         }
> -                       if (rtex->cmask.size || rtex->fmask.size) {
> +                       if (rtex->cmask.size || rtex->fmask.size || rtex->surface.dcc_enabled) {
>                                 samplers->compressed_colortex_mask |= 1 << slot;

I'd like this flag to be set only when dirty_level_mask is non-zero.
Setting this for all textures that have DCC is quite expensive in draw
calls.

Marek


More information about the mesa-dev mailing list