[Mesa-dev] [PATCH v3 5/7] radeonsi: Implement DCC fast clear.

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Thu Oct 22 09:55:14 PDT 2015


On Thu, Oct 22, 2015 at 12:12 PM, Marek Olšák <maraeo at gmail.com> wrote:
> On Wed, Oct 21, 2015 at 12:10 AM, Bas Nieuwenhuizen
> <bas at basnieuwenhuizen.nl> wrote:
>> Uses the DCC buffer instead of the CMASK buffer. The ELIMINATE_FAST_CLEAR
>> still works. Furthermore, with DCC compression we can directly clear
>> to a limited set of colors such that we do not need a postprocessing step.
>>
>> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
>> ---
>>  src/gallium/drivers/radeon/r600_texture.c     | 105 +++++++++++++++++++++++---
>>  src/gallium/drivers/radeonsi/si_blit.c        |   4 +-
>>  src/gallium/drivers/radeonsi/si_descriptors.c |   2 +-
>>  3 files changed, 97 insertions(+), 14 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
>> index 0314049..4391665 100644
>> --- a/src/gallium/drivers/radeon/r600_texture.c
>> +++ b/src/gallium/drivers/radeon/r600_texture.c
>> @@ -1239,6 +1239,79 @@ static void evergreen_set_clear_color(struct r600_texture *rtex,
>>         memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
>>  }
>>
>> +static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
>> +                                        const union pipe_color_union *color,
>> +                                        uint32_t* reset_value,
>> +                                        bool* clear_words_needed)
>> +{
>> +       bool values[4] = {};
>> +       bool main_value = false;
>> +       int i;
>> +       int extra_channel;
>> +       int extra_component = 0;
>> +       const struct util_format_description *desc = util_format_description(surface_format);
>> +
>> +       *clear_words_needed = true;
>> +       *reset_value = 0x20202020U;
>> +
>> +       /* If we want to clear without needing a fast clear eliminate step, we can set each channel to
>> +        * 0 or 1 (or 0/max for integer formats). We have two sets of flags, one for the last or first
>> +        * channel and one for the rest. We decide on the last or first channel by r600_translate_colorswap.
>> +        *
>> +        * Note that in formats as R8G8B8X*, the X8 is the last channel, so the last channel may not correspond
>> +        * to the last enabled component.
>> +        */
>> +
>> +       /* Not sure if it is a coincidence that these are all the 3-channel color formats. */
>> +       if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
>> +           surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
>> +           surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
>> +               extra_channel = -1;
>> +       } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
>> +               extra_channel = (r600_translate_colorswap(surface_format) <= 1) ? desc->nr_channels - 1 : 0;
>> +       } else
>> +               return;
>> +
>> +       for (i = 0; i < 4; ++i) {
>> +               int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X;
>> +
>> +               if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X || desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W)
>> +                       continue;
>> +
>> +
>> +               if (util_format_is_pure_sint(surface_format)) {
>> +                       values[i] = color->i[i] != 0;
>> +                       if (color->i[i] != 0 && color->i[i] != (1ULL << (desc->channel[index].size - 1)) - 1)
>> +                               return;
>> +               } else if (util_format_is_pure_uint(surface_format)) {
>> +                       values[i] = color->ui[i] != 0U;
>> +                       if (color->ui[i] != 0U && color->ui[i] != (1ULL << desc->channel[index].size) - 1)
>> +                               return;
>> +               } else {
>> +                       values[i] = color->f[i] != 0.0F;
>> +                       if (color->f[i] != 0.0F && color->f[i] != 1.0F)
>> +                               return;
>> +               }
>> +
>> +               if (index == extra_channel)
>> +                       extra_component = i;
>> +               else
>> +                       main_value = values[i];
>> +       }
>> +
>> +       for (int i = 0; i < 4; ++i)
>> +               if (values[i] != main_value && desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel &&
>> +                   desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X && desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W)
>> +                       return;
>> +
>> +       *clear_words_needed = false;
>> +       if (main_value)
>> +               *reset_value |= 0x80808080U;
>> +
>> +       if (values[extra_component])
>> +               *reset_value |= 0x40404040U;
>
> Could you please reformat this function and rename things to be more
> readable? "main" is color and "extra" is alpha, right? if yes, they
> should be called color and alpha. Also, 80 characters per line where
> possible.

Extra is not necessarily alpha. I.e. for R8G8 it actually is the green
component.

>
>> +}
>> +
>>  void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>>                                    struct pipe_framebuffer_state *fb,
>>                                    struct r600_atom *fb_state,
>> @@ -1292,23 +1365,33 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
>>                         continue;
>>                 }
>>
>> -               /* CMASK clear does not work for DCC compressed textures */
>>                 if (tex->surface.dcc_enabled) {
>> -                       continue;
>> -               }
>> +                       uint32_t reset_value;
>> +                       bool clear_words_needed;
>>
>> -               /* ensure CMASK is enabled */
>> -               r600_texture_alloc_cmask_separate(rctx->screen, tex);
>> -               if (tex->cmask.size == 0) {
>> -                       continue;
>> +                       vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
>> +
>> +                       rctx->clear_buffer(&rctx->b, &tex->dcc_buffer->b.b,
>> +                                       0, tex->surface.dcc_size, reset_value, true);
>> +
>> +                       if (clear_words_needed)
>> +                               tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>> +               } else {
>> +                       /* ensure CMASK is enabled */
>> +                       r600_texture_alloc_cmask_separate(rctx->screen, tex);
>> +                       if (tex->cmask.size == 0) {
>> +                               continue;
>> +                       }
>> +
>> +                       /* Do the fast clear. */
>> +                       rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
>> +                                       tex->cmask.offset, tex->cmask.size, 0, true);
>> +
>> +                       tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>>                 }
>>
>> -               /* Do the fast clear. */
>>                 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
>> -               rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
>> -                                  tex->cmask.offset, tex->cmask.size, 0, true);
>>
>> -               tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
>>                 if (dirty_cbufs)
>>                         *dirty_cbufs |= 1 << i;
>>                 rctx->set_atom_dirty(rctx, fb_state, true);
>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
>> index a1af4f8..06e7e66 100644
>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> @@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
>>                 assert(view);
>>
>>                 tex = (struct r600_texture *)view->texture;
>> -               assert(tex->cmask.size || tex->fmask.size);
>> +               assert(tex->cmask.size || tex->fmask.size || tex->surface.dcc_enabled);
>>
>>                 si_blit_decompress_color(&sctx->b.b, tex,
>>                                          view->u.tex.first_level, view->u.tex.last_level,
>> @@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context *ctx,
>>                         si_blit_decompress_depth_in_place(sctx, rtex, true,
>>                                                           level, level,
>>                                                           first_layer, last_layer);
>> -       } else if (rtex->fmask.size || rtex->cmask.size) {
>> +       } else if (rtex->fmask.size || rtex->cmask.size || rtex->surface.dcc_enabled) {
>>                 si_blit_decompress_color(ctx, rtex, level, level,
>>                                          first_layer, last_layer);
>>         }
>> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
>> index 5548cba3..a277fa5 100644
>> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
>> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
>> @@ -234,7 +234,7 @@ static void si_set_sampler_views(struct pipe_context *ctx,
>>                         } else {
>>                                 samplers->depth_texture_mask &= ~(1 << slot);
>>                         }
>> -                       if (rtex->cmask.size || rtex->fmask.size) {
>> +                       if (rtex->cmask.size || rtex->fmask.size || rtex->surface.dcc_enabled) {
>>                                 samplers->compressed_colortex_mask |= 1 << slot;
>
> I'd like this flag to be set only when dirty_level_mask is non-zero.
> Setting this for all textures that have DCC is quite expensive in draw
> calls.


More information about the mesa-dev mailing list