[Mesa-dev] [PATCH 1/5] radeonsi: remove fast color clear for single-sample buffers

Marek Olšák maraeo at gmail.com
Fri Mar 30 17:03:16 UTC 2018


Yes.

Marek

On Fri, Mar 30, 2018, 4:47 AM Dieter Nützel <Dieter at nuetzel-hh.de> wrote:

> Hello Marek,
>
> 2-3 landed.
> Is #1 dead after my findings? ;-)
>
> Dieter
>
> Am 11.03.2018 19:11, schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak at amd.com>
> >
> > This should improve the score for the GpuTest Triangle benchmark.
> > Vulkan doesn't use this either.
> > ---
> >  src/gallium/drivers/radeon/r600_pipe_common.h |  1 -
> >  src/gallium/drivers/radeon/r600_texture.c     | 11 +-------
> >  src/gallium/drivers/radeonsi/si_clear.c       | 37
> > ++-------------------------
> >  src/gallium/drivers/radeonsi/si_state.c       |  6 -----
> >  4 files changed, 3 insertions(+), 52 deletions(-)
> >
> > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
> > b/src/gallium/drivers/radeon/r600_pipe_common.h
> > index 7941903..9701757 100644
> > --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> > +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> > @@ -209,21 +209,20 @@ struct r600_cmask_info {
> >  struct r600_texture {
> >       struct r600_resource            resource;
> >
> >       struct radeon_surf              surface;
> >       uint64_t                        size;
> >       struct r600_texture             *flushed_depth_texture;
> >
> >       /* Colorbuffer compression and fast clear. */
> >       struct r600_fmask_info          fmask;
> >       struct r600_cmask_info          cmask;
> > -     struct r600_resource            *cmask_buffer;
> >       uint64_t                        dcc_offset; /* 0 = disabled */
> >       unsigned                        cb_color_info; /* fast clear
> enable bit */
> >       unsigned                        color_clear_value[2];
> >       unsigned
> last_msaa_resolve_target_micro_mode;
> >       unsigned                        num_level0_transfers;
> >
> >       /* Depth buffer compression and fast clear. */
> >       uint64_t                        htile_offset;
> >       float                           depth_clear_value;
> >       uint16_t                        dirty_level_mask; /* each bit says
> if that mipmap is
> > compressed */
> > diff --git a/src/gallium/drivers/radeon/r600_texture.c
> > b/src/gallium/drivers/radeon/r600_texture.c
> > index 125e7ef..03bc955 100644
> > --- a/src/gallium/drivers/radeon/r600_texture.c
> > +++ b/src/gallium/drivers/radeon/r600_texture.c
> > @@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen
> > *sscreen,
> >  {
> >       if (!rtex->cmask.size)
> >               return;
> >
> >       assert(rtex->resource.b.b.nr_samples <= 1);
> >
> >       /* Disable CMASK. */
> >       memset(&rtex->cmask, 0, sizeof(rtex->cmask));
> >       rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
> >       rtex->dirty_level_mask = 0;
> > -
> >       rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
> >
> > -     if (rtex->cmask_buffer != &rtex->resource)
> > -         r600_resource_reference(&rtex->cmask_buffer, NULL);
> > -
> >       /* Notify all contexts about the change. */
> >       p_atomic_inc(&sscreen->dirty_tex_counter);
> >       p_atomic_inc(&sscreen->compressed_colortex_counter);
> >  }
> >
> >  static bool r600_can_disable_dcc(struct r600_texture *rtex)
> >  {
> >       /* We can't disable DCC if it can be written by another process. */
> >       return rtex->dcc_offset &&
> >              (!rtex->resource.b.is_shared ||
> > @@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct
> > pipe_screen* screen,
> >                                             slice_size, whandle);
> >  }
> >
> >  static void r600_texture_destroy(struct pipe_screen *screen,
> >                                struct pipe_resource *ptex)
> >  {
> >       struct r600_texture *rtex = (struct r600_texture*)ptex;
> >       struct r600_resource *resource = &rtex->resource;
> >
> >       r600_texture_reference(&rtex->flushed_depth_texture, NULL);
> > -
> > -     if (rtex->cmask_buffer != &rtex->resource) {
> > -         r600_resource_reference(&rtex->cmask_buffer, NULL);
> > -     }
> >       pb_reference(&resource->buf, NULL);
> >       r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
> >       r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
> >       FREE(rtex);
> >  }
> >
> >  static const struct u_resource_vtbl r600_texture_vtbl;
> >
> >  /* The number of samples can be specified independently of the
> > texture. */
> >  void si_texture_get_fmask_info(struct si_screen *sscreen,
> > @@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen
> > *screen,
> >                       rtex->db_compatible = true;
> >
> >                       if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))
> >                               r600_texture_allocate_htile(sscreen, rtex);
> >               }
> >       } else {
> >               if (base->nr_samples > 1) {
> >                       if (!buf) {
> >                               r600_texture_allocate_fmask(sscreen, rtex);
> >                               r600_texture_allocate_cmask(sscreen, rtex);
> > -                             rtex->cmask_buffer = &rtex->resource;
> >                       }
> >                       if (!rtex->fmask.size || !rtex->cmask.size) {
> >                               FREE(rtex);
> >                               return NULL;
> >                       }
> >               }
> >
> >               /* Shared textures must always set up DCC here.
> >                * If it's not present, it will be disabled by
> >                * apply_opaque_metadata later.
> > @@ -1306,21 +1297,21 @@ r600_texture_create_object(struct pipe_screen
> > *screen,
> >               resource->bo_alignment = buf->alignment;
> >               resource->domains =
> > sscreen->ws->buffer_get_initial_domain(resource->buf);
> >               if (resource->domains & RADEON_DOMAIN_VRAM)
> >                       resource->vram_usage = buf->size;
> >               else if (resource->domains & RADEON_DOMAIN_GTT)
> >                       resource->gart_usage = buf->size;
> >       }
> >
> >       if (rtex->cmask.size) {
> >               /* Initialize the cmask to 0xCC (= compressed state). */
> > -             si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b,
> > +             si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
> >                                        rtex->cmask.offset,
> rtex->cmask.size,
> >                                        0xCCCCCCCC);
> >       }
> >       if (rtex->htile_offset) {
> >               uint32_t clear_value = 0;
> >
> >               if (sscreen->info.chip_class >= GFX9 ||
> rtex->tc_compatible_htile)
> >                       clear_value = 0x0000030F;
> >
> >               si_screen_clear_buffer(sscreen, &rtex->resource.b.b,
> > diff --git a/src/gallium/drivers/radeonsi/si_clear.c
> > b/src/gallium/drivers/radeonsi/si_clear.c
> > index 464b9d7..a940aea 100644
> > --- a/src/gallium/drivers/radeonsi/si_clear.c
> > +++ b/src/gallium/drivers/radeonsi/si_clear.c
> > @@ -26,51 +26,20 @@
> >
> >  #include "util/u_format.h"
> >  #include "util/u_pack_color.h"
> >  #include "util/u_surface.h"
> >
> >  enum {
> >       SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,
> >       SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
> >  };
> >
> > -static void si_alloc_separate_cmask(struct si_screen *sscreen,
> > -                                 struct r600_texture *rtex)
> > -{
> > -     if (rtex->cmask_buffer)
> > -                return;
> > -
> > -     assert(rtex->cmask.size == 0);
> > -
> > -     si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);
> > -     if (!rtex->cmask.size)
> > -             return;
> > -
> > -     rtex->cmask_buffer = (struct r600_resource *)
> > -             si_aligned_buffer_create(&sscreen->b,
> > -                                      R600_RESOURCE_FLAG_UNMAPPABLE,
> > -                                      PIPE_USAGE_DEFAULT,
> > -                                      rtex->cmask.size,
> > -                                      rtex->cmask.alignment);
> > -     if (rtex->cmask_buffer == NULL) {
> > -             rtex->cmask.size = 0;
> > -             return;
> > -     }
> > -
> > -     /* update colorbuffer state bits */
> > -     rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >>
> 8;
> > -
> > -     rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);
> > -
> > -     p_atomic_inc(&sscreen->compressed_colortex_counter);
> > -}
> > -
> >  static void si_set_clear_color(struct r600_texture *rtex,
> >                              enum pipe_format surface_format,
> >                              const union pipe_color_union *color)
> >  {
> >       union util_color uc;
> >
> >       memset(&uc, 0, sizeof(uc));
> >
> >       if (rtex->surface.bpe == 16) {
> >               /* DCC fast clear only:
> > @@ -451,21 +420,21 @@ static void si_do_fast_color_clear(struct
> > si_context *sctx,
> >
> >                       if (clear_words_needed && too_small)
> >                               continue;
> >
> >                       /* DCC fast clear with MSAA should clear CMASK to
> 0xC. */
> >                       if (tex->resource.b.b.nr_samples >= 2 &&
> tex->cmask.size) {
> >                               /* TODO: This doesn't work with MSAA. */
> >                               if (clear_words_needed)
> >                                       continue;
> >
> > -                             si_clear_buffer(&sctx->b.b,
> &tex->cmask_buffer->b.b,
> > +                             si_clear_buffer(&sctx->b.b,
> &tex->resource.b.b,
> >                                               tex->cmask.offset,
> tex->cmask.size,
> >                                               0xCCCCCCCC,
> R600_COHERENCY_CB_META);
> >                               need_decompress_pass = true;
> >                       }
> >
> >                       vi_dcc_clear_level(sctx, tex, 0, reset_value);
> >
> >                       if (clear_words_needed)
> >                               need_decompress_pass = true;
> >
> > @@ -476,28 +445,26 @@ static void si_do_fast_color_clear(struct
> > si_context *sctx,
> >
> >                       /* 128-bit formats are unusupported */
> >                       if (tex->surface.bpe > 8) {
> >                               continue;
> >                       }
> >
> >                       /* RB+ doesn't work with CMASK fast clear on
> Stoney. */
> >                       if (sctx->b.family == CHIP_STONEY)
> >                               continue;
> >
> > -                     /* ensure CMASK is enabled */
> > -                     si_alloc_separate_cmask(sctx->screen, tex);
> >                       if (tex->cmask.size == 0) {
> >                               continue;
> >                       }
> >
> >                       /* Do the fast clear. */
> > -                     si_clear_buffer(&sctx->b.b,
> &tex->cmask_buffer->b.b,
> > +                     si_clear_buffer(&sctx->b.b, &tex->resource.b.b,
> >                                       tex->cmask.offset,
> tex->cmask.size, 0,
> >                                       R600_COHERENCY_CB_META);
> >                       need_decompress_pass = true;
> >               }
> >
> >               if (need_decompress_pass &&
> >                   !(tex->dirty_level_mask & (1 << level))) {
> >                       tex->dirty_level_mask |= 1 << level;
> >
>  p_atomic_inc(&sctx->screen->compressed_colortex_counter);
> >               }
> > diff --git a/src/gallium/drivers/radeonsi/si_state.c
> > b/src/gallium/drivers/radeonsi/si_state.c
> > index 6c82257..aae7332 100644
> > --- a/src/gallium/drivers/radeonsi/si_state.c
> > +++ b/src/gallium/drivers/radeonsi/si_state.c
> > @@ -2980,26 +2980,20 @@ static void si_emit_framebuffer_state(struct
> > si_context *sctx, struct r600_atom
> >                       continue;
> >               }
> >
> >               tex = (struct r600_texture *)cb->base.texture;
> >               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> >                                     &tex->resource,
> RADEON_USAGE_READWRITE,
> >                                     tex->resource.b.b.nr_samples > 1 ?
> >
>  RADEON_PRIO_COLOR_BUFFER_MSAA :
> >                                             RADEON_PRIO_COLOR_BUFFER);
> >
> > -             if (tex->cmask_buffer && tex->cmask_buffer !=
> &tex->resource) {
> > -                     radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> > -                             tex->cmask_buffer, RADEON_USAGE_READWRITE,
> > -                             RADEON_PRIO_CMASK);
> > -             }
> > -
> >               if (tex->dcc_separate_buffer)
> >                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> >                                                 tex->dcc_separate_buffer,
> >                                                 RADEON_USAGE_READWRITE,
> >                                                 RADEON_PRIO_DCC);
> >
> >               /* Compute mutable surface parameters. */
> >               cb_color_base = tex->resource.gpu_address >> 8;
> >               cb_color_fmask = 0;
> >               cb_dcc_base = 0;
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180330/001ff2d5/attachment-0001.html>


More information about the mesa-dev mailing list