<div dir="auto">Yes.<div dir="auto"><br></div><div dir="auto">Marek</div></div><br><div class="gmail_quote"><div dir="ltr">On Fri, Mar 30, 2018, 4:47 AM Dieter Nützel <<a href="mailto:Dieter@nuetzel-hh.de">Dieter@nuetzel-hh.de</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hello Marek,<br>
<br>
2-3 landed.<br>
Is #1 dead after my findings? ;-)<br>
<br>
Dieter<br>
<br>
Am 11.03.2018 19:11, schrieb Marek Olšák:<br>
> From: Marek Olšák <<a href="mailto:marek.olsak@amd.com" target="_blank" rel="noreferrer">marek.olsak@amd.com</a>><br>
><br>
> This should improve the score for the GpuTest Triangle benchmark.<br>
> Vulkan doesn't use this either.<br>
> ---<br>
>  src/gallium/drivers/radeon/r600_pipe_common.h |  1 -<br>
>  src/gallium/drivers/radeon/r600_texture.c     | 11 +-------<br>
>  src/gallium/drivers/radeonsi/si_clear.c       | 37<br>
> ++-------------------------<br>
>  src/gallium/drivers/radeonsi/si_state.c       |  6 -----<br>
>  4 files changed, 3 insertions(+), 52 deletions(-)<br>
><br>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h<br>
> b/src/gallium/drivers/radeon/r600_pipe_common.h<br>
> index 7941903..9701757 100644<br>
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h<br>
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h<br>
> @@ -209,21 +209,20 @@ struct r600_cmask_info {<br>
>  struct r600_texture {<br>
>       struct r600_resource            resource;<br>
><br>
>       struct radeon_surf              surface;<br>
>       uint64_t                        size;<br>
>       struct r600_texture             *flushed_depth_texture;<br>
><br>
>       /* Colorbuffer compression and fast clear. */<br>
>       struct r600_fmask_info          fmask;<br>
>       struct r600_cmask_info          cmask;<br>
> -     struct r600_resource            *cmask_buffer;<br>
>       uint64_t                        dcc_offset; /* 0 = disabled */<br>
>       unsigned                        cb_color_info; /* fast clear enable bit */<br>
>       unsigned                        color_clear_value[2];<br>
>       unsigned                        last_msaa_resolve_target_micro_mode;<br>
>       unsigned                        num_level0_transfers;<br>
><br>
>       /* Depth buffer compression and fast clear. */<br>
>       uint64_t                        htile_offset;<br>
>       float                           depth_clear_value;<br>
>       uint16_t                        dirty_level_mask; /* each bit says if that mipmap is<br>
> compressed */<br>
> diff --git a/src/gallium/drivers/radeon/r600_texture.c<br>
> b/src/gallium/drivers/radeon/r600_texture.c<br>
> index 125e7ef..03bc955 100644<br>
> --- a/src/gallium/drivers/radeon/r600_texture.c<br>
> +++ b/src/gallium/drivers/radeon/r600_texture.c<br>
> @@ -405,26 +405,22 @@ void si_texture_discard_cmask(struct si_screen<br>
> *sscreen,<br>
>  {<br>
>       if (!rtex->cmask.size)<br>
>               return;<br>
><br>
>       assert(rtex->resource.b.b.nr_samples <= 1);<br>
><br>
>       /* Disable CMASK. */<br>
>       memset(&rtex->cmask, 0, sizeof(rtex->cmask));<br>
>       rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;<br>
>       rtex->dirty_level_mask = 0;<br>
> -<br>
>       rtex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);<br>
><br>
> -     if (rtex->cmask_buffer != &rtex->resource)<br>
> -         r600_resource_reference(&rtex->cmask_buffer, NULL);<br>
> -<br>
>       /* Notify all contexts about the change. */<br>
>       p_atomic_inc(&sscreen->dirty_tex_counter);<br>
>       p_atomic_inc(&sscreen->compressed_colortex_counter);<br>
>  }<br>
><br>
>  static bool r600_can_disable_dcc(struct r600_texture *rtex)<br>
>  {<br>
>       /* We can't disable DCC if it can be written by another process. */<br>
>       return rtex->dcc_offset &&<br>
>              (!rtex->resource.b.is_shared ||<br>
> @@ -813,24 +809,20 @@ static boolean r600_texture_get_handle(struct<br>
> pipe_screen* screen,<br>
>                                             slice_size, whandle);<br>
>  }<br>
><br>
>  static void r600_texture_destroy(struct pipe_screen *screen,<br>
>                                struct pipe_resource *ptex)<br>
>  {<br>
>       struct r600_texture *rtex = (struct r600_texture*)ptex;<br>
>       struct r600_resource *resource = &rtex->resource;<br>
><br>
>       r600_texture_reference(&rtex->flushed_depth_texture, NULL);<br>
> -<br>
> -     if (rtex->cmask_buffer != &rtex->resource) {<br>
> -         r600_resource_reference(&rtex->cmask_buffer, NULL);<br>
> -     }<br>
>       pb_reference(&resource->buf, NULL);<br>
>       r600_resource_reference(&rtex->dcc_separate_buffer, NULL);<br>
>       r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);<br>
>       FREE(rtex);<br>
>  }<br>
><br>
>  static const struct u_resource_vtbl r600_texture_vtbl;<br>
><br>
>  /* The number of samples can be specified independently of the<br>
> texture. */<br>
>  void si_texture_get_fmask_info(struct si_screen *sscreen,<br>
> @@ -1262,21 +1254,20 @@ r600_texture_create_object(struct pipe_screen<br>
> *screen,<br>
>                       rtex->db_compatible = true;<br>
><br>
>                       if (!(sscreen->debug_flags & DBG(NO_HYPERZ)))<br>
>                               r600_texture_allocate_htile(sscreen, rtex);<br>
>               }<br>
>       } else {<br>
>               if (base->nr_samples > 1) {<br>
>                       if (!buf) {<br>
>                               r600_texture_allocate_fmask(sscreen, rtex);<br>
>                               r600_texture_allocate_cmask(sscreen, rtex);<br>
> -                             rtex->cmask_buffer = &rtex->resource;<br>
>                       }<br>
>                       if (!rtex->fmask.size || !rtex->cmask.size) {<br>
>                               FREE(rtex);<br>
>                               return NULL;<br>
>                       }<br>
>               }<br>
><br>
>               /* Shared textures must always set up DCC here.<br>
>                * If it's not present, it will be disabled by<br>
>                * apply_opaque_metadata later.<br>
> @@ -1306,21 +1297,21 @@ r600_texture_create_object(struct pipe_screen<br>
> *screen,<br>
>               resource->bo_alignment = buf->alignment;<br>
>               resource->domains =<br>
> sscreen->ws->buffer_get_initial_domain(resource->buf);<br>
>               if (resource->domains & RADEON_DOMAIN_VRAM)<br>
>                       resource->vram_usage = buf->size;<br>
>               else if (resource->domains & RADEON_DOMAIN_GTT)<br>
>                       resource->gart_usage = buf->size;<br>
>       }<br>
><br>
>       if (rtex->cmask.size) {<br>
>               /* Initialize the cmask to 0xCC (= compressed state). */<br>
> -             si_screen_clear_buffer(sscreen, &rtex->cmask_buffer->b.b,<br>
> +             si_screen_clear_buffer(sscreen, &rtex->resource.b.b,<br>
>                                        rtex->cmask.offset, rtex->cmask.size,<br>
>                                        0xCCCCCCCC);<br>
>       }<br>
>       if (rtex->htile_offset) {<br>
>               uint32_t clear_value = 0;<br>
><br>
>               if (sscreen->info.chip_class >= GFX9 || rtex->tc_compatible_htile)<br>
>                       clear_value = 0x0000030F;<br>
><br>
>               si_screen_clear_buffer(sscreen, &rtex->resource.b.b,<br>
> diff --git a/src/gallium/drivers/radeonsi/si_clear.c<br>
> b/src/gallium/drivers/radeonsi/si_clear.c<br>
> index 464b9d7..a940aea 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_clear.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_clear.c<br>
> @@ -26,51 +26,20 @@<br>
><br>
>  #include "util/u_format.h"<br>
>  #include "util/u_pack_color.h"<br>
>  #include "util/u_surface.h"<br>
><br>
>  enum {<br>
>       SI_CLEAR         = SI_SAVE_FRAGMENT_STATE,<br>
>       SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,<br>
>  };<br>
><br>
> -static void si_alloc_separate_cmask(struct si_screen *sscreen,<br>
> -                                 struct r600_texture *rtex)<br>
> -{<br>
> -     if (rtex->cmask_buffer)<br>
> -                return;<br>
> -<br>
> -     assert(rtex->cmask.size == 0);<br>
> -<br>
> -     si_texture_get_cmask_info(sscreen, rtex, &rtex->cmask);<br>
> -     if (!rtex->cmask.size)<br>
> -             return;<br>
> -<br>
> -     rtex->cmask_buffer = (struct r600_resource *)<br>
> -             si_aligned_buffer_create(&sscreen->b,<br>
> -                                      R600_RESOURCE_FLAG_UNMAPPABLE,<br>
> -                                      PIPE_USAGE_DEFAULT,<br>
> -                                      rtex->cmask.size,<br>
> -                                      rtex->cmask.alignment);<br>
> -     if (rtex->cmask_buffer == NULL) {<br>
> -             rtex->cmask.size = 0;<br>
> -             return;<br>
> -     }<br>
> -<br>
> -     /* update colorbuffer state bits */<br>
> -     rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;<br>
> -<br>
> -     rtex->cb_color_info |= S_028C70_FAST_CLEAR(1);<br>
> -<br>
> -     p_atomic_inc(&sscreen->compressed_colortex_counter);<br>
> -}<br>
> -<br>
>  static void si_set_clear_color(struct r600_texture *rtex,<br>
>                              enum pipe_format surface_format,<br>
>                              const union pipe_color_union *color)<br>
>  {<br>
>       union util_color uc;<br>
><br>
>       memset(&uc, 0, sizeof(uc));<br>
><br>
>       if (rtex->surface.bpe == 16) {<br>
>               /* DCC fast clear only:<br>
> @@ -451,21 +420,21 @@ static void si_do_fast_color_clear(struct<br>
> si_context *sctx,<br>
><br>
>                       if (clear_words_needed && too_small)<br>
>                               continue;<br>
><br>
>                       /* DCC fast clear with MSAA should clear CMASK to 0xC. */<br>
>                       if (tex->resource.b.b.nr_samples >= 2 && tex->cmask.size) {<br>
>                               /* TODO: This doesn't work with MSAA. */<br>
>                               if (clear_words_needed)<br>
>                                       continue;<br>
><br>
> -                             si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,<br>
> +                             si_clear_buffer(&sctx->b.b, &tex->resource.b.b,<br>
>                                               tex->cmask.offset, tex->cmask.size,<br>
>                                               0xCCCCCCCC, R600_COHERENCY_CB_META);<br>
>                               need_decompress_pass = true;<br>
>                       }<br>
><br>
>                       vi_dcc_clear_level(sctx, tex, 0, reset_value);<br>
><br>
>                       if (clear_words_needed)<br>
>                               need_decompress_pass = true;<br>
><br>
> @@ -476,28 +445,26 @@ static void si_do_fast_color_clear(struct<br>
> si_context *sctx,<br>
><br>
>                       /* 128-bit formats are unusupported */<br>
>                       if (tex->surface.bpe > 8) {<br>
>                               continue;<br>
>                       }<br>
><br>
>                       /* RB+ doesn't work with CMASK fast clear on Stoney. */<br>
>                       if (sctx->b.family == CHIP_STONEY)<br>
>                               continue;<br>
><br>
> -                     /* ensure CMASK is enabled */<br>
> -                     si_alloc_separate_cmask(sctx->screen, tex);<br>
>                       if (tex->cmask.size == 0) {<br>
>                               continue;<br>
>                       }<br>
><br>
>                       /* Do the fast clear. */<br>
> -                     si_clear_buffer(&sctx->b.b, &tex->cmask_buffer->b.b,<br>
> +                     si_clear_buffer(&sctx->b.b, &tex->resource.b.b,<br>
>                                       tex->cmask.offset, tex->cmask.size, 0,<br>
>                                       R600_COHERENCY_CB_META);<br>
>                       need_decompress_pass = true;<br>
>               }<br>
><br>
>               if (need_decompress_pass &&<br>
>                   !(tex->dirty_level_mask & (1 << level))) {<br>
>                       tex->dirty_level_mask |= 1 << level;<br>
>                       p_atomic_inc(&sctx->screen->compressed_colortex_counter);<br>
>               }<br>
> diff --git a/src/gallium/drivers/radeonsi/si_state.c<br>
> b/src/gallium/drivers/radeonsi/si_state.c<br>
> index 6c82257..aae7332 100644<br>
> --- a/src/gallium/drivers/radeonsi/si_state.c<br>
> +++ b/src/gallium/drivers/radeonsi/si_state.c<br>
> @@ -2980,26 +2980,20 @@ static void si_emit_framebuffer_state(struct<br>
> si_context *sctx, struct r600_atom<br>
>                       continue;<br>
>               }<br>
><br>
>               tex = (struct r600_texture *)cb->base.texture;<br>
>               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,<br>
>                                     &tex->resource, RADEON_USAGE_READWRITE,<br>
>                                     tex->resource.b.b.nr_samples > 1 ?<br>
>                                             RADEON_PRIO_COLOR_BUFFER_MSAA :<br>
>                                             RADEON_PRIO_COLOR_BUFFER);<br>
><br>
> -             if (tex->cmask_buffer && tex->cmask_buffer != &tex->resource) {<br>
> -                     radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,<br>
> -                             tex->cmask_buffer, RADEON_USAGE_READWRITE,<br>
> -                             RADEON_PRIO_CMASK);<br>
> -             }<br>
> -<br>
>               if (tex->dcc_separate_buffer)<br>
>                       radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,<br>
>                                                 tex->dcc_separate_buffer,<br>
>                                                 RADEON_USAGE_READWRITE,<br>
>                                                 RADEON_PRIO_DCC);<br>
><br>
>               /* Compute mutable surface parameters. */<br>
>               cb_color_base = tex->resource.gpu_address >> 8;<br>
>               cb_color_fmask = 0;<br>
>               cb_dcc_base = 0;<br>
</blockquote></div>