[Mesa-stable] [Mesa-dev] [PATCH 2/6] i965: Track format and aux usage in the render cache

Kenneth Graunke kenneth at whitecape.org
Tue Jan 16 23:54:38 UTC 2018


On Wednesday, January 10, 2018 11:22:36 AM PST Jason Ekstrand wrote:
> This lets us perform render cache flushes whenever a surface goes from
> being used with one aux+format to a different aux+format.
> 
> This is the "proper" fix for https://bugs.freedesktop.org/102435.
> ee57b15ec764736e2d5360beaef9fb2045ed0f68 which was really just a partial
> revert of 3e57e9494c2279580ad6a83ab8c065d01e7e634e was just a hack to
> get rid of a hang in a bunch of Valve games.  This solves the actual
> problem responsible for the hang and lets us enable CCS_E once again.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102435
> Cc: "17.3" <mesa-stable at lists.freedesktop.org>
> ---
>  src/mesa/drivers/dri/i965/brw_context.h     |  2 +-
>  src/mesa/drivers/dri/i965/brw_draw.c        | 12 +++++-
>  src/mesa/drivers/dri/i965/genX_blorp_exec.c | 14 +++++--
>  src/mesa/drivers/dri/i965/intel_fbo.c       | 65 ++++++++++++++++++++++-------
>  src/mesa/drivers/dri/i965/intel_fbo.h       |  8 +++-
>  5 files changed, 78 insertions(+), 23 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 0f0aad8..d041032 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -730,7 +730,7 @@ struct brw_context
>      * and would need flushing before being used from another cache domain that
>      * isn't coherent with it (i.e. the sampler).
>      */
> -   struct set *render_cache;
> +   struct hash_table *render_cache;
>  
>     /**
>      * Set of struct brw_bo * that have been used as a depth buffer within this
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 1f86378..4945dec 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -503,13 +503,17 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw)
>        mesa_format mesa_format =
>           _mesa_get_render_format(ctx, intel_rb_format(irb));
>        enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
> +      enum isl_aux_usage aux_usage =
> +         intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
> +                                        ctx->Color.BlendEnabled & (1 << i));
>  
>        intel_miptree_prepare_render(brw, irb->mt, irb->mt_level,
>                                     irb->mt_layer, irb->layer_count,
>                                     isl_format,
>                                     ctx->Color.BlendEnabled & (1 << i));
>  
> -      brw_cache_flush_for_render(brw, irb->mt->bo);
> +      brw_cache_flush_for_render(brw, irb->mt->bo,
> +                                 isl_format, aux_usage);
>     }
>  }
>  
> @@ -575,8 +579,12 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
>        mesa_format mesa_format =
>           _mesa_get_render_format(ctx, intel_rb_format(irb));
>        enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
> +      enum isl_aux_usage aux_usage =
> +         intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
> +                                        ctx->Color.BlendEnabled & (1 << i));
> +
> +      brw_render_cache_add_bo(brw, irb->mt->bo, isl_format, aux_usage);
>  
> -      brw_render_cache_add_bo(brw, irb->mt->bo);
>        intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
>                                    irb->mt_layer, irb->layer_count,
>                                    isl_format,
> diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> index e8bc52e..062171a 100644
> --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> @@ -239,8 +239,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
>      */
>     if (params->src.enabled)
>        brw_cache_flush_for_read(brw, params->src.addr.buffer);
> -   if (params->dst.enabled)
> -      brw_cache_flush_for_render(brw, params->dst.addr.buffer);
> +   if (params->dst.enabled) {
> +      brw_cache_flush_for_render(brw, params->dst.addr.buffer,
> +                                 params->dst.view.format,
> +                                 params->dst.aux_usage);
> +   }
>     if (params->depth.enabled)
>        brw_cache_flush_for_depth(brw, params->depth.addr.buffer);
>     if (params->stencil.enabled)
> @@ -310,8 +313,11 @@ retry:
>                                !params->stencil.enabled;
>     brw->ib.index_size = -1;
>  
> -   if (params->dst.enabled)
> -      brw_render_cache_add_bo(brw, params->dst.addr.buffer);
> +   if (params->dst.enabled) {
> +      brw_render_cache_add_bo(brw, params->dst.addr.buffer,
> +                              params->dst.view.format,
> +                              params->dst.aux_usage);
> +   }
>     if (params->depth.enabled)
>        brw_depth_cache_add_bo(brw, params->depth.addr.buffer);
>     if (params->stencil.enabled)
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
> index 75c85ec..ea17577 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -972,14 +972,13 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
>  void
>  brw_cache_sets_clear(struct brw_context *brw)
>  {
> -   struct set_entry *entry;
> +   struct hash_entry *render_entry;
> +   hash_table_foreach(brw->render_cache, render_entry)
> +      _mesa_hash_table_remove(brw->render_cache, render_entry);
>  
> -   set_foreach(brw->render_cache, entry) {
> -      _mesa_set_remove(brw->render_cache, entry);
> -   }
> -
> -   set_foreach(brw->depth_cache, entry)
> -      _mesa_set_remove(brw->depth_cache, entry);
> +   struct set_entry *depth_entry;
> +   set_foreach(brw->depth_cache, depth_entry)
> +      _mesa_set_remove(brw->depth_cache, depth_entry);
>  }
>  
>  /**
> @@ -1018,28 +1017,66 @@ flush_depth_and_render_caches(struct brw_context *brw, struct brw_bo *bo)
>  void
>  brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo)
>  {
> -   if (_mesa_set_search(brw->render_cache, bo) ||
> +   if (_mesa_hash_table_search(brw->render_cache, bo) ||
>         _mesa_set_search(brw->depth_cache, bo))
>        flush_depth_and_render_caches(brw, bo);
>  }
>  
> +static void *
> +format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
> +{
> +   return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
> +}
> +
>  void
> -brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo)
> +brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo *bo,
> +                           enum isl_format format,
> +                           enum isl_aux_usage aux_usage)
>  {
>     if (_mesa_set_search(brw->depth_cache, bo))
>        flush_depth_and_render_caches(brw, bo);
> +
> +   /* Check to see if this bo has been used by a previous rendering operation
> +    * but with a different format or aux usage.  If it has, flush the render
> +    * cache so we ensure that it's only in there with one format or aux usage
> +    * at a time.
> +    *
> +    * Even though it's not obvious, this can easily happen in practice.
> +    * Suppose a client is blending on a surface with sRGB encode enabled on
> +    * gen9.  This implies that you get AUX_USAGE_CCS_D at best.  If the client
> +    * then disables sRGB decode and continues blending we will flip on
> +    * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
> +    * perfectly valid since CCS_E is a subset of CCS_D).  However, this means
> +    * that we have fragments in-flight which are rendering with UNORM+CCS_E
> +    * and other fragments in-flight with SRGB+CCS_D on the same surface at the
> +    * same time and the pixel scoreboard and color blender are trying to sort
> +    * it all out.  This ends badly (i.e. GPU hangs).

It would be nice to mention that we've observed hangs with aux format
changing, but we're uncertain whether format is an issue, and are being
conservative here.  It seems very plausible that it could be a problem,
so I'm fine with keeping it.  Better to avoid any potential GPU hangs,
and if it's a performance bottleneck, try and relax it later.

> +    */
> +   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
> +   if (entry && entry->data != format_aux_tuple(format, aux_usage))
> +      flush_depth_and_render_caches(brw, bo);
>  }
>  
>  void
> -brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo)
> +brw_render_cache_add_bo(struct brw_context *brw, struct brw_bo *bo,
> +                        enum isl_format format,
> +                        enum isl_aux_usage aux_usage)
>  {
> -   _mesa_set_add(brw->render_cache, bo);

This will do an unnecessary hash table search in release builds, and
introduce an unused variable warning, so let's do:

#ifndef NDEBUG

> +   struct hash_entry *entry = _mesa_hash_table_search(brw->render_cache, bo);
> +   if (entry) {
> +      /* Otherwise, someone didn't do a flush_for_render and that would be
> +       * very bad indeed.
> +       */
> +      assert(entry->data == format_aux_tuple(format, aux_usage));
> +   }

#endif

> +   _mesa_hash_table_insert(brw->render_cache, bo,
> +                           format_aux_tuple(format, aux_usage));
>  }

With those and Iago's suggestions, this patch is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-stable/attachments/20180116/ab7975ee/attachment.sig>


More information about the mesa-stable mailing list