[Mesa-dev] [PATCH 2/6] i965: Track format and aux usage in the render cache
Iago Toral
itoral at igalia.com
Thu Jan 11 08:45:40 UTC 2018
On Wed, 2018-01-10 at 11:22 -0800, Jason Ekstrand wrote:
> This lets us perform render cache flushes whenever a surface goes
> from
> being used with one aux+format to a different aux+format.
>
> This is the "proper" fix for https://bugs.freedesktop.org/102435.
> ee57b15ec764736e2d5360beaef9fb2045ed0f68 which was really just a
> partial
> revert of 3e57e9494c2279580ad6a83ab8c065d01e7e634e was just a hack to
> get rid of a hang in a bunch of Valve games. This solves the actual
> problem responsible for the hang and lets us enable CCS_E once again.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102435
> Cc: "17.3" <mesa-stable at lists.freedesktop.org>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 2 +-
> src/mesa/drivers/dri/i965/brw_draw.c | 12 +++++-
> src/mesa/drivers/dri/i965/genX_blorp_exec.c | 14 +++++--
> src/mesa/drivers/dri/i965/intel_fbo.c | 65
> ++++++++++++++++++++++-------
> src/mesa/drivers/dri/i965/intel_fbo.h | 8 +++-
> 5 files changed, 78 insertions(+), 23 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 0f0aad8..d041032 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -730,7 +730,7 @@ struct brw_context
> * and would need flushing before being used from another cache
> domain that
> * isn't coherent with it (i.e. the sampler).
> */
> - struct set *render_cache;
> + struct hash_table *render_cache;
>
> /**
> * Set of struct brw_bo * that have been used as a depth buffer
> within this
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index 1f86378..4945dec 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -503,13 +503,17 @@ brw_predraw_resolve_framebuffer(struct
> brw_context *brw)
> mesa_format mesa_format =
> _mesa_get_render_format(ctx, intel_rb_format(irb));
> enum isl_format isl_format =
> brw_isl_format_for_mesa_format(mesa_format);
bool blend_enabled = ctx->Color.BlendEnabled & (1 << i);
and then pass blend_enabled to both intel_miptree_render_aux_usage and
intel_miptree_prepare_render below sintead of recomputing it.
> + enum isl_aux_usage aux_usage =
> + intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
> + ctx->Color.BlendEnabled & (1
> << i));
>
> intel_miptree_prepare_render(brw, irb->mt, irb->mt_level,
> irb->mt_layer, irb->layer_count,
> isl_format,
> ctx->Color.BlendEnabled & (1 <<
> i));
>
> - brw_cache_flush_for_render(brw, irb->mt->bo);
> + brw_cache_flush_for_render(brw, irb->mt->bo,
> + isl_format, aux_usage);
> }
> }
>
> @@ -575,8 +579,12 @@ brw_postdraw_set_buffers_need_resolve(struct
> brw_context *brw)
> mesa_format mesa_format =
> _mesa_get_render_format(ctx, intel_rb_format(irb));
> enum isl_format isl_format =
> brw_isl_format_for_mesa_format(mesa_format);
Same here for the intel_miptree_render_aux_usage and
intel_miptree_render_aux_usage calls below.
> + enum isl_aux_usage aux_usage =
> + intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
> + ctx->Color.BlendEnabled & (1
> << i));
> +
> + brw_render_cache_add_bo(brw, irb->mt->bo, isl_format,
> aux_usage);
>
> - brw_render_cache_add_bo(brw, irb->mt->bo);
> intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
> irb->mt_layer, irb->layer_count,
> isl_format,
> diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> index e8bc52e..062171a 100644
> --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
> @@ -239,8 +239,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
> */
> if (params->src.enabled)
> brw_cache_flush_for_read(brw, params->src.addr.buffer);
> - if (params->dst.enabled)
> - brw_cache_flush_for_render(brw, params->dst.addr.buffer);
> + if (params->dst.enabled) {
> + brw_cache_flush_for_render(brw, params->dst.addr.buffer,
> + params->dst.view.format,
> + params->dst.aux_usage);
> + }
> if (params->depth.enabled)
> brw_cache_flush_for_depth(brw, params->depth.addr.buffer);
> if (params->stencil.enabled)
> @@ -310,8 +313,11 @@ retry:
> !params->stencil.enabled;
> brw->ib.index_size = -1;
>
> - if (params->dst.enabled)
> - brw_render_cache_add_bo(brw, params->dst.addr.buffer);
> + if (params->dst.enabled) {
> + brw_render_cache_add_bo(brw, params->dst.addr.buffer,
> + params->dst.view.format,
> + params->dst.aux_usage);
> + }
> if (params->depth.enabled)
> brw_depth_cache_add_bo(brw, params->depth.addr.buffer);
> if (params->stencil.enabled)
> diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c
> b/src/mesa/drivers/dri/i965/intel_fbo.c
> index 75c85ec..ea17577 100644
> --- a/src/mesa/drivers/dri/i965/intel_fbo.c
> +++ b/src/mesa/drivers/dri/i965/intel_fbo.c
> @@ -972,14 +972,13 @@ intel_renderbuffer_move_to_temp(struct
> brw_context *brw,
> void
> brw_cache_sets_clear(struct brw_context *brw)
> {
> - struct set_entry *entry;
> + struct hash_entry *render_entry;
> + hash_table_foreach(brw->render_cache, render_entry)
> + _mesa_hash_table_remove(brw->render_cache, render_entry);
>
> - set_foreach(brw->render_cache, entry) {
> - _mesa_set_remove(brw->render_cache, entry);
> - }
> -
> - set_foreach(brw->depth_cache, entry)
> - _mesa_set_remove(brw->depth_cache, entry);
> + struct set_entry *depth_entry;
> + set_foreach(brw->depth_cache, depth_entry)
> + _mesa_set_remove(brw->depth_cache, depth_entry);
> }
>
> /**
> @@ -1018,28 +1017,66 @@ flush_depth_and_render_caches(struct
> brw_context *brw, struct brw_bo *bo)
> void
> brw_cache_flush_for_read(struct brw_context *brw, struct brw_bo *bo)
> {
> - if (_mesa_set_search(brw->render_cache, bo) ||
> + if (_mesa_hash_table_search(brw->render_cache, bo) ||
> _mesa_set_search(brw->depth_cache, bo))
> flush_depth_and_render_caches(brw, bo);
> }
>
> +static void *
> +format_aux_tuple(enum isl_format format, enum isl_aux_usage
> aux_usage)
> +{
> + return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
> +}
> +
> void
> -brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo
> *bo)
> +brw_cache_flush_for_render(struct brw_context *brw, struct brw_bo
> *bo,
> + enum isl_format format,
> + enum isl_aux_usage aux_usage)
> {
> if (_mesa_set_search(brw->depth_cache, bo))
> flush_depth_and_render_caches(brw, bo);
> +
> + /* Check to see if this bo has been used by a previous rendering
> operation
> + * but with a different format or aux usage. If it has, flush
> the render
> + * cache so we ensure that it's only in there with one format or
> aux usage
> + * at a time.
> + *
> + * Even though it's not obvious, this can easily happen in
> practice.
> + * Suppose a client is blending on a surface with sRGB encode
> enabled on
> + * gen9. This implies that you get AUX_USAGE_CCS_D at best. If
> the client
> + * then disables sRGB decode and continues blending we will flip
> on
> + * AUX_USAGE_CCS_E without doing any sort of resolve in-between
> (this is
> + * perfectly valid since CCS_E is a subset of CCS_D). However,
> this means
> + * that we have fragments in-flight which are rendering with
> UNORM+CCS_E
> + * and other fragments in-flight with SRGB+CCS_D on the same
> surface at the
> + * same time and the pixel scoreboard and color blender are
> trying to sort
> + * it all out. This ends badly (i.e. GPU hangs).
> + */
Wow, good finding, I would have expected this to cause rendering
corruption but not a hang :-/
> + struct hash_entry *entry = _mesa_hash_table_search(brw-
> >render_cache, bo);
> + if (entry && entry->data != format_aux_tuple(format, aux_usage))
> + flush_depth_and_render_caches(brw, bo);
> }
More information about the mesa-dev
mailing list