[Mesa-dev] [PATCH] i965: Disable L3 cache allocation for external buffers
Daniel Vetter
daniel at ffwll.ch
Wed Nov 1 15:24:21 UTC 2017
On Tue, Oct 24, 2017 at 05:06:33PM +0100, Chris Wilson wrote:
> Through the use of mocs, we can define the cache usage for any surface
> used by the GPU. In particular, we can request that L3 cache be
> allocated for either a read/write miss so that subsequent reads can be
> fetched from cache rather than memory. A consequence of this is that if
> we allocate a L3/LLC cacheline for a read and the object is changed in
> main memory (e.g. a PCIe write bypassing the CPU) then the next read
> will be serviced from the stale cache and not from the new data in
> memory. This is an issue for external PRIME buffers where we may miss
> the updates entirely if the image is small enough to fit within our
> cache.
>
> Currently, we have a single bit to mark all external buffers so use that
> to tell us when it is unsafe to use a cache override in mocs and
> fallback to the PTE value instead (which should be set to the correct
> cache level to be coherent amongst all active parties: PRIME, scanout and
> render). This may be refined in future to limit the override to buffers
> outside the control of mesa; as buffers being shared between mesa
> clients should be able to coordinate themselves without resolves.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101691
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Jason Ekstrand <jason at jlekstrand.net>
> Cc: Lyude Paul <lyude at redhat.com>
> Cc: Timo Aalton <tjaalton at ubuntu.com>
> Cc: Ben Widawsky <ben at bwidawsk.net>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
> src/intel/blorp/blorp.c | 1 +
> src/intel/blorp/blorp.h | 1 +
> src/intel/blorp/blorp_genX_exec.h | 2 +-
> src/intel/blorp/blorp_priv.h | 1 +
> src/mesa/drivers/dri/i965/brw_blorp.c | 1 +
> src/mesa/drivers/dri/i965/brw_state.h | 3 ++-
> src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 16 +++++++++++-----
> 7 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
> index 7cc6335f2f..459ad66652 100644
> --- a/src/intel/blorp/blorp.c
> +++ b/src/intel/blorp/blorp.c
> @@ -71,6 +71,7 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
> surf->surf->logical_level0_px.array_len));
>
> info->enabled = true;
> + info->external = surf->external;
>
> if (format == ISL_FORMAT_UNSUPPORTED)
> format = surf->surf->format;
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index 9716c66302..af056c9d52 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -106,6 +106,7 @@ struct blorp_surf
> enum isl_aux_usage aux_usage;
>
> union isl_color_value clear_color;
> + bool external;
> };
>
> void
> diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
> index 5389262098..18715788ff 100644
> --- a/src/intel/blorp/blorp_genX_exec.h
> +++ b/src/intel/blorp/blorp_genX_exec.h
> @@ -1328,7 +1328,7 @@ blorp_emit_surface_states(struct blorp_batch *batch,
> blorp_emit_surface_state(batch, ¶ms->src,
> surface_maps[BLORP_TEXTURE_BT_INDEX],
> surface_offsets[BLORP_TEXTURE_BT_INDEX],
> - NULL, false);
> + NULL, params->src.external);
> }
> }
>
> diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h
> index c7d5d308da..f841aa7cdc 100644
> --- a/src/intel/blorp/blorp_priv.h
> +++ b/src/intel/blorp/blorp_priv.h
> @@ -47,6 +47,7 @@ enum {
> struct brw_blorp_surface_info
> {
> bool enabled;
> + bool external;
>
> struct isl_surf surf;
> struct blorp_address addr;
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
> index ed4f9870f2..563d13a037 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -160,6 +160,7 @@ blorp_surf_for_miptree(struct brw_context *brw,
> .offset = mt->offset,
> .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
> };
> + surf->external = mt->bo->external;
>
> surf->aux_usage = aux_usage;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index 8db354cf23..01c0cd12cb 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -342,6 +342,7 @@ void gen10_init_atoms(struct brw_context *brw);
> * may still respect that.
> */
> #define GEN7_MOCS_L3 1
> +#define GEN7_MOCS_PTE 0
I think we want to keep the current bitfield (it obviously works with
uncached scanout buffers), but with a big comment:
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 8db354cf232d..4e418f5e8ea5 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -342,6 +342,12 @@ void gen10_init_atoms(struct brw_context *brw);
* may still respect that.
*/
#define GEN7_MOCS_L3 1
+/* Even when we obey the kernel's cacheability setting from the PTE we still
+ * want to explicitly enable L3 caching. L3$ seems to be automatically disabled
+ * when the PTE request uncached mode - the kernel never explicitly flushed L3
+ * on gen7.
+ */
+#define GEN7_MOCS_PTE GEN7_MOCS_L3
/* Ivybridge only: cache in LLC.
* Specifying zero here means to use the PTE values set by the kernel;
>
> /* Ivybridge only: cache in LLC.
> * Specifying zero here means to use the PTE values set by the kernel;
> @@ -367,7 +368,7 @@ void gen10_init_atoms(struct brw_context *brw);
> */
> #define BDW_MOCS_WB 0x78
> #define BDW_MOCS_WT 0x58
> -#define BDW_MOCS_PTE 0x18
> +#define BDW_MOCS_PTE 0x08
Checking bsepc 0x18 == "Use PAT for cache placement control", and PAT is
the field in the PTE, so exactly what we want. Cache control (i.e.
uc/wc/wt/wb) is selected by bits 6:5, and there 0 == "Use PTE". So I think
the current value is the right one, and we don't want this change.
With the above two changes:
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
>
> /* Skylake: MOCS is now an index into an array of 62 different caching
> * configurations programmed by the kernel.
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index f4e9cf48c6..50a3682efc 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -63,12 +63,17 @@ uint32_t tex_mocs[] = {
> };
>
> uint32_t rb_mocs[] = {
> - [7] = GEN7_MOCS_L3,
> + [7] = GEN7_MOCS_PTE,
> [8] = BDW_MOCS_PTE,
> [9] = SKL_MOCS_PTE,
> [10] = CNL_MOCS_PTE,
> };
>
> +static inline uint32_t get_tex_mocs(struct brw_bo *bo, unsigned int gen)
> +{
> + return (bo && bo->external ? rb_mocs : tex_mocs)[gen];
> +}
> +
> static void
> get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
> GLenum target, struct isl_view *view,
> @@ -585,7 +590,7 @@ brw_update_texture_surface(struct gl_context *ctx,
> aux_usage = ISL_AUX_USAGE_NONE;
>
> brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
> - tex_mocs[devinfo->gen],
> + get_tex_mocs(mt->bo, devinfo->gen),
> surf_offset, surf_index,
> 0);
> }
> @@ -616,7 +621,7 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
> .size = buffer_size,
> .format = surface_format,
> .stride = pitch,
> - .mocs = tex_mocs[devinfo->gen]);
> + .mocs = get_tex_mocs(bo, devinfo->gen));
> }
>
> void
> @@ -1106,7 +1111,7 @@ update_renderbuffer_read_surfaces(struct brw_context *brw)
> aux_usage = ISL_AUX_USAGE_NONE;
>
> brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
> - tex_mocs[devinfo->gen],
> + get_tex_mocs(irb->mt->bo, devinfo->gen),
> surf_offset, surf_index,
> 0);
>
> @@ -1598,7 +1603,8 @@ update_image_surface(struct brw_context *brw,
> view.base_array_layer,
> view.array_len));
> brw_emit_surface_state(brw, mt, mt->target, view,
> - ISL_AUX_USAGE_NONE, tex_mocs[devinfo->gen],
> + ISL_AUX_USAGE_NONE,
> + get_tex_mocs(mt->bo, devinfo->gen),
> surf_offset, surf_index,
> access == GL_READ_ONLY ? 0 : RELOC_WRITE);
> }
> --
> 2.15.0.rc1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the mesa-dev
mailing list