[Mesa-dev] [PATCH] i965: Disable L3 cache allocation for external buffers

Daniel Vetter daniel at ffwll.ch
Wed Nov 1 15:24:21 UTC 2017


On Tue, Oct 24, 2017 at 05:06:33PM +0100, Chris Wilson wrote:
> Through the use of mocs, we can define the cache usage for any surface
> used by the GPU. In particular, we can request that L3 cache be
> allocated for either a read/write miss so that subsequent reads can be
> fetched from cache rather than memory. A consequence of this is that if
> we allocate a L3/LLC cacheline for a read and the object is changed in
> main memory (e.g. a PCIe write bypassing the CPU) then the next read
> will be serviced from the stale cache and not from the new data in
> memory. This is an issue for external PRIME buffers where we may miss
> the updates entirely if the image is small enough to fit within our
> cache.
> 
> Currently, we have a single bit to mark all external buffers so use that
> to tell us when it is unsafe to use a cache override in mocs and
> fallback to the PTE value instead (which should be set to the correct
> cache level to be coherent amongst all active parties: PRIME, scanout and
> render). This may be refined in future to limit the override to buffers
> outside the control of mesa; as buffers being shared between mesa
> clients should be able to coordinate themselves without resolves.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101691
> Cc: Kenneth Graunke <kenneth at whitecape.org>
> Cc: Jason Ekstrand <jason at jlekstrand.net>
> Cc: Lyude Paul <lyude at redhat.com>
> Cc: Timo Aalton <tjaalton at ubuntu.com>
> Cc: Ben Widawsky <ben at bwidawsk.net>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> ---
>  src/intel/blorp/blorp.c                          |  1 +
>  src/intel/blorp/blorp.h                          |  1 +
>  src/intel/blorp/blorp_genX_exec.h                |  2 +-
>  src/intel/blorp/blorp_priv.h                     |  1 +
>  src/mesa/drivers/dri/i965/brw_blorp.c            |  1 +
>  src/mesa/drivers/dri/i965/brw_state.h            |  3 ++-
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 16 +++++++++++-----
>  7 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c
> index 7cc6335f2f..459ad66652 100644
> --- a/src/intel/blorp/blorp.c
> +++ b/src/intel/blorp/blorp.c
> @@ -71,6 +71,7 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
>                         surf->surf->logical_level0_px.array_len));
>  
>     info->enabled = true;
> +   info->external = surf->external;
>  
>     if (format == ISL_FORMAT_UNSUPPORTED)
>        format = surf->surf->format;
> diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h
> index 9716c66302..af056c9d52 100644
> --- a/src/intel/blorp/blorp.h
> +++ b/src/intel/blorp/blorp.h
> @@ -106,6 +106,7 @@ struct blorp_surf
>     enum isl_aux_usage aux_usage;
>  
>     union isl_color_value clear_color;
> +   bool external;
>  };
>  
>  void
> diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
> index 5389262098..18715788ff 100644
> --- a/src/intel/blorp/blorp_genX_exec.h
> +++ b/src/intel/blorp/blorp_genX_exec.h
> @@ -1328,7 +1328,7 @@ blorp_emit_surface_states(struct blorp_batch *batch,
>           blorp_emit_surface_state(batch, &params->src,
>                                    surface_maps[BLORP_TEXTURE_BT_INDEX],
>                                    surface_offsets[BLORP_TEXTURE_BT_INDEX],
> -                                  NULL, false);
> +                                  NULL, params->src.external);
>        }
>     }
>  
> diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h
> index c7d5d308da..f841aa7cdc 100644
> --- a/src/intel/blorp/blorp_priv.h
> +++ b/src/intel/blorp/blorp_priv.h
> @@ -47,6 +47,7 @@ enum {
>  struct brw_blorp_surface_info
>  {
>     bool enabled;
> +   bool external;
>  
>     struct isl_surf surf;
>     struct blorp_address addr;
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
> index ed4f9870f2..563d13a037 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -160,6 +160,7 @@ blorp_surf_for_miptree(struct brw_context *brw,
>        .offset = mt->offset,
>        .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
>     };
> +   surf->external = mt->bo->external;
>  
>     surf->aux_usage = aux_usage;
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index 8db354cf23..01c0cd12cb 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -342,6 +342,7 @@ void gen10_init_atoms(struct brw_context *brw);
>   * may still respect that.
>   */
>  #define GEN7_MOCS_L3                    1
> +#define GEN7_MOCS_PTE                   0

I think we want to keep the current bitfield (it obviously works with
uncached scanout buffers), but with a big comment:


diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 8db354cf232d..4e418f5e8ea5 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -342,6 +342,12 @@ void gen10_init_atoms(struct brw_context *brw);
  * may still respect that.
  */
 #define GEN7_MOCS_L3                    1
+/* Even when we obey the kernel's cacheability setting from the PTE we still
+ * want to explicitly enable L3 caching. L3$ seems to be automatically disabled
+ * when the PTE request uncached mode - the kernel never explicitly flushed L3
+ * on gen7.
+ */
+#define GEN7_MOCS_PTE                   GEN7_MOCS_L3
 
 /* Ivybridge only: cache in LLC.
  * Specifying zero here means to use the PTE values set by the kernel;

>  
>  /* Ivybridge only: cache in LLC.
>   * Specifying zero here means to use the PTE values set by the kernel;
> @@ -367,7 +368,7 @@ void gen10_init_atoms(struct brw_context *brw);
>   */
>  #define BDW_MOCS_WB  0x78
>  #define BDW_MOCS_WT  0x58
> -#define BDW_MOCS_PTE 0x18
> +#define BDW_MOCS_PTE 0x08

Checking bsepc 0x18 == "Use PAT for cache placement control", and PAT is
the field in the PTE, so exactly what we want. Cache control (i.e.
uc/wc/wt/wb) is selected by bits 6:5, and there 0 == "Use PTE". So I think
the current value is the right one, and we don't want this change.

With the above two changes:

Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>

>  
>  /* Skylake: MOCS is now an index into an array of 62 different caching
>   * configurations programmed by the kernel.
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index f4e9cf48c6..50a3682efc 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -63,12 +63,17 @@ uint32_t tex_mocs[] = {
>  };
>  
>  uint32_t rb_mocs[] = {
> -   [7] = GEN7_MOCS_L3,
> +   [7] = GEN7_MOCS_PTE,
>     [8] = BDW_MOCS_PTE,
>     [9] = SKL_MOCS_PTE,
>     [10] = CNL_MOCS_PTE,
>  };
>  
> +static inline uint32_t get_tex_mocs(struct brw_bo *bo, unsigned int gen)
> +{
> +	return (bo && bo->external ? rb_mocs : tex_mocs)[gen];
> +}
> +
>  static void
>  get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
>               GLenum target, struct isl_view *view,
> @@ -585,7 +590,7 @@ brw_update_texture_surface(struct gl_context *ctx,
>           aux_usage = ISL_AUX_USAGE_NONE;
>  
>        brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,
> -                             tex_mocs[devinfo->gen],
> +                             get_tex_mocs(mt->bo, devinfo->gen),
>                               surf_offset, surf_index,
>                               0);
>     }
> @@ -616,7 +621,7 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
>                           .size = buffer_size,
>                           .format = surface_format,
>                           .stride = pitch,
> -                         .mocs = tex_mocs[devinfo->gen]);
> +                         .mocs = get_tex_mocs(bo, devinfo->gen));
>  }
>  
>  void
> @@ -1106,7 +1111,7 @@ update_renderbuffer_read_surfaces(struct brw_context *brw)
>                 aux_usage = ISL_AUX_USAGE_NONE;
>  
>              brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,
> -                                   tex_mocs[devinfo->gen],
> +                                   get_tex_mocs(irb->mt->bo, devinfo->gen),
>                                     surf_offset, surf_index,
>                                     0);
>  
> @@ -1598,7 +1603,8 @@ update_image_surface(struct brw_context *brw,
>                                                         view.base_array_layer,
>                                                         view.array_len));
>              brw_emit_surface_state(brw, mt, mt->target, view,
> -                                   ISL_AUX_USAGE_NONE, tex_mocs[devinfo->gen],
> +                                   ISL_AUX_USAGE_NONE,
> +                                   get_tex_mocs(mt->bo, devinfo->gen),
>                                     surf_offset, surf_index,
>                                     access == GL_READ_ONLY ? 0 : RELOC_WRITE);
>           }
> -- 
> 2.15.0.rc1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the mesa-dev mailing list