<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Tue, Oct 24, 2017 at 9:06 AM, Chris Wilson <span dir="ltr"><<a href="mailto:chris@chris-wilson.co.uk" target="_blank">chris@chris-wilson.co.uk</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Through the use of mocs, we can define the cache usage for any surface<br>
used by the GPU. In particular, we can request that L3 cache be<br>
allocated for either a read/write miss so that subsequent reads can be<br>
fetched from cache rather than memory. A consequence of this is that if<br>
we allocate a L3/LLC cacheline for a read and the object is changed in<br>
main memory (e.g. a PCIe write bypassing the CPU) then the next read<br>
will be serviced from the stale cache and not from the new data in<br>
memory. This is an issue for external PRIME buffers where we may miss<br>
the updates entirely if the image is small enough to fit within our<br>
cache.<br>
<br>
Currently, we have a single bit to mark all external buffers so use that<br>
to tell us when it is unsafe to use a cache override in mocs and<br>
fallback to the PTE value instead (which should be set to the correct<br>
cache level to be coherent amongst all active parties: PRIME, scanout and<br>
render). This may be refined in future to limit the override to buffers<br>
outside the control of mesa; as buffers being shared between mesa<br>
clients should be able to coordinate themselves without resolves.<br>
<br>
Bugzilla: <a href="https://bugs.freedesktop.org/show_bug.cgi?id=101691" rel="noreferrer" target="_blank">https://bugs.freedesktop.org/<wbr>show_bug.cgi?id=101691</a><br>
Cc: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
Cc: Jason Ekstrand <<a href="mailto:jason@jlekstrand.net">jason@jlekstrand.net</a>><br>
Cc: Lyude Paul <<a href="mailto:lyude@redhat.com">lyude@redhat.com</a>><br>
Cc: Timo Aalton <<a href="mailto:tjaalton@ubuntu.com">tjaalton@ubuntu.com</a>><br>
Cc: Ben Widawsky <<a href="mailto:ben@bwidawsk.net">ben@bwidawsk.net</a>><br>
Cc: Daniel Vetter <<a href="mailto:daniel.vetter@ffwll.ch">daniel.vetter@ffwll.ch</a>><br>
---<br>
src/intel/blorp/blorp.c | 1 +<br>
src/intel/blorp/blorp.h | 1 +<br>
src/intel/blorp/blorp_genX_<wbr>exec.h | 2 +-<br>
src/intel/blorp/blorp_priv.h | 1 +<br>
src/mesa/drivers/dri/i965/brw_<wbr>blorp.c | 1 +<br>
src/mesa/drivers/dri/i965/brw_<wbr>state.h | 3 ++-<br>
src/mesa/drivers/dri/i965/brw_<wbr>wm_surface_state.c | 16 +++++++++++-----<br>
7 files changed, 18 insertions(+), 7 deletions(-)<br>
<br>
diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c<br>
index 7cc6335f2f..459ad66652 100644<br>
--- a/src/intel/blorp/blorp.c<br>
+++ b/src/intel/blorp/blorp.c<br>
@@ -71,6 +71,7 @@ brw_blorp_surface_info_init(<wbr>struct blorp_context *blorp,<br>
surf->surf->logical_level0_px.<wbr>array_len));<br>
<br>
info->enabled = true;<br>
+ info->external = surf->external;<br>
<br>
if (format == ISL_FORMAT_UNSUPPORTED)<br>
format = surf->surf->format;<br>
diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h<br>
index 9716c66302..af056c9d52 100644<br>
--- a/src/intel/blorp/blorp.h<br>
+++ b/src/intel/blorp/blorp.h<br>
@@ -106,6 +106,7 @@ struct blorp_surf<br>
enum isl_aux_usage aux_usage;<br>
<br>
union isl_color_value clear_color;<br>
+ bool external;<br>
};<br>
<br>
void<br>
diff --git a/src/intel/blorp/blorp_genX_<wbr>exec.h b/src/intel/blorp/blorp_genX_<wbr>exec.h<br>
index 5389262098..18715788ff 100644<br>
--- a/src/intel/blorp/blorp_genX_<wbr>exec.h<br>
+++ b/src/intel/blorp/blorp_genX_<wbr>exec.h<br>
@@ -1328,7 +1328,7 @@ blorp_emit_surface_states(<wbr>struct blorp_batch *batch,<br>
blorp_emit_surface_state(<wbr>batch, ¶ms->src,<br>
surface_maps[BLORP_TEXTURE_BT_<wbr>INDEX],<br>
surface_offsets[BLORP_TEXTURE_<wbr>BT_INDEX],<br>
- NULL, false);<br>
+ NULL, params->src.external);<br>
}<br>
}<br>
<br>
diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h<br>
index c7d5d308da..f841aa7cdc 100644<br>
--- a/src/intel/blorp/blorp_priv.h<br>
+++ b/src/intel/blorp/blorp_priv.h<br>
@@ -47,6 +47,7 @@ enum {<br>
struct brw_blorp_surface_info<br>
{<br>
bool enabled;<br>
+ bool external;<br>
<br>
struct isl_surf surf;<br>
struct blorp_address addr;<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_blorp.c b/src/mesa/drivers/dri/i965/<wbr>brw_blorp.c<br>
index ed4f9870f2..563d13a037 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_blorp.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_blorp.c<br>
@@ -160,6 +160,7 @@ blorp_surf_for_miptree(struct brw_context *brw,<br>
.offset = mt->offset,<br>
.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,<br>
};<br>
+ surf->external = mt->bo->external;<br>
<br>
surf->aux_usage = aux_usage;<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_state.h b/src/mesa/drivers/dri/i965/<wbr>brw_state.h<br>
index 8db354cf23..01c0cd12cb 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_state.h<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_state.h<br>
@@ -342,6 +342,7 @@ void gen10_init_atoms(struct brw_context *brw);<br>
* may still respect that.<br>
*/<br>
#define GEN7_MOCS_L3 1<br>
+#define GEN7_MOCS_PTE 0<br>
<br>
/* Ivybridge only: cache in LLC.<br>
* Specifying zero here means to use the PTE values set by the kernel;<br>
@@ -367,7 +368,7 @@ void gen10_init_atoms(struct brw_context *brw);<br>
*/<br>
#define BDW_MOCS_WB 0x78<br>
#define BDW_MOCS_WT 0x58<br>
-#define BDW_MOCS_PTE 0x18<br>
+#define BDW_MOCS_PTE 0x08<br></blockquote><div><br></div><div>Why are we throwing away L3 caching here and in gen7 above? We should be able to handle L3 caching with manual texture/render cache invalidates/flushes. Our performance is already going to be not great with linear buffers with no LLC, we shouldn't kill it completely.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<br>
/* Skylake: MOCS is now an index into an array of 62 different caching<br>
* configurations programmed by the kernel.<br>
diff --git a/src/mesa/drivers/dri/i965/<wbr>brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/<wbr>brw_wm_surface_state.c<br>
index f4e9cf48c6..50a3682efc 100644<br>
--- a/src/mesa/drivers/dri/i965/<wbr>brw_wm_surface_state.c<br>
+++ b/src/mesa/drivers/dri/i965/<wbr>brw_wm_surface_state.c<br>
@@ -63,12 +63,17 @@ uint32_t tex_mocs[] = {<br>
};<br>
<br>
uint32_t rb_mocs[] = {<br>
- [7] = GEN7_MOCS_L3,<br>
+ [7] = GEN7_MOCS_PTE,<br>
[8] = BDW_MOCS_PTE,<br>
[9] = SKL_MOCS_PTE,<br>
[10] = CNL_MOCS_PTE,<br>
};<br>
<br>
+static inline uint32_t get_tex_mocs(struct brw_bo *bo, unsigned int gen)<br>
+{<br>
+ return (bo && bo->external ? rb_mocs : tex_mocs)[gen];<br>
+}<br>
+<br>
static void<br>
get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,<br>
GLenum target, struct isl_view *view,<br>
@@ -585,7 +590,7 @@ brw_update_texture_surface(<wbr>struct gl_context *ctx,<br>
aux_usage = ISL_AUX_USAGE_NONE;<br>
<br>
brw_emit_surface_state(brw, mt, mt->target, view, aux_usage,<br>
- tex_mocs[devinfo->gen],<br>
+ get_tex_mocs(mt->bo, devinfo->gen),<br>
surf_offset, surf_index,<br>
0);<br>
}<br>
@@ -616,7 +621,7 @@ brw_emit_buffer_surface_state(<wbr>struct brw_context *brw,<br>
.size = buffer_size,<br>
.format = surface_format,<br>
.stride = pitch,<br>
- .mocs = tex_mocs[devinfo->gen]);<br>
+ .mocs = get_tex_mocs(bo, devinfo->gen));<br>
}<br>
<br>
void<br>
@@ -1106,7 +1111,7 @@ update_renderbuffer_read_<wbr>surfaces(struct brw_context *brw)<br>
aux_usage = ISL_AUX_USAGE_NONE;<br>
<br>
brw_emit_surface_state(brw, irb->mt, target, view, aux_usage,<br>
- tex_mocs[devinfo->gen],<br>
+ get_tex_mocs(irb->mt->bo, devinfo->gen),<br>
surf_offset, surf_index,<br>
0);<br>
<br>
@@ -1598,7 +1603,8 @@ update_image_surface(struct brw_context *brw,<br>
view.base_array_layer,<br>
view.array_len));<br>
brw_emit_surface_state(brw, mt, mt->target, view,<br>
- ISL_AUX_USAGE_NONE, tex_mocs[devinfo->gen],<br>
+ ISL_AUX_USAGE_NONE,<br>
+ get_tex_mocs(mt->bo, devinfo->gen),<br>
surf_offset, surf_index,<br>
access == GL_READ_ONLY ? 0 : RELOC_WRITE);<br>
}<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.15.0.rc1<br>
<br>
</font></span></blockquote></div><br></div></div>