[Intel-gfx] [PATCH] drm/i915/ringbuffer: Delay after invalidating gen6+ xcs
Chris Wilson
chris at chris-wilson.co.uk
Tue Aug 28 17:04:29 UTC 2018
During stress testing of full-ppgtt (on Baytrail at least), we found
that the invalidation around a context/mm switch was insufficient (writes
would go astray). Adding a second MI_FLUSH_DW barrier prevents this, but
it is unclear as to whether this is merely a delaying tactic or if it is
truly serialising with the TLB invalidation. Either way, it is
empirically required.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107715
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Matthew Auld <matthew.william.auld at gmail.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/intel_ringbuffer.c | 101 +++++++++++-------------
1 file changed, 47 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d40f55a8dc34..952b6269bab0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1944,40 +1944,62 @@ static void gen6_bsd_submit_request(struct i915_request *request)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
-static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
+static int emit_mi_flush_dw(struct i915_request *rq, u32 mode, u32 invflags)
{
u32 cmd, *cs;
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
+ do {
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
- cmd = MI_FLUSH_DW;
+ cmd = MI_FLUSH_DW;
- /* We always require a command barrier so that subsequent
- * commands, such as breadcrumb interrupts, are strictly ordered
- * wrt the contents of the write cache being flushed to memory
- * (and thus being coherent from the CPU).
- */
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+ /*
+ * We always require a command barrier so that subsequent
+ * commands, such as breadcrumb interrupts, are strictly ordered
+ * wrt the contents of the write cache being flushed to memory
+ * (and thus being coherent from the CPU).
+ */
+ cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
- /*
- * Bspec vol 1c.5 - video engine command streamer:
- * "If ENABLED, all TLBs will be invalidated once the flush
- * operation is complete. This bit is only valid when the
- * Post-Sync Operation field is a value of 1h or 3h."
- */
- if (mode & EMIT_INVALIDATE)
- cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+ /*
+ * Bspec vol 1c.3 - blitter engine command streamer:
+ * "If ENABLED, all TLBs will be invalidated once the flush
+ * operation is complete. This bit is only valid when the
+ * Post-Sync Operation field is a value of 1h or 3h."
+ */
+ if (mode & EMIT_INVALIDATE)
+ cmd |= invflags;
+ *cs++ = cmd;
+ *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ /*
+ * Not only do we need a full barrier (post-sync write) after
+ * invalidating the TLBs, but we need to wait a little bit
+ * longer. Whether this is merely delaying us, or the
+ * subsequent flush is a key part of serialising with the
+ * post-sync op, this extra pass appears vital before a
+ * mm switch!
+ */
+ if (!(mode & EMIT_INVALIDATE))
+ break;
+
+ mode &= ~EMIT_INVALIDATE;
+ } while (1);
- *cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
return 0;
}
+static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode)
+{
+ return emit_mi_flush_dw(rq, mode,
+ MI_INVALIDATE_TLB | MI_INVALIDATE_BSD);
+}
+
static int
hsw_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
@@ -2022,36 +2044,7 @@ gen6_emit_bb_start(struct i915_request *rq,
static int gen6_ring_flush(struct i915_request *rq, u32 mode)
{
- u32 cmd, *cs;
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- cmd = MI_FLUSH_DW;
-
- /* We always require a command barrier so that subsequent
- * commands, such as breadcrumb interrupts, are strictly ordered
- * wrt the contents of the write cache being flushed to memory
- * (and thus being coherent from the CPU).
- */
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
- /*
- * Bspec vol 1c.3 - blitter engine command streamer:
- * "If ENABLED, all TLBs will be invalidated once the flush
- * operation is complete. This bit is only valid when the
- * Post-Sync Operation field is a value of 1h or 3h."
- */
- if (mode & EMIT_INVALIDATE)
- cmd |= MI_INVALIDATE_TLB;
- *cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
- *cs++ = 0;
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
-
- return 0;
+ return emit_mi_flush_dw(rq, mode, MI_INVALIDATE_TLB);
}
static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
--
2.18.0
More information about the Intel-gfx
mailing list