[PATCH 78/80] drm/i915/gt: Stall around xcs invalidations on tgl
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 7 23:23:29 UTC 2020
Whether this is an arbitrary stall or a vital ingredient, neverthess the
impact is noticeable. If we do not have the stall around the xcs
invalidation before a request, writes within that request sometimes go
astray.
v2: Split between flush/invalidate, as it seems we can then beat the
mysterious incoherency at a fraction of the cost, but only reduces
the risk [substantially]
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2169
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Acked-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 57 ++++++++++++++++++++---------
1 file changed, 39 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 4b46b112cd42..961c73ff0d82 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -4158,6 +4158,24 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
}
+static u32 *emit_mi_flush(u32 *cs, u32 flags)
+{
+ *cs++ = (MI_FLUSH_DW + 1) | flags;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+ *cs++ = 0; /* upper addr */
+ *cs++ = 0; /* value */
+
+ return cs;
+}
+
+static u32 *emit_xcs_invalidate(u32 *cs)
+{
+ return emit_mi_flush(cs,
+ MI_FLUSH_DW_STORE_INDEX |
+ MI_FLUSH_DW_OP_STOREDW |
+ MI_INVALIDATE_TLB);
+}
+
static int gen8_emit_flush(struct i915_request *request, u32 mode)
{
u32 cmd, *cs;
@@ -4166,14 +4184,13 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
if (IS_ERR(cs))
return PTR_ERR(cs);
- cmd = MI_FLUSH_DW + 1;
-
- /* We always require a command barrier so that subsequent
+ /*
+ * We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+ cmd = MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
@@ -4181,10 +4198,8 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
cmd |= MI_INVALIDATE_BSD;
}
- *cs++ = cmd;
- *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
- *cs++ = 0; /* upper addr */
- *cs++ = 0; /* value */
+ cs = emit_mi_flush(cs, cmd);
+
intel_ring_advance(request, cs);
return 0;
@@ -4423,10 +4438,12 @@ static int gen12_emit_flush_render(struct i915_request *request,
static int gen12_emit_flush(struct i915_request *request, u32 mode)
{
+#define WA_CNT 2 /* Magic delay? */
intel_engine_mask_t aux_inv = 0;
u32 cmd, *cs;
+ int n;
- cmd = 4;
+ cmd = 4 * WA_CNT;
if (mode & EMIT_INVALIDATE)
cmd += 2;
if (mode & EMIT_INVALIDATE)
@@ -4441,25 +4458,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(true);
- cmd = MI_FLUSH_DW + 1;
-
- /* We always require a command barrier so that subsequent
+ /*
+ * We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
+ cmd = MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
if (request->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
}
- *cs++ = cmd;
- *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
- *cs++ = 0; /* upper addr */
- *cs++ = 0; /* value */
+ for (n = 0; n < WA_CNT; n++)
+ cs = emit_mi_flush(cs, cmd);
if (aux_inv) { /* hsdes: 1809175790 */
struct intel_engine_cs *engine;
@@ -4480,6 +4493,7 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
intel_ring_advance(request, cs);
return 0;
+#undef WA_CNT
}
static void assert_request_valid(struct i915_request *rq)
@@ -4633,7 +4647,14 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
+#define WA_CNT 2
+ int i;
+
+ for (i = 0; i < WA_CNT; i++)
+ cs = emit_xcs_invalidate(cs);
+
return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
+#undef WA_CNT
}
static u32 *
--
2.20.1
More information about the Intel-gfx-trybot
mailing list