[PATCH 4/4] drm/i915/gt: Stall around xcs invalidations on tgl
Chris Wilson
chris at chris-wilson.co.uk
Tue Jul 28 11:18:36 UTC 2020
Whether this is an arbitrary stall or a vital ingredient, neverthess the
impact is noticeable. If we do not have the stall around the xcs
invalidation before a request, writes within that request sometimes go
astray.
v2: Split between flush/invalidate, as it seems we can beat the
incoherency at a fraction of the cost.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/2169
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Acked-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 61 ++++++++++++++++++++---------
1 file changed, 42 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 353b1717fe84..82890d408742 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -4496,6 +4496,24 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine)
ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
}
+static u32 *emit_mi_flush(u32 *cs, u32 flags)
+{
+ *cs++ = (MI_FLUSH_DW + 1) | flags;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
+ *cs++ = 0; /* upper addr */
+ *cs++ = 0; /* value */
+
+ return cs;
+}
+
+static u32 *emit_xcs_invalidate(u32 *cs)
+{
+ return emit_mi_flush(cs,
+ MI_FLUSH_DW_STORE_INDEX |
+ MI_FLUSH_DW_OP_STOREDW |
+ MI_INVALIDATE_TLB);
+}
+
static int gen8_emit_flush(struct i915_request *request, u32 mode)
{
u32 cmd, *cs;
@@ -4504,14 +4522,13 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
if (IS_ERR(cs))
return PTR_ERR(cs);
- cmd = MI_FLUSH_DW + 1;
-
- /* We always require a command barrier so that subsequent
+ /*
+ * We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+ cmd = MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
@@ -4519,10 +4536,8 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
cmd |= MI_INVALIDATE_BSD;
}
- *cs++ = cmd;
- *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
- *cs++ = 0; /* upper addr */
- *cs++ = 0; /* value */
+ cs = emit_mi_flush(cs, cmd);
+
intel_ring_advance(request, cs);
return 0;
@@ -4761,10 +4776,12 @@ static int gen12_emit_flush_render(struct i915_request *request,
static int gen12_emit_flush(struct i915_request *request, u32 mode)
{
+#define WA_CNT 2 /* Magic delay? */
intel_engine_mask_t aux_inv = 0;
u32 cmd, *cs;
+ int n;
- cmd = 4;
+ cmd = 4 * WA_CNT;
if (mode & EMIT_INVALIDATE)
cmd += 2;
if (mode & EMIT_INVALIDATE)
@@ -4779,25 +4796,21 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(true);
- cmd = MI_FLUSH_DW + 1;
-
- /* We always require a command barrier so that subsequent
+ /*
+ * We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
- cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
-
+ cmd = MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
if (request->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
}
- *cs++ = cmd;
- *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
- *cs++ = 0; /* upper addr */
- *cs++ = 0; /* value */
+ for (n = 0; n < WA_CNT; n++)
+ cs = emit_mi_flush(cs, cmd);
if (aux_inv) { /* hsdes: 1809175790 */
struct intel_engine_cs *engine;
@@ -4818,6 +4831,7 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
intel_ring_advance(request, cs);
return 0;
+#undef WA_CNT
}
static void assert_request_valid(struct i915_request *rq)
@@ -4971,7 +4985,16 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
{
- return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
+ cs = emit_xcs_invalidate(cs);
+ cs = emit_xcs_invalidate(cs);
+
+ cs = emit_xcs_breadcrumb(rq, cs);
+ cs = emit_xcs_breadcrumb(rq, cs);
+
+ cs = emit_xcs_invalidate(cs);
+ cs = emit_xcs_invalidate(cs);
+
+ return gen12_emit_fini_breadcrumb_tail(rq, cs);
}
static u32 *
--
2.20.1
More information about the Intel-gfx-trybot
mailing list