[PATCH 01/34] drm/i915: Always update the RING registers on resetting a context

Chris Wilson chris at chris-wilson.co.uk
Sun Apr 7 22:22:46 UTC 2019


If the context was innocent upon reset, and the context image looked
intact, we left the context alone before replaying. (Trying to avoid
inflicting damage of our own across the reset.) However, it appears that
the RING_HEAD is not always updated by the context save prior to the
reset meaning that we would replay earlier requests, which can cause
momentarily confusion as we see older requests being run that we already
have retired:

<0> [412.390350]   <idle>-0       3d.s2 408373352us : __i915_request_submit: rcs0 fence 1e95b:3640 -> current 3638
<0> [412.390350]   <idle>-0       3d.s2 408373353us : __i915_request_submit: rcs0 fence 1e95b:3642 -> current 3638
<0> [412.390350]   <idle>-0       3d.s2 408373354us : __i915_request_submit: rcs0 fence 1e95b:3644 -> current 3638
<0> [412.390350]   <idle>-0       3d.s2 408373354us : __i915_request_submit: rcs0 fence 1e95b:3646 -> current 3638
<0> [412.390350]   <idle>-0       3d.s2 408373356us : __execlists_submission_tasklet: rcs0 in[0]:  ctx=2.1, fence 1e95b:3646 (current 3638), prio=4
<0> [412.390350] i915_sel-4613    0.... 408373374us : __i915_request_commit: rcs0 fence 1e95b:3648
<0> [412.390350] i915_sel-4613    0d..1 408373377us : process_csb: rcs0 cs-irq head=2, tail=3
<0> [412.390350] i915_sel-4613    0d..1 408373377us : process_csb: rcs0 csb[3]: status=0x00000001:0x00000000, active=0x1
<0> [412.390350] i915_sel-4613    0d..1 408373378us : __i915_request_submit: rcs0 fence 1e95b:3648 -> current 3638
<0> [412.390350]   <idle>-0       3..s1 408373378us : execlists_submission_tasklet: rcs0 awake?=1, active=5
<0> [412.390350] i915_sel-4613    0d..1 408373379us : __execlists_submission_tasklet: rcs0 in[0]:  ctx=2.2, fence 1e95b:3648 (current 3638), prio=4
<0> [412.390350] i915_sel-4613    0.... 408373381us : i915_reset_engine: rcs0 flags=4
<0> [412.390350] i915_sel-4613    0.... 408373382us : execlists_reset_prepare: rcs0: depth<-0
<0> [412.390350]   <idle>-0       3d.s2 408373390us : process_csb: rcs0 cs-irq head=3, tail=4
<0> [412.390350]   <idle>-0       3d.s2 408373390us : process_csb: rcs0 csb[4]: status=0x00008002:0x00000002, active=0x1
<0> [412.390350]   <idle>-0       3d.s2 408373390us : process_csb: rcs0 out[0]: ctx=2.2, fence 1e95b:3648 (current 3640), prio=4
<0> [412.390350] i915_sel-4613    0.... 408373401us : intel_engine_stop_cs: rcs0
<0> [412.390350] i915_sel-4613    0d..1 408373402us : process_csb: rcs0 cs-irq head=4, tail=4
<0> [412.390350] i915_sel-4613    0.... 408373403us : intel_gpu_reset: engine_mask=1
<0> [412.390350] i915_sel-4613    0d..1 408373408us : execlists_cancel_port_requests: rcs0:port0 fence 1e95b:3648, (current 3648)
<0> [412.390350] i915_sel-4613    0.... 408373442us : intel_engine_cancel_stop_cs: rcs0
<0> [412.390350] i915_sel-4613    0.... 408373442us : execlists_reset_finish: rcs0: depth->0
<0> [412.390350] ksoftirq-26      3..s. 408373442us : execlists_submission_tasklet: rcs0 awake?=1, active=0
<0> [412.390350] ksoftirq-26      3d.s1 408373443us : process_csb: rcs0 cs-irq head=5, tail=5
<0> [412.390350] i915_sel-4613    0.... 408373475us : i915_request_retire: rcs0 fence 1e95b:3640, current 3648
<0> [412.390350] i915_sel-4613    0.... 408373476us : i915_request_retire: __retire_engine_request(rcs0) fence 1e95b:3640, current 3648
<0> [412.390350] i915_sel-4613    0.... 408373494us : __i915_request_commit: rcs0 fence 1e95b:3650
<0> [412.390350] i915_sel-4613    0d..1 408373496us : process_csb: rcs0 cs-irq head=5, tail=5
<0> [412.390350] i915_sel-4613    0d..1 408373496us : __i915_request_submit: rcs0 fence 1e95b:3650 -> current 3648
<0> [412.390350] i915_sel-4613    0d..1 408373498us : __execlists_submission_tasklet: rcs0 in[0]:  ctx=2.1, fence 1e95b:3650 (current 3648), prio=6
<0> [412.390350] i915_sel-4613    0.... 408373500us : i915_request_retire_upto: rcs0 fence 1e95b:3648, current 3648
<0> [412.390350] i915_sel-4613    0.... 408373500us : i915_request_retire: rcs0 fence 1e95b:3642, current 3648
<0> [412.390350] i915_sel-4613    0.... 408373501us : i915_request_retire: __retire_engine_request(rcs0) fence 1e95b:3642, current 3648
<0> [412.390350] i915_sel-4613    0.... 408373514us : i915_request_retire: rcs0 fence 1e95b:3644, current 3648
<0> [412.390350] i915_sel-4613    0.... 408373515us : i915_request_retire: __retire_engine_request(rcs0) fence 1e95b:3644, current 3648
<0> [412.390350] i915_sel-4613    0.... 408373527us : i915_request_retire: rcs0 fence 1e95b:3646, current 3640
<0> [412.390350]   <idle>-0       3..s1 408373569us : execlists_submission_tasklet: rcs0 awake?=1, active=1
<0> [412.390350]   <idle>-0       3d.s2 408373569us : process_csb: rcs0 cs-irq head=5, tail=1
<0> [412.390350]   <idle>-0       3d.s2 408373570us : process_csb: rcs0 csb[0]: status=0x00000001:0x00000000, active=0x1
<0> [412.390350]   <idle>-0       3d.s2 408373570us : process_csb: rcs0 csb[1]: status=0x00000018:0x00000002, active=0x5
<0> [412.390350]   <idle>-0       3d.s2 408373570us : process_csb: rcs0 out[0]: ctx=2.1, fence 1e95b:3650 (current 3650), prio=6
<0> [412.390350]   <idle>-0       3d.s2 408373571us : process_csb: rcs0 completed ctx=2
<0> [412.390350] i915_sel-4613    0.... 408373621us : i915_request_retire: i915_request_retire:253 GEM_BUG_ON(!i915_request_completed(request))

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6931dbb2888c..e3302d03ec46 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1967,7 +1967,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 * perfectly and we do not need to flag the result as being erroneous.
 	 */
 	if (!i915_request_started(rq) && lrc_regs_ok(rq))
-		goto out_unlock;
+		goto out_replay;
 
 	/*
 	 * If the request was innocent, we leave the request in the ELSP
@@ -1982,7 +1982,7 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 	 */
 	i915_reset_request(rq, stalled);
 	if (!stalled && lrc_regs_ok(rq))
-		goto out_unlock;
+		goto out_replay;
 
 	/*
 	 * We want a simple context + ring to execute the breadcrumb update.
@@ -1998,12 +1998,14 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
 		       engine->context_size - PAGE_SIZE);
 	}
+	execlists_init_reg_state(regs, rq->hw_context, engine, rq->ring);
 
+out_replay:
+	GEM_TRACE("%s: replaying from start of %llx:%lld\n",
+		  engine->name, rq->fence.context, rq->fence.seqno);
 	/* Rerun the request; its payload has been neutered (if guilty). */
 	rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
 	intel_ring_update_space(rq->ring);
-
-	execlists_init_reg_state(regs, rq->hw_context, engine, rq->ring);
 	__execlists_update_reg_state(rq->hw_context, engine);
 
 out_unlock:
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list