[PATCH 02/10] drm/i915/gt: Don't declare hangs if engine is stalled

Chris Wilson chris at chris-wilson.co.uk
Wed May 27 23:23:09 UTC 2020


If the ring submission is stalled on an external request, nothing can be
submitted, not even the heartbeat in the kernel context. Since nothing
is running, resetting the engine/device does not unblock the system and
is pointless. We can see if the heartbeat is supposed to be running
before declaring foul.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 .../gpu/drm/i915/gt/intel_engine_heartbeat.c   | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5136c8bf112d..a68052c1c850 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -48,8 +48,10 @@ static void show_heartbeat(const struct i915_request *rq,
 	struct drm_printer p = drm_debug_printer("heartbeat");
 
 	intel_engine_dump(engine, &p,
-			  "%s heartbeat {prio:%d} not ticking\n",
+			  "%s heartbeat {seqno:%llx:%lld, prio:%d} not ticking\n",
 			  engine->name,
+			  rq->fence.context,
+			  rq->fence.seqno,
 			  rq->sched.attr.priority);
 }
 
@@ -64,6 +66,7 @@ static void heartbeat(struct work_struct *wrk)
 	struct i915_request *rq;
 
 	rq = engine->heartbeat.systole;
+
 	if (rq && i915_request_completed(rq)) {
 		i915_request_put(rq);
 		engine->heartbeat.systole = NULL;
@@ -76,7 +79,18 @@ static void heartbeat(struct work_struct *wrk)
 		goto out;
 
 	if (engine->heartbeat.systole) {
-		if (engine->schedule &&
+		if (!i915_sw_fence_signaled(&rq->submit)) {
+			/*
+			 * Not yet submitted, system is stalled.
+			 *
+			 * This more often happens for ring submission,
+			 * where all contexts are funnelled into a common
+			 * ringbuffer. If one context is blocked on an
+			 * external fence, not only is it not submitted,
+			 * but all other contexts, including the kernel
+			 * context are stuck waiting for the signal.
+			 */
+		} else if (engine->schedule &&
 		    rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
 			/*
 			 * Gradually raise the priority of the heartbeat to
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list