[PATCH 35/81] drm/i915: Clean up GPU hang message

Chris Wilson chris at chris-wilson.co.uk
Sat Apr 16 12:50:30 UTC 2016


Remove some redundant kernel messages as we deduce a hung GPU and
capture the error state.

v2: Fix "hang" vs "no progress" message whilst I was there

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_irq.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 17e321a0806b..706410747eb9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3127,9 +3127,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 			     gpu_error.hangcheck_work.work);
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int busy_count = 0, rings_hung = 0;
-	bool stuck[I915_NUM_ENGINES] = { 0 };
+	unsigned hung = 0, stuck = 0;
+	int busy_count = 0;
 #define BUSY 1
 #define KICK 5
 #define HUNG 20
@@ -3147,7 +3146,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	 */
 	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
 
-	for_each_engine_id(engine, dev_priv, id) {
+	for_each_engine(engine, dev_priv) {
 		bool busy = intel_engine_has_waiter(engine);
 		u64 acthd;
 		u32 seqno;
@@ -3210,10 +3209,15 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 					break;
 				case HANGCHECK_HUNG:
 					engine->hangcheck.score += HUNG;
-					stuck[id] = true;
 					break;
 				}
 			}
+
+			if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) {
+				hung |= intel_engine_flag(engine);
+				if (engine->hangcheck.action != HANGCHECK_HUNG)
+					stuck |= intel_engine_flag(engine);
+			}
 		} else {
 			engine->hangcheck.action = HANGCHECK_ACTIVE;
 
@@ -3238,17 +3242,24 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 		busy_count += busy;
 	}
 
-	for_each_engine_id(engine, dev_priv, id) {
-		if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) {
-			DRM_INFO("%s on %s\n",
-				 stuck[id] ? "stuck" : "no progress",
-				 engine->name);
-			rings_hung |= intel_engine_flag(engine);
-		}
-	}
+	if (hung) {
+		char msg[80];
+		int len;
 
-	if (rings_hung)
-		i915_handle_error(dev, rings_hung, "Engine(s) hung");
+		/* If some rings hung but others were still busy, only
+		 * blame the hanging rings in the synopsis.
+		 */
+		if (stuck != hung)
+			hung &= ~stuck;
+		len = snprintf(msg, sizeof(msg),
+			       "%s on ", stuck == hung ? "No progress" : "Hang");
+		for_each_engine_masked(engine, dev_priv, hung)
+			len += snprintf(msg + len, sizeof(msg) - len,
+					"%s, ", engine->name);
+		msg[len-2] = '\0';
+
+		return i915_handle_error(dev, hung, msg);
+	}
 
 	/* Reset timer in case GPU hangs without another request being added */
 	if (busy_count)
-- 
2.8.0.rc3



More information about the Intel-gfx-trybot mailing list