[Intel-gfx] [PATCH 4/4] drm/i915: Detect small loops in hangcheck

Mika Kuoppala mika.kuoppala at linux.intel.com
Mon Nov 30 08:53:09 PST 2015


If there is very small loop in batch, the chances are quite high
that we sample the same head value twice in a row leading the
hangcheck score to be incremented with hung engine status, instead of
active loop which would have been more correct.

Try to resample the actual head few times to detect small loops
instead of jumping into conclusions.

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c1d1400..7c1168b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2914,12 +2914,8 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 }
 
 static enum intel_ring_hangcheck_action
-head_stuck(struct intel_engine_cs *ring, u64 acthd)
+head_action(struct intel_engine_cs *ring, u64 acthd)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 head;
-
 	if (acthd != ring->hangcheck.acthd) {
 		if (acthd > ring->hangcheck.max_acthd) {
 			ring->hangcheck.max_acthd = acthd;
@@ -2929,6 +2925,21 @@ head_stuck(struct intel_engine_cs *ring, u64 acthd)
 		return HANGCHECK_ACTIVE_LOOP;
 	}
 
+	return HANGCHECK_HUNG;
+}
+
+static enum intel_ring_hangcheck_action
+head_stuck(struct intel_engine_cs *ring, u64 acthd)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	static enum intel_ring_hangcheck_action ha;
+	u32 head, retries = 5;
+
+	ha = head_action(ring, acthd);
+	if (ha != HANGCHECK_HUNG)
+		return ha;
+
 	head = I915_READ_HEAD(ring) & HEAD_ADDR;
 
 	/* Some operations, like pipe flush, can take a long time.
@@ -2938,6 +2949,17 @@ head_stuck(struct intel_engine_cs *ring, u64 acthd)
 	if (lower_32_bits(acthd) == head)
 		return HANGCHECK_ACTIVE_LOOP;
 
+	do {
+		msleep(20);
+
+		ring->hangcheck.acthd = acthd;
+		acthd = intel_ring_get_active_head(ring);
+
+		ha = head_action(ring, acthd);
+		if (ha != HANGCHECK_HUNG)
+			return ha;
+	} while (retries--);
+
 	return HANGCHECK_HUNG;
 }
 
-- 
2.5.0



More information about the Intel-gfx mailing list