[Intel-gfx] [PATCH 031/190] drm/i915: Harden detection of missed interrupts

Chris Wilson chris at chris-wilson.co.uk
Mon Jan 11 01:16:42 PST 2016


Only declare a missed interrupt if we find that the GPU is idle with
waiters and a hangcheck interval has passed in which no new user
interrupts have been raised.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  6 ++++++
 drivers/gpu/drm/i915/i915_irq.c         | 10 ++++++++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5a706c700684..567f8db4c70a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -735,6 +735,9 @@ static void i915_ring_seqno_info(struct seq_file *m,
 	seq_printf(m, "Current sequence (%s): %x\n",
 		   ring->name, intel_ring_get_seqno(ring));
 
+	seq_printf(m, "Current user interrupts (%s): %x\n",
+		   ring->name, READ_ONCE(ring->user_interrupts));
+
 	spin_lock(&ring->breadcrumbs.lock);
 	for (rb = rb_first(&ring->breadcrumbs.waiters);
 	     rb != NULL;
@@ -1372,6 +1375,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
 		seq_printf(m, "\tseqno = %x [current %x], waiters? %d\n",
 			   ring->hangcheck.seqno, seqno[i],
 			   intel_engine_has_waiter(ring));
+		seq_printf(m, "\tuser interrupts = %x [current %x]\n",
+			   ring->hangcheck.user_interrupts,
+			   ring->user_interrupts);
 		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
 			   (long long)ring->hangcheck.acthd,
 			   (long long)acthd[i]);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bf48fa63127a..b3942dec7de4 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -997,8 +997,10 @@ static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 static void notify_ring(struct intel_engine_cs *ring)
 {
 	ring->irq_posted = true; /* paired with mb() in wake_up_process() */
-	if (intel_engine_wakeup(ring))
+	if (intel_engine_wakeup(ring)) {
 		trace_i915_gem_request_notify(ring);
+		ring->user_interrupts++;
+	}
 }
 
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -3061,12 +3063,14 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	for_each_ring(ring, dev_priv, i) {
 		u64 acthd;
 		u32 seqno;
+		unsigned user_interrupts;
 		bool busy = true;
 
 		semaphore_clear_deadlocks(dev_priv);
 
 		acthd = intel_ring_get_active_head(ring);
 		seqno = intel_ring_get_seqno(ring);
+		user_interrupts = READ_ONCE(ring->user_interrupts);
 
 		if (ring->hangcheck.seqno == seqno) {
 			if (ring_idle(ring, seqno)) {
@@ -3074,7 +3078,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 				if (intel_engine_has_waiter(ring)) {
 					/* Issue a wake-up to catch stuck h/w. */
-					if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
+					if (ring->hangcheck.user_interrupts == user_interrupts &&
+					    !test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
 						if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings))
 							DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
 								  ring->name);
@@ -3142,6 +3147,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
 		ring->hangcheck.seqno = seqno;
 		ring->hangcheck.acthd = acthd;
+		ring->hangcheck.user_interrupts = user_interrupts;
 		busy_count += busy;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 3364bcebd456..73da75fa47c1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -90,6 +90,7 @@ struct intel_ring_hangcheck {
 	u64 acthd;
 	u64 max_acthd;
 	u32 seqno;
+	unsigned user_interrupts;
 	int score;
 	enum intel_ring_hangcheck_action action;
 	int deadlock;
@@ -328,6 +329,7 @@ struct  intel_engine_cs {
 	 * inspecting request list.
 	 */
 	u32 last_submitted_seqno;
+	unsigned user_interrupts;
 
 	bool gpu_caches_dirty;
 
-- 
2.7.0.rc3



More information about the Intel-gfx mailing list