[Intel-gfx] [PATCH 06/13] drm/i915: detect hang using per ring hangcheck_score
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Feb 26 12:05:09 CET 2013
Add per ring score of possible culprit for gpu hang. If
ring is busy and not waiting, it will get the highest score
across calls to i915_hangcheck_elapsed. This way we are
most likely to find the ring that caused the hang among
the waiting ones.
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_irq.c | 65 +++++++++++++++++--------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
2 files changed, 36 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b828807..4da8691 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -356,7 +356,6 @@ static void notify_ring(struct drm_device *dev,
wake_up_all(&ring->irq_queue);
if (i915_enable_hangcheck) {
- dev_priv->gpu_error.hangcheck_count = 0;
mod_timer(&dev_priv->gpu_error.hangcheck_timer,
round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
}
@@ -1818,52 +1817,58 @@ void i915_hangcheck_elapsed(unsigned long data)
struct drm_device *dev = (struct drm_device *)data;
drm_i915_private_t *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
- bool err = false, idle;
int i;
- u32 seqno[I915_NUM_RINGS];
- bool work_done;
+ int busy_count = 0, rings_hung = 0;
if (!i915_enable_hangcheck)
return;
- idle = true;
for_each_ring(ring, dev_priv, i) {
- seqno[i] = ring->get_seqno(ring, false);
- idle &= i915_hangcheck_ring_idle(ring, seqno[i], &err);
- }
+ u32 seqno;
+ bool idle, err = false;
+
+ seqno = ring->get_seqno(ring, false);
+ idle = i915_hangcheck_ring_idle(ring, seqno, &err);
- /* If all work is done then ACTHD clearly hasn't advanced. */
- if (idle) {
- if (err) {
- if (i915_hangcheck_hung(dev))
- return;
+ if (idle) {
+ if (err)
+ ring->hangcheck_score++;
+ else
+ ring->hangcheck_score = 0;
+ } else {
+ busy_count++;
- goto repeat;
+ if (ring->hangcheck_seqno == seqno) {
+ ring->hangcheck_score++;
+
+ /* If the ring is not waiting, raise
+ the score further */
+ if (i915_hangcheck_ring_hung(dev, ring))
+ ring->hangcheck_score++;
+ } else {
+ ring->hangcheck_score = 0;
+ }
}
- dev_priv->gpu_error.hangcheck_count = 0;
- return;
+ ring->hangcheck_seqno = seqno;
}
- work_done = false;
for_each_ring(ring, dev_priv, i) {
- if (ring->hangcheck_seqno != seqno[i]) {
- work_done = true;
- ring->hangcheck_seqno = seqno[i];
+ if (ring->hangcheck_score > 2) {
+ rings_hung++;
+ DRM_ERROR("%s seems hung\n", ring->name);
}
}
- if (!work_done) {
- if (i915_hangcheck_hung(dev))
- return;
- } else {
- dev_priv->gpu_error.hangcheck_count = 0;
- }
+ if (rings_hung)
+ return i915_handle_error(dev, true);
-repeat:
- /* Reset timer case chip hangs without another request being added */
- mod_timer(&dev_priv->gpu_error.hangcheck_timer,
- round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
+ if (busy_count)
+ /* Reset timer case chip hangs without another request
+ * being added */
+ mod_timer(&dev_priv->gpu_error.hangcheck_timer,
+ round_jiffies_up(jiffies +
+ DRM_I915_HANGCHECK_JIFFIES));
}
/* drm_dma.h hooks
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9599c56..97b8f37 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -138,6 +138,7 @@ struct intel_ring_buffer {
struct drm_i915_gem_object *last_context_obj;
u32 hangcheck_seqno;
+ int hangcheck_score;
void *private;
};
--
1.7.9.5
More information about the Intel-gfx
mailing list