[Intel-gfx] [PATCH] drm/i915: Inspect subunit states on hangcheck
Arun Siluvery
arun.siluvery at linux.intel.com
Tue Dec 1 04:56:55 PST 2015
On 01/12/2015 12:17, Mika Kuoppala wrote:
> If head seems stuck and engine in question is rcs,
> inspect subunit state transitions before deciding that
> this really is a hang instead of limited progress.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=93029
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon at intel.com>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
> drivers/gpu/drm/i915/i915_irq.c | 49 +++++++++++++++++++++++++++++----
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 2 files changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e88d692..e6ae54f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
> ring->hangcheck.deadlock = 0;
> }
>
> -static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +static bool subunits_stuck(struct intel_engine_cs *ring)
> {
> - struct drm_device *dev = ring->dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - u32 tmp;
> + int i;
> + u32 instdone[I915_NUM_INSTDONE_REG];
> + bool stuck;
> +
> + if (ring->id != RCS)
> + return true;
> +
> + i915_get_extra_instdone(ring->dev, instdone);
>
> + stuck = true;
> + for (i = 0; i < I915_NUM_INSTDONE_REG; i++) {
> + if (instdone[i] != ring->hangcheck.instdone[i])
> + stuck = false;
This may not be completely reliable. Tomas Elf in his TDR tests observed
that instdone kept changing even when CS is hung and in a stable state.
regards
Arun
> +
> + ring->hangcheck.instdone[i] = instdone[i];
> + }
> +
> + return stuck;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +head_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
> if (acthd != ring->hangcheck.acthd) {
> if (acthd > ring->hangcheck.max_acthd) {
> ring->hangcheck.max_acthd = acthd;
> @@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> return HANGCHECK_ACTIVE_LOOP;
> }
>
> + if (!subunits_stuck(ring))
> + return HANGCHECK_ACTIVE_LOOP;
> +
> + return HANGCHECK_HUNG;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
> + struct drm_device *dev = ring->dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + enum intel_ring_hangcheck_action ha;
> + u32 tmp;
> +
> + ha = head_stuck(ring, acthd);
> + if (ha != HANGCHECK_HUNG)
> + return ha;
> +
> if (IS_GEN2(dev))
> return HANGCHECK_HUNG;
>
> @@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
> ring->hangcheck.score--;
>
> ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
> +
> + memset(ring->hangcheck.instdone, 0,
> + sizeof(ring->hangcheck.instdone));
> }
>
> ring->hangcheck.seqno = seqno;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5d1eb20..b8fe92e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -93,6 +93,7 @@ struct intel_ring_hangcheck {
> int score;
> enum intel_ring_hangcheck_action action;
> int deadlock;
> + u32 instdone[I915_NUM_INSTDONE_REG];
> };
>
> struct intel_ringbuffer {
>
More information about the Intel-gfx
mailing list