[Intel-gfx] [PATCH] drm/i915: Inspect subunit states on hangcheck

Mika Kuoppala mika.kuoppala at linux.intel.com
Tue Dec 1 04:58:05 PST 2015


Mika Kuoppala <mika.kuoppala at linux.intel.com> writes:

> If head seems stuck and engine in question is rcs,
> inspect subunit state transitions before deciding that
> this really is a hang instead of limited progress.
>

One thing to add into this description is that
now as we always have one extra hangcheck step even
in true stuck cases, this makes the hang declaration
one tick longer, 7.5s.

If this becomes a problem, we can sample the instdone
state on first check converge back to old behaviour.

One real, but hard to achive, improvement in determinism
would be to normalize the amount of 'work' each gen does in
a given tick. This way the shader progressions would be
more or less equal and same amount of shader work would
cause same hangcheck behaviour regardless of actual
speed of computation.

-Mika

> References: https://bugs.freedesktop.org/show_bug.cgi?id=93029
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon at intel.com>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_irq.c         | 49 +++++++++++++++++++++++++++++----
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>  2 files changed, 45 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e88d692..e6ae54f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2913,13 +2913,31 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
>  		ring->hangcheck.deadlock = 0;
>  }
>  
> -static enum intel_ring_hangcheck_action
> -ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +static bool subunits_stuck(struct intel_engine_cs *ring)
>  {
> -	struct drm_device *dev = ring->dev;
> -	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 tmp;
> +	int i;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
> +	bool stuck;
> +
> +	if (ring->id != RCS)
> +		return true;
> +
> +	i915_get_extra_instdone(ring->dev, instdone);
>  
> +	stuck = true;
> +	for (i = 0; i < I915_NUM_INSTDONE_REG; i++) {
> +		if (instdone[i] != ring->hangcheck.instdone[i])
> +			stuck = false;
> +
> +		ring->hangcheck.instdone[i] = instdone[i];
> +	}
> +
> +	return stuck;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +head_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
>  	if (acthd != ring->hangcheck.acthd) {
>  		if (acthd > ring->hangcheck.max_acthd) {
>  			ring->hangcheck.max_acthd = acthd;
> @@ -2929,6 +2947,24 @@ ring_stuck(struct intel_engine_cs *ring, u64 acthd)
>  		return HANGCHECK_ACTIVE_LOOP;
>  	}
>  
> +	if (!subunits_stuck(ring))
> +		return HANGCHECK_ACTIVE_LOOP;
> +
> +	return HANGCHECK_HUNG;
> +}
> +
> +static enum intel_ring_hangcheck_action
> +ring_stuck(struct intel_engine_cs *ring, u64 acthd)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	enum intel_ring_hangcheck_action ha;
> +	u32 tmp;
> +
> +	ha = head_stuck(ring, acthd);
> +	if (ha != HANGCHECK_HUNG)
> +		return ha;
> +
>  	if (IS_GEN2(dev))
>  		return HANGCHECK_HUNG;
>  
> @@ -3064,6 +3100,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  				ring->hangcheck.score--;
>  
>  			ring->hangcheck.acthd = ring->hangcheck.max_acthd = 0;
> +
> +			memset(ring->hangcheck.instdone, 0,
> +			       sizeof(ring->hangcheck.instdone));
>  		}
>  
>  		ring->hangcheck.seqno = seqno;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5d1eb20..b8fe92e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -93,6 +93,7 @@ struct intel_ring_hangcheck {
>  	int score;
>  	enum intel_ring_hangcheck_action action;
>  	int deadlock;
> +	u32 instdone[I915_NUM_INSTDONE_REG];
>  };
>  
>  struct intel_ringbuffer {
> -- 
> 2.5.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list