[Intel-gfx] [PATCH 2/3] drm/i915: check acthd for all rings

Ben Widawsky ben at bwidawsk.net
Mon Oct 3 18:59:12 CEST 2011


On Sat, Oct 01, 2011 at 07:15:18PM -0700, Ben Widawsky wrote:
> On Gen6+ we have other rings which may be in use. We haven't hung if the
> blit or media ring is still going
> 
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |    6 +-
>  drivers/gpu/drm/i915/i915_drv.h     |    9 +--
>  drivers/gpu/drm/i915/i915_irq.c     |  146 ++++++++++++++++++++++++-----------
>  3 files changed, 107 insertions(+), 54 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 1f02971..c00dee5 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -755,20 +755,20 @@ static int i915_error_state(struct seq_file *m, void *unused)
>  		seq_printf(m, "Blitter Page Fault: 0x%08x\n", error->page_fault[BCS]);
>  		seq_printf(m, "ERROR: 0x%08x\n", error->error);
>  		seq_printf(m, "Blitter command stream:\n");
> -		seq_printf(m, "  ACTHD:    0x%08x\n", error->bcs_acthd);
> +		seq_printf(m, "  ACTHD:    0x%08x\n", error->acthd[BCS]);
>  		seq_printf(m, "  IPEIR:    0x%08x\n", error->bcs_ipeir);
>  		seq_printf(m, "  IPEHR:    0x%08x\n", error->bcs_ipehr);
>  		seq_printf(m, "  INSTDONE: 0x%08x\n", error->bcs_instdone);
>  		seq_printf(m, "  seqno:    0x%08x\n", error->bcs_seqno);
>  		seq_printf(m, "Video (BSD) command stream:\n");
> -		seq_printf(m, "  ACTHD:    0x%08x\n", error->vcs_acthd);
> +		seq_printf(m, "  ACTHD:    0x%08x\n", error->acthd[VCS]);
>  		seq_printf(m, "  IPEIR:    0x%08x\n", error->vcs_ipeir);
>  		seq_printf(m, "  IPEHR:    0x%08x\n", error->vcs_ipehr);
>  		seq_printf(m, "  INSTDONE: 0x%08x\n", error->vcs_instdone);
>  		seq_printf(m, "  seqno:    0x%08x\n", error->vcs_seqno);
>  	}
>  	seq_printf(m, "Render command stream:\n");
> -	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd);
> +	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd[RCS]);
>  	seq_printf(m, "  IPEIR: 0x%08x\n", error->ipeir);
>  	seq_printf(m, "  IPEHR: 0x%08x\n", error->ipehr);
>  	seq_printf(m, "  INSTDONE: 0x%08x\n", error->instdone);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 279560e..d4e8d42 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -152,15 +152,13 @@ struct drm_i915_error_state {
>  	u32 ipeir;
>  	u32 ipehr;
>  	u32 instdone;
> -	u32 acthd;
> +	u32 acthd[I915_NUM_RINGS];
>  	u32 page_fault[I915_NUM_RINGS];
>  	u32 error; /* gen6+ */
> -	u32 bcs_acthd; /* gen6+ blt engine */
>  	u32 bcs_ipehr;
>  	u32 bcs_ipeir;
>  	u32 bcs_instdone;
>  	u32 bcs_seqno;
> -	u32 vcs_acthd; /* gen6+ bsd engine */
>  	u32 vcs_ipehr;
>  	u32 vcs_ipeir;
>  	u32 vcs_instdone;
> @@ -332,9 +330,8 @@ typedef struct drm_i915_private {
>  #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
>  	struct timer_list hangcheck_timer;
>  	int hangcheck_count;
> -	uint32_t last_acthd;
> -	uint32_t last_instdone;
> -	uint32_t last_instdone1;
> +	uint32_t last_acthd[I915_NUM_RINGS];
> +	uint64_t last_instdone[I915_NUM_RINGS];
>  
>  	unsigned long cfb_size;
>  	unsigned int cfb_fb;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 990abda..7f228ec 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -919,7 +919,7 @@ static void i915_capture_error_state(struct drm_device *dev)
>  		error->page_fault[BCS] = I915_READ(GEN6_BLT_FAULT);
>  		error->error = I915_READ(ERROR_GEN6);
>  
> -		error->bcs_acthd = I915_READ(BCS_ACTHD);
> +		error->acthd[BCS] = I915_READ(BCS_ACTHD);
>  		error->bcs_ipehr = I915_READ(BCS_IPEHR);
>  		error->bcs_ipeir = I915_READ(BCS_IPEIR);
>  		error->bcs_instdone = I915_READ(BCS_INSTDONE);
> @@ -927,7 +927,7 @@ static void i915_capture_error_state(struct drm_device *dev)
>  		if (dev_priv->ring[BCS].get_seqno)
>  			error->bcs_seqno = dev_priv->ring[BCS].get_seqno(&dev_priv->ring[BCS]);
>  
> -		error->vcs_acthd = I915_READ(VCS_ACTHD);
> +		error->acthd[VCS] = I915_READ(VCS_ACTHD);
>  		error->vcs_ipehr = I915_READ(VCS_IPEHR);
>  		error->vcs_ipeir = I915_READ(VCS_IPEIR);
>  		error->vcs_instdone = I915_READ(VCS_INSTDONE);
> @@ -941,13 +941,13 @@ static void i915_capture_error_state(struct drm_device *dev)
>  		error->instdone = I915_READ(INSTDONE_I965);
>  		error->instps = I915_READ(INSTPS);
>  		error->instdone1 = I915_READ(INSTDONE1);
> -		error->acthd = I915_READ(ACTHD_I965);
> +		error->acthd[RCS] = I915_READ(ACTHD_I965);
>  		error->bbaddr = I915_READ64(BB_ADDR);
>  	} else {
>  		error->ipeir = I915_READ(IPEIR);
>  		error->ipehr = I915_READ(IPEHR);
>  		error->instdone = I915_READ(INSTDONE);
> -		error->acthd = I915_READ(ACTHD);
> +		error->acthd[RCS] = I915_READ(ACTHD);
>  		error->bbaddr = 0;
>  	}
>  	i915_gem_record_fences(dev, error);
> @@ -1659,6 +1659,83 @@ static bool kick_ring(struct intel_ring_buffer *ring)
>  	return false;
>  }
>  
> +static bool
> +instdone_stuck(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	uint64_t instdone = 0, instdone1 = 0, vcs_instdone = 0, bcs_instdone = 0;
> +	bool stuck;
> +
> +	switch (INTEL_INFO(dev)->gen) {
> +	case 7:
> +	case 6:
> +		bcs_instdone = I915_READ(BCS_INSTDONE);
> +	case 5:
> +		vcs_instdone = I915_READ(VCS_INSTDONE);
> +	case 4:
> +		instdone = I915_READ(INSTDONE_I965);
> +		instdone1 = I915_READ(INSTDONE1);
> +		break;
> +	case 3:
> +	case 2:
> +		instdone = I915_READ(INSTDONE);
> +		break;
> +	}
> +
> +	stuck =
> +	    (dev_priv->last_instdone[RCS] == ((instdone << 32) | instdone1)) &&
> +	    (dev_priv->last_instdone[VCS] == vcs_instdone) &&
> +	    (dev_priv->last_instdone[BCS] == bcs_instdone);
> +
> +	dev_priv->last_instdone[RCS] = (instdone << 32) | instdone1;
> +	dev_priv->last_instdone[VCS] = vcs_instdone;
> +	dev_priv->last_instdone[BCS] = bcs_instdone;
> +
> +	return stuck;
> +}
> +
> +static bool
> +acthd_stuck(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	uint32_t acthd = 0, vcs_acthd = 0, bcs_acthd = 0;
> +	bool stuck = false;
> +
> +	switch (INTEL_INFO(dev)->gen) {
> +	case 7:
> +	case 6:
> +		bcs_acthd = intel_ring_get_active_head(&dev_priv->ring[BCS]);
> +	case 5:
> +		vcs_acthd = intel_ring_get_active_head(&dev_priv->ring[VCS]);
> +	case 4:
> +	case 3:
> +	case 2:
> +		acthd = intel_ring_get_active_head(&dev_priv->ring[RCS]);
> +		break;
> +	}
> +
> +	stuck =
> +	    (dev_priv->last_acthd[RCS] == acthd) &&
> +	    (dev_priv->last_acthd[VCS] == vcs_acthd) &&
> +	    (dev_priv->last_acthd[BCS] == bcs_acthd);
> +
> +	dev_priv->last_acthd[RCS] = acthd;
> +	dev_priv->last_acthd[VCS] = vcs_acthd;
> +	dev_priv->last_acthd[BCS] = bcs_acthd;
> +
> +	return stuck;
> +}
> +
> +static bool gpu_stuck(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +	if (dev_priv->hangcheck_count++ == 0)
> +		return false;
> +
> +	return acthd_stuck(dev) || instdone_stuck(dev);
> +}
> +

This should be: 
	return acthd_stuck(dev) && instdone_stuck(dev);



More information about the Intel-gfx mailing list