[Intel-gfx] [PATCH] drm/i915: collect more per ring error state

Chris Wilson chris at chris-wilson.co.uk
Sun Oct 30 19:39:03 CET 2011


On Tue, 11 Oct 2011 21:20:21 +0200, Daniel Vetter <daniel.vetter at ffwll.ch> wrote:
> Based on a patch by Ben Widawsky, but with different colors
> for the bikeshed.
> 
> In contrast to Ben's patch this one doesn't add the fault regs.
> Afaics they're for the optional page fault support which
> - we're not enabling
> - and which seems to be unsupported by the hw team. Recent bspec
>   lacks tons of information about this that the public docs released
>   half a year back still contain.
> 
> Also dump ring HEAD/TAIL registers - I've recently seen a few
> error_state where just guessing these is not good enough.
> 
> v2: Also dump INSTPM for every ring.
> 
> v3: Fix a few really silly goof-ups spotted by Chris Wilson.
> 
> Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>

This is independent of the "hangcheck robustification" (which I feel is
false advertising ;-) and adds some mildly useful debug information.
The patch itself is fine and worthy of a reviewed-by, there is just one
nearby bug that may as well be squashed in with this cleanup...

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |   16 ++++++++++------
>  drivers/gpu/drm/i915/i915_drv.h     |    7 +++++--
>  drivers/gpu/drm/i915/i915_irq.c     |    9 +++++++--
>  drivers/gpu/drm/i915/i915_reg.h     |    3 +++
>  4 files changed, 25 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d618f78..5f5eb5b 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -734,17 +734,21 @@ static void i915_ring_error_state(struct seq_file *m,
>  				  unsigned ring)
>  {
>  	seq_printf(m, "%s command stream:\n", ring_str(ring));
> +	seq_printf(m, "  HEAD: 0x%08x\n", error->head[ring]);
> +	seq_printf(m, "  TAIL: 0x%08x\n", error->tail[ring]);
>  	seq_printf(m, "  ACTHD: 0x%08x\n", error->acthd[ring]);
>  	seq_printf(m, "  IPEIR: 0x%08x\n", error->ipeir[ring]);
>  	seq_printf(m, "  IPEHR: 0x%08x\n", error->ipehr[ring]);
>  	seq_printf(m, "  INSTDONE: 0x%08x\n", error->instdone[ring]);
> -	if (ring == RCS) {
> -		if (INTEL_INFO(dev)->gen >= 4) {
> -			seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
> -			seq_printf(m, "  INSTPS: 0x%08x\n", error->instps);
> -		}
> -		seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm);
> +	if (ring == RCS && INTEL_INFO(dev)->gen >= 4) {
> +		seq_printf(m, "  INSTDONE1: 0x%08x\n", error->instdone1);
> +		seq_printf(m, "  BBADDR: 0x%08llx\n", error->bbaddr);
>  	}
> +	if (INTEL_INFO(dev)->gen >= 4)
> +		seq_printf(m, "  INSTPS: 0x%08x\n", error->instps[ring]);
> +	seq_printf(m, "  INSTPM: 0x%08x\n", error->instpm[ring]);
> +	if (INTEL_INFO(dev)->gen >= 6)
> +		seq_printf(m, "  FADDR: 0x%08x\n", error->faddr[ring]);
>  	seq_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5f3ff9d..3701369 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -149,16 +149,19 @@ struct drm_i915_error_state {
>  	u32 eir;
>  	u32 pgtbl_er;
>  	u32 pipestat[I915_MAX_PIPES];
> +	u32 tail[I915_NUM_RINGS];
> +	u32 head[I915_NUM_RINGS];
>  	u32 ipeir[I915_NUM_RINGS];
>  	u32 ipehr[I915_NUM_RINGS];
>  	u32 instdone[I915_NUM_RINGS];
>  	u32 acthd[I915_NUM_RINGS];
>  	u32 error; /* gen6+ */
> -	u32 instpm;
> -	u32 instps;
> +	u32 instpm[I915_NUM_RINGS];
> +	u32 instps[I915_NUM_RINGS];
>  	u32 instdone1;
>  	u32 seqno[I915_NUM_RINGS];
>  	u64 bbaddr;
> +	u32 faddr[I915_NUM_RINGS];
>  	u64 fence[16];
>  	struct timeval time;
>  	struct drm_i915_error_object {
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 79aba7f..ea1721f 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -878,12 +878,15 @@ static void i915_record_ring_state(struct drm_device *dev,
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> +	if (INTEL_INFO(dev)->gen >= 6) 
> +		error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base));
> +
>  	if (INTEL_INFO(dev)->gen >= 4) {
>  		error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base));
>  		error->ipehr[ring->id] = I915_READ(RING_IPEHR(ring->mmio_base));
>  		error->instdone[ring->id] = I915_READ(RING_INSTDONE(ring->mmio_base));
> +		error->instps[ring->id] = I915_READ(RING_INSTPS(ring->mmio_base));
>  		if (ring->id == RCS) {
> -			error->instps = I915_READ(INSTPS);
>  			error->instdone1 = I915_READ(INSTDONE1);
>  			error->bbaddr = I915_READ64(BB_ADDR);
>  		}
> @@ -894,8 +897,11 @@ static void i915_record_ring_state(struct drm_device *dev,
>  		error->bbaddr = 0;
>  	}
>  
> +	error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
>  	error->seqno[ring->id] = dev_priv->ring->get_seqno(ring);
Whilst you are here, you may as well fix this ^.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre



More information about the Intel-gfx mailing list