[Intel-gfx] [RFC 05/13] drm/i915: Per-engine recovery

Mon Dec 16 18:50:25 CET 2013

On Mon, Dec 16, 2013 at 04:02:55PM +0000, Lister, Ian wrote:
> From de7ad64c6b112e2950b31fc6daa52dc6247f11cb Mon Sep 17 00:00:00 2001
> Message-Id: <de7ad64c6b112e2950b31fc6daa52dc6247f11cb.1387201899.git.ian.lister at intel.com>
> In-Reply-To: <cover.1387201899.git.ian.lister at intel.com>
> References: <cover.1387201899.git.ian.lister at intel.com>
> From: ian-lister <ian.lister at intel.com>
> Date: Mon, 9 Dec 2013 13:59:53 +0000
> Subject: [RFC 05/13] drm/i915: Per-engine recovery
> 
> Added i915_handle_hung_ring function. This function will be called when
> a hang is detected on an individual engine. It disables the ring, saves
> the ring state and then resets the hung engine. After reset it restores
> the ring state so that the ring will continue executing at the *next*
> command after the command which caused the hang. This means that only
> the bad command is thrown away - all other work already submitted to
> the ring remains valid and the GPU will continue executing it.
> 
> The request list is scanned to determine if a particular batch
> submission was responsible for the hang and to update the context stats
> appropriately.
> 
> The display code has been modified to insert a tracking marker in the
> hardware status page so that i915_handle_hung_ring can detect if the ring
> hung on a page flip command. If so then we need to manually complete the
> flip.
> 
> Signed-off-by: ian-lister <ian.lister at intel.com>

As usual, comments inline below.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  10 +-
>  drivers/gpu/drm/i915/i915_gem.c         |   6 +-
>  drivers/gpu/drm/i915/i915_irq.c         |  57 +++++++++---
>  drivers/gpu/drm/i915/intel_display.c    |  30 +++++-
>  drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
>  drivers/gpu/drm/i915/intel_uncore.c     | 156 ++++++++++++++++++++++++++++++++
>  6 files changed, 242 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b9c4876..0d8805e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1142,6 +1142,10 @@ struct i915_gpu_error {
>  #define I915_RESET_IN_PROGRESS_FLAG	1
>  #define I915_WEDGED			0xffffffff
>  
> +	/* The global reset score. This is used to decide when to fall back
> +	* to global reset in case per-engine reset happens too frequently */
> +	u32 score;
> +
>  	/**
>  	 * Waitqueue to signal when the reset has completed. Used by clients
>  	 * that wait for dev_priv->mm.wedged to settle.
> @@ -1941,7 +1945,7 @@ extern void intel_console_resume(struct work_struct *work);
>  
>  /* i915_irq.c */
>  void i915_queue_hangcheck(struct drm_device *dev);
> -void i915_handle_error(struct drm_device *dev, bool wedged);
> +void i915_handle_error(struct drm_device *dev, u32 mask);
>  
>  extern void intel_irq_init(struct drm_device *dev);
>  extern void intel_pm_init(struct drm_device *dev);
> @@ -2111,6 +2115,10 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
>  	return atomic_read(&error->reset_counter) == I915_WEDGED;
>  }
>  
> +void i915_set_reset_status(struct intel_ring_buffer *ring,
> +                          struct drm_i915_gem_request *request,
> +                          u32 acthd);
> +
>  void i915_gem_reset(struct drm_device *dev);
>  bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
>  int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 976b9d8..a29b355 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2278,9 +2278,9 @@ static bool i915_context_is_banned(const struct i915_ctx_hang_stats *hs)
>  	return false;
>  }
>  
> -static void i915_set_reset_status(struct intel_ring_buffer *ring,
> -				  struct drm_i915_gem_request *request,
> -				  u32 acthd)
> +void i915_set_reset_status(struct intel_ring_buffer *ring,
> +			  struct drm_i915_gem_request *request,
> +			  u32 acthd)
>  {
>  	struct i915_ctx_hang_stats *hs = NULL;
>  	bool inside, guilty;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index c2ecea1..0131423 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1322,7 +1322,7 @@ static void snb_gt_irq_handler(struct drm_device *dev,
>  		      GT_BSD_CS_ERROR_INTERRUPT |
>  		      GT_RENDER_CS_MASTER_ERROR_INTERRUPT)) {
>  		DRM_ERROR("GT error interrupt 0x%08x\n", gt_iir);
> -		i915_handle_error(dev, false);
> +		i915_handle_error(dev, 0);
>  	}
>  
>  	if (gt_iir & GT_PARITY_ERROR(dev))
> @@ -1576,7 +1576,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
>  
>  		if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT) {
>  			DRM_ERROR("VEBOX CS error interrupt 0x%08x\n", pm_iir);
> -			i915_handle_error(dev_priv->dev, false);
> +			i915_handle_error(dev_priv->dev, 0);
>  		}
>  	}
>  }
> @@ -2362,20 +2362,28 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
>   * i915_handle_error - handle an error interrupt
>   * @dev: drm device
>   *
> - * Do some basic checking of regsiter state at error interrupt time and
> + * Do some basic checking of register state at error interrupt time and
>   * dump it to the syslog.  Also call i915_capture_error_state() to make
>   * sure we get a record and make it available in debugfs.  Fire a uevent
>   * so userspace knows something bad happened (should trigger collection
>   * of a ring dump etc.).
> + *
> + * mask = mask of rings to reset.
> + *   0x0000000 = (Send error uevent, no rings hung)
> + *   0x0000001 = Request RCS reset
> + *   0x0000002 = Request BSD reset
> + *   0x0000004 = Request BLT reset
> + *   ...
> + *   0x8000000 = Global reset request

This comment would look much better ans an enum ... And then we can ditch
the comment. Since this is only used in i915_irq.c we can even avoid the
I915/intel prefix.

With an enum all the magic paramters and tests in this patch become much
more readable. Note that gcc is happy to treat enums as bitmasks, so you
can still or them together (and gdb will actually decode it, in case you
have kgdb wired up or are looking at a dump).

>   */
> -void i915_handle_error(struct drm_device *dev, bool wedged)
> +void i915_handle_error(struct drm_device *dev, u32 mask)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
>  	i915_capture_error_state(dev);
>  	i915_report_and_clear_eir(dev);
>  
> -	if (wedged) {
> +	if (mask > 0) {
>  		atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG,
>  				&dev_priv->gpu_error.reset_counter);
>  
> @@ -2707,7 +2715,7 @@ ring_stuck(struct intel_ring_buffer *ring)
>  		case 1:
>  			DRM_ERROR("Kicking stuck semaphore on %s\n",
>  				  ring->name);
> -			i915_handle_error(dev, false);
> +			i915_handle_error(dev, 0);
>  			I915_WRITE_CTL(ring, tmp);
>  			return HANGCHECK_KICK;
>  		case 0:
> @@ -2856,9 +2864,10 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  	drm_i915_private_t *dev_priv = dev->dev_private;
>  	struct intel_ring_buffer *ring;
>  	int i;
> -	int busy_count = 0, rings_hung = 0;
> +	int busy_count = 0, hang_mask = 0;
>  	u32 tmp;
>  #define FIRE 50
> +#define GLOBAL_RESET_REQUEST 0x80000000
>  
>  	/* Clear the active flag *before* assessing the ring state
>          * in case new work is added just after we sample the rings.
> @@ -2891,19 +2900,39 @@ static void i915_hangcheck_elapsed(unsigned long data)
>  				 * that if we fail to break the hang it will be
>  				 * recovered by a reset next time round.
>  				 */
> -				i915_handle_error(dev, false);
> +				i915_handle_error(dev, 0);
>  				I915_WRITE_CTL(ring, tmp);
>  				ring->hangcheck.action = HANGCHECK_KICK;
>  				ring->hangcheck.score = FIRE;
>  			} else {
> -				rings_hung++;
> +				hang_mask |= (0x1 << ring->id);
>  				ring->hangcheck.score = 0;
>  			}
>  		}
>  	}
>  
> -	if (rings_hung)
> -		return i915_handle_error(dev, true);
> +	if (hang_mask) {
> +		/* One or more rings have hung */
> +		dev_priv->gpu_error.score += 5;
> +
> +		DRM_DEBUG_TDR("Hangcheck score: %d vs %d\n",
> +			dev_priv->gpu_error.score, FIRE);
> +
> +		if (INTEL_INFO(dev)->gen < 7) {
> +			/* Per-engine reset not supported */
> +			dev_priv->gpu_error.score = 0;
> +			hang_mask = GLOBAL_RESET_REQUEST;
> +		} else if (dev_priv->gpu_error.score > FIRE) {
> +			/* Per-engine reset occuring too frequently. Fall back
> +			* to a global reset to try and resolve the hang */
> +			dev_priv->gpu_error.score = 0;
> +			hang_mask = GLOBAL_RESET_REQUEST;
> +			DRM_DEBUG_TDR("Rings hanging too frequently\n");
> +		}
> +
> +		i915_handle_error(dev, hang_mask);
> +	} else if (dev_priv->gpu_error.score > 0)
> +		dev_priv->gpu_error.score--;
>  
>  	if (busy_count)
>  		/* Restart the timer in case the chip hangs without another request
> @@ -3695,7 +3724,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
>  		 */
>  		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
>  		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
> -			i915_handle_error(dev, false);
> +			i915_handle_error(dev, 0);
>  
>  		for_each_pipe(pipe) {
>  			int reg = PIPESTAT(pipe);
> @@ -3877,7 +3906,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
>  		 */
>  		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
>  		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
> -			i915_handle_error(dev, false);
> +			i915_handle_error(dev, 0);
>  
>  		for_each_pipe(pipe) {
>  			int reg = PIPESTAT(pipe);
> @@ -4122,7 +4151,7 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
>  		 */
>  		spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
>  		if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
> -			i915_handle_error(dev, false);
> +			i915_handle_error(dev, 0);
>  
>  		for_each_pipe(pipe) {
>  			int reg = PIPESTAT(pipe);
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 76ae034..38eb2e4 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -8536,6 +8536,29 @@ inline static void intel_mark_page_flip_active(struct intel_crtc *intel_crtc)
>  	smp_wmb();
>  }
>  
> +static void insert_pgflip_start_marker(struct intel_ring_buffer *ring, int pipe)
> +{
> +	/* WARNING: Caller must reserve an extra 4 spaces in the ring buffer
> +	* before calling this function. Sets the page flip tracking marker
> +	* in the hardware status page.*/
> +
> +	/* Write pipe + 1 to the status page (0 = not in progress) */
> +	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
> +        intel_ring_emit(ring, I915_GEM_PGFLIP_INDEX <<
> +                                MI_STORE_DWORD_INDEX_SHIFT);
> +        intel_ring_emit(ring, pipe + 1);
> +        intel_ring_emit(ring, MI_NOOP);
> +}
> +
> +static void insert_pgflip_end_marker(struct intel_ring_buffer *ring)
> +{
> +	/* WARNING: Caller must reserve an extra 4 spaces in the ring buffer
> +	* before calling this function. Clears the page flip tracking marker
> +	* in the hardware status page.*/
> +	insert_pgflip_start_marker(ring, -1);
> +}

Nice trick, but we already have a request structure to track all kinds of
things. Also adding a real request per pageflip will allow rid us of the
need to queue the hangcheck from ring_advance, which is imo too tricky.

So I think we should go with a real request and throw a bit more
information into that one to keep track of flips. Long term we want that
anyway so that we can e.g. also track outstanding pageflips in the
eviction code and wait for those in a last-ditch (to be able to unpin old
buffers) in a last-ditch effort to free up some space. Jon Bloomfield and
his troubles with SIGBUS from gtt mmaps will appreciate this ;-)

> +
> +
>  static int intel_gen2_queue_flip(struct drm_device *dev,
>  				 struct drm_crtc *crtc,
>  				 struct drm_framebuffer *fb,
> @@ -8757,10 +8780,12 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	if (ring->id == RCS)
>  		len += 6;
>  
> -	ret = intel_ring_begin(ring, len);
> +	ret = intel_ring_begin(ring, len + 8);
>  	if (ret)
>  		goto err_unpin;
>  
> +	insert_pgflip_start_marker(ring, intel_crtc->pipe);
> +
>  	/* Unmask the flip-done completion message. Note that the bspec says that
>  	 * we should do this for both the BCS and RCS, and that we must not unmask
>  	 * more than one flip event at any time (or ensure that one flip message
> @@ -8788,6 +8813,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
>  	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
>  	intel_ring_emit(ring, (MI_NOOP));
>  
> +	insert_pgflip_end_marker(ring);
> +
>  	intel_mark_page_flip_active(intel_crtc);
>  	__intel_ring_advance(ring);
>  	return 0;
> @@ -8798,6 +8825,7 @@ err:
>  	return ret;
>  }
>  
> +
>  static int intel_default_queue_flip(struct drm_device *dev,
>  				    struct drm_crtc *crtc,
>  				    struct drm_framebuffer *fb,
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 6417de1..b9125dc 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -66,6 +66,7 @@ struct intel_ring_hangcheck {
>  	int score;
>  	enum intel_ring_hangcheck_action action;
>  	u32 resets; /* Total resets applied to this ring/engine*/
> +	u32 last_head; /* Head value recorded at last hang */
>  };
>  
>  struct  intel_ring_buffer {
> @@ -260,6 +261,7 @@ intel_write_status_page(struct intel_ring_buffer *ring,
>   */
>  #define I915_GEM_HWS_INDEX		0x20
>  #define I915_GEM_HWS_SCRATCH_INDEX	0x30
> +#define I915_GEM_PGFLIP_INDEX           0x35
>  #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>  
>  void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 764e2af..c51e389 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1096,6 +1096,162 @@ int intel_gpu_engine_reset(struct drm_device *dev, enum intel_ring_id engine)
>  	return ret;
>  }
>  
> +int i915_handle_hung_ring(struct intel_ring_buffer *ring)
> +{
> +	/*
> +	 *  WARNING: This function should only be called whilst holding
> +	 *           dev->struct_mutex.

Just enforce this with a WARN_ON(!mutex_is_locked) and kill the comment.
Executable checks never get stale, code comments always do.

> +	 *
> +	 *  Call this function if a hang is detected on one of the rings.
> +	 *  The ring is disabled and the ring state is saved. The engine
> +	 *  is then reset and the ring state is restored. This restores the
> +	 *  ring head pointer at the start of the *next* instruction in the
> +	 *  ring after the instruction that caused the hang. If the command
> +	 *  parser was processing a batch buffer at the time, then control
> +	 *  returns to the next instruction in the ring following the
> +	 *  MI_BATCH_BUFFER_START command.
> +	 *
> +	 *  From the perspective of the driver everything continues as if the
> +	 *  hardware did not hang and no work is lost, except the failing
> +	 *  command/batch buffer.
> +	 */
> +	struct drm_device *dev = ring->dev;
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_request *request;
> +	struct intel_unpin_work *unpin_work;
> +	struct drm_crtc *crtc;
> +	struct intel_crtc *intel_crtc;
> +	uint32_t ring_flags = 0;
> +	uint32_t head;
> +	int ret = 0;
> +	u32 completed_seqno;
> +	u32 acthd;
> +	int pipe;
> +
> +	acthd = intel_ring_get_active_head(ring);
> +	completed_seqno = ring->get_seqno(ring, false);
> +
> +	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> +
> +	DRM_ERROR("Stuck ring %d (GPU Hang)\n", ring->id);
> +
> +	/* Take wake lock to prevent power saving mode */
> +	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
> +
> +	/* Search the request list to see which batch buffer caused
> +	 * the hang. Only check requests that haven't yet completed*/
> +	list_for_each_entry(request, &ring->request_list, list) {
> +		if (request && (request->seqno > completed_seqno))
> +			i915_set_reset_status(ring, request, acthd);
> +	}
> +
> +	/* Check if the ring has hung on an MI_DISPLAY_FLIP command.
> +	 * The pipe value will be stored in the HWS page if it has.*/
> +	pipe = intel_read_status_page(ring, I915_GEM_PGFLIP_INDEX);
> +	if (pipe) {
> +		/* Clear it to avoid responding to it twice */
> +		intel_write_status_page(ring, I915_GEM_PGFLIP_INDEX, 0);
> +	}
> +
> +	/* Clear simulated hang flags */
> +	if (dev_priv->gpu_error.stop_rings) {
> +		DRM_DEBUG_TDR("Simulated gpu hang, rst stop_rings bits %08x\n",
> +			(0x1 << ring->id));
> +		dev_priv->gpu_error.stop_rings &= ~(0x1 << ring->id);
> +	}
> +
> +	ret = intel_ring_disable(ring);
> +
> +	if (ret != 0) {
> +		DRM_ERROR("Failed to disable ring %d\n", ring->id);
> +		goto handle_hung_ring_error;
> +	}
> +
> +	/* Sample the current ring head position */
> +	head = I915_READ(RING_HEAD(ring->mmio_base)) & HEAD_ADDR;
> +	DRM_DEBUG_TDR("head 0x%08X, last_head 0x%08X\n",
> +		head, ring->hangcheck.last_head);
> +	if (head == ring->hangcheck.last_head) {
> +		/* In most cases the ring head pointer will
> +		 * automatically advance to the next instruction
> +		 * as soon as it has read the current instruction,
> +		 * without waiting for it to complete. This seems
> +		 * to be the default behaviour, however an MBOX
> +		 * wait inserted directly to the VCS/BCS rings
> +		 * does not behave in the same way, instead the
> +		 * head pointer will still be pointing at the MBOX
> +		 * instruction until it completes.
> +		 * So, if the ring has not advanced since the last
> +		 * time it hung then force it to advance to the next
> +		 * QWORD*/
> +		ring_flags = FORCE_ADVANCE;
> +		DRM_DEBUG_TDR("Force ring head to advance\n");
> +	}
> +
> +	ring->hangcheck.last_head = head;
> +
> +	ret = intel_ring_save(ring, ring_flags);
> +
> +	if (ret != 0) {
> +		DRM_ERROR("Failed to save ring state\n");
> +		goto handle_hung_ring_error;
> +	}
> +
> +	ret = intel_gpu_engine_reset(dev, ring->id);
> +
> +	if (ret != 0) {
> +		DRM_ERROR("Failed to reset engine\n");
> +		goto handle_hung_ring_error;
> +	}
> +
> +	ret = intel_ring_restore(ring);
> +
> +	if (ret != 0) {
> +		DRM_ERROR("Failed to restore ring state\n");
> +		goto handle_hung_ring_error;
> +	}
> +
> +	/* Correct driver state */
> +	intel_ring_resample(ring);
> +
> +	ret = intel_ring_enable(ring);
> +
> +	if (ret != 0) {
> +		DRM_ERROR("Failed to re-enable ring\n");
> +		goto handle_hung_ring_error;
> +	}
> +
> +	/* Wake up anything waiting on this rings queue */
> +	wake_up_all(&ring->irq_queue);
> +
> +	if (pipe
> +	&& ((pipe - 1) < ARRAY_SIZE(dev_priv->pipe_to_crtc_mapping))) {
> +		/* The pipe value in the status page is offset by 1 */
> +		pipe -= 1;
> +
> +		/* The ring hung on a page flip command so we must
> +		 * manually release the pending flip queue as we do not
> +		 * expect to get an interrupt.
> +		 */
> +		crtc = dev_priv->pipe_to_crtc_mapping[pipe];
> +                intel_crtc = to_intel_crtc(crtc);
> +                unpin_work = intel_crtc->unpin_work;
> +
> +		if (unpin_work && unpin_work->pending_flip_obj) {
> +			intel_prepare_page_flip(dev, intel_crtc->pipe);
> +			intel_finish_page_flip(dev, intel_crtc->pipe);
> +			DRM_DEBUG_TDR("Released stuck page flip for pipe %d\n",
> +				pipe);
> +		}
> +	}
> +
> +handle_hung_ring_error:
> +	/* Release power lock */
> +	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
> +
> +	return ret;
> +}

tbh I didn't read this precisely, I think we first need to work through a
few of the infrastructure issues I've raised in earlier patches.

> +
>  
>  void intel_uncore_clear_errors(struct drm_device *dev)
>  {
> -- 
> 1.8.5.1
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch