[Intel-gfx] [PATCH 3/3] drm/i915: Prevent leaking of -EIO from i915_wait_request()

Daniel Vetter daniel at ffwll.ch
Thu Dec 3 01:14:54 PST 2015


On Tue, Dec 01, 2015 at 11:05:35AM +0000, Chris Wilson wrote:
> Reporting -EIO from i915_wait_request() has proven very troublematic
> over the years, with numerous hard-to-reproduce bugs cropping up in the
> corner case of where a reset occurs and the code wasn't expecting such
> an error.
> 
> If the we reset the GPU or have detected a hang and wish to reset the
> GPU, the request is forcibly complete and the wait broken. Currently, we
> report either -EAGAIN or -EIO in order for the caller to retreat and
> restart the wait (if appropriate) after dropping and then reacquiring
> the struct_mutex (essential to allow the GPU reset to proceed). However,
> if we take the view that the request is complete (no further work will
> be done on it by the GPU because it is dead and soon to be reset), then
> we can proceed with the task at hand and then drop the struct_mutex
> allowing the reset to occur. This transfers the burden of checking
> whether it is safe to proceed to the caller, which in all but one
> instance it is safe - completely eliminating the source of all spurious
> -EIO.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
>  drivers/gpu/drm/i915/i915_drv.c         | 32 +++++++++++++----------
>  drivers/gpu/drm/i915/i915_drv.h         |  5 +---
>  drivers/gpu/drm/i915/i915_gem.c         | 45 +++++++++++++--------------------
>  drivers/gpu/drm/i915/i915_irq.c         | 21 ++-------------
>  drivers/gpu/drm/i915/intel_display.c    | 32 +++++++----------------
>  drivers/gpu/drm/i915/intel_lrc.c        |  8 ++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  8 ++++++
>  8 files changed, 65 insertions(+), 88 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 864b3ae9419c..0583eda642d7 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4656,7 +4656,7 @@ i915_wedged_get(void *data, u64 *val)
>  	struct drm_device *dev = data;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	*val = i915_reset_counter(&dev_priv->gpu_error);
> +	*val = i915_terminally_wedged(&dev_priv->gpu_error);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 90faa8e03fca..eb893a5e00b1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -923,23 +923,31 @@ int i915_resume_switcheroo(struct drm_device *dev)
>  int i915_reset(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct i915_gpu_error *error = &dev_priv->gpu_error;
> +	unsigned reset_counter;
>  	bool simulated;
>  	int ret;
>  
>  	intel_reset_gt_powersave(dev);
>  
>  	mutex_lock(&dev->struct_mutex);
> +	atomic_andnot(I915_WEDGED, &error->reset_counter);
> +	reset_counter = atomic_inc_return(&error->reset_counter);
> +	if (WARN_ON(__i915_reset_in_progress(reset_counter))) {
> +		ret = -EIO;
> +		goto error;
> +	}
>  
>  	i915_gem_reset(dev);
>  
> -	simulated = dev_priv->gpu_error.stop_rings != 0;
> +	simulated = error->stop_rings != 0;
>  
>  	ret = intel_gpu_reset(dev);
>  
>  	/* Also reset the gpu hangman. */
>  	if (simulated) {
>  		DRM_INFO("Simulated gpu hang, resetting stop_rings\n");
> -		dev_priv->gpu_error.stop_rings = 0;
> +		error->stop_rings = 0;
>  		if (ret == -ENODEV) {
>  			DRM_INFO("Reset not implemented, but ignoring "
>  				 "error for simulated gpu hangs\n");
> @@ -952,8 +960,7 @@ int i915_reset(struct drm_device *dev)
>  
>  	if (ret) {
>  		DRM_ERROR("Failed to reset chip: %i\n", ret);
> -		mutex_unlock(&dev->struct_mutex);
> -		return ret;
> +		goto error;
>  	}
>  
>  	intel_overlay_reset(dev_priv);
> @@ -972,20 +979,14 @@ int i915_reset(struct drm_device *dev)
>  	 * was running at the time of the reset (i.e. we weren't VT
>  	 * switched away).
>  	 */
> -
> -	/* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset */
> -	dev_priv->gpu_error.reload_in_reset = true;
> -
>  	ret = i915_gem_init_hw(dev);
> -
> -	dev_priv->gpu_error.reload_in_reset = false;
> -
> -	mutex_unlock(&dev->struct_mutex);
>  	if (ret) {
>  		DRM_ERROR("Failed hw init on reset %d\n", ret);
> -		return ret;
> +		goto error;
>  	}
>  
> +	mutex_unlock(&dev->struct_mutex);
> +
>  	/*
>  	 * rps/rc6 re-init is necessary to restore state lost after the
>  	 * reset and the re-install of gt irqs. Skip for ironlake per
> @@ -996,6 +997,11 @@ int i915_reset(struct drm_device *dev)
>  		intel_enable_gt_powersave(dev);
>  
>  	return 0;
> +
> +error:
> +	atomic_or(I915_WEDGED, &error->reset_counter);
> +	mutex_unlock(&dev->struct_mutex);
> +	return ret;

I've warmed up to your approach to removing reload_in_reset, but I still
maintain that it should be a separate patch and not mixed in with the -EIO
ABI change.

>  }
>  
>  static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ee7677343056..c24c23d8a0c0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1384,9 +1384,6 @@ struct i915_gpu_error {
>  
>  	/* For missed irq/seqno simulation. */
>  	unsigned int test_irq_rings;
> -
> -	/* Used to prevent gem_check_wedged returning -EAGAIN during gpu reset   */
> -	bool reload_in_reset;
>  };
>  
>  enum modeset_restore {
> @@ -2977,7 +2974,7 @@ i915_gem_find_active_request(struct intel_engine_cs *ring);
>  
>  bool i915_gem_retire_requests(struct drm_device *dev);
>  void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
> -int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
> +int __must_check i915_gem_check_wedge(unsigned reset_counter,
>  				      bool interruptible);
>  
>  static inline u32 i915_reset_counter(struct i915_gpu_error *error)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 68d4bab93fd8..ce4b89f28d38 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -85,9 +85,7 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
>  {
>  	int ret;
>  
> -#define EXIT_COND (!i915_reset_in_progress(error) || \
> -		   i915_terminally_wedged(error))
> -	if (EXIT_COND)
> +	if (!i915_reset_in_progress(error))

This is one of the cases I think should be moved to patch 1. Iirc there's
1 more somewhere where we really don't need to check for terminally
wedged.

>  		return 0;
>  
>  	/*
> @@ -96,17 +94,16 @@ i915_gem_wait_for_error(struct i915_gpu_error *error)
>  	 * we should simply try to bail out and fail as gracefully as possible.
>  	 */
>  	ret = wait_event_interruptible_timeout(error->reset_queue,
> -					       EXIT_COND,
> +					       !i915_reset_in_progress(error),
>  					       10*HZ);
>  	if (ret == 0) {
>  		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
>  		return -EIO;
>  	} else if (ret < 0) {
>  		return ret;
> +	} else {
> +		return 0;
>  	}
> -#undef EXIT_COND
> -
> -	return 0;
>  }
>  
>  int i915_mutex_lock_interruptible(struct drm_device *dev)
> @@ -1110,26 +1107,19 @@ put_rpm:
>  }
>  
>  int
> -i915_gem_check_wedge(struct i915_gpu_error *error,
> -		     bool interruptible)
> +i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
>  {
> -	if (i915_reset_in_progress(error)) {
> +	if (__i915_reset_in_progress_or_wedged(reset_counter)) {
>  		/* Non-interruptible callers can't handle -EAGAIN, hence return
>  		 * -EIO unconditionally for these. */
>  		if (!interruptible)
>  			return -EIO;
>  
>  		/* Recovery complete, but the reset failed ... */
> -		if (i915_terminally_wedged(error))
> +		if (__i915_terminally_wedged(reset_counter))
>  			return -EIO;
>  
> -		/*
> -		 * Check if GPU Reset is in progress - we need intel_ring_begin
> -		 * to work properly to reinit the hw state while the gpu is
> -		 * still marked as reset-in-progress. Handle this with a flag.
> -		 */
> -		if (!error->reload_in_reset)
> -			return -EAGAIN;
> +		return -EAGAIN;
>  	}
>  
>  	return 0;
> @@ -1296,11 +1286,11 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
>  		/* We need to check whether any gpu reset happened in between
>  		 * the caller grabbing the seqno and now ... */
>  		if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
> -			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
> -			 * is truely gone. */
> -			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
> -			if (ret == 0)
> -				ret = -EAGAIN;
> +			/* As we do not requeue the request over a GPU reset,
> +			 * if one does occur we know that the request is
> +			 * effectively complete.
> +			 */
> +			ret = 0;
>  			break;
>  		}
>  
> @@ -2687,8 +2677,7 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
>  
>  	*req_out = NULL;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
> -				   dev_priv->mm.interruptible);
> +	ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
>  	if (ret)
>  		return ret;
>  
> @@ -4078,9 +4067,9 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  	if (ret)
>  		return ret;
>  
> -	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
> -	if (ret)
> -		return ret;
> +	/* ABI: return -EIO if already wedged */
> +	if (i915_terminally_wedged(&dev_priv->gpu_error))

Commit message says there's only one case, but I count two:
- throttle here
- waiting for ring space in case the gpu died meanwhile

Please amend the commit message with that.

> +		return -EIO;
>  
>  	spin_lock(&file_priv->mm.lock);
>  	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index e88d692583a5..3b2a8fbe0392 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2434,7 +2434,6 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
>  static void i915_reset_and_wakeup(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
> -	struct i915_gpu_error *error = &dev_priv->gpu_error;
>  	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
>  	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
>  	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
> @@ -2452,7 +2451,7 @@ static void i915_reset_and_wakeup(struct drm_device *dev)
>  	 * the reset in-progress bit is only ever set by code outside of this
>  	 * work we don't need to worry about any other races.
>  	 */
> -	if (i915_reset_in_progress(error) && !i915_terminally_wedged(error)) {
> +	if (i915_reset_in_progress(&dev_priv->gpu_error)) {

Imo we should take this path here iff wedged == true in i915_handle_error.
Otherwise control flow just becomes inconsistent I think. Rechecking the
reset counter just seems fragile.

>  		DRM_DEBUG_DRIVER("resetting chip\n");
>  		kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE,
>  				   reset_event);
> @@ -2480,25 +2479,9 @@ static void i915_reset_and_wakeup(struct drm_device *dev)
>  
>  		intel_runtime_pm_put(dev_priv);
>  
> -		if (ret == 0) {
> -			/*
> -			 * After all the gem state is reset, increment the reset
> -			 * counter and wake up everyone waiting for the reset to
> -			 * complete.
> -			 *
> -			 * Since unlock operations are a one-sided barrier only,
> -			 * we need to insert a barrier here to order any seqno
> -			 * updates before
> -			 * the counter increment.
> -			 */
> -			smp_mb__before_atomic();
> -			atomic_inc(&dev_priv->gpu_error.reset_counter);
> -
> +		if (ret == 0)
>  			kobject_uevent_env(&dev->primary->kdev->kobj,
>  					   KOBJ_CHANGE, reset_done_event);
> -		} else {
> -			atomic_or(I915_WEDGED, &error->reset_counter);
> -		}
>  
>  		/*
>  		 * Note: The wake_up also serves as a memory barrier so that
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 4447e73b54db..73c61b94f7fd 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -3287,12 +3287,9 @@ static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
>  	struct drm_device *dev = crtc->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
> -	unsigned reset_counter;
>  	bool pending;
>  
> -	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> -	if (intel_crtc->reset_counter != reset_counter ||
> -	    __i915_reset_in_progress(reset_counter))
> +	if (intel_crtc->reset_counter != i915_reset_counter(&dev_priv->gpu_error))
>  		return false;
>  
>  	spin_lock_irq(&dev->event_lock);
> @@ -10867,11 +10864,8 @@ static bool page_flip_finished(struct intel_crtc *crtc)
>  {
>  	struct drm_device *dev = crtc->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	unsigned reset_counter;
>  
> -	reset_counter = i915_reset_counter(&dev_priv->gpu_error);
> -	if (crtc->reset_counter != reset_counter ||
> -	    __i915_reset_in_progress(reset_counter))
> +	if (crtc->reset_counter != i915_reset_counter(&dev_priv->gpu_error))
>  		return true;
>  
>  	/*
> @@ -13303,9 +13297,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>  		return ret;
>  
>  	ret = drm_atomic_helper_prepare_planes(dev, state);
> -	if (!ret && !async && !i915_reset_in_progress(&dev_priv->gpu_error)) {
> -		mutex_unlock(&dev->struct_mutex);
> +	mutex_unlock(&dev->struct_mutex);
>  
> +	if (!ret && !async) {
>  		for_each_plane_in_state(state, plane, plane_state, i) {
>  			struct intel_plane_state *intel_plane_state =
>  				to_intel_plane_state(plane_state);
> @@ -13315,23 +13309,15 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
>  
>  			ret = __i915_wait_request(intel_plane_state->wait_req,
>  						  true, NULL, NULL);
> -
> -			/* Swallow -EIO errors to allow updates during hw lockup. */
> -			if (ret == -EIO)
> -				ret = 0;
> -
> -			if (ret)
> +			if (ret) {
> +				mutex_lock(&dev->struct_mutex);
> +				drm_atomic_helper_cleanup_planes(dev, state);
> +				mutex_unlock(&dev->struct_mutex);
>  				break;
> +			}
>  		}
> -
> -		if (!ret)
> -			return 0;
> -
> -		mutex_lock(&dev->struct_mutex);
> -		drm_atomic_helper_cleanup_planes(dev, state);
>  	}
>  
> -	mutex_unlock(&dev->struct_mutex);

Sneaking in lockless waits! Separate patch please.

>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index fe71c768dbc4..08b982a6ae81 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -711,6 +711,14 @@ static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
>  	if (ret)
>  		return ret;
>  
> +	/* If the request was completed due to a GPU hang, we want to
> +	 * error out before we continue to emit more commands to the GPU.
> +	 */
> +	ret = i915_gem_check_wedge(i915_reset_counter(&req->i915->gpu_error),
> +				   req->i915->mm.interruptible);
> +	if (ret)
> +		return ret;

Do we really need to recheck here? We check when creating the request, and
at most we can transition to wedged. But without dropping struct_mutex the
reset handler can't sneak in and declare the gpu terminal. And even if
that changes worst case we'll restart the entire ordeal and then notice at
the start of execbuf that it's really dead and return the -EIO to
userspace.

I think we can drop these two parts, or did I miss something?

Besides the nitpicks and 2 patches that should be split out lgtm.
-Daniel

> +
>  	ringbuf->space = space;
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 913526c0264f..511efe556d73 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2254,6 +2254,14 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
>  	if (ret)
>  		return ret;
>  
> +	/* If the request was completed due to a GPU hang, we want to
> +	 * error out before we continue to emit more commands to the GPU.
> +	 */
> +	ret = i915_gem_check_wedge(i915_reset_counter(&to_i915(ring->dev)->gpu_error),
> +				   to_i915(ring->dev)->mm.interruptible);
> +	if (ret)
> +		return ret;
> +
>  	ringbuf->space = space;
>  	return 0;
>  }
> -- 
> 2.6.2
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the Intel-gfx mailing list