[Intel-gfx] [PATCH 10/34] drm/i915: Remove GPU reset dependence on struct_mutex

Thu Jan 24 12:06:01 UTC 2019

Chris Wilson <chris at chris-wilson.co.uk> writes:

> Now that the submission backends are controlled via their own spinlocks,
> with a wave of a magic wand we can lift the struct_mutex requirement
> around GPU reset. That is we allow the submission frontend (userspace)
> to keep on submitting while we process the GPU reset as we can suspend
> the backend independently.
>
> The major change is around the backoff/handoff strategy for performing
> the reset. With no mutex deadlock, we no longer have to coordinate with
> any waiter, and just perform the reset immediately.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c           |  38 +-
>  drivers/gpu/drm/i915/i915_drv.h               |   5 -
>  drivers/gpu/drm/i915/i915_gem.c               |  18 +-
>  drivers/gpu/drm/i915/i915_gem_fence_reg.h     |   1 -
>  drivers/gpu/drm/i915/i915_gem_gtt.h           |   1 +
>  drivers/gpu/drm/i915/i915_gpu_error.c         | 104 +++--
>  drivers/gpu/drm/i915/i915_gpu_error.h         |  28 +-
>  drivers/gpu/drm/i915/i915_request.c           |  47 ---
>  drivers/gpu/drm/i915/i915_reset.c             | 397 ++++++++----------
>  drivers/gpu/drm/i915/i915_reset.h             |   3 +
>  drivers/gpu/drm/i915/intel_engine_cs.c        |   6 +-
>  drivers/gpu/drm/i915/intel_guc_submission.c   |   5 +-
>  drivers/gpu/drm/i915/intel_hangcheck.c        |  28 +-
>  drivers/gpu/drm/i915/intel_lrc.c              |  92 ++--
>  drivers/gpu/drm/i915/intel_overlay.c          |   2 -
>  drivers/gpu/drm/i915/intel_ringbuffer.c       |  91 ++--
>  drivers/gpu/drm/i915/intel_ringbuffer.h       |  17 +-
>  .../gpu/drm/i915/selftests/intel_hangcheck.c  |  57 +--
>  .../drm/i915/selftests/intel_workarounds.c    |   3 -
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |   4 +-
>  20 files changed, 393 insertions(+), 554 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 24d6d4ce14ef..3ec369980d40 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -1284,8 +1284,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  		seq_puts(m, "Wedged\n");
>  	if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
>  		seq_puts(m, "Reset in progress: struct_mutex backoff\n");
> -	if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
> -		seq_puts(m, "Reset in progress: reset handoff to waiter\n");
>  	if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
>  		seq_puts(m, "Waiter holding struct mutex\n");
>  	if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
> @@ -1321,15 +1319,15 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  		struct rb_node *rb;
>  
>  		seq_printf(m, "%s:\n", engine->name);
> -		seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
> +		seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
>  			   engine->hangcheck.seqno, seqno[id],
> -			   intel_engine_last_submit(engine));
> -		seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n",
> +			   intel_engine_last_submit(engine),
> +			   jiffies_to_msecs(jiffies -
> +					    engine->hangcheck.action_timestamp));
> +		seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
>  			   yesno(intel_engine_has_waiter(engine)),
>  			   yesno(test_bit(engine->id,
> -					  &dev_priv->gpu_error.missed_irq_rings)),
> -			   yesno(engine->hangcheck.stalled),
> -			   yesno(engine->hangcheck.wedged));
> +					  &dev_priv->gpu_error.missed_irq_rings)));
>  
>  		spin_lock_irq(&b->rb_lock);
>  		for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
> @@ -1343,11 +1341,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>  		seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
>  			   (long long)engine->hangcheck.acthd,
>  			   (long long)acthd[id]);
> -		seq_printf(m, "\taction = %s(%d) %d ms ago\n",
> -			   hangcheck_action_to_str(engine->hangcheck.action),
> -			   engine->hangcheck.action,
> -			   jiffies_to_msecs(jiffies -
> -					    engine->hangcheck.action_timestamp));

Yeah it is a time for sample and most decision are on top of
seqno. Welcomed compression.

>  
>  		if (engine->id == RCS) {
>  			seq_puts(m, "\tinstdone read =\n");
> @@ -3886,8 +3879,6 @@ static int
>  i915_wedged_set(void *data, u64 val)

*hones his axe*

>  {
>  	struct drm_i915_private *i915 = data;
> -	struct intel_engine_cs *engine;
> -	unsigned int tmp;
>  
>  	/*
>  	 * There is no safeguard against this debugfs entry colliding
> @@ -3900,18 +3891,8 @@ i915_wedged_set(void *data, u64 val)
>  	if (i915_reset_backoff(&i915->gpu_error))
>  		return -EAGAIN;
>  
> -	for_each_engine_masked(engine, i915, val, tmp) {
> -		engine->hangcheck.seqno = intel_engine_get_seqno(engine);
> -		engine->hangcheck.stalled = true;
> -	}
> -
>  	i915_handle_error(i915, val, I915_ERROR_CAPTURE,
>  			  "Manually set wedged engine mask = %llx", val);
> -
> -	wait_on_bit(&i915->gpu_error.flags,
> -		    I915_RESET_HANDOFF,
> -		    TASK_UNINTERRUPTIBLE);
> -
>  	return 0;
>  }
>  
> @@ -4066,13 +4047,8 @@ i915_drop_caches_set(void *data, u64 val)
>  		mutex_unlock(&i915->drm.struct_mutex);
>  	}
>  
> -	if (val & DROP_RESET_ACTIVE &&
> -	    i915_terminally_wedged(&i915->gpu_error)) {
> +	if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
>  		i915_handle_error(i915, ALL_ENGINES, 0, NULL);
> -		wait_on_bit(&i915->gpu_error.flags,
> -			    I915_RESET_HANDOFF,
> -			    TASK_UNINTERRUPTIBLE);
> -	}
>  
>  	fs_reclaim_acquire(GFP_KERNEL);
>  	if (val & DROP_BOUND)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 03db011caa8e..59a7e90113d7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3001,11 +3001,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
>  	return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
>  }
>  
> -static inline bool i915_reset_handoff(struct i915_gpu_error *error)
> -{
> -	return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
> -}
> -
>  static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
>  {
>  	return unlikely(test_bit(I915_WEDGED, &error->flags));
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b359390ba22c..d20b42386c3c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -657,11 +657,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
>  		     struct intel_rps_client *rps_client)
>  {
>  	might_sleep();
> -#if IS_ENABLED(CONFIG_LOCKDEP)
> -	GEM_BUG_ON(debug_locks &&
> -		   !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
> -		   !!(flags & I915_WAIT_LOCKED));
> -#endif
>  	GEM_BUG_ON(timeout < 0);
>  
>  	timeout = i915_gem_object_wait_reservation(obj->resv,
> @@ -4493,8 +4488,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  
>  	GEM_TRACE("\n");
>  
> -	mutex_lock(&i915->drm.struct_mutex);
> -
>  	wakeref = intel_runtime_pm_get(i915);
>  	intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
>  
> @@ -4520,6 +4513,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
>  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
>  	intel_runtime_pm_put(i915, wakeref);
>

unset_wedged looks ok, I should have faith
as I reviewed the patch. In retrospect
READ_ONCE on gt.scratch might have been
good at rising suspicion, even tho
superfluous.

Looks like that engines we are saved by the
timeline lock. Andw we have layed some GEM_BUG_ON mines
there so we will hear the explosions if any.

> +	mutex_lock(&i915->drm.struct_mutex);
>  	i915_gem_contexts_lost(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  }
> @@ -4534,6 +4528,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
>  	wakeref = intel_runtime_pm_get(i915);
>  	intel_suspend_gt_powersave(i915);
>  
> +	flush_workqueue(i915->wq);

I don't know what is happening here. Why
don't we need the i915_gem_drain_workqueue in here?

> +
>  	mutex_lock(&i915->drm.struct_mutex);
>  
>  	/*
> @@ -4563,11 +4559,9 @@ int i915_gem_suspend(struct drm_i915_private *i915)
>  	i915_retire_requests(i915); /* ensure we flush after wedging */
>  
>  	mutex_unlock(&i915->drm.struct_mutex);
> +	i915_reset_flush(i915);
>  
> -	intel_uc_suspend(i915);
> -
> -	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
> -	cancel_delayed_work_sync(&i915->gt.retire_work);
> +	drain_delayed_work(&i915->gt.retire_work);

Hangcheck is inside reset flush but why the change
for retire?

>  
>  	/*
>  	 * As the idle_work is rearming if it detects a race, play safe and
> @@ -4575,6 +4569,8 @@ int i915_gem_suspend(struct drm_i915_private *i915)
>  	 */
>  	drain_delayed_work(&i915->gt.idle_work);
>  
> +	intel_uc_suspend(i915);
> +
>  	/*
>  	 * Assert that we successfully flushed all the work and
>  	 * reset the GPU back to its idle, low power state.
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> index 99a31ded4dfd..09dcaf14121b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h
> @@ -50,4 +50,3 @@ struct drm_i915_fence_reg {
>  };
>  
>  #endif
> -
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 9229b03d629b..a0039ea97cdc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -39,6 +39,7 @@
>  #include <linux/pagevec.h>
>  
>  #include "i915_request.h"
> +#include "i915_reset.h"
>  #include "i915_selftest.h"
>  #include "i915_timeline.h"
>  
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 1f8e80e31b49..4eef0462489c 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -533,10 +533,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
>  	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
>  	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
>  	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
> -	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
> -	err_printf(m, "  hangcheck action: %s\n",
> -		   hangcheck_action_to_str(ee->hangcheck_action));
> -	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
> +	err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
>  		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
>  		   ee->hangcheck_timestamp,
>  		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
> @@ -684,15 +681,15 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
>  		   jiffies_to_msecs(error->capture - error->epoch));
>  
>  	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
> -		if (error->engine[i].hangcheck_stalled &&
> -		    error->engine[i].context.pid) {
> -			err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
> -				   engine_name(m->i915, i),
> -				   error->engine[i].context.comm,
> -				   error->engine[i].context.pid,
> -				   error->engine[i].context.ban_score,
> -				   bannable(&error->engine[i].context));
> -		}
> +		if (!error->engine[i].context.pid)
> +			continue;
> +
> +		err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
> +			   engine_name(m->i915, i),
> +			   error->engine[i].context.comm,
> +			   error->engine[i].context.pid,
> +			   error->engine[i].context.ban_score,
> +			   bannable(&error->engine[i].context));
>  	}
>  	err_printf(m, "Reset count: %u\n", error->reset_count);
>  	err_printf(m, "Suspend count: %u\n", error->suspend_count);
> @@ -1144,7 +1141,8 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
>  	return i;
>  }
>  
> -/* Generate a semi-unique error code. The code is not meant to have meaning, The
> +/*
> + * Generate a semi-unique error code. The code is not meant to have meaning, The
>   * code's only purpose is to try to prevent false duplicated bug reports by
>   * grossly estimating a GPU error state.
>   *
> @@ -1153,29 +1151,23 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err,
>   *
>   * It's only a small step better than a random number in its current form.
>   */
> -static u32 i915_error_generate_code(struct drm_i915_private *dev_priv,
> -				    struct i915_gpu_state *error,
> -				    int *engine_id)
> +static u32 i915_error_generate_code(struct i915_gpu_state *error,
> +				    unsigned long engine_mask)
>  {
> -	u32 error_code = 0;
> -	int i;
> -
> -	/* IPEHR would be an ideal way to detect errors, as it's the gross
> +	/*
> +	 * IPEHR would be an ideal way to detect errors, as it's the gross
>  	 * measure of "the command that hung." However, has some very common
>  	 * synchronization commands which almost always appear in the case
>  	 * strictly a client bug. Use instdone to differentiate those some.
>  	 */
> -	for (i = 0; i < I915_NUM_ENGINES; i++) {
> -		if (error->engine[i].hangcheck_stalled) {
> -			if (engine_id)
> -				*engine_id = i;
> +	if (engine_mask) {
> +		struct drm_i915_error_engine *ee =
> +			&error->engine[ffs(engine_mask)];
>  
> -			return error->engine[i].ipehr ^
> -			       error->engine[i].instdone.instdone;
> -		}
> +		return ee->ipehr ^ ee->instdone.instdone;
>  	}
>  
> -	return error_code;
> +	return 0;
>  }
>  
>  static void gem_record_fences(struct i915_gpu_state *error)
> @@ -1338,9 +1330,8 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
>  	}
>  
>  	ee->idle = intel_engine_is_idle(engine);
> -	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
> -	ee->hangcheck_action = engine->hangcheck.action;
> -	ee->hangcheck_stalled = engine->hangcheck.stalled;
> +	if (!ee->idle)
> +		ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
>  	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
>  						  engine);
>  
> @@ -1783,31 +1774,35 @@ static void capture_reg_state(struct i915_gpu_state *error)
>  	error->pgtbl_er = I915_READ(PGTBL_ER);
>  }
>  
> -static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
> -				   struct i915_gpu_state *error,
> -				   u32 engine_mask,
> -				   const char *error_msg)
> +static const char *
> +error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg)
>  {
> -	u32 ecode;
> -	int engine_id = -1, len;
> +	int len;
> +	int i;
>  
> -	ecode = i915_error_generate_code(dev_priv, error, &engine_id);
> +	for (i = 0; i < ARRAY_SIZE(error->engine); i++)
> +		if (!error->engine[i].context.pid)
> +			engines &= ~BIT(i);

No more grouping for driver internal hangs...?

>  
>  	len = scnprintf(error->error_msg, sizeof(error->error_msg),
> -			"GPU HANG: ecode %d:%d:0x%08x",
> -			INTEL_GEN(dev_priv), engine_id, ecode);
> -
> -	if (engine_id != -1 && error->engine[engine_id].context.pid)
> +			"GPU HANG: ecode %d:%lx:0x%08x",
> +			INTEL_GEN(error->i915), engines,
> +			i915_error_generate_code(error, engines));
> +	if (engines) {
> +		/* Just show the first executing process, more is confusing */
> +		i = ffs(engines);

then why not just make the ecode accepting single engine and move it here.

>  		len += scnprintf(error->error_msg + len,
>  				 sizeof(error->error_msg) - len,
>  				 ", in %s [%d]",
> -				 error->engine[engine_id].context.comm,
> -				 error->engine[engine_id].context.pid);
> +				 error->engine[i].context.comm,
> +				 error->engine[i].context.pid);
> +	}
> +	if (msg)
> +		len += scnprintf(error->error_msg + len,
> +				 sizeof(error->error_msg) - len,
> +				 ", %s", msg);
>  
> -	scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
> -		  ", reason: %s, action: %s",
> -		  error_msg,
> -		  engine_mask ? "reset" : "continue");
> +	return error->error_msg;
>  }
>  
>  static void capture_gen_state(struct i915_gpu_state *error)
> @@ -1847,7 +1842,7 @@ static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
>  	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
>  		const struct drm_i915_error_engine *ee = &error->engine[i];
>  
> -		if (ee->hangcheck_stalled &&
> +		if (ee->hangcheck_timestamp &&
>  		    time_before(ee->hangcheck_timestamp, epoch))
>  			epoch = ee->hangcheck_timestamp;
>  	}
> @@ -1921,7 +1916,7 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
>   * i915_capture_error_state - capture an error record for later analysis
>   * @i915: i915 device
>   * @engine_mask: the mask of engines triggering the hang
> - * @error_msg: a message to insert into the error capture header
> + * @msg: a message to insert into the error capture header
>   *
>   * Should be called when an error is detected (either a hang or an error
>   * interrupt) to capture error state from the time of the error.  Fills
> @@ -1929,8 +1924,8 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
>   * to pick up.
>   */
>  void i915_capture_error_state(struct drm_i915_private *i915,
> -			      u32 engine_mask,
> -			      const char *error_msg)
> +			      unsigned long engine_mask,
> +			      const char *msg)
>  {
>  	static bool warned;
>  	struct i915_gpu_state *error;
> @@ -1946,8 +1941,7 @@ void i915_capture_error_state(struct drm_i915_private *i915,
>  	if (IS_ERR(error))
>  		return;
>  
> -	i915_error_capture_msg(i915, error, engine_mask, error_msg);
> -	DRM_INFO("%s\n", error->error_msg);
> +	dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
>  
>  	if (!error->simulated) {
>  		spin_lock_irqsave(&i915->gpu_error.lock, flags);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index 604291f7762d..231173786eae 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -85,8 +85,6 @@ struct i915_gpu_state {
>  		bool waiting;
>  		int num_waiters;
>  		unsigned long hangcheck_timestamp;
> -		bool hangcheck_stalled;
> -		enum intel_engine_hangcheck_action hangcheck_action;
>  		struct i915_address_space *vm;
>  		int num_requests;
>  		u32 reset_count;
> @@ -197,6 +195,8 @@ struct i915_gpu_state {
>  	struct scatterlist *sgl, *fit;
>  };
>  
> +struct i915_gpu_restart;
> +
>  struct i915_gpu_error {
>  	/* For hangcheck timer */
>  #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
> @@ -247,15 +247,6 @@ struct i915_gpu_error {
>  	 * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
>  	 * secondary role in preventing two concurrent global reset attempts.
>  	 *
> -	 * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
> -	 * struct_mutex. We try to acquire the struct_mutex in the reset worker,
> -	 * but it may be held by some long running waiter (that we cannot
> -	 * interrupt without causing trouble). Once we are ready to do the GPU
> -	 * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
> -	 * they already hold the struct_mutex and want to participate they can
> -	 * inspect the bit and do the reset directly, otherwise the worker
> -	 * waits for the struct_mutex.
> -	 *
>  	 * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
>  	 * acquire the struct_mutex to reset an engine, we need an explicit
>  	 * flag to prevent two concurrent reset attempts in the same engine.
> @@ -269,20 +260,13 @@ struct i915_gpu_error {
>  	 */
>  	unsigned long flags;
>  #define I915_RESET_BACKOFF	0
> -#define I915_RESET_HANDOFF	1
> -#define I915_RESET_MODESET	2
> -#define I915_RESET_ENGINE	3
> +#define I915_RESET_MODESET	1
> +#define I915_RESET_ENGINE	2
>  #define I915_WEDGED		(BITS_PER_LONG - 1)
>  
>  	/** Number of times an engine has been reset */
>  	u32 reset_engine_count[I915_NUM_ENGINES];
>  
> -	/** Set of stalled engines with guilty requests, in the current reset */
> -	u32 stalled_mask;
> -
> -	/** Reason for the current *global* reset */
> -	const char *reason;
> -
>  	struct mutex wedge_mutex; /* serialises wedging/unwedging */
>  
>  	/**
> @@ -299,6 +283,8 @@ struct i915_gpu_error {
>  
>  	/* For missed irq/seqno simulation. */
>  	unsigned long test_irq_rings;
> +
> +	struct i915_gpu_restart *restart;
>  };
>  
>  struct drm_i915_error_state_buf {
> @@ -320,7 +306,7 @@ void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
>  
>  struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
>  void i915_capture_error_state(struct drm_i915_private *dev_priv,
> -			      u32 engine_mask,
> +			      unsigned long engine_mask,
>  			      const char *error_msg);
>  
>  static inline struct i915_gpu_state *
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 5e178f5ac18b..80232de8e2be 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1083,18 +1083,6 @@ static bool __i915_spin_request(const struct i915_request *rq,
>  	return false;
>  }
>  
> -static bool __i915_wait_request_check_and_reset(struct i915_request *request)
> -{
> -	struct i915_gpu_error *error = &request->i915->gpu_error;
> -
> -	if (likely(!i915_reset_handoff(error)))
> -		return false;
> -
> -	__set_current_state(TASK_RUNNING);
> -	i915_reset(request->i915, error->stalled_mask, error->reason);
> -	return true;
> -}
> -
>  /**
>   * i915_request_wait - wait until execution of request has finished
>   * @rq: the request to wait upon
> @@ -1120,17 +1108,10 @@ long i915_request_wait(struct i915_request *rq,
>  {
>  	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
>  		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
> -	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
> -	DEFINE_WAIT_FUNC(reset, default_wake_function);
>  	DEFINE_WAIT_FUNC(exec, default_wake_function);
>  	struct intel_wait wait;
>  
>  	might_sleep();
> -#if IS_ENABLED(CONFIG_LOCKDEP)
> -	GEM_BUG_ON(debug_locks &&
> -		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
> -		   !!(flags & I915_WAIT_LOCKED));
> -#endif
>  	GEM_BUG_ON(timeout < 0);
>  
>  	if (i915_request_completed(rq))
> @@ -1140,11 +1121,7 @@ long i915_request_wait(struct i915_request *rq,
>  		return -ETIME;
>  
>  	trace_i915_request_wait_begin(rq, flags);
> -
>  	add_wait_queue(&rq->execute, &exec);
> -	if (flags & I915_WAIT_LOCKED)
> -		add_wait_queue(errq, &reset);
> -
>  	intel_wait_init(&wait);
>  	if (flags & I915_WAIT_PRIORITY)
>  		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
> @@ -1155,10 +1132,6 @@ long i915_request_wait(struct i915_request *rq,
>  		if (intel_wait_update_request(&wait, rq))
>  			break;
>  
> -		if (flags & I915_WAIT_LOCKED &&
> -		    __i915_wait_request_check_and_reset(rq))
> -			continue;
> -
>  		if (signal_pending_state(state, current)) {
>  			timeout = -ERESTARTSYS;
>  			goto complete;
> @@ -1188,9 +1161,6 @@ long i915_request_wait(struct i915_request *rq,
>  		 */
>  		goto wakeup;
>  
> -	if (flags & I915_WAIT_LOCKED)
> -		__i915_wait_request_check_and_reset(rq);
> -
>  	for (;;) {
>  		if (signal_pending_state(state, current)) {
>  			timeout = -ERESTARTSYS;
> @@ -1214,21 +1184,6 @@ long i915_request_wait(struct i915_request *rq,
>  		if (i915_request_completed(rq))
>  			break;
>  
> -		/*
> -		 * If the GPU is hung, and we hold the lock, reset the GPU
> -		 * and then check for completion. On a full reset, the engine's
> -		 * HW seqno will be advanced passed us and we are complete.
> -		 * If we do a partial reset, we have to wait for the GPU to
> -		 * resume and update the breadcrumb.
> -		 *
> -		 * If we don't hold the mutex, we can just wait for the worker
> -		 * to come along and update the breadcrumb (either directly
> -		 * itself, or indirectly by recovering the GPU).
> -		 */
> -		if (flags & I915_WAIT_LOCKED &&
> -		    __i915_wait_request_check_and_reset(rq))
> -			continue;
> -
>  		/* Only spin if we know the GPU is processing this request */
>  		if (__i915_spin_request(rq, wait.seqno, state, 2))
>  			break;
> @@ -1242,8 +1197,6 @@ long i915_request_wait(struct i915_request *rq,
>  	intel_engine_remove_wait(rq->engine, &wait);
>  complete:
>  	__set_current_state(TASK_RUNNING);
> -	if (flags & I915_WAIT_LOCKED)
> -		remove_wait_queue(errq, &reset);
>  	remove_wait_queue(&rq->execute, &exec);
>  	trace_i915_request_wait_end(rq);
>  
> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
> index 2961c21d9420..064fc6da1512 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -5,6 +5,7 @@
>   */
>  
>  #include <linux/sched/mm.h>
> +#include <linux/stop_machine.h>
>  
>  #include "i915_drv.h"
>  #include "i915_gpu_error.h"
> @@ -17,22 +18,23 @@ static void engine_skip_context(struct i915_request *rq)
>  	struct intel_engine_cs *engine = rq->engine;
>  	struct i915_gem_context *hung_ctx = rq->gem_context;
>  	struct i915_timeline *timeline = rq->timeline;
> -	unsigned long flags;
>  
> +	lockdep_assert_held(&engine->timeline.lock);
>  	GEM_BUG_ON(timeline == &engine->timeline);
>  
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
>  	spin_lock(&timeline->lock);
>  
> -	list_for_each_entry_continue(rq, &engine->timeline.requests, link)
> -		if (rq->gem_context == hung_ctx)
> -			i915_request_skip(rq, -EIO);
> +	if (rq->global_seqno) {
> +		list_for_each_entry_continue(rq,
> +					     &engine->timeline.requests, link)
> +			if (rq->gem_context == hung_ctx)
> +				i915_request_skip(rq, -EIO);
> +	}
>  
>  	list_for_each_entry(rq, &timeline->requests, link)
>  		i915_request_skip(rq, -EIO);
>  
>  	spin_unlock(&timeline->lock);
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>  }
>  
>  static void client_mark_guilty(struct drm_i915_file_private *file_priv,
> @@ -59,7 +61,7 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
>  	}
>  }
>  
> -static void context_mark_guilty(struct i915_gem_context *ctx)
> +static bool context_mark_guilty(struct i915_gem_context *ctx)
>  {
>  	unsigned int score;
>  	bool banned, bannable;
> @@ -72,7 +74,7 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
>  
>  	/* Cool contexts don't accumulate client ban score */
>  	if (!bannable)
> -		return;
> +		return false;
>  
>  	if (banned) {
>  		DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
> @@ -83,6 +85,8 @@ static void context_mark_guilty(struct i915_gem_context *ctx)
>  
>  	if (!IS_ERR_OR_NULL(ctx->file_priv))
>  		client_mark_guilty(ctx->file_priv, ctx);
> +
> +	return banned;
>  }
>  
>  static void context_mark_innocent(struct i915_gem_context *ctx)
> @@ -90,6 +94,21 @@ static void context_mark_innocent(struct i915_gem_context *ctx)
>  	atomic_inc(&ctx->active_count);
>  }
>  
> +void i915_reset_request(struct i915_request *rq, bool guilty)
> +{
> +	lockdep_assert_held(&rq->engine->timeline.lock);
> +	GEM_BUG_ON(i915_request_completed(rq));
> +
> +	if (guilty) {
> +		i915_request_skip(rq, -EIO);
> +		if (context_mark_guilty(rq->gem_context))
> +			engine_skip_context(rq);
> +	} else {
> +		dma_fence_set_error(&rq->fence, -EAGAIN);
> +		context_mark_innocent(rq->gem_context);
> +	}
> +}
> +
>  static void gen3_stop_engine(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> @@ -533,22 +552,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
>  	int retry;
>  	int ret;
>  
> -	/*
> -	 * We want to perform per-engine reset from atomic context (e.g.
> -	 * softirq), which imposes the constraint that we cannot sleep.
> -	 * However, experience suggests that spending a bit of time waiting
> -	 * for a reset helps in various cases, so for a full-device reset
> -	 * we apply the opposite rule and wait if we want to. As we should
> -	 * always follow up a failed per-engine reset with a full device reset,
> -	 * being a little faster, stricter and more error prone for the
> -	 * atomic case seems an acceptable compromise.
> -	 *
> -	 * Unfortunately this leads to a bimodal routine, when the goal was
> -	 * to have a single reset function that worked for resetting any
> -	 * number of engines simultaneously.
> -	 */
> -	might_sleep_if(engine_mask == ALL_ENGINES);

Oh here it is. I was after this on atomic resets.

> -
>  	/*
>  	 * If the power well sleeps during the reset, the reset
>  	 * request may be dropped and never completes (causing -EIO).
> @@ -580,8 +583,6 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask)
>  		}
>  		if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES)
>  			break;
> -
> -		cond_resched();
>  	}
>  	intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
>  
> @@ -620,11 +621,8 @@ int intel_reset_guc(struct drm_i915_private *i915)
>   * Ensure irq handler finishes, and not run again.
>   * Also return the active request so that we only search for it once.
>   */
> -static struct i915_request *
> -reset_prepare_engine(struct intel_engine_cs *engine)
> +static void reset_prepare_engine(struct intel_engine_cs *engine)
>  {
> -	struct i915_request *rq;
> -
>  	/*
>  	 * During the reset sequence, we must prevent the engine from
>  	 * entering RC6. As the context state is undefined until we restart
> @@ -633,162 +631,86 @@ reset_prepare_engine(struct intel_engine_cs *engine)
>  	 * GPU state upon resume, i.e. fail to restart after a reset.
>  	 */
>  	intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
> -
> -	rq = engine->reset.prepare(engine);
> -	if (rq && rq->fence.error == -EIO)
> -		rq = ERR_PTR(-EIO); /* Previous reset failed! */
> -
> -	return rq;
> +	engine->reset.prepare(engine);
>  }
>  
> -static int reset_prepare(struct drm_i915_private *i915)
> +static void reset_prepare(struct drm_i915_private *i915)
>  {
>  	struct intel_engine_cs *engine;
> -	struct i915_request *rq;
>  	enum intel_engine_id id;
> -	int err = 0;
>  
> -	for_each_engine(engine, i915, id) {
> -		rq = reset_prepare_engine(engine);
> -		if (IS_ERR(rq)) {
> -			err = PTR_ERR(rq);
> -			continue;
> -		}
> -
> -		engine->hangcheck.active_request = rq;
> -	}
> +	for_each_engine(engine, i915, id)
> +		reset_prepare_engine(engine);
>  
> -	i915_gem_revoke_fences(i915);
>  	intel_uc_sanitize(i915);
> -
> -	return err;
>  }
>  
> -/* Returns the request if it was guilty of the hang */
> -static struct i915_request *
> -reset_request(struct intel_engine_cs *engine,
> -	      struct i915_request *rq,
> -	      bool stalled)
> +static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
>  {
> +	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
> +	int err;
> +
>  	/*
> -	 * The guilty request will get skipped on a hung engine.
> -	 *
> -	 * Users of client default contexts do not rely on logical
> -	 * state preserved between batches so it is safe to execute
> -	 * queued requests following the hang. Non default contexts
> -	 * rely on preserved state, so skipping a batch loses the
> -	 * evolution of the state and it needs to be considered corrupted.
> -	 * Executing more queued batches on top of corrupted state is
> -	 * risky. But we take the risk by trying to advance through
> -	 * the queued requests in order to make the client behaviour
> -	 * more predictable around resets, by not throwing away random
> -	 * amount of batches it has prepared for execution. Sophisticated
> -	 * clients can use gem_reset_stats_ioctl and dma fence status
> -	 * (exported via sync_file info ioctl on explicit fences) to observe
> -	 * when it loses the context state and should rebuild accordingly.
> -	 *
> -	 * The context ban, and ultimately the client ban, mechanism are safety
> -	 * valves if client submission ends up resulting in nothing more than
> -	 * subsequent hangs.
> +	 * Everything depends on having the GTT running, so we need to start
> +	 * there.
>  	 */
> +	err = i915_ggtt_enable_hw(i915);
> +	if (err)
> +		return err;
>  
> -	if (i915_request_completed(rq)) {
> -		GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
> -			  engine->name, rq->global_seqno,
> -			  rq->fence.context, rq->fence.seqno,
> -			  intel_engine_get_seqno(engine));
> -		stalled = false;
> -	}
> -
> -	if (stalled) {
> -		context_mark_guilty(rq->gem_context);
> -		i915_request_skip(rq, -EIO);
> +	for_each_engine(engine, i915, id)
> +		intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
>  
> -		/* If this context is now banned, skip all pending requests. */
> -		if (i915_gem_context_is_banned(rq->gem_context))
> -			engine_skip_context(rq);
> -	} else {
> -		/*
> -		 * Since this is not the hung engine, it may have advanced
> -		 * since the hang declaration. Double check by refinding
> -		 * the active request at the time of the reset.
> -		 */
> -		rq = i915_gem_find_active_request(engine);
> -		if (rq) {
> -			unsigned long flags;
> -
> -			context_mark_innocent(rq->gem_context);
> -			dma_fence_set_error(&rq->fence, -EAGAIN);
> -
> -			/* Rewind the engine to replay the incomplete rq */
> -			spin_lock_irqsave(&engine->timeline.lock, flags);
> -			rq = list_prev_entry(rq, link);
> -			if (&rq->link == &engine->timeline.requests)
> -				rq = NULL;
> -			spin_unlock_irqrestore(&engine->timeline.lock, flags);
> -		}
> -	}
> +	i915_gem_restore_fences(i915);
>  
> -	return rq;
> +	return err;
>  }
>  
> -static void reset_engine(struct intel_engine_cs *engine,
> -			 struct i915_request *rq,
> -			 bool stalled)
> +static void reset_finish_engine(struct intel_engine_cs *engine)
>  {
> -	if (rq)
> -		rq = reset_request(engine, rq, stalled);
> -
> -	/* Setup the CS to resume from the breadcrumb of the hung request */
> -	engine->reset.reset(engine, rq);
> +	engine->reset.finish(engine);
> +	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
>  }
>  
> -static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
> +struct i915_gpu_restart {
> +	struct work_struct work;
> +	struct drm_i915_private *i915;
> +};
> +
> +static void restart_work(struct work_struct *work)
>  {
> +	struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
> +	struct drm_i915_private *i915 = arg->i915;
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
> +	intel_wakeref_t wakeref;
>  
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> +	wakeref = intel_runtime_pm_get(i915);
> +	mutex_lock(&i915->drm.struct_mutex);
>  
> -	i915_retire_requests(i915);

Can't do this anymore yes. What will be the effect
of delaying this and the other explicit retirements?
Are we more prone to starvation?

> +	smp_store_mb(i915->gpu_error.restart, NULL);

Checkpatch might want a comment for the mb.

>  
>  	for_each_engine(engine, i915, id) {
> -		struct intel_context *ce;
> -
> -		reset_engine(engine,
> -			     engine->hangcheck.active_request,
> -			     stalled_mask & ENGINE_MASK(id));
> -		ce = fetch_and_zero(&engine->last_retired_context);
> -		if (ce)
> -			intel_context_unpin(ce);
> +		struct i915_request *rq;
>  
>  		/*
>  		 * Ostensibily, we always want a context loaded for powersaving,
>  		 * so if the engine is idle after the reset, send a request
>  		 * to load our scratch kernel_context.
> -		 *
> -		 * More mysteriously, if we leave the engine idle after a reset,
> -		 * the next userspace batch may hang, with what appears to be
> -		 * an incoherent read by the CS (presumably stale TLB). An
> -		 * empty request appears sufficient to paper over the glitch.
>  		 */
> -		if (intel_engine_is_idle(engine)) {
> -			struct i915_request *rq;
> +		if (!intel_engine_is_idle(engine))
> +			continue;

Why did you remove the comment on needing a empty request?

Also if the request causing nonidle could be troublesome one,
from troublesome context, why not just skip the idle check and
always add one for kernel ctx?

>  
> -			rq = i915_request_alloc(engine, i915->kernel_context);
> -			if (!IS_ERR(rq))
> -				i915_request_add(rq);
> -		}
> +		rq = i915_request_alloc(engine, i915->kernel_context);
> +		if (!IS_ERR(rq))
> +			i915_request_add(rq);
>  	}
>  
> -	i915_gem_restore_fences(i915);
> -}
> -
> -static void reset_finish_engine(struct intel_engine_cs *engine)
> -{
> -	engine->reset.finish(engine);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	intel_runtime_pm_put(i915, wakeref);
>  
> -	intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
> +	kfree(arg);
>  }
>  
>  static void reset_finish(struct drm_i915_private *i915)
> @@ -796,11 +718,30 @@ static void reset_finish(struct drm_i915_private *i915)
>  	struct intel_engine_cs *engine;
>  	enum intel_engine_id id;
>  
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -
> -	for_each_engine(engine, i915, id) {
> -		engine->hangcheck.active_request = NULL;
> +	for_each_engine(engine, i915, id)
>  		reset_finish_engine(engine);
> +}
> +
> +static void reset_restart(struct drm_i915_private *i915)
> +{
> +	struct i915_gpu_restart *arg;
> +
> +	/*
> +	 * Following the reset, ensure that we always reload context for
> +	 * powersaving, and to correct engine->last_retired_context. Since
> +	 * this requires us to submit a request, queue a worker to do that
> +	 * task for us to evade any locking here.
> +	 */

Nice, this was/will be helpful!

> +	if (READ_ONCE(i915->gpu_error.restart))
> +		return;
> +
> +	arg = kmalloc(sizeof(*arg), GFP_KERNEL);
> +	if (arg) {
> +		arg->i915 = i915;
> +		INIT_WORK(&arg->work, restart_work);
> +
> +		WRITE_ONCE(i915->gpu_error.restart, arg);
> +		queue_work(i915->wq, &arg->work);
>  	}
>  }
>  
> @@ -889,8 +830,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	struct i915_timeline *tl;
>  	bool ret = false;
>  
> -	lockdep_assert_held(&i915->drm.struct_mutex);
> -
>  	if (!test_bit(I915_WEDGED, &error->flags))
>  		return true;
>  
> @@ -913,9 +852,9 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	 */
>  	list_for_each_entry(tl, &i915->gt.timelines, link) {
>  		struct i915_request *rq;
> +		long timeout;
>  
> -		rq = i915_gem_active_peek(&tl->last_request,
> -					  &i915->drm.struct_mutex);
> +		rq = i915_gem_active_get_unlocked(&tl->last_request);
>  		if (!rq)
>  			continue;
>  
> @@ -930,12 +869,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  		 * and when the seqno passes the fence, the signaler
>  		 * then signals the fence waking us up).
>  		 */
> -		if (dma_fence_default_wait(&rq->fence, true,
> -					   MAX_SCHEDULE_TIMEOUT) < 0)
> +		timeout = dma_fence_default_wait(&rq->fence, true,
> +						 MAX_SCHEDULE_TIMEOUT);
> +		i915_request_put(rq);
> +		if (timeout < 0)
>  			goto unlock;
>  	}
> -	i915_retire_requests(i915);
> -	GEM_BUG_ON(i915->gt.active_requests);
>  
>  	intel_engines_sanitize(i915, false);
>  
> @@ -949,7 +888,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	 * context and do not require stop_machine().
>  	 */
>  	intel_engines_reset_default_submission(i915);
> -	i915_gem_contexts_lost(i915);
>  
>  	GEM_TRACE("end\n");
>  
> @@ -962,6 +900,43 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
>  	return ret;
>  }
>  
> +struct __i915_reset {
> +	struct drm_i915_private *i915;
> +	unsigned int stalled_mask;
> +};
> +
> +static int __i915_reset__BKL(void *data)
> +{
> +	struct __i915_reset *arg = data;
> +	int err;
> +
> +	err = intel_gpu_reset(arg->i915, ALL_ENGINES);
> +	if (err)
> +		return err;
> +
> +	return gt_reset(arg->i915, arg->stalled_mask);
> +}
> +
> +#if 0
> +#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)

Lets remove the machinery to select reset stop_machine and the #include.

> +#else
> +#define __do_reset(fn, arg) fn(arg)
> +#endif
> +
> +static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
> +{
> +	struct __i915_reset arg = { i915, stalled_mask };
> +	int err, i;
> +
> +	err = __do_reset(__i915_reset__BKL, &arg);
> +	for (i = 0; err && i < 3; i++) {
> +		msleep(100);
> +		err = __do_reset(__i915_reset__BKL, &arg);
> +	}
> +
> +	return err;
> +}
> +
>  /**
>   * i915_reset - reset chip after a hang
>   * @i915: #drm_i915_private to reset
> @@ -987,31 +962,22 @@ void i915_reset(struct drm_i915_private *i915,
>  {
>  	struct i915_gpu_error *error = &i915->gpu_error;
>  	int ret;
> -	int i;
>  
>  	GEM_TRACE("flags=%lx\n", error->flags);
>  
>  	might_sleep();

What will? I didn't spot anything in execlists_reset_prepare.

> -	lockdep_assert_held(&i915->drm.struct_mutex);
>  	assert_rpm_wakelock_held(i915);
>  	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
>  
> -	if (!test_bit(I915_RESET_HANDOFF, &error->flags))
> -		return;
> -
>  	/* Clear any previous failed attempts at recovery. Time to try again. */
>  	if (!i915_gem_unset_wedged(i915))
> -		goto wakeup;
> +		return;
>  
>  	if (reason)
>  		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
>  	error->reset_count++;
>  
> -	ret = reset_prepare(i915);
> -	if (ret) {
> -		dev_err(i915->drm.dev, "GPU recovery failed\n");
> -		goto taint;
> -	}
> +	reset_prepare(i915);
>  
>  	if (!intel_has_gpu_reset(i915)) {
>  		if (i915_modparams.reset)
> @@ -1021,32 +987,11 @@ void i915_reset(struct drm_i915_private *i915,
>  		goto error;
>  	}
>  
> -	for (i = 0; i < 3; i++) {
> -		ret = intel_gpu_reset(i915, ALL_ENGINES);
> -		if (ret == 0)
> -			break;
> -
> -		msleep(100);
> -	}
> -	if (ret) {
> +	if (do_reset(i915, stalled_mask)) {
>  		dev_err(i915->drm.dev, "Failed to reset chip\n");
>  		goto taint;
>  	}
>  
> -	/* Ok, now get things going again... */
> -
> -	/*
> -	 * Everything depends on having the GTT running, so we need to start
> -	 * there.
> -	 */
> -	ret = i915_ggtt_enable_hw(i915);
> -	if (ret) {
> -		DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
> -			  ret);
> -		goto error;
> -	}
> -
> -	gt_reset(i915, stalled_mask);
>  	intel_overlay_reset(i915);
>  
>  	/*
> @@ -1068,9 +1013,8 @@ void i915_reset(struct drm_i915_private *i915,
>  
>  finish:
>  	reset_finish(i915);
> -wakeup:
> -	clear_bit(I915_RESET_HANDOFF, &error->flags);
> -	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
> +	if (!i915_terminally_wedged(error))
> +		reset_restart(i915);
>  	return;
>  
>  taint:
> @@ -1089,7 +1033,6 @@ void i915_reset(struct drm_i915_private *i915,
>  	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
>  error:
>  	i915_gem_set_wedged(i915);
> -	i915_retire_requests(i915);
>  	goto finish;
>  }
>  
> @@ -1115,18 +1058,16 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
>  int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
>  {
>  	struct i915_gpu_error *error = &engine->i915->gpu_error;
> -	struct i915_request *active_request;
>  	int ret;
>  
>  	GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
>  	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
>  
> -	active_request = reset_prepare_engine(engine);
> -	if (IS_ERR_OR_NULL(active_request)) {
> -		/* Either the previous reset failed, or we pardon the reset. */
> -		ret = PTR_ERR(active_request);
> -		goto out;
> -	}
> +	if (i915_seqno_passed(intel_engine_get_seqno(engine),
> +			      intel_engine_last_submit(engine)))
> +		return 0;

You seem to have a patch to remove this shortly after so
squash?

I need to restock on coffee at this point.
-Mika

> +
> +	reset_prepare_engine(engine);
>  
>  	if (msg)
>  		dev_notice(engine->i915->drm.dev,
> @@ -1150,7 +1091,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
>  	 * active request and can drop it, adjust head to skip the offending
>  	 * request to resume executing remaining requests in the queue.
>  	 */
> -	reset_engine(engine, active_request, true);
> +	intel_engine_reset(engine, true);
>  
>  	/*
>  	 * The engine and its registers (and workarounds in case of render)
> @@ -1187,30 +1128,7 @@ static void i915_reset_device(struct drm_i915_private *i915,
>  	i915_wedge_on_timeout(&w, i915, 5 * HZ) {
>  		intel_prepare_reset(i915);
>  
> -		error->reason = reason;
> -		error->stalled_mask = engine_mask;
> -
> -		/* Signal that locked waiters should reset the GPU */
> -		smp_mb__before_atomic();
> -		set_bit(I915_RESET_HANDOFF, &error->flags);
> -		wake_up_all(&error->wait_queue);
> -
> -		/*
> -		 * Wait for anyone holding the lock to wakeup, without
> -		 * blocking indefinitely on struct_mutex.
> -		 */
> -		do {
> -			if (mutex_trylock(&i915->drm.struct_mutex)) {
> -				i915_reset(i915, engine_mask, reason);
> -				mutex_unlock(&i915->drm.struct_mutex);
> -			}
> -		} while (wait_on_bit_timeout(&error->flags,
> -					     I915_RESET_HANDOFF,
> -					     TASK_UNINTERRUPTIBLE,
> -					     1));
> -
> -		error->stalled_mask = 0;
> -		error->reason = NULL;
> +		i915_reset(i915, engine_mask, reason);
>  
>  		intel_finish_reset(i915);
>  	}
> @@ -1366,6 +1284,25 @@ void i915_handle_error(struct drm_i915_private *i915,
>  	intel_runtime_pm_put(i915, wakeref);
>  }
>  
> +bool i915_reset_flush(struct drm_i915_private *i915)
> +{
> +	int err;
> +
> +	cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
> +
> +	flush_workqueue(i915->wq);
> +	GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +	err = i915_gem_wait_for_idle(i915,
> +				     I915_WAIT_LOCKED |
> +				     I915_WAIT_FOR_IDLE_BOOST,
> +				     MAX_SCHEDULE_TIMEOUT);
> +	mutex_unlock(&i915->drm.struct_mutex);
> +
> +	return !err;
> +}
> +
>  static void i915_wedge_me(struct work_struct *work)
>  {
>  	struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
> diff --git a/drivers/gpu/drm/i915/i915_reset.h b/drivers/gpu/drm/i915/i915_reset.h
> index b6a519bde67d..f2d347f319df 100644
> --- a/drivers/gpu/drm/i915/i915_reset.h
> +++ b/drivers/gpu/drm/i915/i915_reset.h
> @@ -29,6 +29,9 @@ void i915_reset(struct drm_i915_private *i915,
>  int i915_reset_engine(struct intel_engine_cs *engine,
>  		      const char *reason);
>  
> +void i915_reset_request(struct i915_request *rq, bool guilty);
> +bool i915_reset_flush(struct drm_i915_private *i915);
> +
>  bool intel_has_gpu_reset(struct drm_i915_private *i915);
>  bool intel_has_reset_engine(struct drm_i915_private *i915);
>  
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 2f3c71f6d313..fc52737751e7 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1071,10 +1071,8 @@ void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
>  	if (!reset_engines(i915) && !force)
>  		return;
>  
> -	for_each_engine(engine, i915, id) {
> -		if (engine->reset.reset)
> -			engine->reset.reset(engine, NULL);
> -	}
> +	for_each_engine(engine, i915, id)
> +		intel_engine_reset(engine, false);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index ab1c49b106f2..7217c7e3ee8d 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -834,8 +834,7 @@ static void guc_submission_tasklet(unsigned long data)
>  	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>  }
>  
> -static struct i915_request *
> -guc_reset_prepare(struct intel_engine_cs *engine)
> +static void guc_reset_prepare(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
>  
> @@ -861,8 +860,6 @@ guc_reset_prepare(struct intel_engine_cs *engine)
>  	 */
>  	if (engine->i915->guc.preempt_wq)
>  		flush_workqueue(engine->i915->guc.preempt_wq);
> -
> -	return i915_gem_find_active_request(engine);
>  }
>  
>  /*
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index 741441daae32..5662d6fed523 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -25,6 +25,17 @@
>  #include "i915_drv.h"
>  #include "i915_reset.h"
>  
> +struct hangcheck {
> +	u64 acthd;
> +	u32 seqno;
> +	enum intel_engine_hangcheck_action action;
> +	unsigned long action_timestamp;
> +	int deadlock;
> +	struct intel_instdone instdone;
> +	bool wedged:1;
> +	bool stalled:1;
> +};
> +
>  static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
>  {
>  	u32 tmp = current_instdone | *old_instdone;
> @@ -119,25 +130,22 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
>  }
>  
>  static void hangcheck_load_sample(struct intel_engine_cs *engine,
> -				  struct intel_engine_hangcheck *hc)
> +				  struct hangcheck *hc)
>  {
>  	hc->acthd = intel_engine_get_active_head(engine);
>  	hc->seqno = intel_engine_get_seqno(engine);
>  }
>  
>  static void hangcheck_store_sample(struct intel_engine_cs *engine,
> -				   const struct intel_engine_hangcheck *hc)
> +				   const struct hangcheck *hc)
>  {
>  	engine->hangcheck.acthd = hc->acthd;
>  	engine->hangcheck.seqno = hc->seqno;
> -	engine->hangcheck.action = hc->action;
> -	engine->hangcheck.stalled = hc->stalled;
> -	engine->hangcheck.wedged = hc->wedged;
>  }
>  
>  static enum intel_engine_hangcheck_action
>  hangcheck_get_action(struct intel_engine_cs *engine,
> -		     const struct intel_engine_hangcheck *hc)
> +		     const struct hangcheck *hc)
>  {
>  	if (engine->hangcheck.seqno != hc->seqno)
>  		return ENGINE_ACTIVE_SEQNO;
> @@ -149,7 +157,7 @@ hangcheck_get_action(struct intel_engine_cs *engine,
>  }
>  
>  static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
> -					struct intel_engine_hangcheck *hc)
> +					struct hangcheck *hc)
>  {
>  	unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
>  
> @@ -265,19 +273,19 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
>  	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
>  
>  	for_each_engine(engine, dev_priv, id) {
> -		struct intel_engine_hangcheck hc;
> +		struct hangcheck hc;
>  
>  		hangcheck_load_sample(engine, &hc);
>  		hangcheck_accumulate_sample(engine, &hc);
>  		hangcheck_store_sample(engine, &hc);
>  
> -		if (engine->hangcheck.stalled) {
> +		if (hc.stalled) {
>  			hung |= intel_engine_flag(engine);
>  			if (hc.action != ENGINE_DEAD)
>  				stuck |= intel_engine_flag(engine);
>  		}
>  
> -		if (engine->hangcheck.wedged)
> +		if (hc.wedged)
>  			wedged |= intel_engine_flag(engine);
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 28d183439952..c11cbf34258d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -136,6 +136,7 @@
>  #include <drm/i915_drm.h>
>  #include "i915_drv.h"
>  #include "i915_gem_render_state.h"
> +#include "i915_reset.h"
>  #include "i915_vgpu.h"
>  #include "intel_lrc_reg.h"
>  #include "intel_mocs.h"
> @@ -288,7 +289,8 @@ static void unwind_wa_tail(struct i915_request *rq)
>  	assert_ring_tail_valid(rq->ring, rq->tail);
>  }
>  
> -static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
> +static struct i915_request *
> +__unwind_incomplete_requests(struct intel_engine_cs *engine)
>  {
>  	struct i915_request *rq, *rn, *active = NULL;
>  	struct list_head *uninitialized_var(pl);
> @@ -330,6 +332,8 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
>  		list_move_tail(&active->sched.link,
>  			       i915_sched_lookup_priolist(engine, prio));
>  	}
> +
> +	return active;
>  }
>  
>  void
> @@ -1743,11 +1747,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
>  	return 0;
>  }
>  
> -static struct i915_request *
> -execlists_reset_prepare(struct intel_engine_cs *engine)
> +static void execlists_reset_prepare(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	struct i915_request *request, *active;
>  	unsigned long flags;
>  
>  	GEM_TRACE("%s: depth<-%d\n", engine->name,
> @@ -1763,59 +1765,21 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>  	 * prevents the race.
>  	 */
>  	__tasklet_disable_sync_once(&execlists->tasklet);
> +	GEM_BUG_ON(!reset_in_progress(execlists));
>  
> +	/* And flush any current direct submission. */
>  	spin_lock_irqsave(&engine->timeline.lock, flags);
> -
> -	/*
> -	 * We want to flush the pending context switches, having disabled
> -	 * the tasklet above, we can assume exclusive access to the execlists.
> -	 * For this allows us to catch up with an inflight preemption event,
> -	 * and avoid blaming an innocent request if the stall was due to the
> -	 * preemption itself.
> -	 */
> -	process_csb(engine);
> -
> -	/*
> -	 * The last active request can then be no later than the last request
> -	 * now in ELSP[0]. So search backwards from there, so that if the GPU
> -	 * has advanced beyond the last CSB update, it will be pardoned.
> -	 */
> -	active = NULL;
> -	request = port_request(execlists->port);
> -	if (request) {
> -		/*
> -		 * Prevent the breadcrumb from advancing before we decide
> -		 * which request is currently active.
> -		 */
> -		intel_engine_stop_cs(engine);
> -
> -		list_for_each_entry_from_reverse(request,
> -						 &engine->timeline.requests,
> -						 link) {
> -			if (__i915_request_completed(request,
> -						     request->global_seqno))
> -				break;
> -
> -			active = request;
> -		}
> -	}
> -
> +	process_csb(engine); /* drain preemption events */
>  	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> -
> -	return active;
>  }
>  
> -static void execlists_reset(struct intel_engine_cs *engine,
> -			    struct i915_request *request)
> +static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> +	struct i915_request *rq;
>  	unsigned long flags;
>  	u32 *regs;
>  
> -	GEM_TRACE("%s request global=%d, current=%d\n",
> -		  engine->name, request ? request->global_seqno : 0,
> -		  intel_engine_get_seqno(engine));
> -
>  	spin_lock_irqsave(&engine->timeline.lock, flags);
>  
>  	/*
> @@ -1830,12 +1794,18 @@ static void execlists_reset(struct intel_engine_cs *engine,
>  	execlists_cancel_port_requests(execlists);
>  
>  	/* Push back any incomplete requests for replay after the reset. */
> -	__unwind_incomplete_requests(engine);
> +	rq = __unwind_incomplete_requests(engine);
>  
>  	/* Following the reset, we need to reload the CSB read/write pointers */
>  	reset_csb_pointers(&engine->execlists);
>  
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
> +		  engine->name,
> +		  rq ? rq->global_seqno : 0,
> +		  intel_engine_get_seqno(engine),
> +		  yesno(stalled));
> +	if (!rq)
> +		goto out_unlock;
>  
>  	/*
>  	 * If the request was innocent, we leave the request in the ELSP
> @@ -1848,8 +1818,9 @@ static void execlists_reset(struct intel_engine_cs *engine,
>  	 * and have to at least restore the RING register in the context
>  	 * image back to the expected values to skip over the guilty request.
>  	 */
> -	if (!request || request->fence.error != -EIO)
> -		return;
> +	i915_reset_request(rq, stalled);
> +	if (!stalled)
> +		goto out_unlock;
>  
>  	/*
>  	 * We want a simple context + ring to execute the breadcrumb update.
> @@ -1859,25 +1830,23 @@ static void execlists_reset(struct intel_engine_cs *engine,
>  	 * future request will be after userspace has had the opportunity
>  	 * to recreate its own state.
>  	 */
> -	regs = request->hw_context->lrc_reg_state;
> +	regs = rq->hw_context->lrc_reg_state;
>  	if (engine->pinned_default_state) {
>  		memcpy(regs, /* skip restoring the vanilla PPHWSP */
>  		       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
>  		       engine->context_size - PAGE_SIZE);
>  	}
> -	execlists_init_reg_state(regs,
> -				 request->gem_context, engine, request->ring);
> +	execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
>  
>  	/* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
> -	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma);
> -
> -	request->ring->head = intel_ring_wrap(request->ring, request->postfix);
> -	regs[CTX_RING_HEAD + 1] = request->ring->head;
> +	regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(rq->ring->vma);
>  
> -	intel_ring_update_space(request->ring);
> +	rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
> +	regs[CTX_RING_HEAD + 1] = rq->ring->head;
> +	intel_ring_update_space(rq->ring);
>  
> -	/* Reset WaIdleLiteRestore:bdw,skl as well */
> -	unwind_wa_tail(request);
> +out_unlock:
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>  }
>  
>  static void execlists_reset_finish(struct intel_engine_cs *engine)
> @@ -1890,6 +1859,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
>  	 * to sleep before we restart and reload a context.
>  	 *
>  	 */
> +	GEM_BUG_ON(!reset_in_progress(execlists));
>  	if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
>  		execlists->tasklet.func(execlists->tasklet.data);
>  
> diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
> index c81db81e4416..f68c7975006c 100644
> --- a/drivers/gpu/drm/i915/intel_overlay.c
> +++ b/drivers/gpu/drm/i915/intel_overlay.c
> @@ -478,8 +478,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
>  	if (!overlay)
>  		return;
>  
> -	intel_overlay_release_old_vid(overlay);
> -

How to compensate for this?

>  	overlay->old_xscale = 0;
>  	overlay->old_yscale = 0;
>  	overlay->crtc = NULL;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 26b7274a2d43..662907e1a286 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -33,6 +33,7 @@
>  
>  #include "i915_drv.h"
>  #include "i915_gem_render_state.h"
> +#include "i915_reset.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include "intel_workarounds.h"
> @@ -707,52 +708,80 @@ static int init_ring_common(struct intel_engine_cs *engine)
>  	return ret;
>  }
>  
> -static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
> +static void reset_prepare(struct intel_engine_cs *engine)
>  {
>  	intel_engine_stop_cs(engine);
> -	return i915_gem_find_active_request(engine);
>  }
>  
> -static void skip_request(struct i915_request *rq)
> +static void reset_ring(struct intel_engine_cs *engine, bool stalled)
>  {
> -	void *vaddr = rq->ring->vaddr;
> +	struct i915_timeline *tl = &engine->timeline;
> +	struct i915_request *pos, *rq;
> +	unsigned long flags;
>  	u32 head;
>  
> -	head = rq->infix;
> -	if (rq->postfix < head) {
> -		memset32(vaddr + head, MI_NOOP,
> -			 (rq->ring->size - head) / sizeof(u32));
> -		head = 0;
> +	rq = NULL;
> +	spin_lock_irqsave(&tl->lock, flags);
> +	list_for_each_entry(pos, &tl->requests, link) {
> +		if (!__i915_request_completed(pos, pos->global_seqno)) {
> +			rq = pos;
> +			break;
> +		}
>  	}
> -	memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
> -}
> -
> -static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
> -{
> -	GEM_TRACE("%s request global=%d, current=%d\n",
> -		  engine->name, rq ? rq->global_seqno : 0,
> -		  intel_engine_get_seqno(engine));
>  
> +	GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
> +		  engine->name,
> +		  rq ? rq->global_seqno : 0,
> +		  intel_engine_get_seqno(engine),
> +		  yesno(stalled));
>  	/*
> -	 * Try to restore the logical GPU state to match the continuation
> -	 * of the request queue. If we skip the context/PD restore, then
> -	 * the next request may try to execute assuming that its context
> -	 * is valid and loaded on the GPU and so may try to access invalid
> -	 * memory, prompting repeated GPU hangs.
> +	 * The guilty request will get skipped on a hung engine.
>  	 *
> -	 * If the request was guilty, we still restore the logical state
> -	 * in case the next request requires it (e.g. the aliasing ppgtt),
> -	 * but skip over the hung batch.
> +	 * Users of client default contexts do not rely on logical
> +	 * state preserved between batches so it is safe to execute
> +	 * queued requests following the hang. Non default contexts
> +	 * rely on preserved state, so skipping a batch loses the
> +	 * evolution of the state and it needs to be considered corrupted.
> +	 * Executing more queued batches on top of corrupted state is
> +	 * risky. But we take the risk by trying to advance through
> +	 * the queued requests in order to make the client behaviour
> +	 * more predictable around resets, by not throwing away random
> +	 * amount of batches it has prepared for execution. Sophisticated
> +	 * clients can use gem_reset_stats_ioctl and dma fence status
> +	 * (exported via sync_file info ioctl on explicit fences) to observe
> +	 * when it loses the context state and should rebuild accordingly.
>  	 *
> -	 * If the request was innocent, we try to replay the request with
> -	 * the restored context.
> +	 * The context ban, and ultimately the client ban, mechanism are safety
> +	 * valves if client submission ends up resulting in nothing more than
> +	 * subsequent hangs.
>  	 */
> +
>  	if (rq) {
> -		/* If the rq hung, jump to its breadcrumb and skip the batch */
> -		rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
> -		if (rq->fence.error == -EIO)
> -			skip_request(rq);
> +		/*
> +		 * Try to restore the logical GPU state to match the
> +		 * continuation of the request queue. If we skip the
> +		 * context/PD restore, then the next request may try to execute
> +		 * assuming that its context is valid and loaded on the GPU and
> +		 * so may try to access invalid memory, prompting repeated GPU
> +		 * hangs.
> +		 *
> +		 * If the request was guilty, we still restore the logical
> +		 * state in case the next request requires it (e.g. the
> +		 * aliasing ppgtt), but skip over the hung batch.
> +		 *
> +		 * If the request was innocent, we try to replay the request
> +		 * with the restored context.
> +		 */
> +		i915_reset_request(rq, stalled);
> +
> +		GEM_BUG_ON(rq->ring != engine->buffer);
> +		head = rq->head;
> +	} else {
> +		head = engine->buffer->tail;
>  	}
> +	engine->buffer->head = intel_ring_wrap(engine->buffer, head);
> +
> +	spin_unlock_irqrestore(&tl->lock, flags);
>  }
>  
>  static void reset_finish(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c3ef0f9bf321..32ed44196c1a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -120,13 +120,8 @@ struct intel_instdone {
>  struct intel_engine_hangcheck {
>  	u64 acthd;
>  	u32 seqno;
> -	enum intel_engine_hangcheck_action action;
>  	unsigned long action_timestamp;
> -	int deadlock;
>  	struct intel_instdone instdone;
> -	struct i915_request *active_request;
> -	bool stalled:1;
> -	bool wedged:1;
>  };
>  
>  struct intel_ring {
> @@ -444,9 +439,8 @@ struct intel_engine_cs {
>  	int		(*init_hw)(struct intel_engine_cs *engine);
>  
>  	struct {
> -		struct i915_request *(*prepare)(struct intel_engine_cs *engine);
> -		void (*reset)(struct intel_engine_cs *engine,
> -			      struct i915_request *rq);
> +		void (*prepare)(struct intel_engine_cs *engine);
> +		void (*reset)(struct intel_engine_cs *engine, bool stalled);
>  		void (*finish)(struct intel_engine_cs *engine);
>  	} reset;
>  
> @@ -1018,6 +1012,13 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
>  	return cs;
>  }
>  
> +static inline void intel_engine_reset(struct intel_engine_cs *engine,
> +				      bool stalled)
> +{
> +	if (engine->reset.reset)
> +		engine->reset.reset(engine, stalled);
> +}
> +
>  void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
>  
>  bool intel_engine_is_idle(struct intel_engine_cs *engine);
> diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> index 12550b55c42f..67431355cd6e 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
> @@ -363,9 +363,7 @@ static int igt_global_reset(void *arg)
>  	/* Check that we can issue a global GPU reset */
>  
>  	igt_global_reset_lock(i915);
> -	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
>  
> -	mutex_lock(&i915->drm.struct_mutex);
>  	reset_count = i915_reset_count(&i915->gpu_error);
>  
>  	i915_reset(i915, ALL_ENGINES, NULL);
> @@ -374,9 +372,7 @@ static int igt_global_reset(void *arg)
>  		pr_err("No GPU reset recorded!\n");
>  		err = -EINVAL;
>  	}
> -	mutex_unlock(&i915->drm.struct_mutex);
>  
> -	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
>  	igt_global_reset_unlock(i915);
>  
>  	if (i915_terminally_wedged(&i915->gpu_error))
> @@ -399,9 +395,7 @@ static int igt_wedged_reset(void *arg)
>  	i915_gem_set_wedged(i915);
>  	GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
>  
> -	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
>  	i915_reset(i915, ALL_ENGINES, NULL);
> -	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
>  
>  	intel_runtime_pm_put(i915, wakeref);
>  	mutex_unlock(&i915->drm.struct_mutex);
> @@ -511,7 +505,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
>  				break;
>  			}
>  
> -			if (!wait_for_idle(engine)) {
> +			if (!i915_reset_flush(i915)) {
>  				struct drm_printer p =
>  					drm_info_printer(i915->drm.dev);
>  
> @@ -903,20 +897,13 @@ static int igt_reset_engines(void *arg)
>  	return 0;
>  }
>  
> -static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
> +static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
>  {
> -	struct i915_gpu_error *error = &rq->i915->gpu_error;
> -	u32 reset_count = i915_reset_count(error);
> -
> -	error->stalled_mask = mask;
> -
> -	/* set_bit() must be after we have setup the backchannel (mask) */
> -	smp_mb__before_atomic();
> -	set_bit(I915_RESET_HANDOFF, &error->flags);
> +	u32 count = i915_reset_count(&i915->gpu_error);
>  
> -	wake_up_all(&error->wait_queue);
> +	i915_reset(i915, mask, NULL);
>  
> -	return reset_count;
> +	return count;
>  }
>  
>  static int igt_reset_wait(void *arg)
> @@ -962,7 +949,7 @@ static int igt_reset_wait(void *arg)
>  		goto out_rq;
>  	}
>  
> -	reset_count = fake_hangcheck(rq, ALL_ENGINES);
> +	reset_count = fake_hangcheck(i915, ALL_ENGINES);
>  
>  	timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
>  	if (timeout < 0) {
> @@ -972,7 +959,6 @@ static int igt_reset_wait(void *arg)
>  		goto out_rq;
>  	}
>  
> -	GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
>  	if (i915_reset_count(&i915->gpu_error) == reset_count) {
>  		pr_err("No GPU reset recorded!\n");
>  		err = -EINVAL;
> @@ -1162,7 +1148,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
>  	}
>  
>  out_reset:
> -	fake_hangcheck(rq, intel_engine_flag(rq->engine));
> +	fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
>  
>  	if (tsk) {
>  		struct igt_wedge_me w;
> @@ -1341,12 +1327,7 @@ static int igt_reset_queue(void *arg)
>  				goto fini;
>  			}
>  
> -			reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
> -
> -			i915_reset(i915, ENGINE_MASK(id), NULL);
> -
> -			GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
> -					    &i915->gpu_error.flags));
> +			reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
>  
>  			if (prev->fence.error != -EIO) {
>  				pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
> @@ -1565,6 +1546,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
>  		pr_err("%s(%s): Failed to start request %llx, at %x\n",
>  		       __func__, engine->name,
>  		       rq->fence.seqno, hws_seqno(&h, rq));
> +		i915_gem_set_wedged(i915);
>  		err = -EIO;
>  	}
>  
> @@ -1588,7 +1570,6 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
>  static void force_reset(struct drm_i915_private *i915)
>  {
>  	i915_gem_set_wedged(i915);
> -	set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
>  	i915_reset(i915, 0, NULL);
>  }
>  
> @@ -1618,6 +1599,26 @@ static int igt_atomic_reset(void *arg)
>  	if (i915_terminally_wedged(&i915->gpu_error))
>  		goto unlock;
>  
> +	if (intel_has_gpu_reset(i915)) {
> +		const typeof(*phases) *p;
> +
> +		for (p = phases; p->name; p++) {
> +			GEM_TRACE("intel_gpu_reset under %s\n", p->name);
> +
> +			p->critical_section_begin();
> +			err = intel_gpu_reset(i915, ALL_ENGINES);
> +			p->critical_section_end();
> +
> +			if (err) {
> +				pr_err("intel_gpu_reset failed under %s\n",
> +				       p->name);
> +				goto out;
> +			}
> +		}
> +
> +		force_reset(i915);
> +	}
> +
>  	if (intel_has_reset_engine(i915)) {
>  		struct intel_engine_cs *engine;
>  		enum intel_engine_id id;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> index a8cac56be835..b15c4f26c593 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c
> @@ -214,7 +214,6 @@ static int check_whitelist(struct i915_gem_context *ctx,
>  
>  static int do_device_reset(struct intel_engine_cs *engine)
>  {
> -	set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);
>  	i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");
>  	return 0;
>  }
> @@ -394,7 +393,6 @@ static int
>  live_gpu_reset_gt_engine_workarounds(void *arg)
>  {
>  	struct drm_i915_private *i915 = arg;
> -	struct i915_gpu_error *error = &i915->gpu_error;
>  	intel_wakeref_t wakeref;
>  	struct wa_lists lists;
>  	bool ok;
> @@ -413,7 +411,6 @@ live_gpu_reset_gt_engine_workarounds(void *arg)
>  	if (!ok)
>  		goto out;
>  
> -	set_bit(I915_RESET_HANDOFF, &error->flags);
>  	i915_reset(i915, ALL_ENGINES, "live_workarounds");
>  
>  	ok = verify_gt_engine_wa(i915, &lists, "after reset");
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> index 5477ad4a7e7d..8ab5a2688a0c 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
> @@ -58,8 +58,8 @@ static void mock_device_release(struct drm_device *dev)
>  	i915_gem_contexts_lost(i915);
>  	mutex_unlock(&i915->drm.struct_mutex);
>  
> -	cancel_delayed_work_sync(&i915->gt.retire_work);
> -	cancel_delayed_work_sync(&i915->gt.idle_work);
> +	drain_delayed_work(&i915->gt.retire_work);
> +	drain_delayed_work(&i915->gt.idle_work);
>  	i915_gem_drain_workqueue(i915);
>  
>  	mutex_lock(&i915->drm.struct_mutex);
> -- 
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx