[PATCH 4/6] reset-mutexless
Chris Wilson
chris at chris-wilson.co.uk
Thu Jun 21 16:50:08 UTC 2018
---
drivers/gpu/drm/i915/i915_debugfs.c | 7 ----
drivers/gpu/drm/i915/i915_drv.c | 11 +----
drivers/gpu/drm/i915/i915_drv.h | 5 ---
drivers/gpu/drm/i915/i915_gem.c | 29 ++++---------
drivers/gpu/drm/i915/i915_gpu_error.h | 18 +-------
drivers/gpu/drm/i915/i915_irq.c | 24 +----------
drivers/gpu/drm/i915/i915_request.c | 42 -------------------
drivers/gpu/drm/i915/intel_overlay.c | 2 -
.../gpu/drm/i915/selftests/intel_hangcheck.c | 19 +--------
9 files changed, 13 insertions(+), 144 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1511d6db626d..57c43e2f3c04 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1317,8 +1317,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
seq_puts(m, "Wedged\n");
if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
seq_puts(m, "Reset in progress: struct_mutex backoff\n");
- if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
- seq_puts(m, "Reset in progress: reset handoff to waiter\n");
if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
seq_puts(m, "Waiter holding struct mutex\n");
if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
@@ -4061,11 +4059,6 @@ i915_wedged_set(void *data, u64 val)
i915_handle_error(i915, val, I915_ERROR_CAPTURE,
"Manually set wedged engine mask = %llx", val);
-
- wait_on_bit(&i915->gpu_error.flags,
- I915_RESET_HANDOFF,
- TASK_UNINTERRUPTIBLE);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5c030c96b6ce..2e3f3b208181 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1899,15 +1899,11 @@ void i915_reset(struct drm_i915_private *i915,
GEM_TRACE("flags=%lx\n", error->flags);
might_sleep();
- lockdep_assert_held(&i915->drm.struct_mutex);
GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
- if (!test_bit(I915_RESET_HANDOFF, &error->flags))
- return;
-
/* Clear any previous failed attempts at recovery. Time to try again. */
if (!i915_gem_unset_wedged(i915))
- goto wakeup;
+ return;
if (reason)
dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
@@ -1976,10 +1972,6 @@ void i915_reset(struct drm_i915_private *i915,
finish:
i915_gem_reset_finish(i915);
enable_irq(i915->drm.irq);
-
-wakeup:
- clear_bit(I915_RESET_HANDOFF, &error->flags);
- wake_up_bit(&error->flags, I915_RESET_HANDOFF);
return;
taint:
@@ -1998,7 +1990,6 @@ void i915_reset(struct drm_i915_private *i915,
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
error:
i915_gem_set_wedged(i915);
- i915_retire_requests(i915);
goto finish;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c2a30936f58b..7dcdc41ac3b6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3129,11 +3129,6 @@ static inline bool i915_reset_backoff(struct i915_gpu_error *error)
return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
}
-static inline bool i915_reset_handoff(struct i915_gpu_error *error)
-{
- return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
-}
-
static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
{
return unlikely(test_bit(I915_WEDGED, &error->flags));
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 07615ec67d59..d5d68238daef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3202,19 +3202,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine;
enum intel_engine_id id;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
- i915_retire_requests(dev_priv);
-
for_each_engine(engine, dev_priv, id) {
- struct intel_context *ce;
-
i915_gem_reset_engine(engine,
engine->hangcheck.active_request,
stalled_mask & ENGINE_MASK(id));
- ce = fetch_and_zero(&engine->last_retired_context);
- if (ce)
- intel_context_unpin(ce);
+
+ //__retire_engine_upto(engine->hangcheck.active_request);
/*
* Ostensibily, we always want a context loaded for powersaving,
@@ -3226,7 +3219,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv,
* an incoherent read by the CS (presumably stale TLB). An
* empty request appears sufficient to paper over the glitch.
*/
- if (intel_engine_is_idle(engine)) {
+ if (intel_engine_is_idle(engine) && 0) {
struct i915_request *rq;
rq = i915_request_alloc(engine,
@@ -3251,8 +3244,6 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
struct intel_engine_cs *engine;
enum intel_engine_id id;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
for_each_engine(engine, dev_priv, id) {
engine->hangcheck.active_request = NULL;
i915_gem_reset_finish_engine(engine);
@@ -3367,7 +3358,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
{
struct i915_timeline *tl;
- lockdep_assert_held(&i915->drm.struct_mutex);
if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
return true;
@@ -3385,9 +3375,9 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
*/
list_for_each_entry(tl, &i915->gt.timelines, link) {
struct i915_request *rq;
+ long timeout;
- rq = i915_gem_active_peek(&tl->last_request,
- &i915->drm.struct_mutex);
+ rq = i915_gem_active_get_unlocked(&tl->last_request);
if (!rq)
continue;
@@ -3402,12 +3392,12 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
* and when the seqno passes the fence, the signaler
* then signals the fence waking us up).
*/
- if (dma_fence_default_wait(&rq->fence, true,
- MAX_SCHEDULE_TIMEOUT) < 0)
+ timeout = dma_fence_default_wait(&rq->fence, true,
+ MAX_SCHEDULE_TIMEOUT);
+ i915_request_put(rq);
+ if (timeout < 0)
return false;
}
- i915_retire_requests(i915);
- GEM_BUG_ON(i915->gt.active_requests);
/*
* Undo nop_submit_request. We prevent all new i915 requests from
@@ -3419,7 +3409,6 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
* context and do not require stop_machine().
*/
intel_engines_reset_default_submission(i915);
- i915_gem_contexts_lost(i915);
GEM_TRACE("end\n");
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 58910f1dc67c..3c7502f39859 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -243,15 +243,6 @@ struct i915_gpu_error {
* i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
* secondary role in preventing two concurrent global reset attempts.
*
- * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
- * struct_mutex. We try to acquire the struct_mutex in the reset worker,
- * but it may be held by some long running waiter (that we cannot
- * interrupt without causing trouble). Once we are ready to do the GPU
- * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
- * they already hold the struct_mutex and want to participate they can
- * inspect the bit and do the reset directly, otherwise the worker
- * waits for the struct_mutex.
- *
* #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
* acquire the struct_mutex to reset an engine, we need an explicit
* flag to prevent two concurrent reset attempts in the same engine.
@@ -265,20 +256,13 @@ struct i915_gpu_error {
*/
unsigned long flags;
#define I915_RESET_BACKOFF 0
-#define I915_RESET_HANDOFF 1
-#define I915_RESET_MODESET 2
+#define I915_RESET_MODESET 1
#define I915_WEDGED (BITS_PER_LONG - 1)
#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
/** Number of times an engine has been reset */
u32 reset_engine_count[I915_NUM_ENGINES];
- /** Set of stalled engines with guilty requests, in the current reset */
- u32 stalled_mask;
-
- /** Reason for the current *global* reset */
- const char *reason;
-
/**
* Waitqueue to signal when a hang is detected. Used to for waiters
* to release the struct_mutex for the reset to procede.
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 46aaef5c1851..03564959dd3d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -3118,29 +3118,7 @@ static void i915_reset_device(struct drm_i915_private *dev_priv,
i915_wedge_on_timeout(&w, dev_priv, 5*HZ) {
intel_prepare_reset(dev_priv);
- error->reason = reason;
- error->stalled_mask = engine_mask;
-
- /* Signal that locked waiters should reset the GPU */
- smp_mb__before_atomic();
- set_bit(I915_RESET_HANDOFF, &error->flags);
- wake_up_all(&error->wait_queue);
-
- /* Wait for anyone holding the lock to wakeup, without
- * blocking indefinitely on struct_mutex.
- */
- do {
- if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
- i915_reset(dev_priv, engine_mask, reason);
- mutex_unlock(&dev_priv->drm.struct_mutex);
- }
- } while (wait_on_bit_timeout(&error->flags,
- I915_RESET_HANDOFF,
- TASK_UNINTERRUPTIBLE,
- 1));
-
- error->stalled_mask = 0;
- error->reason = NULL;
+ i915_reset(dev_priv, engine_mask, reason);
intel_finish_reset(dev_priv);
}
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e1dbb544046f..69712088c662 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1223,18 +1223,6 @@ static bool __i915_spin_request(const struct i915_request *rq,
return false;
}
-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
-{
- struct i915_gpu_error *error = &request->i915->gpu_error;
-
- if (likely(!i915_reset_handoff(error)))
- return false;
-
- __set_current_state(TASK_RUNNING);
- i915_reset(request->i915, error->stalled_mask, error->reason);
- return true;
-}
-
/**
* i915_request_wait - wait until execution of request has finished
* @rq: the request to wait upon
@@ -1260,8 +1248,6 @@ long i915_request_wait(struct i915_request *rq,
{
const int state = flags & I915_WAIT_INTERRUPTIBLE ?
TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
- wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
- DEFINE_WAIT_FUNC(reset, default_wake_function);
DEFINE_WAIT_FUNC(exec, default_wake_function);
struct intel_wait wait;
@@ -1280,11 +1266,7 @@ long i915_request_wait(struct i915_request *rq,
return -ETIME;
trace_i915_request_wait_begin(rq, flags);
-
add_wait_queue(&rq->execute, &exec);
- if (flags & I915_WAIT_LOCKED)
- add_wait_queue(errq, &reset);
-
intel_wait_init(&wait, rq);
restart:
@@ -1293,10 +1275,6 @@ long i915_request_wait(struct i915_request *rq,
if (intel_wait_update_request(&wait, rq))
break;
- if (flags & I915_WAIT_LOCKED &&
- __i915_wait_request_check_and_reset(rq))
- continue;
-
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
goto complete;
@@ -1326,9 +1304,6 @@ long i915_request_wait(struct i915_request *rq,
*/
goto wakeup;
- if (flags & I915_WAIT_LOCKED)
- __i915_wait_request_check_and_reset(rq);
-
for (;;) {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
@@ -1358,21 +1333,6 @@ long i915_request_wait(struct i915_request *rq,
if (__i915_request_irq_complete(rq))
break;
- /*
- * If the GPU is hung, and we hold the lock, reset the GPU
- * and then check for completion. On a full reset, the engine's
- * HW seqno will be advanced passed us and we are complete.
- * If we do a partial reset, we have to wait for the GPU to
- * resume and update the breadcrumb.
- *
- * If we don't hold the mutex, we can just wait for the worker
- * to come along and update the breadcrumb (either directly
- * itself, or indirectly by recovering the GPU).
- */
- if (flags & I915_WAIT_LOCKED &&
- __i915_wait_request_check_and_reset(rq))
- continue;
-
/* Only spin if we know the GPU is processing this request */
if (__i915_spin_request(rq, wait.seqno, state, 2))
break;
@@ -1386,8 +1346,6 @@ long i915_request_wait(struct i915_request *rq,
intel_engine_remove_wait(rq->engine, &wait);
complete:
__set_current_state(TASK_RUNNING);
- if (flags & I915_WAIT_LOCKED)
- remove_wait_queue(errq, &reset);
remove_wait_queue(&rq->execute, &exec);
trace_i915_request_wait_end(rq);
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index c2f10d899329..371eb3dbedc0 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -501,8 +501,6 @@ void intel_overlay_reset(struct drm_i915_private *dev_priv)
if (!overlay)
return;
- intel_overlay_release_old_vid(overlay);
-
overlay->old_xscale = 0;
overlay->old_yscale = 0;
overlay->crtc = NULL;
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index fe7d3190ebfe..f4a89d19f5de 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -383,7 +383,6 @@ static int igt_global_reset(void *arg)
/* Check that we can issue a global GPU reset */
global_reset_lock(i915);
- set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
mutex_lock(&i915->drm.struct_mutex);
reset_count = i915_reset_count(&i915->gpu_error);
@@ -396,7 +395,6 @@ static int igt_global_reset(void *arg)
}
mutex_unlock(&i915->drm.struct_mutex);
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
global_reset_unlock(i915);
if (i915_terminally_wedged(&i915->gpu_error))
@@ -900,18 +898,7 @@ static int igt_reset_engines(void *arg)
static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
{
- struct i915_gpu_error *error = &rq->i915->gpu_error;
- u32 reset_count = i915_reset_count(error);
-
- error->stalled_mask = mask;
-
- /* set_bit() must be after we have setup the backchannel (mask) */
- smp_mb__before_atomic();
- set_bit(I915_RESET_HANDOFF, &error->flags);
-
- wake_up_all(&error->wait_queue);
-
- return reset_count;
+ return i915_reset_count(&rq->i915->gpu_error);
}
static int igt_wait_reset(void *arg)
@@ -967,7 +954,6 @@ static int igt_wait_reset(void *arg)
goto out_rq;
}
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
@@ -1097,9 +1083,6 @@ static int igt_reset_queue(void *arg)
i915_reset(i915, ENGINE_MASK(id), NULL);
- GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
- &i915->gpu_error.flags));
-
if (prev->fence.error != -EIO) {
pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
prev->fence.error);
--
2.18.0.rc2
More information about the Intel-gfx-trybot
mailing list