[PATCH 20/20] TEST: drm/i915: mutex-less reset engine
Michel Thierry
michel.thierry at intel.com
Thu Apr 6 16:43:07 UTC 2017
It can't be that simple...
Sometimes we already got the mutex and there's no reset-engine support.
Signed-off-by: Michel Thierry <michel.thierry at intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 71 +++++++++++++++++++++++++++++----
drivers/gpu/drm/i915/i915_drv.h | 2 +
drivers/gpu/drm/i915/i915_gem.c | 6 ++-
drivers/gpu/drm/i915/i915_gem_request.c | 4 +-
drivers/gpu/drm/i915/i915_irq.c | 20 +---------
5 files changed, 75 insertions(+), 28 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c24aa5cdf72e..84a45eaef64a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1871,8 +1871,6 @@ void i915_reset_chip(struct drm_i915_private *dev_priv)
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
*
- * Caller must hold the struct_mutex.
- *
* Procedure is:
* - identifies the request that caused the hang and it is dropped
* - force engine to idle: this is done by issuing a reset request
@@ -1885,7 +1883,6 @@ int i915_reset_engine(struct intel_engine_cs *engine)
struct drm_i915_private *dev_priv = engine->i915;
struct i915_gpu_error *error = &dev_priv->gpu_error;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
if (!test_and_clear_bit(I915_RESET_HANDOFF, &error->flags))
@@ -1893,6 +1890,9 @@ int i915_reset_engine(struct intel_engine_cs *engine)
DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
+ /* let the other functions know that we are not full gpu reset */
+ set_bit(I915_RESET_ENGINE, &error->flags);
+
/*
* We need to first idle the engine by issuing a reset request,
* then perform soft reset and re-initialize hw state, for all of
@@ -1947,8 +1947,6 @@ int i915_reset_engine(struct intel_engine_cs *engine)
}
}
- /* i915_gem_reset_prepare revoked the fences */
- i915_gem_restore_fences(dev_priv);
i915_gem_reset_finish(dev_priv, intel_engine_flag(engine));
/* replay remaining requests in the queue */
@@ -1963,6 +1961,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
error->reset_engine_count[engine->id]++;
wakeup:
+ clear_bit(I915_RESET_ENGINE, &error->flags);
enable_irq(dev_priv->drm.irq);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
wake_up_bit(&error->flags, I915_RESET_HANDOFF);
@@ -1975,6 +1974,51 @@ int i915_reset_engine(struct intel_engine_cs *engine)
}
/**
+ * i915_reset__unlocked - start either engine or full GPU reset to recover from a hang
+ * @dev_priv: device private
+ * @engine_mask: mask representing engines that are hung
+ *
+ * Wrapper function to initiate a GPU reset. When platform supports it, attempt
+ * to reset the hung engine only. If engine reset fails (or is not supported),
+ * reset the full GPU. If more than one engine is hung, the speed gains of
+ * reset_engine are negligible, thus promote to full reset.
+ *
+ */
+void i915_reset__unlocked(struct drm_i915_private *dev_priv, u32 engine_mask)
+{
+ /* try engine reset first; look mom, no mutex! */
+ if (intel_has_reset_engine(dev_priv) &&
+ !(engine_mask & (engine_mask - 1))) {
+ struct intel_engine_cs *engine =
+ dev_priv->engine[intel_engineid_from_flag(engine_mask)];
+
+ if (i915_reset_engine(engine))
+ goto reset_chip;
+
+ return;
+ }
+
+reset_chip:
+ do {
+ /*
+ * All state reset _must_ be completed before we update the
+ * reset counter, for otherwise waiters might miss the reset
+ * pending state and not properly drop locks, resulting in
+ * deadlocks with the reset work.
+ */
+ if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+ i915_reset_chip(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+ }
+
+ /* We need to wait for anyone holding the lock to wakeup */
+ } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+ I915_RESET_HANDOFF,
+ TASK_UNINTERRUPTIBLE,
+ HZ));
+}
+
+/**
* i915_reset - start either engine or full GPU reset to recover from a hang
* @dev_priv: device private
* @engine_mask: mask representing engines that are hung
@@ -1984,7 +2028,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
* reset the full GPU. If more than one engine is hung, the speed gains of
* reset_engine are negligible, thus promote to full reset.
*
- * Caller must hold the struct_mutex.
+ * Caller must already hold the struct_mutex.
*/
void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask)
{
@@ -2001,7 +2045,20 @@ void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask)
}
reset_chip:
- i915_reset_chip(dev_priv);
+ do {
+ /*
+ * All state reset _must_ be completed before we update the
+ * reset counter, for otherwise waiters might miss the reset
+ * pending state and not properly drop locks, resulting in
+ * deadlocks with the reset work.
+ */
+ i915_reset_chip(dev_priv);
+
+ /* We need to wait for anyone holding the lock to wakeup */
+ } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+ I915_RESET_HANDOFF,
+ TASK_UNINTERRUPTIBLE,
+ HZ));
}
static int i915_pm_suspend(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cf8d038244f6..3b38540497c7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1624,6 +1624,7 @@ struct i915_gpu_error {
#define I915_RESET_BACKOFF 0
#define I915_RESET_HANDOFF 1
#define I915_RESET_WATCHDOG 2
+#define I915_RESET_ENGINE 3
#define I915_WEDGED (BITS_PER_LONG - 1)
/* if available, engine-specific reset is tried before full gpu reset */
@@ -3033,6 +3034,7 @@ extern void i915_driver_unload(struct drm_device *dev);
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
+extern void i915_reset__unlocked(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
extern int intel_reset_engine_start(struct intel_engine_cs *engine);
extern void intel_reset_engine_cancel(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e934fd6ebe7c..e1d22aa32107 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2784,7 +2784,8 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv,
}
}
- i915_gem_revoke_fences(dev_priv);
+ if (!test_bit(I915_RESET_ENGINE, &dev_priv->gpu_error.flags))
+ i915_gem_revoke_fences(dev_priv);
return err;
}
@@ -2921,7 +2922,8 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine;
unsigned int tmp;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ if (!test_bit(I915_RESET_ENGINE, &dev_priv->gpu_error.flags))
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
tasklet_enable(&engine->irq_tasklet);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8303c4bef836..ef054832d375 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -276,7 +276,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
struct intel_engine_cs *engine = request->engine;
struct i915_gem_active *active, *next;
- lockdep_assert_held(&request->i915->drm.struct_mutex);
+ if (!test_bit(I915_RESET_ENGINE, &request->i915->gpu_error.flags))
+ lockdep_assert_held(&request->i915->drm.struct_mutex);
+
GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
GEM_BUG_ON(!i915_gem_request_completed(request));
GEM_BUG_ON(!request->i915->gt.active_requests);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6543e77e0621..a3acff267685 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2668,24 +2668,8 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags);
wake_up_all(&dev_priv->gpu_error.wait_queue);
- do {
- /*
- * All state reset _must_ be completed before we update the
- * reset counter, for otherwise waiters might miss the reset
- * pending state and not properly drop locks, resulting in
- * deadlocks with the reset work.
- */
- if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
- i915_reset(dev_priv,
- dev_priv->gpu_error.reset_engine_mask);
- mutex_unlock(&dev_priv->drm.struct_mutex);
- }
-
- /* We need to wait for anyone holding the lock to wakeup */
- } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
- I915_RESET_HANDOFF,
- TASK_UNINTERRUPTIBLE,
- HZ));
+ i915_reset__unlocked(dev_priv,
+ dev_priv->gpu_error.reset_engine_mask);
intel_finish_reset(dev_priv);
--
2.11.0
More information about the Intel-gfx-trybot
mailing list