[PATCH 20/20] TEST: drm/i915: mutex-less reset engine

Michel Thierry michel.thierry at intel.com
Thu Apr 6 14:04:56 UTC 2017


It can't be that simple.

Signed-off-by: Michel Thierry <michel.thierry at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         | 27 ++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_gem.c         |  6 ++++--
 drivers/gpu/drm/i915/i915_gem_request.c |  4 +++-
 drivers/gpu/drm/i915/i915_irq.c         | 20 ++------------------
 5 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c24aa5cdf72e..ff5e275502dd 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1885,7 +1885,6 @@ int i915_reset_engine(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct i915_gpu_error *error = &dev_priv->gpu_error;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
 	if (!test_and_clear_bit(I915_RESET_HANDOFF, &error->flags))
@@ -1893,6 +1892,9 @@ int i915_reset_engine(struct intel_engine_cs *engine)
 
 	DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
 
+	/* let the other functions know that we are not full gpu reset */
+	set_bit(I915_RESET_ENGINE, &error->flags);
+
 	/*
 	 * We need to first idle the engine by issuing a reset request,
 	 * then perform soft reset and re-initialize hw state, for all of
@@ -1947,8 +1949,6 @@ int i915_reset_engine(struct intel_engine_cs *engine)
 		}
 	}
 
-	/* i915_gem_reset_prepare revoked the fences */
-	i915_gem_restore_fences(dev_priv);
 	i915_gem_reset_finish(dev_priv, intel_engine_flag(engine));
 
 	/* replay remaining requests in the queue */
@@ -1963,6 +1963,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
 	error->reset_engine_count[engine->id]++;
 
 wakeup:
+	clear_bit(I915_RESET_ENGINE, &error->flags);
 	enable_irq(dev_priv->drm.irq);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
@@ -1988,7 +1989,7 @@ int i915_reset_engine(struct intel_engine_cs *engine)
  */
 void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask)
 {
-	/* try engine reset first */
+	/* try engine reset first; look mom, no mutex! */
 	if (intel_has_reset_engine(dev_priv) &&
 	    !(engine_mask & (engine_mask - 1))) {
 		struct intel_engine_cs *engine =
@@ -2001,7 +2002,23 @@ void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask)
 	}
 
 reset_chip:
-	i915_reset_chip(dev_priv);
+        do {
+                /*
+                 * All state reset _must_ be completed before we update the
+                 * reset counter, for otherwise waiters might miss the reset
+                 * pending state and not properly drop locks, resulting in
+                 * deadlocks with the reset work.
+                 */
+                if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+                        i915_reset_chip(dev_priv);
+                        mutex_unlock(&dev_priv->drm.struct_mutex);
+                }
+
+                /* We need to wait for anyone holding the lock to wakeup */
+        } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+                                     I915_RESET_HANDOFF,
+                                     TASK_UNINTERRUPTIBLE,
+                                     HZ));
 }
 
 static int i915_pm_suspend(struct device *kdev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cf8d038244f6..0b53ae4d916a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1624,6 +1624,7 @@ struct i915_gpu_error {
 #define I915_RESET_BACKOFF	0
 #define I915_RESET_HANDOFF	1
 #define I915_RESET_WATCHDOG	2
+#define I915_RESET_ENGINE	3
 #define I915_WEDGED		(BITS_PER_LONG - 1)
 
 	/* if available, engine-specific reset is tried before full gpu reset */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e934fd6ebe7c..e1d22aa32107 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2784,7 +2784,8 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv,
 		}
 	}
 
-	i915_gem_revoke_fences(dev_priv);
+	if (!test_bit(I915_RESET_ENGINE, &dev_priv->gpu_error.flags))
+		i915_gem_revoke_fences(dev_priv);
 
 	return err;
 }
@@ -2921,7 +2922,8 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv,
 	struct intel_engine_cs *engine;
 	unsigned int tmp;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+	if (!test_bit(I915_RESET_ENGINE, &dev_priv->gpu_error.flags))
+		lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
 		tasklet_enable(&engine->irq_tasklet);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8303c4bef836..ef054832d375 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -276,7 +276,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	struct i915_gem_active *active, *next;
 
-	lockdep_assert_held(&request->i915->drm.struct_mutex);
+	if (!test_bit(I915_RESET_ENGINE, &request->i915->gpu_error.flags))
+		lockdep_assert_held(&request->i915->drm.struct_mutex);
+
 	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
 	GEM_BUG_ON(!i915_gem_request_completed(request));
 	GEM_BUG_ON(!request->i915->gt.active_requests);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6543e77e0621..cee7cddff25f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2668,24 +2668,8 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
 	set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags);
 	wake_up_all(&dev_priv->gpu_error.wait_queue);
 
-	do {
-		/*
-		 * All state reset _must_ be completed before we update the
-		 * reset counter, for otherwise waiters might miss the reset
-		 * pending state and not properly drop locks, resulting in
-		 * deadlocks with the reset work.
-		 */
-		if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
-			i915_reset(dev_priv,
-				   dev_priv->gpu_error.reset_engine_mask);
-			mutex_unlock(&dev_priv->drm.struct_mutex);
-		}
-
-		/* We need to wait for anyone holding the lock to wakeup */
-	} while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
-				     I915_RESET_HANDOFF,
-				     TASK_UNINTERRUPTIBLE,
-				     HZ));
+	i915_reset(dev_priv,
+		   dev_priv->gpu_error.reset_engine_mask);
 
 	intel_finish_reset(dev_priv);
 
-- 
2.11.0



More information about the Intel-gfx-trybot mailing list