[Intel-gfx] [PATCH] drm/i915: Prevent lock-cycles between GPU waits and GPU resets

Chris Wilson chris at chris-wilson.co.uk
Tue Jun 11 14:27:47 UTC 2019


We cannot allow ourselves to wait on the GPU while holding any lock we
may need to reset the GPU. While there is not an explicit lock between
the two operations, lockdep cannot detect the dependency. So let's tell
lockdep about the wait/reset dependency with an explicit lockmap.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gt/intel_reset.c | 5 ++++-
 drivers/gpu/drm/i915/i915_drv.h       | 8 ++++++++
 drivers/gpu/drm/i915/i915_gem.c       | 3 +++
 drivers/gpu/drm/i915/i915_request.c   | 2 ++
 4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 60d24110af80..6368b37f26d1 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -978,10 +978,11 @@ void i915_reset(struct drm_i915_private *i915,
 
 	might_sleep();
 	GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+	lock_map_acquire(&i915->gt.reset_lockmap);
 
 	/* Clear any previous failed attempts at recovery. Time to try again. */
 	if (!__i915_gem_unset_wedged(i915))
-		return;
+		goto unlock;
 
 	if (reason)
 		dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
@@ -1029,6 +1030,8 @@ void i915_reset(struct drm_i915_private *i915,
 
 finish:
 	reset_finish(i915);
+unlock:
+	lock_map_release(&i915->gt.reset_lockmap);
 	return;
 
 taint:
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0ea7f78ae227..9cfa9500fcc4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1919,6 +1919,14 @@ struct drm_i915_private {
 		ktime_t last_init_time;
 
 		struct i915_vma *scratch;
+
+		/*
+		 * We must never wait on the GPU while holding a lock we may
+		 * need to perform a GPU reset. So while we don't need to
+		 * serialise wait/reset with an explicit lock, we do want
+		 * lockdep to detect potential dependency cycles.
+		 */
+		struct lockdep_map reset_lockmap;
 	} gt;
 
 	struct {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e980c1ee3dcf..24f0f3db1bfb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1782,6 +1782,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
 
 int i915_gem_init_early(struct drm_i915_private *dev_priv)
 {
+	static struct lock_class_key reset_key;
 	int err;
 
 	intel_gt_pm_init(dev_priv);
@@ -1789,6 +1790,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
 	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
 	spin_lock_init(&dev_priv->gt.closed_lock);
+	lockdep_init_map(&dev_priv->gt.reset_lockmap,
+			 "i915.reset", &reset_key, 0);
 
 	i915_gem_init__mm(dev_priv);
 	i915_gem_init__pm(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index e9b59eea4f10..1cbc3ef4fc27 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1444,6 +1444,7 @@ long i915_request_wait(struct i915_request *rq,
 		return -ETIME;
 
 	trace_i915_request_wait_begin(rq, flags);
+	lock_map_acquire(&rq->i915->gt.reset_lockmap);
 
 	/*
 	 * Optimistic spin before touching IRQs.
@@ -1517,6 +1518,7 @@ long i915_request_wait(struct i915_request *rq,
 	dma_fence_remove_callback(&rq->fence, &wait.cb);
 
 out:
+	lock_map_release(&rq->i915->gt.reset_lockmap);
 	trace_i915_request_wait_end(rq);
 	return timeout;
 }
-- 
2.20.1



More information about the Intel-gfx mailing list