[PATCH 1/2] remove pin_mutex try4, extra bruteforce

Mon Jun 14 09:32:19 UTC 2021

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
 drivers/gpu/drm/i915/gt/intel_context.c       | 45 ++++---------
 drivers/gpu/drm/i915/gt/intel_context.h       | 13 ++--
 drivers/gpu/drm/i915/gt/intel_context_param.c | 26 +++-----
 drivers/gpu/drm/i915/gt/intel_context_types.h |  3 +-
 .../drm/i915/gt/intel_execlists_submission.c  | 12 ++--
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 63 ++++++++++++-------
 drivers/gpu/drm/i915/gt/intel_lrc.h           |  4 +-
 .../gpu/drm/i915/gt/intel_ring_submission.c   | 27 ++++++--
 drivers/gpu/drm/i915/gt/mock_engine.c         |  4 +-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 +-
 10 files changed, 106 insertions(+), 97 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 4033184f13b9..705c781f4375 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -51,26 +51,18 @@ intel_context_create(struct intel_engine_cs *engine)
 
 int intel_context_alloc_state(struct intel_context *ce)
 {
-	int err = 0;
-
-	if (mutex_lock_interruptible(&ce->pin_mutex))
-		return -EINTR;
+	int err;
 
-	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
-		if (intel_context_is_banned(ce)) {
-			err = -EIO;
-			goto unlock;
-		}
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		return 0;
 
-		err = ce->ops->alloc(ce);
-		if (unlikely(err))
-			goto unlock;
+	if (intel_context_is_banned(ce))
+		return -EIO;
 
-		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
-	}
+	err = ce->ops->alloc_state(ce);
+	if (!err)
+		GEM_WARN_ON(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags));
 
-unlock:
-	mutex_unlock(&ce->pin_mutex);
 	return err;
 }
 
@@ -213,12 +205,6 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
 			return err;
 	}
 
-	/*
-	 * We always pin the context/ring/timeline here, to ensure a pin
-	 * refcount for __intel_context_active(), which prevent a lock
-	 * inversion of ce->pin_mutex vs dma_resv_lock().
-	 */
-
 	err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
 	if (!err && ce->ring->vma->obj)
 		err = i915_gem_object_lock(ce->ring->vma->obj, ww);
@@ -237,24 +223,20 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
 	if (err)
 		goto err_release;
 
-	err = mutex_lock_interruptible(&ce->pin_mutex);
-	if (err)
-		goto err_post_unpin;
-
 	if (unlikely(intel_context_is_closed(ce))) {
 		err = -ENOENT;
-		goto err_unlock;
+		goto err_post_unpin;
 	}
 
 	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
 		err = intel_context_active_acquire(ce);
 		if (unlikely(err))
-			goto err_unlock;
+			goto err_post_unpin;
 
 		err = ce->ops->pin(ce, vaddr);
 		if (err) {
 			intel_context_active_release(ce);
-			goto err_unlock;
+			goto err_post_unpin;
 		}
 
 		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
@@ -268,8 +250,6 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
 
 	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
 
-err_unlock:
-	mutex_unlock(&ce->pin_mutex);
 err_post_unpin:
 	if (!handoff)
 		ce->ops->post_unpin(ce);
@@ -381,8 +361,6 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
 	spin_lock_init(&ce->signal_lock);
 	INIT_LIST_HEAD(&ce->signals);
 
-	mutex_init(&ce->pin_mutex);
-
 	i915_active_init(&ce->active,
 			 __intel_context_active, __intel_context_retire, 0);
 }
@@ -393,7 +371,6 @@ void intel_context_fini(struct intel_context *ce)
 		intel_timeline_put(ce->timeline);
 	i915_vm_put(ce->vm);
 
-	mutex_destroy(&ce->pin_mutex);
 	i915_active_fini(&ce->active);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index f83a73a2b39f..0de56c50e828 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -49,9 +49,14 @@ int intel_context_reconfigure_sseu(struct intel_context *ce,
  * intel_context_is_pinned() remains stable.
  */
 static inline int intel_context_lock_pinned(struct intel_context *ce)
-	__acquires(ce->pin_mutex)
+	__acquires(&ce->state->obj->base.resv->lock)
 {
-	return mutex_lock_interruptible(&ce->pin_mutex);
+	int ret = intel_context_alloc_state(ce);
+
+	if (ret)
+		return ret;
+
+	return i915_gem_object_lock_interruptible(ce->state->obj, NULL);
 }
 
 /**
@@ -76,9 +81,9 @@ intel_context_is_pinned(struct intel_context *ce)
  * Releases the lock earlier acquired by intel_context_unlock_pinned().
  */
 static inline void intel_context_unlock_pinned(struct intel_context *ce)
-	__releases(ce->pin_mutex)
+	__releases(&ce->state->obj->base.resv->lock)
 {
-	mutex_unlock(&ce->pin_mutex);
+	i915_gem_object_unlock(ce->state->obj);
 }
 
 int __intel_context_do_pin(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c
index 65dcd090245d..36819baa3377 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_param.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.c
@@ -10,6 +10,7 @@
 
 int intel_context_set_ring_size(struct intel_context *ce, long sz)
 {
+	struct intel_ring *ring;
 	int err;
 
 	if (intel_context_lock_pinned(ce))
@@ -24,24 +25,17 @@ int intel_context_set_ring_size(struct intel_context *ce, long sz)
 		goto unlock;
 	}
 
-	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
-		struct intel_ring *ring;
-
-		/* Replace the existing ringbuffer */
-		ring = intel_engine_create_ring(ce->engine, sz);
-		if (IS_ERR(ring)) {
-			err = PTR_ERR(ring);
-			goto unlock;
-		}
-
-		intel_ring_put(ce->ring);
-		ce->ring = ring;
-
-		/* Context image will be updated on next pin */
-	} else {
-		ce->ring = __intel_context_ring_size(sz);
+	/* Replace the existing ringbuffer */
+	ring = intel_engine_create_ring(ce->engine, sz);
+	if (IS_ERR(ring)) {
+		err = PTR_ERR(ring);
+		goto unlock;
 	}
 
+	intel_ring_put(ce->ring);
+	ce->ring = ring;
+	/* Context image will be updated on next pin */
+
 unlock:
 	intel_context_unlock_pinned(ce);
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ed8c447a7346..e6e57566adf0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -33,7 +33,8 @@ struct intel_context_ops {
 #define COPS_HAS_INFLIGHT_BIT 0
 #define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)
 
-	int (*alloc)(struct intel_context *ce);
+	/* alloc_state() may be called multiple times without lock held, so have to careful here */
+	int (*alloc_state)(struct intel_context *ce);
 
 	int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
 	int (*pin)(struct intel_context *ce, void *vaddr);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index fc77592d88a9..4515985e6474 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2528,15 +2528,15 @@ static int execlists_context_pin(struct intel_context *ce, void *vaddr)
 	return lrc_pin(ce, ce->engine, vaddr);
 }
 
-static int execlists_context_alloc(struct intel_context *ce)
+static int execlists_context_alloc_state(struct intel_context *ce)
 {
-	return lrc_alloc(ce, ce->engine);
+	return lrc_alloc_state(ce, ce->engine);
 }
 
 static const struct intel_context_ops execlists_context_ops = {
 	.flags = COPS_HAS_INFLIGHT,
 
-	.alloc = execlists_context_alloc,
+	.alloc_state = execlists_context_alloc_state,
 
 	.pre_pin = execlists_context_pre_pin,
 	.pin = execlists_context_pin,
@@ -3394,11 +3394,11 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
 		swap(ve->siblings[swp], ve->siblings[0]);
 }
 
-static int virtual_context_alloc(struct intel_context *ce)
+static int virtual_context_alloc_state(struct intel_context *ce)
 {
 	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
 
-	return lrc_alloc(ce, ve->siblings[0]);
+	return lrc_alloc_state(ce, ve->siblings[0]);
 }
 
 static int virtual_context_pre_pin(struct intel_context *ce,
@@ -3443,7 +3443,7 @@ static void virtual_context_exit(struct intel_context *ce)
 static const struct intel_context_ops virtual_context_ops = {
 	.flags = COPS_HAS_INFLIGHT,
 
-	.alloc = virtual_context_alloc,
+	.alloc_state = virtual_context_alloc_state,
 
 	.pre_pin = virtual_context_pre_pin,
 	.pin = virtual_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a27bac0a4bfb..c94a193bc412 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -806,7 +806,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
 		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
 
 	if (GRAPHICS_VER(engine->i915) == 12) {
-		ce->wa_bb_page = context_size / PAGE_SIZE;
+		/* harmless to repeatedly write, since this is always the same */
+		WRITE_ONCE(ce->wa_bb_page, context_size / PAGE_SIZE);
 		context_size += PAGE_SIZE;
 	}
 
@@ -833,23 +834,35 @@ pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
 	return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
 }
 
-int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
+int lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
 {
 	struct intel_ring *ring;
-	struct i915_vma *vma;
+	struct i915_vma *vma = NULL, *old_vma;
 	int err;
 
-	GEM_BUG_ON(ce->state);
+	vma = READ_ONCE(ce->state);
+	if (likely(!vma)) {
+		vma = __lrc_alloc_state(ce, engine);
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
+
+		old_vma = cmpxchg(&ce->state, NULL, vma);
+		if (old_vma) {
+			i915_vma_put(vma);
+			vma = old_vma;
+		}
+	}
+
+	err = i915_gem_object_lock_interruptible(vma->obj, NULL);
+	if (err)
+		return err;
 
-	vma = __lrc_alloc_state(ce, engine);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		goto err_unlock;
 
 	ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
-	if (IS_ERR(ring)) {
-		err = PTR_ERR(ring);
-		goto err_vma;
-	}
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	if (!page_mask_bits(ce->timeline)) {
 		struct intel_timeline *tl;
@@ -862,23 +875,23 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
 			tl = pinned_timeline(ce, engine);
 		else
 			tl = intel_timeline_create(engine->gt);
-		if (IS_ERR(tl)) {
-			err = PTR_ERR(tl);
-			goto err_ring;
-		}
 
-		ce->timeline = tl;
+		if (IS_ERR(tl))
+			err = PTR_ERR(tl);
+		else
+			ce->timeline = tl;
 	}
 
-	ce->ring = ring;
-	ce->state = vma;
+	if (!err)
+		ce->ring = ring;
+	else
+		intel_ring_put(ring);
 
-	return 0;
+	smp_mb__before_atomic();
+	set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
 
-err_ring:
-	intel_ring_put(ring);
-err_vma:
-	i915_vma_put(vma);
+err_unlock:
+	i915_gem_object_unlock(vma->obj);
 	return err;
 }
 
@@ -941,8 +954,10 @@ void lrc_fini(struct intel_context *ce)
 	if (!ce->state)
 		return;
 
-	intel_ring_put(fetch_and_zero(&ce->ring));
 	i915_vma_put(fetch_and_zero(&ce->state));
+
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		intel_ring_put(fetch_and_zero(&ce->ring));
 }
 
 void lrc_destroy(struct kref *kref)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 7f697845c4cf..60d5e1113987 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -29,8 +29,8 @@ struct intel_ring;
 void lrc_init_wa_ctx(struct intel_engine_cs *engine);
 void lrc_fini_wa_ctx(struct intel_engine_cs *engine);
 
-int lrc_alloc(struct intel_context *ce,
-	      struct intel_engine_cs *engine);
+int lrc_alloc_state(struct intel_context *ce,
+		    struct intel_engine_cs *engine);
 void lrc_reset(struct intel_context *ce);
 void lrc_fini(struct intel_context *ce);
 void lrc_destroy(struct kref *kref);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 0c423f096e2b..0a7e52bf7169 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -549,12 +549,22 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	return ERR_PTR(err);
 }
 
-static int ring_context_alloc(struct intel_context *ce)
+static int ring_context_alloc_state(struct intel_context *ce)
 {
 	struct intel_engine_cs *engine = ce->engine;
+	int err;
+
+	err = i915_gem_object_lock(engine->legacy.timeline->hwsp_ggtt->obj, NULL);
+	if (err)
+		return err;
 
 	/* One ringbuffer to rule them all */
 	GEM_BUG_ON(!engine->legacy.ring);
+
+	/* only run once */
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		goto err_unlock;
+
 	ce->ring = engine->legacy.ring;
 	ce->timeline = intel_timeline_get(engine->legacy.timeline);
 
@@ -563,13 +573,20 @@ static int ring_context_alloc(struct intel_context *ce)
 		struct i915_vma *vma;
 
 		vma = alloc_context_vma(engine);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto err_unlock;
+		}
 
 		ce->state = vma;
 	}
 
-	return 0;
+	smp_mb__before_atomic();
+	set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+err_unlock:
+	i915_gem_object_unlock(engine->legacy.timeline->hwsp_ggtt->obj);
+	return err;
 }
 
 static int ring_context_pin(struct intel_context *ce, void *unused)
@@ -584,7 +601,7 @@ static void ring_context_reset(struct intel_context *ce)
 }
 
 static const struct intel_context_ops ring_context_ops = {
-	.alloc = ring_context_alloc,
+	.alloc_state = ring_context_alloc_state,
 
 	.pre_pin = ring_context_pre_pin,
 	.pin = ring_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 32589c6625e1..cf77c3197bb3 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -142,7 +142,7 @@ static void mock_context_destroy(struct kref *ref)
 	intel_context_free(ce);
 }
 
-static int mock_context_alloc(struct intel_context *ce)
+static int mock_context_alloc_state(struct intel_context *ce)
 {
 	int err;
 
@@ -182,7 +182,7 @@ static void mock_context_reset(struct intel_context *ce)
 }
 
 static const struct intel_context_ops mock_context_ops = {
-	.alloc = mock_context_alloc,
+	.alloc_state = mock_context_alloc_state,
 
 	.pre_pin = mock_context_pre_pin,
 	.pin = mock_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 7c8ff9792f7b..e585d108140a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -432,9 +432,9 @@ void intel_guc_submission_fini(struct intel_guc *guc)
 	}
 }
 
-static int guc_context_alloc(struct intel_context *ce)
+static int guc_context_alloc_state(struct intel_context *ce)
 {
-	return lrc_alloc(ce, ce->engine);
+	return lrc_alloc_state(ce, ce->engine);
 }
 
 static int guc_context_pre_pin(struct intel_context *ce,
@@ -450,7 +450,7 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr)
 }
 
 static const struct intel_context_ops guc_context_ops = {
-	.alloc = guc_context_alloc,
+	.alloc_state = guc_context_alloc_state,
 
 	.pre_pin = guc_context_pre_pin,
 	.pin = guc_context_pin,
-- 
2.31.0