[PATCH 1/2] remove pin_mutex try4, extra bruteforce
Maarten Lankhorst
maarten.lankhorst at linux.intel.com
Mon Jun 14 09:32:19 UTC 2021
Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_context.c | 45 ++++---------
drivers/gpu/drm/i915/gt/intel_context.h | 13 ++--
drivers/gpu/drm/i915/gt/intel_context_param.c | 26 +++-----
drivers/gpu/drm/i915/gt/intel_context_types.h | 3 +-
.../drm/i915/gt/intel_execlists_submission.c | 12 ++--
drivers/gpu/drm/i915/gt/intel_lrc.c | 63 ++++++++++++-------
drivers/gpu/drm/i915/gt/intel_lrc.h | 4 +-
.../gpu/drm/i915/gt/intel_ring_submission.c | 27 ++++++--
drivers/gpu/drm/i915/gt/mock_engine.c | 4 +-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 +-
10 files changed, 106 insertions(+), 97 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 4033184f13b9..705c781f4375 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -51,26 +51,18 @@ intel_context_create(struct intel_engine_cs *engine)
int intel_context_alloc_state(struct intel_context *ce)
{
- int err = 0;
-
- if (mutex_lock_interruptible(&ce->pin_mutex))
- return -EINTR;
+ int err;
- if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
- if (intel_context_is_banned(ce)) {
- err = -EIO;
- goto unlock;
- }
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+ return 0;
- err = ce->ops->alloc(ce);
- if (unlikely(err))
- goto unlock;
+ if (intel_context_is_banned(ce))
+ return -EIO;
- set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
- }
+ err = ce->ops->alloc_state(ce);
+ if (!err)
+ GEM_WARN_ON(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags));
-unlock:
- mutex_unlock(&ce->pin_mutex);
return err;
}
@@ -213,12 +205,6 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
return err;
}
- /*
- * We always pin the context/ring/timeline here, to ensure a pin
- * refcount for __intel_context_active(), which prevent a lock
- * inversion of ce->pin_mutex vs dma_resv_lock().
- */
-
err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
if (!err && ce->ring->vma->obj)
err = i915_gem_object_lock(ce->ring->vma->obj, ww);
@@ -237,24 +223,20 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
if (err)
goto err_release;
- err = mutex_lock_interruptible(&ce->pin_mutex);
- if (err)
- goto err_post_unpin;
-
if (unlikely(intel_context_is_closed(ce))) {
err = -ENOENT;
- goto err_unlock;
+ goto err_post_unpin;
}
if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
err = intel_context_active_acquire(ce);
if (unlikely(err))
- goto err_unlock;
+ goto err_post_unpin;
err = ce->ops->pin(ce, vaddr);
if (err) {
intel_context_active_release(ce);
- goto err_unlock;
+ goto err_post_unpin;
}
CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
@@ -268,8 +250,6 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
-err_unlock:
- mutex_unlock(&ce->pin_mutex);
err_post_unpin:
if (!handoff)
ce->ops->post_unpin(ce);
@@ -381,8 +361,6 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
spin_lock_init(&ce->signal_lock);
INIT_LIST_HEAD(&ce->signals);
- mutex_init(&ce->pin_mutex);
-
i915_active_init(&ce->active,
__intel_context_active, __intel_context_retire, 0);
}
@@ -393,7 +371,6 @@ void intel_context_fini(struct intel_context *ce)
intel_timeline_put(ce->timeline);
i915_vm_put(ce->vm);
- mutex_destroy(&ce->pin_mutex);
i915_active_fini(&ce->active);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index f83a73a2b39f..0de56c50e828 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -49,9 +49,14 @@ int intel_context_reconfigure_sseu(struct intel_context *ce,
* intel_context_is_pinned() remains stable.
*/
static inline int intel_context_lock_pinned(struct intel_context *ce)
- __acquires(ce->pin_mutex)
+ __acquires(&ce->state->obj->base.resv->lock)
{
- return mutex_lock_interruptible(&ce->pin_mutex);
+ int ret = intel_context_alloc_state(ce);
+
+ if (ret)
+ return ret;
+
+ return i915_gem_object_lock_interruptible(ce->state->obj, NULL);
}
/**
@@ -76,9 +81,9 @@ intel_context_is_pinned(struct intel_context *ce)
* Releases the lock earlier acquired by intel_context_unlock_pinned().
*/
static inline void intel_context_unlock_pinned(struct intel_context *ce)
- __releases(ce->pin_mutex)
+ __releases(&ce->state->obj->base.resv->lock)
{
- mutex_unlock(&ce->pin_mutex);
+ i915_gem_object_unlock(ce->state->obj);
}
int __intel_context_do_pin(struct intel_context *ce);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c
index 65dcd090245d..36819baa3377 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_param.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.c
@@ -10,6 +10,7 @@
int intel_context_set_ring_size(struct intel_context *ce, long sz)
{
+ struct intel_ring *ring;
int err;
if (intel_context_lock_pinned(ce))
@@ -24,24 +25,17 @@ int intel_context_set_ring_size(struct intel_context *ce, long sz)
goto unlock;
}
- if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
- struct intel_ring *ring;
-
- /* Replace the existing ringbuffer */
- ring = intel_engine_create_ring(ce->engine, sz);
- if (IS_ERR(ring)) {
- err = PTR_ERR(ring);
- goto unlock;
- }
-
- intel_ring_put(ce->ring);
- ce->ring = ring;
-
- /* Context image will be updated on next pin */
- } else {
- ce->ring = __intel_context_ring_size(sz);
+ /* Replace the existing ringbuffer */
+ ring = intel_engine_create_ring(ce->engine, sz);
+ if (IS_ERR(ring)) {
+ err = PTR_ERR(ring);
+ goto unlock;
}
+ intel_ring_put(ce->ring);
+ ce->ring = ring;
+ /* Context image will be updated on next pin */
+
unlock:
intel_context_unlock_pinned(ce);
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index ed8c447a7346..e6e57566adf0 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -33,7 +33,8 @@ struct intel_context_ops {
#define COPS_HAS_INFLIGHT_BIT 0
#define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)
- int (*alloc)(struct intel_context *ce);
+ /* alloc_state() may be called multiple times without lock held, so have to careful here */
+ int (*alloc_state)(struct intel_context *ce);
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
int (*pin)(struct intel_context *ce, void *vaddr);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index fc77592d88a9..4515985e6474 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2528,15 +2528,15 @@ static int execlists_context_pin(struct intel_context *ce, void *vaddr)
return lrc_pin(ce, ce->engine, vaddr);
}
-static int execlists_context_alloc(struct intel_context *ce)
+static int execlists_context_alloc_state(struct intel_context *ce)
{
- return lrc_alloc(ce, ce->engine);
+ return lrc_alloc_state(ce, ce->engine);
}
static const struct intel_context_ops execlists_context_ops = {
.flags = COPS_HAS_INFLIGHT,
- .alloc = execlists_context_alloc,
+ .alloc_state = execlists_context_alloc_state,
.pre_pin = execlists_context_pre_pin,
.pin = execlists_context_pin,
@@ -3394,11 +3394,11 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
swap(ve->siblings[swp], ve->siblings[0]);
}
-static int virtual_context_alloc(struct intel_context *ce)
+static int virtual_context_alloc_state(struct intel_context *ce)
{
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
- return lrc_alloc(ce, ve->siblings[0]);
+ return lrc_alloc_state(ce, ve->siblings[0]);
}
static int virtual_context_pre_pin(struct intel_context *ce,
@@ -3443,7 +3443,7 @@ static void virtual_context_exit(struct intel_context *ce)
static const struct intel_context_ops virtual_context_ops = {
.flags = COPS_HAS_INFLIGHT,
- .alloc = virtual_context_alloc,
+ .alloc_state = virtual_context_alloc_state,
.pre_pin = virtual_context_pre_pin,
.pin = virtual_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a27bac0a4bfb..c94a193bc412 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -806,7 +806,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
if (GRAPHICS_VER(engine->i915) == 12) {
- ce->wa_bb_page = context_size / PAGE_SIZE;
+ /* harmless to repeatedly write, since this is always the same */
+ WRITE_ONCE(ce->wa_bb_page, context_size / PAGE_SIZE);
context_size += PAGE_SIZE;
}
@@ -833,23 +834,35 @@ pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
}
-int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
+int lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
{
struct intel_ring *ring;
- struct i915_vma *vma;
+ struct i915_vma *vma = NULL, *old_vma;
int err;
- GEM_BUG_ON(ce->state);
+ vma = READ_ONCE(ce->state);
+ if (likely(!vma)) {
+ vma = __lrc_alloc_state(ce, engine);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ old_vma = cmpxchg(&ce->state, NULL, vma);
+ if (old_vma) {
+ i915_vma_put(vma);
+ vma = old_vma;
+ }
+ }
+
+ err = i915_gem_object_lock_interruptible(vma->obj, NULL);
+ if (err)
+ return err;
- vma = __lrc_alloc_state(ce, engine);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+ goto err_unlock;
ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
- if (IS_ERR(ring)) {
- err = PTR_ERR(ring);
- goto err_vma;
- }
+ if (IS_ERR(ring))
+ return PTR_ERR(ring);
if (!page_mask_bits(ce->timeline)) {
struct intel_timeline *tl;
@@ -862,23 +875,23 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
tl = pinned_timeline(ce, engine);
else
tl = intel_timeline_create(engine->gt);
- if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto err_ring;
- }
- ce->timeline = tl;
+ if (IS_ERR(tl))
+ err = PTR_ERR(tl);
+ else
+ ce->timeline = tl;
}
- ce->ring = ring;
- ce->state = vma;
+ if (!err)
+ ce->ring = ring;
+ else
+ intel_ring_put(ring);
- return 0;
+ smp_mb__before_atomic();
+ set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
-err_ring:
- intel_ring_put(ring);
-err_vma:
- i915_vma_put(vma);
+err_unlock:
+ i915_gem_object_unlock(vma->obj);
return err;
}
@@ -941,8 +954,10 @@ void lrc_fini(struct intel_context *ce)
if (!ce->state)
return;
- intel_ring_put(fetch_and_zero(&ce->ring));
i915_vma_put(fetch_and_zero(&ce->state));
+
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+ intel_ring_put(fetch_and_zero(&ce->ring));
}
void lrc_destroy(struct kref *kref)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 7f697845c4cf..60d5e1113987 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -29,8 +29,8 @@ struct intel_ring;
void lrc_init_wa_ctx(struct intel_engine_cs *engine);
void lrc_fini_wa_ctx(struct intel_engine_cs *engine);
-int lrc_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine);
+int lrc_alloc_state(struct intel_context *ce,
+ struct intel_engine_cs *engine);
void lrc_reset(struct intel_context *ce);
void lrc_fini(struct intel_context *ce);
void lrc_destroy(struct kref *kref);
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 0c423f096e2b..0a7e52bf7169 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -549,12 +549,22 @@ alloc_context_vma(struct intel_engine_cs *engine)
return ERR_PTR(err);
}
-static int ring_context_alloc(struct intel_context *ce)
+static int ring_context_alloc_state(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
+ int err;
+
+ err = i915_gem_object_lock(engine->legacy.timeline->hwsp_ggtt->obj, NULL);
+ if (err)
+ return err;
/* One ringbuffer to rule them all */
GEM_BUG_ON(!engine->legacy.ring);
+
+ /* only run once */
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+ goto err_unlock;
+
ce->ring = engine->legacy.ring;
ce->timeline = intel_timeline_get(engine->legacy.timeline);
@@ -563,13 +573,20 @@ static int ring_context_alloc(struct intel_context *ce)
struct i915_vma *vma;
vma = alloc_context_vma(engine);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_unlock;
+ }
ce->state = vma;
}
- return 0;
+ smp_mb__before_atomic();
+ set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+err_unlock:
+ i915_gem_object_unlock(engine->legacy.timeline->hwsp_ggtt->obj);
+ return err;
}
static int ring_context_pin(struct intel_context *ce, void *unused)
@@ -584,7 +601,7 @@ static void ring_context_reset(struct intel_context *ce)
}
static const struct intel_context_ops ring_context_ops = {
- .alloc = ring_context_alloc,
+ .alloc_state = ring_context_alloc_state,
.pre_pin = ring_context_pre_pin,
.pin = ring_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 32589c6625e1..cf77c3197bb3 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -142,7 +142,7 @@ static void mock_context_destroy(struct kref *ref)
intel_context_free(ce);
}
-static int mock_context_alloc(struct intel_context *ce)
+static int mock_context_alloc_state(struct intel_context *ce)
{
int err;
@@ -182,7 +182,7 @@ static void mock_context_reset(struct intel_context *ce)
}
static const struct intel_context_ops mock_context_ops = {
- .alloc = mock_context_alloc,
+ .alloc_state = mock_context_alloc_state,
.pre_pin = mock_context_pre_pin,
.pin = mock_context_pin,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 7c8ff9792f7b..e585d108140a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -432,9 +432,9 @@ void intel_guc_submission_fini(struct intel_guc *guc)
}
}
-static int guc_context_alloc(struct intel_context *ce)
+static int guc_context_alloc_state(struct intel_context *ce)
{
- return lrc_alloc(ce, ce->engine);
+ return lrc_alloc_state(ce, ce->engine);
}
static int guc_context_pre_pin(struct intel_context *ce,
@@ -450,7 +450,7 @@ static int guc_context_pin(struct intel_context *ce, void *vaddr)
}
static const struct intel_context_ops guc_context_ops = {
- .alloc = guc_context_alloc,
+ .alloc_state = guc_context_alloc_state,
.pre_pin = guc_context_pre_pin,
.pin = guc_context_pin,
--
2.31.0
More information about the Intel-gfx-trybot
mailing list