[Intel-gfx] [PATCH 3/5] drm/i915: Remove requirement for holding i915_request.lock for breadcrumbs
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jul 16 15:09:50 UTC 2020
On 16/07/2020 12:33, Chris Wilson wrote:
> Since the breadcrumb enabling/cancelling itself is serialised by the
> breadcrumbs.irq_lock, with a bit of care we can remove the outer
> serialisation with i915_request.lock for concurrent
> dma_fence_enable_signaling(). This has the important side-effect of
> eliminating the nested i915_request.lock within request submission.
>
> The challenge in serialisation is around the unsubmission where we take
> an active request that wants a breadcrumb on the signaling engine and
> put it to sleep. We do not want a concurrent
> dma_fence_enable_signaling() to attach a breadcrumb as we unsubmit, so
> we must mark the request as no longer active before serialising with the
> concurrent enable-signaling.
>
> On retire, we serialise with the concurrent enable-signaling, but
> instead of clearing ACTIVE, we mark it as SIGNALED.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 130 +++++++++++++-------
> drivers/gpu/drm/i915/gt/intel_lrc.c | 14 ---
> drivers/gpu/drm/i915/i915_request.c | 39 +++---
> 3 files changed, 100 insertions(+), 83 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index 91786310c114..a0f52417238c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -220,17 +220,17 @@ static void signal_irq_work(struct irq_work *work)
> }
> }
>
> -static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
> +static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
> {
> struct intel_engine_cs *engine =
> container_of(b, struct intel_engine_cs, breadcrumbs);
>
> lockdep_assert_held(&b->irq_lock);
> if (b->irq_armed)
> - return true;
> + return;
>
> if (!intel_gt_pm_get_if_awake(engine->gt))
> - return false;
> + return;
>
> /*
> * The breadcrumb irq will be disarmed on the interrupt after the
> @@ -250,8 +250,6 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
>
> if (!b->irq_enabled++)
> irq_enable(engine);
> -
> - return true;
> }
>
> void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
> @@ -310,57 +308,99 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
> {
> }
>
> -bool i915_request_enable_breadcrumb(struct i915_request *rq)
> +static void insert_breadcrumb(struct i915_request *rq,
> + struct intel_breadcrumbs *b)
> {
> - lockdep_assert_held(&rq->lock);
> + struct intel_context *ce = rq->context;
> + struct list_head *pos;
>
> - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> - return true;
> + if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> + return;
>
> - if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
> - struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
> - struct intel_context *ce = rq->context;
> - struct list_head *pos;
> + __intel_breadcrumbs_arm_irq(b);
>
> - spin_lock(&b->irq_lock);
> + /*
> + * We keep the seqno in retirement order, so we can break
> + * inside intel_engine_signal_breadcrumbs as soon as we've
> + * passed the last completed request (or seen a request that
> + * hasn't event started). We could walk the timeline->requests,
> + * but keeping a separate signalers_list has the advantage of
> + * hopefully being much smaller than the full list and so
> + * provides faster iteration and detection when there are no
> + * more interrupts required for this context.
> + *
> + * We typically expect to add new signalers in order, so we
> + * start looking for our insertion point from the tail of
> + * the list.
> + */
> + list_for_each_prev(pos, &ce->signals) {
> + struct i915_request *it =
> + list_entry(pos, typeof(*it), signal_link);
>
> - if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
> - goto unlock;
> + if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
> + break;
> + }
> + list_add(&rq->signal_link, pos);
> + if (pos == &ce->signals) /* catch transitions from empty list */
> + list_move_tail(&ce->signal_link, &b->signalers);
> + GEM_BUG_ON(!check_signal_order(ce, rq));
>
> - if (!__intel_breadcrumbs_arm_irq(b))
> - goto unlock;
> + set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> +}
>
> - /*
> - * We keep the seqno in retirement order, so we can break
> - * inside intel_engine_signal_breadcrumbs as soon as we've
> - * passed the last completed request (or seen a request that
> - * hasn't event started). We could walk the timeline->requests,
> - * but keeping a separate signalers_list has the advantage of
> - * hopefully being much smaller than the full list and so
> - * provides faster iteration and detection when there are no
> - * more interrupts required for this context.
> - *
> - * We typically expect to add new signalers in order, so we
> - * start looking for our insertion point from the tail of
> - * the list.
> - */
> - list_for_each_prev(pos, &ce->signals) {
> - struct i915_request *it =
> - list_entry(pos, typeof(*it), signal_link);
> +bool i915_request_enable_breadcrumb(struct i915_request *rq)
> +{
> + struct intel_breadcrumbs *b;
>
> - if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
> - break;
> - }
> - list_add(&rq->signal_link, pos);
> - if (pos == &ce->signals) /* catch transitions from empty list */
> - list_move_tail(&ce->signal_link, &b->signalers);
> - GEM_BUG_ON(!check_signal_order(ce, rq));
> + /* Seralises with i915_request_retire() using rq->lock */
Serialises
> + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> + return true;
>
> - set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
> -unlock:
> + /*
> + * Peek at i915_request_submit()/i915_request_unsubmit() status.
> + *
> + * If the request is not yet active (and not signaled), we will
> + * attach the breadcrumb later.
> + */
> + if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
> + return true;
> +
> + /*
> + * rq->engine is locked by rq->engine->active.lock. That however
> + * is not known until after rq->engine has been dereferenced and
> + * the lock acquired. Hence we acquire the lock and then validate
> + * that rq->engine still matches the lock we hold for it.
> + *
> + * Here, we are using the breadcrumb lock as a proxy for the
> + * rq->engine->active.lock, and we know that since the breadcrumb
> + * will be serialised within i915_request_submit/i915_request_unsubmit,
> + * the engine cannot change while active as long as we hold the
> + * breadcrumb lock on that engine.
> + *
> + * From the dma_fence_enable_signaling() path, we are outside of the
> + * request submit/unsubmit path, and so we must be more careful to
> + * acquire the right lock.
> + */
> + b = &READ_ONCE(rq->engine)->breadcrumbs;
> + spin_lock(&b->irq_lock);
> + while (unlikely(b != &READ_ONCE(rq->engine)->breadcrumbs)) {
> spin_unlock(&b->irq_lock);
> + b = &READ_ONCE(rq->engine)->breadcrumbs;
> + spin_lock(&b->irq_lock);
> }
>
> + /*
> + * Now that we are finally serialised with request submit/unsubmit,
> + * [with b->irq_lock] and with i915_request_reitre() [via checking
retire
> + * SIGNALED with rq->lock] confirm the request is indeed active. If
> + * it is no longer active, the breadcrumb will be attached upon
> + * i915_request_submit().
> + */
> + if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
> + insert_breadcrumb(rq, b);
> +
> + spin_unlock(&b->irq_lock);
> +
> return !__request_completed(rq);
> }
>
> @@ -368,8 +408,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
> {
> struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
>
> - lockdep_assert_held(&rq->lock);
> -
> /*
> * We must wait for b->irq_lock so that we know the interrupt handler
> * has released its reference to the intel_context and has completed
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 29c0fde8b4df..21c16e31c4fe 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1148,20 +1148,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
> } else {
> struct intel_engine_cs *owner = rq->context->engine;
>
> - /*
> - * Decouple the virtual breadcrumb before moving it
> - * back to the virtual engine -- we don't want the
> - * request to complete in the background and try
> - * and cancel the breadcrumb on the virtual engine
> - * (instead of the old engine where it is linked)!
> - */
> - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> - &rq->fence.flags)) {
> - spin_lock_nested(&rq->lock,
> - SINGLE_DEPTH_NESTING);
> - i915_request_cancel_breadcrumb(rq);
> - spin_unlock(&rq->lock);
> - }
> WRITE_ONCE(rq->engine, owner);
> owner->submit_request(rq);
> active = NULL;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 2ef17b11ca4b..8c345ead04a6 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -320,11 +320,12 @@ bool i915_request_retire(struct i915_request *rq)
> dma_fence_signal_locked(&rq->fence);
> if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
> i915_request_cancel_breadcrumb(rq);
> + spin_unlock_irq(&rq->lock);
> +
> if (i915_request_has_waitboost(rq)) {
> GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
> atomic_dec(&rq->engine->gt->rps.num_waiters);
> }
> - spin_unlock_irq(&rq->lock);
>
> /*
> * We only loosely track inflight requests across preemption,
> @@ -608,17 +609,9 @@ bool __i915_request_submit(struct i915_request *request)
> */
> __notify_execute_cb_irq(request);
>
> - /* We may be recursing from the signal callback of another i915 fence */
> - if (!i915_request_signaled(request)) {
> - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> -
> - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
> - &request->fence.flags) &&
> - !i915_request_enable_breadcrumb(request))
> - intel_engine_signal_breadcrumbs(engine);
> -
> - spin_unlock(&request->lock);
> - }
> + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
> + !i915_request_enable_breadcrumb(request))
> + intel_engine_signal_breadcrumbs(engine);
>
> return result;
> }
> @@ -640,27 +633,27 @@ void __i915_request_unsubmit(struct i915_request *request)
> {
> struct intel_engine_cs *engine = request->engine;
>
> + /*
> + * Only unwind in reverse order, required so that the per-context list
> + * is kept in seqno/ring order.
> + */
> RQ_TRACE(request, "\n");
>
> GEM_BUG_ON(!irqs_disabled());
> lockdep_assert_held(&engine->active.lock);
>
> /*
> - * Only unwind in reverse order, required so that the per-context list
> - * is kept in seqno/ring order.
> + * Before we remove this breadcrumb from the signal list, we have
> + * to ensure that a concurrent dma_fence_enable_signaling() does not
> + * attach itself. We first mark the request as no longer active and
> + * make sure that is visible to other cores, and then remove the
> + * breadcrumb if attached.
> */
> -
> - /* We may be recursing from the signal callback of another i915 fence */
> - spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> -
> + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
> + clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
> if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
> i915_request_cancel_breadcrumb(request);
>
> - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
> - clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
> -
> - spin_unlock(&request->lock);
> -
> /* We've already spun, don't charge on resubmitting. */
> if (request->sched.semaphores && i915_request_started(request))
> request->sched.semaphores = 0;
>
I did not find a hole (race) after quite a bit of straining the grey
matter so.. lets see..
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list