[PATCH v2] drm/i915/gt: Ensure irqs' status does not change with spin_unlock
Krzysztof Karas
krzysztof.karas at intel.com
Tue Jan 14 11:54:29 UTC 2025
Hi Tvrtko,
> On 14/01/2025 09:00, Krzysztof Karas wrote:
> > spin_unlock() function enables irqs regardless of their state
>
> It doesn't, you confuse spin_unlock with spin_unlock_irq.
>
> > before spin_lock() was called. This might result in an interrupt
> > while holding a lock further down in the execution, as seen in
> > GitLab issue #13399.
> >
> > Try to remedy the problem by saving irq state before spin lock
> > acquisition.
>
> Please check guc_lrc_desc_unpin(). It gets called from the
> destroyed_worker_func as hinted by lockdep in 13399. There is a plain
> spin_lock() in there (in contradiction with itself). Fixing that one may be
> all that is needed to fix this correctly.
>
> If that turns out right then also:
>
> Fixes: 2f2cc53b5fe7 ("drm/i915/guc: Close deregister-context race against
> CT-loss")
> Cc: <stable at vger.kernel.org> # v6.9+
Thanks for explaining. I'll try that out then.
Krzysztof
>
> Regards,
>
> Tvrtko
>
> >
> > v2: add irqs' state save/restore calls to all locks/unlocks in
> > signal_irq_work() execution (Maciej)
> >
> > Signed-off-by: Krzysztof Karas <krzysztof.karas at intel.com>
> > ---
> > This issue is hit rarely on CI and I was not able to reproduce
> > it locally. There might be more places where we should save and
> > restore irq state, so I am not adding "Closes" label for the
> > issue yet.
> >
> > drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 21 ++++++++++++-------
> > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 +++--
> > 2 files changed, 16 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > index cc866773ba6f..dd5542726b41 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> > @@ -53,13 +53,15 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
> > static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
> > {
> > + unsigned long flags;
> > +
> > if (!b->irq_engine)
> > return;
> > - spin_lock(&b->irq_lock);
> > + spin_lock_irqsave(&b->irq_lock, flags);
> > if (!b->irq_armed)
> > __intel_breadcrumbs_arm_irq(b);
> > - spin_unlock(&b->irq_lock);
> > + spin_unlock_irqrestore(&b->irq_lock, flags);
> > }
> > static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> > @@ -76,10 +78,12 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> > static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> > {
> > - spin_lock(&b->irq_lock);
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&b->irq_lock, flags);
> > if (b->irq_armed)
> > __intel_breadcrumbs_disarm_irq(b);
> > - spin_unlock(&b->irq_lock);
> > + spin_unlock_irqrestore(&b->irq_lock, flags);
> > }
> > static void add_signaling_context(struct intel_breadcrumbs *b,
> > @@ -173,6 +177,7 @@ static void signal_irq_work(struct irq_work *work)
> > const ktime_t timestamp = ktime_get();
> > struct llist_node *signal, *sn;
> > struct intel_context *ce;
> > + unsigned long flags;
> > signal = NULL;
> > if (unlikely(!llist_empty(&b->signaled_requests)))
> > @@ -226,10 +231,10 @@ static void signal_irq_work(struct irq_work *work)
> > * spinlock as the callback chain may end up adding
> > * more signalers to the same context or engine.
> > */
> > - spin_lock(&ce->signal_lock);
> > + spin_lock_irqsave(&ce->signal_lock, flags);
> > list_del_rcu(&rq->signal_link);
> > release = remove_signaling_context(b, ce);
> > - spin_unlock(&ce->signal_lock);
> > + spin_unlock_irqrestore(&ce->signal_lock, flags);
> > if (release) {
> > if (intel_timeline_is_last(ce->timeline, rq))
> > add_retire(b, ce->timeline);
> > @@ -254,11 +259,11 @@ static void signal_irq_work(struct irq_work *work)
> > if (rq->engine->sched_engine->retire_inflight_request_prio)
> > rq->engine->sched_engine->retire_inflight_request_prio(rq);
> > - spin_lock(&rq->lock);
> > + spin_lock_irqsave(&rq->lock, flags);
> > list_replace(&rq->fence.cb_list, &cb_list);
> > __dma_fence_signal__timestamp(&rq->fence, timestamp);
> > __dma_fence_signal__notify(&rq->fence, &cb_list);
> > - spin_unlock(&rq->lock);
> > + spin_unlock_irqrestore(&rq->lock, flags);
> > i915_request_put(rq);
> > }
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 12f1ba7ca9c1..e9102f7246f5 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -4338,10 +4338,11 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq,
> > static void guc_retire_inflight_request_prio(struct i915_request *rq)
> > {
> > struct intel_context *ce = request_to_scheduling_context(rq);
> > + unsigned long flags;
> > - spin_lock(&ce->guc_state.lock);
> > + spin_lock_irqsave(&ce->guc_state.lock, flags);
> > guc_prio_fini(rq, ce);
> > - spin_unlock(&ce->guc_state.lock);
> > + spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> > }
> > static void sanitize_hwsp(struct intel_engine_cs *engine)
More information about the dri-devel
mailing list