[Intel-gfx] [PATCH 1/2] drm/i915: Remove i915_request.lock requirement for execution callbacks

Wed Jul 15 12:48:41 UTC 2020

Quoting Tvrtko Ursulin (2020-07-15 13:39:56)
> 
> On 14/07/2020 10:47, Chris Wilson wrote:
> > We are using the i915_request.lock to serialise adding an execution
> > callback with __i915_request_submit. However, if we use an atomic
> > llist_add to serialise multiple waiters and then check to see if the
> > request is already executing, we can remove the irq-spinlock.
> > 
> > Fixes: 1d9221e9d395 ("drm/i915: Skip signaling a signaled request")
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_request.c | 38 +++++++----------------------
> >   1 file changed, 9 insertions(+), 29 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 0b2fe55e6194..c59315def07d 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -190,13 +190,11 @@ static void __notify_execute_cb(struct i915_request *rq)
> >   {
> >       struct execute_cb *cb, *cn;
> >   
> > -     lockdep_assert_held(&rq->lock);
> > -
> > -     GEM_BUG_ON(!i915_request_is_active(rq));
> >       if (llist_empty(&rq->execute_cb))
> >               return;
> >   
> > -     llist_for_each_entry_safe(cb, cn, rq->execute_cb.first, work.llnode)
> > +     llist_for_each_entry_safe(cb, cn,
> > +                               llist_del_all(&rq->execute_cb), work.llnode)
> >               irq_work_queue(&cb->work);
> >   
> >       /*
> > @@ -209,7 +207,6 @@ static void __notify_execute_cb(struct i915_request *rq)
> >        * preempt-to-idle cycle on the target engine, all the while the
> >        * master execute_cb may refire.
> >        */
> > -     init_llist_head(&rq->execute_cb);
> >   }
> >   
> >   static inline void
> > @@ -276,6 +273,7 @@ static void remove_from_engine(struct i915_request *rq)
> >       list_del_init(&rq->sched.link);
> >       clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
> >       clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
> > +     set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
> >       spin_unlock_irq(&locked->active.lock);
> >   }
> >   
> > @@ -323,12 +321,8 @@ bool i915_request_retire(struct i915_request *rq)
> >               GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
> >               atomic_dec(&rq->engine->gt->rps.num_waiters);
> >       }
> > -     if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
> > -             set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
> > -             __notify_execute_cb(rq);
> > -     }
> > -     GEM_BUG_ON(!llist_empty(&rq->execute_cb));
> >       spin_unlock_irq(&rq->lock);
> > +     __notify_execute_cb(rq);
> >   
> >       remove_from_client(rq);
> >       __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
> > @@ -357,12 +351,6 @@ void i915_request_retire_upto(struct i915_request *rq)
> >       } while (i915_request_retire(tmp) && tmp != rq);
> >   }
> >   
> > -static void __llist_add(struct llist_node *node, struct llist_head *head)
> > -{
> > -     node->next = head->first;
> > -     head->first = node;
> > -}
> > -
> >   static struct i915_request * const *
> >   __engine_active(struct intel_engine_cs *engine)
> >   {
> > @@ -439,18 +427,11 @@ __await_execution(struct i915_request *rq,
> >               cb->work.func = irq_execute_cb_hook;
> >       }
> >   
> > -     spin_lock_irq(&signal->lock);
> > -     if (i915_request_is_active(signal) || __request_in_flight(signal)) {
> > -             if (hook) {
> > -                     hook(rq, &signal->fence);
> > -                     i915_request_put(signal);
> > -             }
> > -             i915_sw_fence_complete(cb->fence);
> > -             kmem_cache_free(global.slab_execute_cbs, cb);
> > -     } else {
> > -             __llist_add(&cb->work.llnode, &signal->execute_cb);
> > +     if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
> > +             if (i915_request_is_active(signal) ||
> > +                 __request_in_flight(signal))
> > +                     __notify_execute_cb(signal);
> 
> Any reason why the hook couldn't be called straight away but needs to 
> always go through the worker now?
> 
> Maybe it would be easier to figure out if it is race free that way..
> 
> if (llist_add(..)) {
>         llist_for_each_entry_safe(.., llist_del_all(..), .)

Then you would tell me off for open coding __notify_execute_cb for the
benefit of not going through the irq_work. Something like

@@ -186,7 +186,7 @@ static void irq_execute_cb_hook(struct irq_work *wrk)
        irq_execute_cb(wrk);
 }

-static void __notify_execute_cb(struct i915_request *rq)
+static void __execute_cb_irq(struct i915_request *rq)
 {
        struct execute_cb *cb, *cn;

@@ -209,6 +209,15 @@ static void __notify_execute_cb(struct i915_request *rq)
         */
 }

+static void __execute_cb_imm(struct i915_request *rq)
+{
+       struct execute_cb *cb, *cn;
+
+       llist_for_each_entry_safe(cb, cn,
+                                 llist_del_all(&rq->execute_cb), work.llnode)
+               cb->work.func(&cb->work);
+}
+
 static inline void
 remove_from_client(struct i915_request *request)
 {
@@ -323,7 +332,7 @@ bool i915_request_retire(struct i915_request *rq)
                atomic_dec(&rq->engine->gt->rps.num_waiters);
        }
        spin_unlock_irq(&rq->lock);
-       __notify_execute_cb(rq);
+       __execute_cb_imm(rq);

        remove_from_client(rq);
        __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */
@@ -431,7 +440,7 @@ __await_execution(struct i915_request *rq,
        if (llist_add(&cb->work.llnode, &signal->execute_cb)) {
                if (i915_request_is_active(signal) ||
                    __request_in_flight(signal))
-                       __notify_execute_cb(signal);
+                       __execute_cb_imm(signal);
        }

        return 0;
@@ -547,7 +556,7 @@ bool __i915_request_submit(struct i915_request *request)
                list_move_tail(&request->sched.link, &engine->active.requests);
                clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
        }
-       __notify_execute_cb(request);
+       __execute_cb_irq(request);

        /* We may be recursing from the signal callback of another i915 fence */
        if (!i915_request_signaled(request)) {

-Chris