[Intel-gfx] [PATCH] drm/i915/breadcrumbs: Drop request reference for the signaler thread

Wed Jan 31 17:30:46 UTC 2018

Quoting Tvrtko Ursulin (2018-01-31 17:18:55)
> 
> On 22/01/2018 15:41, Chris Wilson wrote:
> > +static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
> > +                                      struct drm_i915_gem_request *request)
> > +{
> > +     struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > +
> > +     lockdep_assert_held(&b->rb_lock);
> > +
> > +     /*
> > +      * Wake up all other completed waiters and select the
> > +      * next bottom-half for the next user interrupt.
> > +      */
> > +     __intel_engine_remove_wait(engine, &request->signaling.wait);
> > +
> > +     /*
> > +      * Find the next oldest signal. Note that as we have
> > +      * not been holding the lock, another client may
> 
> Which lock, request-lock? b->first_signal seems to always be updated 
> under the b->rb_lock, so I am not sure of the 
> request->signaling.wait.seqno check.
> 
> wait.seqno is also updated without the request->lock below, so maybe you 
> were talking about the rb_lock after all?

What it means here is that since we took b->first_signal without
holding the rb_lock, by the time we get to here (and have taken the
rb_lock) b->first_signal may have changed and indeed someone else may
have already retired the request and canceled the signaling. Or inserted
a new signal into the tree. (iirc, this comment is unmodified.)

> How does wait.seqno becomes 0 outside the rb_lock? Is it due RCU 
> business this patch adds? Like it is retired before the signaler runs?

Not outside of the rb_lock, just before we notice.

> > +      * have installed an even older signal than the one
> > +      * we just completed - so double check we are still
> > +      * the oldest before picking the next one.
> > +      */
> > +     if (request->signaling.wait.seqno) {
> > +             if (request == rcu_access_pointer(b->first_signal)) {
> > +                     struct rb_node *rb = rb_next(&request->signaling.node);
> > +                     rcu_assign_pointer(b->first_signal,
> > +                                        rb ? to_signaler(rb) : NULL);
> > +             }
> > +
> > +             rb_erase(&request->signaling.node, &b->signals);
> > +             request->signaling.wait.seqno = 0;
> > +     }
> > +}
> > +
> > +static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
> > +{
> > +     /*
> > +      * See the big warnings for i915_gem_active_get_rcu() and similarly
> > +      * for dma_fence_get_rcu_safe() that explain the intricacies involved
> > +      * here with defeating CPU/compiler speculation and enforcing
> > +      * the required memory barriers.
> > +      */
> > +     do {
> > +             struct drm_i915_gem_request *request;
> > +
> > +             request = rcu_dereference(b->first_signal);
> > +             if (request)
> > +                     request = i915_gem_request_get_rcu(request);
> > +
> > +             barrier();
> > +
> > +             if (!request || request == rcu_access_pointer(b->first_signal))
> > +                     return rcu_pointer_handoff(request);
> > +
> > +             i915_gem_request_put(request);
> > +     } while (1);
> > +}
> > +
> >   static int intel_breadcrumbs_signaler(void *arg)
> >   {
> >       struct intel_engine_cs *engine = arg;
> > @@ -667,41 +715,21 @@ static int intel_breadcrumbs_signaler(void *arg)
> >                * a new client.
> >                */
> >               rcu_read_lock();
> > -             request = rcu_dereference(b->first_signal);
> > -             if (request)
> > -                     request = i915_gem_request_get_rcu(request);
> > +             request = first_signal(b);
> 
> get_ prefix would be good to signify a get vs peek. Maybe even _rcu suffix.

Sold.

> >               rcu_read_unlock();
> >               if (signal_complete(request)) {
> > -                     local_bh_disable();
> > -                     dma_fence_signal(&request->fence);
> > -                     local_bh_enable(); /* kick start the tasklets */
> > -
> > -                     spin_lock_irq(&b->rb_lock);
> > -
> > -                     /* Wake up all other completed waiters and select the
> > -                      * next bottom-half for the next user interrupt.
> > -                      */
> > -                     __intel_engine_remove_wait(engine,
> > -                                                &request->signaling.wait);
> > -
> > -                     /* Find the next oldest signal. Note that as we have
> > -                      * not been holding the lock, another client may
> > -                      * have installed an even older signal than the one
> > -                      * we just completed - so double check we are still
> > -                      * the oldest before picking the next one.
> > -                      */
> > -                     if (request == rcu_access_pointer(b->first_signal)) {
> > -                             struct rb_node *rb =
> > -                                     rb_next(&request->signaling.node);
> > -                             rcu_assign_pointer(b->first_signal,
> > -                                                rb ? to_signaler(rb) : NULL);
> > +                     if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
> > +                                   &request->fence.flags)) {
> > +                             local_bh_disable();
> > +                             dma_fence_signal(&request->fence);
> > +                             local_bh_enable(); /* kick start the tasklets */
> >                       }
> > -                     rb_erase(&request->signaling.node, &b->signals);
> > -                     RB_CLEAR_NODE(&request->signaling.node);
> > -
> > -                     spin_unlock_irq(&b->rb_lock);
> >   
> > -                     i915_gem_request_put(request);
> > +                     if (request->signaling.wait.seqno) {
> > +                             spin_lock_irq(&b->rb_lock);
> > +                             __intel_engine_remove_signal(engine, request);
> > +                             spin_unlock_irq(&b->rb_lock);
> 
> This is safe against double invocation on a single request (race between 
> retire and signaler) because of RB_EMPTY_NODE guard in 
> __intel_engine_remove_wait?

Yes, and wait.seqno==0 guard for the signaler tree.

> > @@ -809,32 +835,17 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
> >   
> >   void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> >   {
> > -     struct intel_engine_cs *engine = request->engine;
> > -     struct intel_breadcrumbs *b = &engine->breadcrumbs;
> > -
> >       GEM_BUG_ON(!irqs_disabled());
> >       lockdep_assert_held(&request->lock);
> > -     GEM_BUG_ON(!request->signaling.wait.seqno);
> >   
> > -     spin_lock(&b->rb_lock);
> > +     if (request->signaling.wait.seqno) {
> 
> Do you need some special annotation like READ_ONCE on wait.seqno every 
> time it is used for decisions like this one?

For humans, yes ;)	
-Chris