[Intel-gfx] [PATCH v2] drm/i915: Copy across scheduler behaviour flags across submit fences

Wed Dec 4 14:15:27 UTC 2019

Quoting Tvrtko Ursulin (2019-12-04 14:12:42)
> 
> On 04/12/2019 11:26, Chris Wilson wrote:
> > We want the bonded request to have the same scheduler properties as its
> > master so that it is placed at the same depth in the queue. For example,
> > consider we have requests A, B and B', where B & B' are a bonded pair to
> > run in parallel on two engines.
> > 
> >       A -> B
> >                    \- B'
> > 
> > B will run after A and so may be scheduled on an idle engine and wait on
> > A using a semaphore. B' sees B being executed and so enters the queue on
> > the same engine as A. As B' did not inherit the semaphore-chain from B,
> > it may have higher precedence than A and so preempts execution. However,
> > B' then sits on a semaphore waiting for B, who is waiting for A, who is
> > blocked by B.
> > 
> > Ergo B' needs to inherit the scheduler properties from B (i.e. the
> > semaphore chain) so that it is scheduled with the same priority as B and
> > will not be executed ahead of Bs dependencies.
> > 
> > Furthermore, to prevent the priorities changing via the expose fence on
> > B', we need to couple in the dependencies for PI. This requires us to
> > relax our sanity-checks that dependencies are strictly in order.
> > 
> > Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding")
> > Testcase: igt/gem_exec_balancer/bonded-chain
> > Testcase: igt/gem_exec_balancer/bonded-semaphore
> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> > ---
> > Transfer any semaphore dependencies as well.
> > ---
> >   drivers/gpu/drm/i915/i915_request.c   | 115 ++++++++++++++++++++------
> >   drivers/gpu/drm/i915/i915_scheduler.c |   1 -
> >   2 files changed, 90 insertions(+), 26 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> > index 3109b8a79b9f..e0d24977aa9b 100644
> > --- a/drivers/gpu/drm/i915/i915_request.c
> > +++ b/drivers/gpu/drm/i915/i915_request.c
> > @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
> >   }
> >   
> >   static int
> > -__i915_request_await_execution(struct i915_request *rq,
> > -                            struct i915_request *signal,
> > -                            void (*hook)(struct i915_request *rq,
> > -                                         struct dma_fence *signal),
> > -                            gfp_t gfp)
> > +__await_execution(struct i915_request *rq,
> > +               struct i915_request *signal,
> > +               void (*hook)(struct i915_request *rq,
> > +                            struct dma_fence *signal),
> > +               gfp_t gfp)
> >   {
> >       struct execute_cb *cb;
> >   
> > @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
> >       }
> >       spin_unlock_irq(&signal->lock);
> >   
> > +     /* Copy across semaphore status as we need the same behaviour */
> > +     rq->sched.flags |= signal->sched.flags;
> >       return 0;
> >   }
> >   
> > @@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq)
> >   }
> >   
> >   static int
> > -emit_semaphore_wait(struct i915_request *to,
> > -                 struct i915_request *from,
> > -                 gfp_t gfp)
> > +__emit_semaphore_wait(struct i915_request *to,
> > +                   struct i915_request *from,
> > +                   u32 seqno)
> >   {
> >       const int has_token = INTEL_GEN(to->i915) >= 12;
> >       u32 hwsp_offset;
> > -     int len;
> > +     int len, err;
> >       u32 *cs;
> >   
> >       GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
> >   
> > -     /* Just emit the first semaphore we see as request space is limited. */
> > -     if (already_busywaiting(to) & from->engine->mask)
> > -             goto await_fence;
> > -
> > -     if (i915_request_await_start(to, from) < 0)
> > -             goto await_fence;
> > -
> > -     /* Only submit our spinner after the signaler is running! */
> > -     if (__i915_request_await_execution(to, from, NULL, gfp))
> > -             goto await_fence;
> > -
> >       /* We need to pin the signaler's HWSP until we are finished reading. */
> > -     if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
> > -             goto await_fence;
> > +     err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
> > +     if (err)
> > +             return err;
> >   
> >       len = 4;
> >       if (has_token)
> > @@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to,
> >                MI_SEMAPHORE_POLL |
> >                MI_SEMAPHORE_SAD_GTE_SDD) +
> >               has_token;
> > -     *cs++ = from->fence.seqno;
> > +     *cs++ = seqno;
> >       *cs++ = hwsp_offset;
> >       *cs++ = 0;
> >       if (has_token) {
> > @@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to,
> >       }
> >   
> >       intel_ring_advance(to, cs);
> > +     return 0;
> > +}
> > +
> > +static int
> > +emit_semaphore_wait(struct i915_request *to,
> > +                 struct i915_request *from,
> > +                 gfp_t gfp)
> > +{
> > +     /* Just emit the first semaphore we see as request space is limited. */
> > +     if (already_busywaiting(to) & from->engine->mask)
> > +             goto await_fence;
> > +
> > +     if (i915_request_await_start(to, from) < 0)
> > +             goto await_fence;
> > +
> > +     /* Only submit our spinner after the signaler is running! */
> > +     if (__await_execution(to, from, NULL, gfp))
> > +             goto await_fence;
> > +
> > +     if (__emit_semaphore_wait(to, from, from->fence.seqno))
> > +             goto await_fence;
> > +
> >       to->sched.semaphores |= from->engine->mask;
> >       to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
> >       return 0;
> > @@ -993,6 +1007,58 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
> >       return 0;
> >   }
> >   
> > +static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
> > +                                       struct dma_fence *fence)
> > +{
> > +     return __intel_timeline_sync_is_later(tl,
> > +                                           fence->context,
> > +                                           fence->seqno - 1);
> > +}
> > +
> > +static int intel_timeline_sync_set_start(struct intel_timeline *tl,
> > +                                      const struct dma_fence *fence)
> > +{
> > +     return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
> > +}
> > +
> > +static int
> > +__i915_request_await_execution(struct i915_request *to,
> > +                            struct i915_request *from,
> > +                            void (*hook)(struct i915_request *rq,
> > +                                         struct dma_fence *signal))
> > +{
> > +     bool has_sync;
> > +     int err;
> > +
> > +     /* Submit both requests at the same time */
> > +     err = __await_execution(to, from, hook, I915_FENCE_GFP);
> > +     if (err)
> > +             return err;
> > +
> > +     if (!to->engine->schedule)
> > +             return 0;
> > +
> > +     /* Squash repeated depenendices to the same timelines */
> > +     if (intel_timeline_sync_has_start(i915_request_timeline(to),
> > +                                       &from->fence))
> > +             return 0;
> > +
> > +     /* Ensure both start together after all semaphores in signal */
> > +     if (from->sched.semaphores && !has_sync) {
> > +             err =__emit_semaphore_wait(to, from, from->fence.seqno - 1);
> 
> Forgot to git add something? has_sync is uninitialized.

has_sync was intel_timeline_sync_has_start, after rearranging the code
it died.
-Chris