[Intel-gfx] [PATCH 08/46] drm/i915/execlists: Suppress mere WAIT preemption

Tue Feb 19 10:22:57 UTC 2019

On Wed, 6 Feb 2019 at 13:05, Chris Wilson <chris at chris-wilson.co.uk> wrote:
>
> WAIT is occasionally suppressed by virtue of preempted requests being
> promoted to NEWCLIENT if they have not all ready received that boost.
> Make this consistent for all WAIT boosts that they are not allowed to
> preempt executing contexts and are merely granted the right to be at the
> front of the queue for the next execution slot. This is in keeping with
> the desire that the WAIT boost be a minor tweak that does not give
> excessive promotion to its user and open ourselves to trivial abuse.
>
> The problem with the inconsistent WAIT preemption becomes more apparent
> as the preemption is propagated across the engines, where one engine may
> preempt and the other not, and we be relying on the exact execution
> order being consistent across engines (e.g. using HW semaphores to
> coordinate parallel execution).
>
> v2: Also protect GuC submission from false preemption loops.
> v3: Build bug safeguards and better debug messages for st.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_request.c        |  12 ++
>  drivers/gpu/drm/i915/i915_scheduler.h      |   2 +
>  drivers/gpu/drm/i915/intel_lrc.c           |   9 +-
>  drivers/gpu/drm/i915/selftests/intel_lrc.c | 161 +++++++++++++++++++++
>  4 files changed, 183 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index c2a5c48c7541..35acef74b93a 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -372,12 +372,24 @@ void __i915_request_submit(struct i915_request *request)
>
>         /* We may be recursing from the signal callback of another i915 fence */
>         spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
> +
>         GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
>         set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
> +
>         request->global_seqno = seqno;
>         if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
>             !i915_request_enable_breadcrumb(request))
>                 intel_engine_queue_breadcrumbs(engine);
> +
> +       /*
> +        * As we do not allow WAIT to preempt inflight requests,
> +        * once we have executed a request, along with triggering
> +        * any execution callbacks, we must preserve its ordering
> +        * within the non-preemptible FIFO.
> +        */
> +       BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
> +       request->sched.attr.priority |= __NO_PREEMPTION;
> +
>         spin_unlock(&request->lock);
>
>         engine->emit_fini_breadcrumb(request,
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index dbe9cb7ecd82..54bd6c89817e 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -33,6 +33,8 @@ enum {
>  #define I915_PRIORITY_WAIT     ((u8)BIT(0))
>  #define I915_PRIORITY_NEWCLIENT        ((u8)BIT(1))
>
> +#define __NO_PREEMPTION (I915_PRIORITY_WAIT)
> +
>  struct i915_sched_attr {
>         /**
>          * @priority: execution and service priority
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5d5ce91a5dfa..afd05e25f911 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -188,6 +188,12 @@ static inline int rq_prio(const struct i915_request *rq)
>         return rq->sched.attr.priority;
>  }
>
> +static int effective_prio(const struct i915_request *rq)
> +{
> +       /* Restrict mere WAIT boosts from triggering preemption */
> +       return rq_prio(rq) | __NO_PREEMPTION;
> +}
> +
>  static int queue_prio(const struct intel_engine_execlists *execlists)
>  {
>         struct i915_priolist *p;
> @@ -208,7 +214,7 @@ static int queue_prio(const struct intel_engine_execlists *execlists)
>  static inline bool need_preempt(const struct intel_engine_cs *engine,
>                                 const struct i915_request *rq)
>  {
> -       const int last_prio = rq_prio(rq);
> +       int last_prio;
>
>         if (!intel_engine_has_preemption(engine))
>                 return false;
> @@ -228,6 +234,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>          * preempt. If that hint is stale or we may be trying to preempt
>          * ourselves, ignore the request.
>          */
> +       last_prio = effective_prio(rq);
>         if (!__execlists_need_preempt(engine->execlists.queue_priority_hint,
>                                       last_prio))
>                 return false;
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 58144e024751..263afd2f1596 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -407,6 +407,166 @@ static int live_suppress_self_preempt(void *arg)
>         goto err_client_b;
>  }
>
> +static int __i915_sw_fence_call
> +dummy_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> +{
> +       return NOTIFY_DONE;
> +}
> +
> +static struct i915_request *dummy_request(struct intel_engine_cs *engine)
> +{
> +       struct i915_request *rq;
> +
> +       rq = kmalloc(sizeof(*rq), GFP_KERNEL | __GFP_ZERO);
> +       if (!rq)
> +               return NULL;
> +
> +       INIT_LIST_HEAD(&rq->active_list);
> +       rq->engine = engine;
> +
> +       i915_sched_node_init(&rq->sched);
> +
> +       /* mark this request as permanently incomplete */
> +       rq->fence.seqno = 1;
> +       BUILD_BUG_ON(sizeof(rq->fence.seqno) != 8); /* upper 32b == 0 */
> +       rq->hwsp_seqno = (u32 *)&rq->fence.seqno + 1;
> +       GEM_BUG_ON(i915_request_completed(rq));
> +
> +       i915_sw_fence_init(&rq->submit, dummy_notify);
> +       i915_sw_fence_commit(&rq->submit);
> +
> +       return rq;
> +}
> +
> +static void dummy_request_free(struct i915_request *dummy)
> +{
> +       i915_request_mark_complete(dummy);
> +       i915_sched_node_fini(dummy->engine->i915, &dummy->sched);

Do we need i915_sw_fence_fini() in here somewhere?

While looking at something unrelated I hit something like:
ODEBUG: init destroyed (active state 0) object type: i915_sw_fence
hint:           (null)