[Intel-gfx] [PATCH 10/24] drm/i915: Drop no-semaphore boosting
Mika Kuoppala
mika.kuoppala at linux.intel.com
Wed May 13 17:04:08 UTC 2020
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Now that we have fast timeslicing on semaphores, we no longer need to
> prioritise none-semaphore work as we will yield any work blocked on a
> sempahore to the next in the queue. Previously with no timeslicing,
sempahore is back at blocking again :)
> blocking on the semaphore caused extremely bad scheduling with multiple
> clients utilising multiple rings. Now, there is no impact and we can
> remove the complication.
Not a small feat to accomplish.
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 15 -------
> drivers/gpu/drm/i915/gt/intel_lrc.c | 9 -----
> drivers/gpu/drm/i915/gt/selftest_context.c | 1 +
> drivers/gpu/drm/i915/i915_priolist_types.h | 4 +-
> drivers/gpu/drm/i915/i915_request.c | 40 ++-----------------
> drivers/gpu/drm/i915/i915_request.h | 1 -
> drivers/gpu/drm/i915/i915_scheduler.c | 12 +++---
> drivers/gpu/drm/i915/i915_scheduler_types.h | 3 +-
> 8 files changed, 12 insertions(+), 73 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 2067557e277b..0a4606faf966 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -2603,21 +2603,6 @@ static void eb_request_add(struct i915_execbuffer *eb)
> /* Check that the context wasn't destroyed before submission */
> if (likely(!intel_context_is_closed(eb->context))) {
> attr = eb->gem_context->sched;
> -
> - /*
> - * Boost actual workloads past semaphores!
> - *
> - * With semaphores we spin on one engine waiting for another,
> - * simply to reduce the latency of starting our work when
> - * the signaler completes. However, if there is any other
> - * work that we could be doing on this engine instead, that
> - * is better utilisation and will reduce the overall duration
> - * of the current work. To avoid PI boosting a semaphore
> - * far in the distance past over useful work, we keep a history
> - * of any semaphore use along our dependency chain.
> - */
> - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
> - attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> } else {
> /* Serialise with context_close via the add_to_timeline */
> i915_request_set_error_once(rq, -ENOENT);
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 9141b3afa2c5..c7d7438b5d55 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -429,15 +429,6 @@ static int effective_prio(const struct i915_request *rq)
> if (i915_request_has_nopreempt(rq))
> prio = I915_PRIORITY_UNPREEMPTABLE;
>
> - /*
> - * On unwinding the active request, we give it a priority bump
> - * if it has completed waiting on any semaphore. If we know that
> - * the request has already started, we can prevent an unwanted
> - * preempt-to-idle cycle by taking that into account now.
> - */
> - if (__i915_request_has_started(rq))
> - prio |= I915_PRIORITY_NOSEMAPHORE;
> -
> return prio;
> }
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
> index a56dff3b157a..52af1cee9a94 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_context.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_context.c
> @@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq)
>
> /* Opencode i915_request_add() so we can keep the timeline locked. */
> __i915_request_commit(rq);
> + rq->sched.attr.priority = I915_PRIORITY_BARRIER;
> __i915_request_queue(rq, NULL);
>
> timeout = i915_request_wait(rq, 0, HZ / 10);
> diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h
> index e18723d8df86..5003a71113cb 100644
> --- a/drivers/gpu/drm/i915/i915_priolist_types.h
> +++ b/drivers/gpu/drm/i915/i915_priolist_types.h
> @@ -24,14 +24,12 @@ enum {
> I915_PRIORITY_DISPLAY,
> };
>
> -#define I915_USER_PRIORITY_SHIFT 1
> +#define I915_USER_PRIORITY_SHIFT 0
> #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT)
>
> #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT)
> #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1)
>
> -#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(0))
> -
> /* Smallest priority value that cannot be bumped. */
> #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 9ad1e6761492..9738dab5a9f6 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -369,8 +369,6 @@ __await_execution(struct i915_request *rq,
> }
> spin_unlock_irq(&signal->lock);
>
> - /* Copy across semaphore status as we need the same behaviour */
> - rq->sched.flags |= signal->sched.flags;
> return 0;
> }
>
> @@ -539,10 +537,8 @@ void __i915_request_unsubmit(struct i915_request *request)
> spin_unlock(&request->lock);
>
> /* We've already spun, don't charge on resubmitting. */
> - if (request->sched.semaphores && i915_request_started(request)) {
> - request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> + if (request->sched.semaphores && i915_request_started(request))
> request->sched.semaphores = 0;
> - }
>
> /*
> * We don't need to wake_up any waiters on request->execute, they
> @@ -600,15 +596,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> return NOTIFY_DONE;
> }
>
> -static void irq_semaphore_cb(struct irq_work *wrk)
> -{
> - struct i915_request *rq =
> - container_of(wrk, typeof(*rq), semaphore_work);
> -
> - i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE);
> - i915_request_put(rq);
> -}
> -
> static int __i915_sw_fence_call
> semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
> {
> @@ -616,11 +603,6 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
>
> switch (state) {
> case FENCE_COMPLETE:
> - if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) {
> - i915_request_get(rq);
> - init_irq_work(&rq->semaphore_work, irq_semaphore_cb);
> - irq_work_queue(&rq->semaphore_work);
> - }
> break;
>
> case FENCE_FREE:
> @@ -999,6 +981,7 @@ emit_semaphore_wait(struct i915_request *to,
> gfp_t gfp)
> {
> const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
> + struct i915_sw_fence *wait = &to->submit;
>
> if (!intel_context_use_semaphores(to->context))
> goto await_fence;
> @@ -1033,11 +1016,10 @@ emit_semaphore_wait(struct i915_request *to,
> goto await_fence;
>
> to->sched.semaphores |= mask;
> - to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
> - return 0;
> + wait = &to->semaphore;
>
> await_fence:
> - return i915_sw_fence_await_dma_fence(&to->submit,
> + return i915_sw_fence_await_dma_fence(wait,
> &from->fence, 0,
> I915_FENCE_GFP);
> }
> @@ -1072,17 +1054,6 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
> if (ret < 0)
> return ret;
>
> - if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) {
> - ret = i915_sw_fence_await_dma_fence(&to->semaphore,
> - &from->fence, 0,
> - I915_FENCE_GFP);
> - if (ret < 0)
> - return ret;
> - }
> -
> - if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
> - to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN;
> -
> return 0;
> }
>
> @@ -1706,9 +1677,6 @@ void i915_request_add(struct i915_request *rq)
> attr = ctx->sched;
> rcu_read_unlock();
>
> - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
> - attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> -
> __i915_request_queue(rq, &attr);
>
> mutex_unlock(&tl->mutex);
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 98ae2dc82371..8ec7ee4dbadc 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -216,7 +216,6 @@ struct i915_request {
> };
> struct list_head execute_cb;
> struct i915_sw_fence semaphore;
> - struct irq_work semaphore_work;
>
> /*
> * A list of everyone we wait upon, and everyone who waits upon us.
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index f8e797a7eee9..56defe78ae54 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -51,11 +51,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists)
> GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
> rb_first(&execlists->queue.rb_root));
>
> - last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
> + last_prio = INT_MAX;
> for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> const struct i915_priolist *p = to_priolist(rb);
>
> - GEM_BUG_ON(p->priority >= last_prio);
> + GEM_BUG_ON(p->priority > last_prio);
> last_prio = p->priority;
>
> GEM_BUG_ON(!p->used);
> @@ -434,15 +434,13 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
> dep->waiter = node;
> dep->flags = flags;
>
> - /* Keep track of whether anyone on this chain has a semaphore */
> - if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
> - !node_started(signal))
> - node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
> -
> /* All set, now publish. Beware the lockless walkers. */
> list_add_rcu(&dep->signal_link, &node->signalers_list);
> list_add_rcu(&dep->wait_link, &signal->waiters_list);
>
> + /* Propagate the chains */
> + node->flags |= signal->flags;
> +
> ret = true;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index 6ab2c5289bed..f72e6c397b08 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -65,8 +65,7 @@ struct i915_sched_node {
> struct list_head link;
> struct i915_sched_attr attr;
> unsigned int flags;
> -#define I915_SCHED_HAS_SEMAPHORE_CHAIN BIT(0)
> -#define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(1)
> +#define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0)
> intel_engine_mask_t semaphores;
> };
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list