[Intel-gfx] [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Sep 25 10:14:32 UTC 2018
On 25/09/2018 09:31, Chris Wilson wrote:
> If the request is currently on the HW (in port 0), then we do not need
> to kick the submission tasklet to evaluate whether we should be
> preempting itself in order to execute it again.
>
> In the case that was annoying me:
>
> execlists_schedule: rq(18:211173).prio=0 -> 2
> need_preempt: last(18:211174).prio=0, queue.prio=2
>
> We are bumping the priority of the first of a pair of requests running
> in the current context. Then when evaluating preempt, we would see that
> that our priority request is higher than the last executing request in
> ELSP0 and so trigger preemption, not realising that our intended request
> was already executing.
>
> v2: As we assume state of the execlists->port[] that is only valid while
> we hold the timeline lock we have to repeat some earlier tests that on
> the validity of the node.
> v3: Wrap guc submission under the timeline.lock as is now the way of all
> things.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/intel_guc_submission.c | 18 +++------
> drivers/gpu/drm/i915/intel_lrc.c | 41 +++++++++++++++------
> 2 files changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index a81f04d46e87..4874a212754c 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -791,19 +791,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>
> static void guc_dequeue(struct intel_engine_cs *engine)
> {
> - unsigned long flags;
> - bool submit;
> -
> - local_irq_save(flags);
> -
> - spin_lock(&engine->timeline.lock);
> - submit = __guc_dequeue(engine);
> - spin_unlock(&engine->timeline.lock);
> -
> - if (submit)
> + if (__guc_dequeue(engine))
> guc_submit(engine);
> -
> - local_irq_restore(flags);
> }
>
> static void guc_submission_tasklet(unsigned long data)
> @@ -812,6 +801,9 @@ static void guc_submission_tasklet(unsigned long data)
> struct intel_engine_execlists * const execlists = &engine->execlists;
> struct execlist_port *port = execlists->port;
> struct i915_request *rq;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&engine->timeline.lock, flags);
>
> rq = port_request(port);
> while (rq && i915_request_completed(rq)) {
> @@ -835,6 +827,8 @@ static void guc_submission_tasklet(unsigned long data)
>
> if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
> guc_dequeue(engine);
> +
> + spin_unlock_irqrestore(&engine->timeline.lock, flags);
> }
>
> static struct i915_request *
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b58c10bc600..e8de250c3413 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -356,13 +356,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
> {
> struct intel_engine_cs *engine =
> container_of(execlists, typeof(*engine), execlists);
> - unsigned long flags;
> -
> - spin_lock_irqsave(&engine->timeline.lock, flags);
>
> __unwind_incomplete_requests(engine);
> -
> - spin_unlock_irqrestore(&engine->timeline.lock, flags);
> }
Could move to header as static inline since it is a trivial wrapper now.
>
> static inline void
> @@ -1234,9 +1229,13 @@ static void execlists_schedule(struct i915_request *request,
>
> engine = sched_lock_engine(node, engine);
>
> + /* Recheck after acquiring the engine->timeline.lock */
> if (prio <= node->attr.priority)
> continue;
>
> + if (i915_sched_node_signaled(node))
> + continue;
> +
> node->attr.priority = prio;
> if (!list_empty(&node->link)) {
> if (last != engine) {
> @@ -1245,14 +1244,34 @@ static void execlists_schedule(struct i915_request *request,
> }
> GEM_BUG_ON(pl->priority != prio);
> list_move_tail(&node->link, &pl->requests);
> + } else {
> + /*
> + * If the request is not in the priolist queue because
> + * it is not yet runnable, then it doesn't contribute
> + * to our preemption decisions. On the other hand,
> + * if the request is on the HW, it too is not in the
> + * queue; but in that case we may still need to reorder
> + * the inflight requests.
> + */
> + if (!i915_sw_fence_done(&sched_to_request(node)->submit))
> + continue;
> }
>
> - if (prio > engine->execlists.queue_priority &&
> - i915_sw_fence_done(&sched_to_request(node)->submit)) {
> - /* defer submission until after all of our updates */
> - __update_queue(engine, prio);
> - tasklet_hi_schedule(&engine->execlists.tasklet);
> - }
> + if (prio <= engine->execlists.queue_priority)
> + continue;
> +
> + /*
> + * If we are already the currently executing context, don't
> + * bother evaluating if we should preempt ourselves.
> + */
> + if (sched_to_request(node)->global_seqno &&
> + i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> + sched_to_request(node)->global_seqno))
> + continue;
> +
> + /* Defer (tasklet) submission until after all of our updates. */
> + __update_queue(engine, prio);
> + tasklet_hi_schedule(&engine->execlists.tasklet);
> }
>
> spin_unlock_irq(&engine->timeline.lock);
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list