[Intel-gfx] [PATCH 2/7] drm/i915/execlists: Avoid kicking priority on the current context

Tue Sep 25 10:14:32 UTC 2018

On 25/09/2018 09:31, Chris Wilson wrote:
> If the request is currently on the HW (in port 0), then we do not need
> to kick the submission tasklet to evaluate whether we should be
> preempting itself in order to execute it again.
> 
> In the case that was annoying me:
> 
>     execlists_schedule: rq(18:211173).prio=0 -> 2
>     need_preempt: last(18:211174).prio=0, queue.prio=2
> 
> We are bumping the priority of the first of a pair of requests running
> in the current context. Then when evaluating preempt, we would see that
> that our priority request is higher than the last executing request in
> ELSP0 and so trigger preemption, not realising that our intended request
> was already executing.
> 
> v2: As we assume state of the execlists->port[] that is only valid while
> we hold the timeline lock we have to repeat some earlier tests that on
> the validity of the node.
> v3: Wrap guc submission under the timeline.lock as is now the way of all
> things.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/intel_guc_submission.c | 18 +++------
>   drivers/gpu/drm/i915/intel_lrc.c            | 41 +++++++++++++++------
>   2 files changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index a81f04d46e87..4874a212754c 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -791,19 +791,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>   
>   static void guc_dequeue(struct intel_engine_cs *engine)
>   {
> -	unsigned long flags;
> -	bool submit;
> -
> -	local_irq_save(flags);
> -
> -	spin_lock(&engine->timeline.lock);
> -	submit = __guc_dequeue(engine);
> -	spin_unlock(&engine->timeline.lock);
> -
> -	if (submit)
> +	if (__guc_dequeue(engine))
>   		guc_submit(engine);
> -
> -	local_irq_restore(flags);
>   }
>   
>   static void guc_submission_tasklet(unsigned long data)
> @@ -812,6 +801,9 @@ static void guc_submission_tasklet(unsigned long data)
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct execlist_port *port = execlists->port;
>   	struct i915_request *rq;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	rq = port_request(port);
>   	while (rq && i915_request_completed(rq)) {
> @@ -835,6 +827,8 @@ static void guc_submission_tasklet(unsigned long data)
>   
>   	if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		guc_dequeue(engine);
> +
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }
>   
>   static struct i915_request *
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 5b58c10bc600..e8de250c3413 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -356,13 +356,8 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists)
>   {
>   	struct intel_engine_cs *engine =
>   		container_of(execlists, typeof(*engine), execlists);
> -	unsigned long flags;
> -
> -	spin_lock_irqsave(&engine->timeline.lock, flags);
>   
>   	__unwind_incomplete_requests(engine);
> -
> -	spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   }

Could move to header as static inline since it is a trivial wrapper now.

>   
>   static inline void
> @@ -1234,9 +1229,13 @@ static void execlists_schedule(struct i915_request *request,
>   
>   		engine = sched_lock_engine(node, engine);
>   
> +		/* Recheck after acquiring the engine->timeline.lock */
>   		if (prio <= node->attr.priority)
>   			continue;
>   
> +		if (i915_sched_node_signaled(node))
> +			continue;
> +
>   		node->attr.priority = prio;
>   		if (!list_empty(&node->link)) {
>   			if (last != engine) {
> @@ -1245,14 +1244,34 @@ static void execlists_schedule(struct i915_request *request,
>   			}
>   			GEM_BUG_ON(pl->priority != prio);
>   			list_move_tail(&node->link, &pl->requests);
> +		} else {
> +			/*
> +			 * If the request is not in the priolist queue because
> +			 * it is not yet runnable, then it doesn't contribute
> +			 * to our preemption decisions. On the other hand,
> +			 * if the request is on the HW, it too is not in the
> +			 * queue; but in that case we may still need to reorder
> +			 * the inflight requests.
> +			 */
> +			if (!i915_sw_fence_done(&sched_to_request(node)->submit))
> +				continue;
>   		}
>   
> -		if (prio > engine->execlists.queue_priority &&
> -		    i915_sw_fence_done(&sched_to_request(node)->submit)) {
> -			/* defer submission until after all of our updates */
> -			__update_queue(engine, prio);
> -			tasklet_hi_schedule(&engine->execlists.tasklet);
> -		}
> +		if (prio <= engine->execlists.queue_priority)
> +			continue;
> +
> +		/*
> +		 * If we are already the currently executing context, don't
> +		 * bother evaluating if we should preempt ourselves.
> +		 */
> +		if (sched_to_request(node)->global_seqno &&
> +		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> +				      sched_to_request(node)->global_seqno))
> +			continue;
> +
> +		/* Defer (tasklet) submission until after all of our updates. */
> +		__update_queue(engine, prio);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
>   	}
>   
>   	spin_unlock_irq(&engine->timeline.lock);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko