[Intel-gfx] [PATCH 2/3] drm/i915/execlists: Suppress preempting self

Wed Jan 23 14:08:56 UTC 2019

On 23/01/2019 12:36, Chris Wilson wrote:
> In order to avoid preempting ourselves, we currently refuse to schedule
> the tasklet if we reschedule an inflight context. However, this glosses
> over a few issues such as what happens after a CS completion event and
> we then preempt the newly executing context with itself, or if something
> else causes a tasklet_schedule triggering the same evaluation to
> preempt the active context with itself.
> 
> To avoid the extra complications, after deciding that we have
> potentially queued a request with higher priority than the currently
> executing request, inspect the head of the queue to see if it is indeed
> higher priority from another context.
> 
> References: a2bf92e8cc16 ("drm/i915/execlists: Avoid kicking priority on the current context")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_scheduler.c | 20 ++++++--
>   drivers/gpu/drm/i915/intel_lrc.c      | 67 ++++++++++++++++++++++++---
>   2 files changed, 76 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 340faea6c08a..fb5d953430e5 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -239,6 +239,18 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
>   	return engine;
>   }
>   
> +static bool inflight(const struct i915_request *rq,
> +		     const struct intel_engine_cs *engine)
> +{
> +	const struct i915_request *active;
> +
> +	if (!rq->global_seqno)
> +		return false;
> +
> +	active = port_request(engine->execlists.port);
> +	return active->hw_context == rq->hw_context;
> +}
> +
>   static void __i915_schedule(struct i915_request *rq,
>   			    const struct i915_sched_attr *attr)
>   {
> @@ -328,6 +340,7 @@ static void __i915_schedule(struct i915_request *rq,
>   		INIT_LIST_HEAD(&dep->dfs_link);
>   
>   		engine = sched_lock_engine(node, engine);
> +		lockdep_assert_held(&engine->timeline.lock);
>   
>   		/* Recheck after acquiring the engine->timeline.lock */
>   		if (prio <= node->attr.priority || node_signaled(node))
> @@ -356,17 +369,16 @@ static void __i915_schedule(struct i915_request *rq,
>   		if (prio <= engine->execlists.queue_priority)
>   			continue;
>   
> +		engine->execlists.queue_priority = prio;
> +
>   		/*
>   		 * If we are already the currently executing context, don't
>   		 * bother evaluating if we should preempt ourselves.
>   		 */
> -		if (node_to_request(node)->global_seqno &&
> -		    i915_seqno_passed(port_request(engine->execlists.port)->global_seqno,
> -				      node_to_request(node)->global_seqno))
> +		if (inflight(node_to_request(node), engine))
>   			continue;
>   
>   		/* Defer (tasklet) submission until after all of our updates. */
> -		engine->execlists.queue_priority = prio;
>   		tasklet_hi_schedule(&engine->execlists.tasklet);
>   	}
>   
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8aa8a4862543..d9d744f6ab2c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -182,12 +182,64 @@ static inline int rq_prio(const struct i915_request *rq)
>   }
>   
>   static inline bool need_preempt(const struct intel_engine_cs *engine,
> -				const struct i915_request *last,
> -				int prio)
> +				const struct i915_request *rq,
> +				int q_prio)
>   {
> -	return (intel_engine_has_preemption(engine) &&
> -		__execlists_need_preempt(prio, rq_prio(last)) &&
> -		!i915_request_completed(last));
> +	const struct intel_context *ctx = rq->hw_context;
> +	const int last_prio = rq_prio(rq);
> +	struct rb_node *rb;
> +	int idx;
> +
> +	if (!intel_engine_has_preemption(engine))
> +		return false;
> +
> +	if (i915_request_completed(rq))
> +		return false;
> +
> +	if (!__execlists_need_preempt(q_prio, last_prio))
> +		return false;
> +
> +	/*
> +	 * The queue_priority is a mere hint that we may need to preempt.
> +	 * If that hint is stale or we may be trying to preempt ourselves,
> +	 * ignore the request.
> +	 */
> +
> +	list_for_each_entry_continue(rq, &engine->timeline.requests, link) {
> +		GEM_BUG_ON(rq->hw_context == ctx);

Why would there be no more requests belonging to the same context on the 
engine timeline after the first one? Did you mean "if (rq->hw_context == 
ctx) continue;" ?

> +		if (rq_prio(rq) > last_prio)
> +			return true;
> +	}
> +
> +	rb = rb_first_cached(&engine->execlists.queue);
> +	if (!rb)
> +		return false;
> +
> +	priolist_for_each_request(rq, to_priolist(rb), idx)
> +		return rq->hw_context != ctx && rq_prio(rq) > last_prio;

This isn't equivalent to top of the queue priority 
(engine->execlists.queue_priority)? Apart from the different ctx check. 
So I guess it is easier than storing new engine->execlists.queue_top_ctx 
and wondering about the validity of that pointer.

Regards,

Tvrtko

> +
> +	return false;
> +}
> +
> +__maybe_unused static inline bool
> +assert_priority_queue(const struct intel_engine_execlists *execlists,
> +		      const struct i915_request *prev,
> +		      const struct i915_request *next)
> +{
> +	if (!prev)
> +		return true;
> +
> +	/*
> +	 * Without preemption, the prev may refer to the still active element
> +	 * which we refuse to let go.
> +	 *
> +	 * Even with premption, there are times when we think it is better not
> +	 * to preempt and leave an ostensibly lower priority request in flight.
> +	 */
> +	if (port_request(execlists->port) == prev)
> +		return true;
> +
> +	return rq_prio(prev) >= rq_prio(next);
>   }
>   
>   /*
> @@ -626,8 +678,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		int i;
>   
>   		priolist_for_each_request_consume(rq, rn, p, i) {
> -			GEM_BUG_ON(last &&
> -				   need_preempt(engine, last, rq_prio(rq)));
> +			GEM_BUG_ON(!assert_priority_queue(execlists, last, rq));
>   
>   			/*
>   			 * Can we combine this request with the current port?
> @@ -872,6 +923,8 @@ static void process_csb(struct intel_engine_cs *engine)
>   	const u32 * const buf = execlists->csb_status;
>   	u8 head, tail;
>   
> +	lockdep_assert_held(&engine->timeline.lock);
> +
>   	/*
>   	 * Note that csb_write, csb_status may be either in HWSP or mmio.
>   	 * When reading from the csb_write mmio register, we have to be
>