[Intel-gfx] [PATCH 7/8] drm/i915/gt: Decouple inflight virtual engines

Mon May 18 12:53:29 UTC 2020

On 18/05/2020 09:14, Chris Wilson wrote:
> Once a virtual engine has been bound to a sibling, it will remain bound
> until we finally schedule out the last active request. We can not rebind
> the context to a new sibling while it is inflight as the context save
> will conflict, hence we wait. As we cannot then use any other sibliing
> while the context is inflight, only kick the bound sibling while it
> inflight and upon scheduling out the kick the rest (so that we can swap
> engines on timeslicing if the previously bound engine becomes
> oversubscribed).
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.c | 30 +++++++++++++----------------
>   1 file changed, 13 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 7a5ac3375225..fe8f3518d6b8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1398,9 +1398,8 @@ execlists_schedule_in(struct i915_request *rq, int idx)
>   static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
>   {
>   	struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
> -	struct i915_request *next = READ_ONCE(ve->request);
>   
> -	if (next == rq || (next && next->execution_mask & ~rq->execution_mask))
> +	if (READ_ONCE(ve->request))
>   		tasklet_hi_schedule(&ve->base.execlists.tasklet);
>   }
>   
> @@ -1821,18 +1820,14 @@ first_virtual_engine(struct intel_engine_cs *engine)
>   			rb_entry(rb, typeof(*ve), nodes[engine->id].rb);
>   		struct i915_request *rq = READ_ONCE(ve->request);
>   
> -		if (!rq) { /* lazily cleanup after another engine handled rq */
> +		/* lazily cleanup after another engine handled rq */
> +		if (!rq || !virtual_matches(ve, rq, engine)) {
>   			rb_erase_cached(rb, &el->virtual);
>   			RB_CLEAR_NODE(rb);
>   			rb = rb_first_cached(&el->virtual);
>   			continue;
>   		}
>   
> -		if (!virtual_matches(ve, rq, engine)) {
> -			rb = rb_next(rb);
> -			continue;
> -		}
> -
>   		return ve;
>   	}
>   
> @@ -5478,7 +5473,6 @@ static void virtual_submission_tasklet(unsigned long data)
>   	if (unlikely(!mask))
>   		return;
>   
> -	local_irq_disable();
>   	for (n = 0; n < ve->num_siblings; n++) {
>   		struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]);
>   		struct ve_node * const node = &ve->nodes[sibling->id];
> @@ -5488,20 +5482,19 @@ static void virtual_submission_tasklet(unsigned long data)
>   		if (!READ_ONCE(ve->request))
>   			break; /* already handled by a sibling's tasklet */
>   
> +		spin_lock_irq(&sibling->active.lock);
> +
>   		if (unlikely(!(mask & sibling->mask))) {
>   			if (!RB_EMPTY_NODE(&node->rb)) {
> -				spin_lock(&sibling->active.lock);
>   				rb_erase_cached(&node->rb,
>   						&sibling->execlists.virtual);
>   				RB_CLEAR_NODE(&node->rb);
> -				spin_unlock(&sibling->active.lock);
>   			}
> -			continue;
> -		}
>   
> -		spin_lock(&sibling->active.lock);
> +			goto unlock_engine;
> +		}
>   
> -		if (!RB_EMPTY_NODE(&node->rb)) {
> +		if (unlikely(!RB_EMPTY_NODE(&node->rb))) {
>   			/*
>   			 * Cheat and avoid rebalancing the tree if we can
>   			 * reuse this node in situ.
> @@ -5541,9 +5534,12 @@ static void virtual_submission_tasklet(unsigned long data)
>   		if (first && prio >= sibling->execlists.queue_priority_hint)
>   			tasklet_hi_schedule(&sibling->execlists.tasklet);
>   
> -		spin_unlock(&sibling->active.lock);
> +unlock_engine:
> +		spin_unlock_irq(&sibling->active.lock);
> +
> +		if (intel_context_inflight(&ve->context))
> +			break;

So virtual request may not be added to all siblings any longer. Will it 
still be able to schedule it on any if time slicing kicks in under these 
conditions?

This is equivalent to the hunk in first_virtual_engine which also 
removes it from all other siblings.

I guess it's inline with what the commit messages says - that new 
sibling will be picked upon time slicing. I just don't quite see the 
path which would do it. Only path which shuffles the siblings array 
around is in dequeue, and dequeue on other that the engine which first 
picked it will not happen any more. I must be missing something..

Regards,

Tvrtko

>   	}
> -	local_irq_enable();
>   }
>   
>   static void virtual_submit_request(struct i915_request *rq)
>