[Intel-gfx] [PATCH 07/10] drm/i915: Reduce spinlock hold time during notify_ring() interrupt

Wed Jan 17 10:45:16 UTC 2018

On 15/01/2018 21:24, Chris Wilson wrote:
> By taking advantage of the RCU protection of the task struct, we can find
> the appropriate signaler under the spinlock and then release the spinlock
> before waking the task and signaling the fence.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_irq.c | 29 +++++++++++++++++++----------
>   1 file changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3517c6548e2c..0b272501b738 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1065,21 +1065,24 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>   
>   static void notify_ring(struct intel_engine_cs *engine)
>   {
> +	const u32 seqno = intel_engine_get_seqno(engine);
>   	struct drm_i915_gem_request *rq = NULL;
> +	struct task_struct *tsk = NULL;
>   	struct intel_wait *wait;
>   
> -	if (!engine->breadcrumbs.irq_armed)
> +	if (unlikely(!engine->breadcrumbs.irq_armed))
>   		return;

It isn't unlikely in GuC mode, just sayin'...

>   
>   	atomic_inc(&engine->irq_count);
>   	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>   
> +	rcu_read_lock();
> +
>   	spin_lock(&engine->breadcrumbs.irq_lock);
>   	wait = engine->breadcrumbs.irq_wait;
>   	if (wait) {
> -		bool wakeup = engine->irq_seqno_barrier;
> -
> -		/* We use a callback from the dma-fence to submit
> +		/*
> +		 * We use a callback from the dma-fence to submit
>   		 * requests after waiting on our own requests. To
>   		 * ensure minimum delay in queuing the next request to
>   		 * hardware, signal the fence now rather than wait for
> @@ -1090,19 +1093,20 @@ static void notify_ring(struct intel_engine_cs *engine)
>   		 * and to handle coalescing of multiple seqno updates
>   		 * and many waiters.
>   		 */
> -		if (i915_seqno_passed(intel_engine_get_seqno(engine),
> -				      wait->seqno)) {
> +		if (i915_seqno_passed(seqno, wait->seqno)) {
>   			struct drm_i915_gem_request *waiter = wait->request;
>   
> -			wakeup = true;
>   			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
>   				      &waiter->fence.flags) &&
>   			    intel_wait_check_request(wait, waiter))
>   				rq = i915_gem_request_get(waiter);
> -		}
>   
> -		if (wakeup)
> -			wake_up_process(wait->tsk);
> +			tsk = wait->tsk;
> +		} else {
> +			if (engine->irq_seqno_barrier &&
> +			    i915_seqno_passed(seqno, wait->seqno - 1))

Hm what is this about? Why -1 on platforms with coherency issues and not 
some other number? Needs a comment as minimum but still is a behaviour 
change which I did not immediately figure out how it goes with the 
commit message. If it is some additional optimization it needs to be 
split out into a separate patch.

> +				tsk = wait->tsk;
> +		}
>   	} else {
>   		if (engine->breadcrumbs.irq_armed)
>   			__intel_engine_disarm_breadcrumbs(engine);
> @@ -1114,6 +1118,11 @@ static void notify_ring(struct intel_engine_cs *engine)
>   		i915_gem_request_put(rq);
>   	}
>   
> +	if (tsk && tsk->state != TASK_RUNNING)
> +		wake_up_process(tsk);
> +
> +	rcu_read_unlock();
> +
>   	trace_intel_engine_notify(engine, wait);
>   }
>   
> 

Regards,

Tvrtko