[Intel-gfx] [PATCH 3/7] drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission

Fri Aug 7 11:21:53 UTC 2020

On 07/08/2020 09:32, Chris Wilson wrote:
> Move the register slow register write and readback from out of the
> critical path for execlists submission and delay it until the following
> worker, shaving off around 200us. Note that the same signal_irq_work() is
> allowed to run concurrently on each CPU (but it will only be queued once,
> once running though it can be requeued and reexecuted) so we have to
> remember to lock the global interactions as we cannot rely on the
> signal_irq_work() itself providing the serialisation (in constrast to a
> tasklet).
> 
> By pushing the arm/disarm into the central signaling worker we can close
> the race for disarming the interrupt (and dropping its associated
> GT wakeref) on parking the engine. If we loose the race, that GT wakeref
> may be held indefinitely, preventing the machine from sleeping while
> the GPU is ostensibly idle.
> 
> v2: Move the self-arming parking of the signal_irq_work to a flush of
> the irq-work from intel_breadcrumbs_park().
> 
> Fixes: dfeba1ae34c8 ("drm/i915/gt: Hold context/request reference while breadcrumbs are active")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 106 +++++++++++++-------
>   1 file changed, 67 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> index d8b206e53660..6c321419441f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
> @@ -30,18 +30,21 @@
>   #include "i915_trace.h"
>   #include "intel_breadcrumbs.h"
>   #include "intel_context.h"
> +#include "intel_engine_pm.h"
>   #include "intel_gt_pm.h"
>   #include "intel_gt_requests.h"
>   
> -static void irq_enable(struct intel_engine_cs *engine)
> +static bool irq_enable(struct intel_engine_cs *engine)
>   {
>   	if (!engine->irq_enable)
> -		return;
> +		return false;
>   
>   	/* Caller disables interrupts */
>   	spin_lock(&engine->gt->irq_lock);
>   	engine->irq_enable(engine);
>   	spin_unlock(&engine->gt->irq_lock);
> +
> +	return true;
>   }
>   
>   static void irq_disable(struct intel_engine_cs *engine)
> @@ -57,12 +60,11 @@ static void irq_disable(struct intel_engine_cs *engine)
>   
>   static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
>   {
> -	lockdep_assert_held(&b->irq_lock);
> -
> -	if (!b->irq_engine || b->irq_armed)
> -		return;
> -
> -	if (!intel_gt_pm_get_if_awake(b->irq_engine->gt))
> +	/*
> +	 * Since we are waiting on a request, the GPU should be busy
> +	 * and should have its own rpm reference.
> +	 */
> +	if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
>   		return;
>   
>   	/*
> @@ -73,25 +75,24 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
>   	 */
>   	WRITE_ONCE(b->irq_armed, true);
>   
> -	/*
> -	 * Since we are waiting on a request, the GPU should be busy
> -	 * and should have its own rpm reference. This is tracked
> -	 * by i915->gt.awake, we can forgo holding our own wakref
> -	 * for the interrupt as before i915->gt.awake is released (when
> -	 * the driver is idle) we disarm the breadcrumbs.
> -	 */
> -
> -	if (!b->irq_enabled++)
> -		irq_enable(b->irq_engine);
> +	/* Requests may have completed before we could enable the interrupt. */
> +	if (!b->irq_enabled++ && irq_enable(b->irq_engine))
> +		irq_work_queue(&b->irq_work);
>   }
>   
> -static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> +static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
>   {
> -	lockdep_assert_held(&b->irq_lock);
> -
> -	if (!b->irq_engine || !b->irq_armed)
> +	if (!b->irq_engine)
>   		return;
>   
> +	spin_lock(&b->irq_lock);
> +	if (!b->irq_armed)
> +		__intel_breadcrumbs_arm_irq(b);
> +	spin_unlock(&b->irq_lock);
> +}
> +
> +static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
> +{
>   	GEM_BUG_ON(!b->irq_enabled);
>   	if (!--b->irq_enabled)
>   		irq_disable(b->irq_engine);
> @@ -105,8 +106,6 @@ static void add_signaling_context(struct intel_breadcrumbs *b,
>   {
>   	intel_context_get(ce);
>   	list_add_tail(&ce->signal_link, &b->signalers);
> -	if (list_is_first(&ce->signal_link, &b->signalers))
> -		__intel_breadcrumbs_arm_irq(b);
>   }
>   
>   static void remove_signaling_context(struct intel_breadcrumbs *b,
> @@ -197,7 +196,32 @@ static void signal_irq_work(struct irq_work *work)
>   
>   	spin_lock(&b->irq_lock);
>   
> -	if (list_empty(&b->signalers))
> +	/*
> +	 * Keep the irq armed until the interrupt after all listeners are gone.
> +	 *
> +	 * Enabling/disabling the interrupt is rather costly, roughly a couple
> +	 * of hundred microseconds. If we are proactive and enable/disable
> +	 * the interrupt around every request that wants a breadcrumb, we
> +	 * quickly drown in the extra orders of magnitude of latency imposed
> +	 * on request submission.
> +	 *
> +	 * So we try to be lazy, and keep the interrupts enabled until no
> +	 * more listeners appear within a breadcrumb interrupt interval (that
> +	 * is until a request completes that no one cares about). The
> +	 * observation is that listeners come in batches, and will often
> +	 * listen to a bunch of requests in succession. Though note on icl+,
> +	 * interrupts are always enabled due to concerns with rc6 being
> +	 * dysfunctional with per-engine interrupt masking.
> +	 *
> +	 * We also try to avoid raising too many interrupts, as they may
> +	 * be generated by userspace batches and it is unfortunately rather
> +	 * too easy to drown the CPU under a flood of GPU interrupts. Thus
> +	 * whenever no one appears to be listening, we turn off the interrupts.
> +	 * Fewer interrupts should conserve power -- at the very least, fewer
> +	 * interrupt draw less ire from other users of the system and tools
> +	 * like powertop.
> +	 */
> +	if (b->irq_armed && list_empty(&b->signalers))
>   		__intel_breadcrumbs_disarm_irq(b);
>   
>   	list_splice_init(&b->signaled_requests, &signal);
> @@ -251,6 +275,9 @@ static void signal_irq_work(struct irq_work *work)
>   
>   		i915_request_put(rq);
>   	}
> +
> +	if (!b->irq_armed && !list_empty(&b->signalers))
> +		intel_breadcrumbs_arm_irq(b);
>   }
>   
>   struct intel_breadcrumbs *
> @@ -292,21 +319,19 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
>   
>   void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
>   {
> -	unsigned long flags;
> -
> -	if (!READ_ONCE(b->irq_armed))
> -		return;
> -
> -	spin_lock_irqsave(&b->irq_lock, flags);
> -	__intel_breadcrumbs_disarm_irq(b);
> -	spin_unlock_irqrestore(&b->irq_lock, flags);
> -
> -	if (!list_empty(&b->signalers))
> -		irq_work_queue(&b->irq_work);
> +	/* Kick the work once more to drain the signalers */
> +	irq_work_sync(&b->irq_work);
> +	while (unlikely(READ_ONCE(b->irq_armed))) {
> +		local_irq_disable();
> +		signal_irq_work(&b->irq_work);
> +		local_irq_enable();
> +		cond_resched();
> +	}
>   }
>   
>   void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
>   {
> +	irq_work_sync(&b->irq_work);
>   	kfree(b);
>   }
>   
> @@ -362,9 +387,12 @@ static void insert_breadcrumb(struct i915_request *rq,
>   	GEM_BUG_ON(!check_signal_order(ce, rq));
>   	set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
>   
> -	/* Check after attaching to irq, interrupt may have already fired. */
> -	if (__request_completed(rq))
> -		irq_work_queue(&b->irq_work);
> +	/*
> +	 * Defer enabling the interrupt to after HW submission and recheck
> +	 * the request as it may have completed and raised the interrupt as
> +	 * we were attaching it into the lists.
> +	 */
> +	irq_work_queue(&b->irq_work);
>   }
>   
>   bool i915_request_enable_breadcrumb(struct i915_request *rq)
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko