[Intel-gfx] [PATCH v5 4/4] drm/i915: Delay disabling the user interrupt for breadcrumbs

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Feb 27 13:57:35 UTC 2017


On 27/02/2017 13:24, Chris Wilson wrote:
> A significant cost in setting up a wait is the overhead of enabling the
> interrupt. As we disable the interrupt whenever the queue of waiters is
> empty, if we are frequently waiting on alternating batches, we end up
> re-enabling the interrupt on a frequent basis. We do want to disable the
> interrupt during normal operations as under high load it may add several
> thousand interrupts/s - we have been known in the past to occupy whole
> cores with our interrupt handler after accidentally leaving user
> interrupts enabled. As a compromise, leave the interrupt enabled until
> the next IRQ, or the system is idle. This gives a small window for a
> waiter to keep the interrupt active and not be delayed by having to
> re-enable the interrupt.
>
> v2: Restore hangcheck/missed-irq detection for continuations
> v3: Be more careful restoring the hangcheck timer after reset
> v4: Be more careful restoring the fake irq after reset (if required!)
> v5: Redo changes to intel_engine_wakeup()
> v6: Factor out __intel_engine_wakeup()
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem.c          |   4 +-
>  drivers/gpu/drm/i915/i915_irq.c          |   2 +
>  drivers/gpu/drm/i915/intel_breadcrumbs.c | 148 ++++++++++++++++++++-----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h  |   7 +-
>  4 files changed, 105 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 01dbba3813c7..0ad080984877 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2988,8 +2988,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
>  	if (wait_for(intel_execlists_idle(dev_priv), 10))
>  		DRM_ERROR("Timeout waiting for engines to idle\n");
>
> -	for_each_engine(engine, dev_priv, id)
> +	for_each_engine(engine, dev_priv, id) {
> +		intel_engine_disarm_breadcrumbs(engine);
>  		i915_gem_batch_pool_fini(&engine->batch_pool);
> +	}
>
>  	GEM_BUG_ON(!dev_priv->gt.awake);
>  	dev_priv->gt.awake = false;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index bc520ee8d6fe..ca8c7b22748e 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1060,6 +1060,8 @@ static void notify_ring(struct intel_engine_cs *engine)
>  			rq = wait->request;
>
>  		wake_up_process(wait->tsk);
> +	} else {
> +		__intel_engine_disarm_breadcrumbs(engine);
>  	}
>  	spin_unlock(&engine->breadcrumbs.lock);
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 3855c8c39b35..ebb7bc0be9fb 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -26,19 +26,28 @@
>
>  #include "i915_drv.h"
>
> -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
> +static unsigned int __intel_engine_wakeup(struct intel_engine_cs *engine)
>  {
>  	struct intel_wait *wait;
> -	unsigned long flags;
>  	unsigned int result = 0;
>
> -	spin_lock_irqsave(&engine->breadcrumbs.lock, flags);
>  	wait = engine->breadcrumbs.first_wait;
>  	if (wait) {
>  		result = ENGINE_WAKEUP_WAITER;
> -		if (!wake_up_process(wait->tsk))
> -			result |= ENGINE_WAKEUP_ACTIVE;
> +		if (wake_up_process(wait->tsk))
> +			result |= ENGINE_WAKEUP_ASLEEP;
>  	}
> +
> +	return result;
> +}
> +
> +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
> +{
> +	unsigned long flags;
> +	unsigned int result = 0;

Can drop the init now.

> +
> +	spin_lock_irqsave(&engine->breadcrumbs.lock, flags);
> +	result = __intel_engine_wakeup(engine);
>  	spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags);
>
>  	return result;
> @@ -54,7 +63,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>  	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
>  	struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> -	if (!b->irq_enabled)
> +	if (!b->irq_armed)
>  		return;
>
>  	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
> @@ -67,7 +76,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>  	 * to process the pending interrupt (e.g, low priority task on a loaded
>  	 * system) and wait until it sleeps before declaring a missed interrupt.
>  	 */
> -	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ACTIVE) {
> +	if (!(intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)) {

I did not get the explanation from the previous round on why you had to 
reverse the active to asleep. Here it even looks wrong now, because I 
thought you don't want to re-queue the hangcheck when there are no waiters?

>  		mod_timer(&b->hangcheck, wait_timeout());
>  		return;
>  	}
> @@ -80,6 +89,8 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
>  static void intel_breadcrumbs_fake_irq(unsigned long data)
>  {
>  	struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	unsigned long flags;
>
>  	/*
>  	 * The timer persists in case we cannot enable interrupts,
> @@ -88,10 +99,15 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
>  	 * every jiffie in order to kick the oldest waiter to do the
>  	 * coherent seqno check.
>  	 */
> -	if (!intel_engine_wakeup(engine))
> +
> +	spin_lock_irqsave(&b->lock, flags);
> +	if (!__intel_engine_wakeup(engine))
> +		__intel_engine_disarm_breadcrumbs(engine);
> +	spin_unlock_irqrestore(&b->lock, flags);
> +	if (!b->irq_armed)
>  		return;
>
> -	mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> +	mod_timer(&b->fake_irq, jiffies + 1);
>
>  	/* Ensure that even if the GPU hangs, we get woken up.
>  	 *
> @@ -127,6 +143,38 @@ static void irq_disable(struct intel_engine_cs *engine)
>  	spin_unlock(&engine->i915->irq_lock);
>  }
>
> +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> +	assert_spin_locked(&b->lock);
> +
> +	if (b->irq_enabled) {
> +		irq_disable(engine);
> +		b->irq_enabled = false;
> +	}
> +
> +	b->irq_armed = false;
> +}
> +
> +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> +{
> +	struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +	unsigned long flags;
> +
> +	if (!b->irq_armed)
> +		return;
> +
> +	spin_lock_irqsave(&b->lock, flags);
> +
> +	if (__intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
> +		set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);

Could be worthy of a comment here, just to help future readers of the code.

> +
> +	__intel_engine_disarm_breadcrumbs(engine);
> +
> +	spin_unlock_irqrestore(&b->lock, flags);
> +}
> +
>  static bool use_fake_irq(const struct intel_breadcrumbs *b)
>  {
>  	const struct intel_engine_cs *engine =
> @@ -144,6 +192,15 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b)
>  	return atomic_read(&engine->irq_count) == b->hangcheck_interrupts;
>  }
>
> +static void enable_fake_irq(struct intel_breadcrumbs *b)
> +{
> +	/* Ensure we never sleep indefinitely */
> +	if (!b->irq_enabled || use_fake_irq(b))
> +		mod_timer(&b->fake_irq, jiffies + 1);
> +	else
> +		mod_timer(&b->hangcheck, wait_timeout());
> +}
> +
>  static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  {
>  	struct intel_engine_cs *engine =
> @@ -151,9 +208,17 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  	struct drm_i915_private *i915 = engine->i915;
>
>  	assert_spin_locked(&b->lock);
> -	if (b->rpm_wakelock)
> +	if (b->irq_armed)
>  		return;
>
> +	/* The breadcrumb irq will be disarmed on the interrupt after the
> +	 * waiters are signaled. This gives us a single interrupt window in
> +	 * which we can add a new waiter and avoid the cost of re-enabling
> +	 * the irq.
> +	 */
> +	b->irq_armed = true;
> +	GEM_BUG_ON(b->irq_enabled);
> +
>  	if (I915_SELFTEST_ONLY(b->mock)) {
>  		/* For our mock objects we want to avoid interaction
>  		 * with the real hardware (which is not set up). So
> @@ -162,17 +227,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  		 * implementation to call intel_engine_wakeup()
>  		 * itself when it wants to simulate a user interrupt,
>  		 */
> -		b->rpm_wakelock = true;
>  		return;
>  	}
>
>  	/* Since we are waiting on a request, the GPU should be busy
> -	 * and should have its own rpm reference. For completeness,
> -	 * record an rpm reference for ourselves to cover the
> -	 * interrupt we unmask.
> +	 * and should have its own rpm reference. This is tracked
> +	 * by i915->gt.awake, we can forgo holding our own wakref
> +	 * for the interrupt as before i915->gt.awake is released (when
> +	 * the driver is idle) we disarm the breadcrumbs.
>  	 */
> -	intel_runtime_pm_get_noresume(i915);
> -	b->rpm_wakelock = true;
>
>  	/* No interrupts? Kick the waiter every jiffie! */
>  	if (intel_irqs_enabled(i915)) {
> @@ -181,34 +244,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>  		b->irq_enabled = true;
>  	}
>
> -	/* Ensure we never sleep indefinitely */
> -	if (!b->irq_enabled || use_fake_irq(b))
> -		mod_timer(&b->fake_irq, jiffies + 1);
> -	else
> -		mod_timer(&b->hangcheck, wait_timeout());
> -}
> -
> -static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
> -{
> -	struct intel_engine_cs *engine =
> -		container_of(b, struct intel_engine_cs, breadcrumbs);
> -
> -	assert_spin_locked(&b->lock);
> -	if (!b->rpm_wakelock)
> -		return;
> -
> -	if (I915_SELFTEST_ONLY(b->mock)) {
> -		b->rpm_wakelock = false;
> -		return;
> -	}
> -
> -	if (b->irq_enabled) {
> -		irq_disable(engine);
> -		b->irq_enabled = false;
> -	}
> -
> -	intel_runtime_pm_put(engine->i915);
> -	b->rpm_wakelock = false;
> +	enable_fake_irq(b);
>  }
>
>  static inline struct intel_wait *to_wait(struct rb_node *node)
> @@ -430,7 +466,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
>  			wake_up_process(b->first_wait->tsk);
>  		} else {
>  			b->first_wait = NULL;
> -			__intel_breadcrumbs_disable_irq(b);
>  		}
>  	} else {
>  		GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
> @@ -722,15 +757,22 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
>  	cancel_fake_irq(engine);
>  	spin_lock_irq(&b->lock);
>
> -	__intel_breadcrumbs_disable_irq(b);
> -	if (intel_engine_has_waiter(engine)) {
> -		__intel_breadcrumbs_enable_irq(b);
> -		if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
> -			wake_up_process(b->first_wait->tsk);
> -	} else {
> -		/* sanitize the IMR and unmask any auxiliary interrupts */
> +	if (b->irq_enabled)
> +		irq_enable(engine);
> +	else
>  		irq_disable(engine);
> -	}
> +
> +	/* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
> +	 * GPU is active and may have already executed the MI_USER_INTERRUPT
> +	 * before the CPU is ready to receive. However, the engine is currently
> +	 * idle (we haven't started it yet), there is no possibility for a
> +	 * missed interrupt as we enabled the irq and so we can clear the
> +	 * immediate wakeup (until a real interrupt arrives for the waiter).
> +	 */
> +	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> +
> +	if (b->irq_armed)
> +		enable_fake_irq(b);
>
>  	spin_unlock_irq(&b->lock);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 974ec11d2b1d..3dd6eee4a08b 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -246,8 +246,8 @@ struct intel_engine_cs {
>
>  		unsigned int hangcheck_interrupts;
>
> +		bool irq_armed : 1;
>  		bool irq_enabled : 1;
> -		bool rpm_wakelock : 1;
>  		I915_SELFTEST_DECLARE(bool mock : 1);
>  	} breadcrumbs;
>
> @@ -644,7 +644,10 @@ static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
>
>  unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
>  #define ENGINE_WAKEUP_WAITER BIT(0)
> -#define ENGINE_WAKEUP_ACTIVE BIT(1)
> +#define ENGINE_WAKEUP_ASLEEP BIT(1)
> +
> +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
> +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
>
>  void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
>  void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
>

Regards,

Tvrtko



More information about the Intel-gfx mailing list