[Intel-gfx] [PATCH 2/2] drm/i915: Delay disabling the user interrupt for breadcrumbs
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Feb 22 17:26:34 UTC 2017
On 17/02/2017 15:51, Chris Wilson wrote:
> A significant cost in setting up a wait is the overhead of enabling the
> interrupt. As we disable the interrupt whenever the queue of waiters is
> empty, if we are frequently waiting on alternating batches, we end up
> re-enabling the interrupt on a frequent basis. We do want to disable the
> interrupt during normal operations as under high load it may add several
> thousand interrupts/s - we have been known in the past to occupy whole
> cores with our interrupt handler after accidentally leaving user
> interrupts enabled. As a compromise, leave the interrupt enabled until
> the next IRQ, or the system is idle. This gives a small window for a
> waiter to keep the interrupt active and not be delayed by having to
> re-enable the interrupt.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 4 +-
> drivers/gpu/drm/i915/i915_irq.c | 5 +-
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 95 +++++++++++++++++++-------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +-
> 4 files changed, 65 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6745dcbf3799..9c87aacce43b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3003,8 +3003,10 @@ i915_gem_idle_work_handler(struct work_struct *work)
> if (wait_for(intel_execlists_idle(dev_priv), 10))
> DRM_ERROR("Timeout waiting for engines to idle\n");
>
> - for_each_engine(engine, dev_priv, id)
> + for_each_engine(engine, dev_priv, id) {
> + intel_engine_disarm_breadcrumbs(engine);
> i915_gem_batch_pool_fini(&engine->batch_pool);
> + }
>
> GEM_BUG_ON(!dev_priv->gt.awake);
> dev_priv->gt.awake = false;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 0c370c687c2a..fa597a29bc1d 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1036,9 +1036,6 @@ static void notify_ring(struct intel_engine_cs *engine)
> struct drm_i915_gem_request *rq = NULL;
> struct intel_wait *wait;
>
> - if (!intel_engine_has_waiter(engine))
> - return;
> -
> trace_i915_gem_request_notify(engine);
> atomic_inc(&engine->irq_count);
> set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> @@ -1064,6 +1061,8 @@ static void notify_ring(struct intel_engine_cs *engine)
> rq = wait->request;
>
> wake_up_process(wait->tsk);
> + } else {
> + __intel_engine_disarm_breadcrumbs(engine);
So we disarm on the irq following the waiter exiting. But the overall
effect depends on the timings of the wake up thread getting scheduled in
combined with batch duration which I am not sure I quite like.
> }
> spin_unlock(&engine->breadcrumbs.lock);
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 860372653a59..94c6ce9c0a6f 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -35,8 +35,10 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
> {
> struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> + struct intel_wait *wait;
> + unsigned long flags;
>
> - if (!b->irq_enabled)
> + if (!intel_engine_has_waiter(engine))
> return;
>
> if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
> @@ -49,10 +51,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
> * to process the pending interupt (e.g, low priority task on a loaded
> * system) and wait until it sleeps before declaring a missed interrupt.
> */
> - if (!intel_engine_wakeup(engine)) {
> + spin_lock_irqsave(&b->lock, flags);
> + wait = b->first_wait;
> + if (wait && !wake_up_process(wait->tsk)) {
> mod_timer(&b->hangcheck, wait_timeout());
> - return;
> + wait = NULL;
> }
> + spin_unlock_irqrestore(&b->lock, flags);
> + if (!wait)
> + return;
>
> DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
> set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
> @@ -107,6 +114,34 @@ static void irq_disable(struct intel_engine_cs *engine)
> spin_unlock(&engine->i915->irq_lock);
> }
>
> +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + assert_spin_locked(&b->lock);
> +
> + if (b->irq_enabled) {
> + irq_disable(engine);
> + b->irq_enabled = false;
> + }
> +
> + b->irq_armed = false;
Do we need the two level, armed & enabled scheme? I mean, why not just
have irq_enabled and disable it from the timer callback? It is a bit
challenging to figure out how the two level thing works.
Would a simpler scheme work where we would just bump the irq_enabled
counter to N on every waiter entering, decrement by one in each user
interrupt, and finally turn off on reaching zero? Maybe it would be too
costly in terms of useless wake-ups. Hm, not sure.
> +}
> +
> +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> + unsigned long flags;
> +
> + if (!b->irq_armed)
> + return;
> +
> + spin_lock_irqsave(&b->lock, flags);
> + if (!intel_engine_has_waiter(engine))
> + __intel_engine_disarm_breadcrumbs(engine);
> + spin_unlock_irqrestore(&b->lock, flags);
> +}
> +
> static bool use_fake_irq(const struct intel_breadcrumbs *b)
> {
> const struct intel_engine_cs *engine =
> @@ -131,9 +166,17 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> struct drm_i915_private *i915 = engine->i915;
>
> assert_spin_locked(&b->lock);
> - if (b->rpm_wakelock)
> + if (b->irq_armed)
> return;
>
> + /* The breadcrumb irq will be disarmed on the interrupt after the
> + * waiters are signaled. This gives us a single interrupt window in
> + * which we can add a new waiter and avoid the cost of re-enabling
> + * the irq.
> + */
> + b->irq_armed = true;
> + GEM_BUG_ON(b->irq_enabled);
> +
> if (I915_SELFTEST_ONLY(b->mock)) {
> /* For our mock objects we want to avoid interaction
> * with the real hardware (which is not set up). So
> @@ -142,17 +185,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> * implementation to call intel_engine_wakeup()
> * itself when it wants to simulate a user interrupt,
> */
> - b->rpm_wakelock = true;
> return;
> }
>
> /* Since we are waiting on a request, the GPU should be busy
> - * and should have its own rpm reference. For completeness,
> - * record an rpm reference for ourselves to cover the
> - * interrupt we unmask.
> + * and should have its own rpm reference. This is tracked
> + * by i915->gt.awake, we can forgo holding our own wakref
> + * for the interrupt as before i915->gt.awake is released (when
> + * the driver is idle) we disarm the breadcrumbs.
> */
> - intel_runtime_pm_get_noresume(i915);
> - b->rpm_wakelock = true;
>
> /* No interrupts? Kick the waiter every jiffie! */
> if (intel_irqs_enabled(i915)) {
> @@ -171,29 +212,6 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> }
> }
>
> -static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
> -{
> - struct intel_engine_cs *engine =
> - container_of(b, struct intel_engine_cs, breadcrumbs);
> -
> - assert_spin_locked(&b->lock);
> - if (!b->rpm_wakelock)
> - return;
> -
> - if (I915_SELFTEST_ONLY(b->mock)) {
> - b->rpm_wakelock = false;
> - return;
> - }
> -
> - if (b->irq_enabled) {
> - irq_disable(engine);
> - b->irq_enabled = false;
> - }
> -
> - intel_runtime_pm_put(engine->i915);
> - b->rpm_wakelock = false;
> -}
> -
> static inline struct intel_wait *to_wait(struct rb_node *node)
> {
> return rb_entry(node, struct intel_wait, node);
> @@ -413,7 +431,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
> wake_up_process(b->first_wait->tsk);
> } else {
> b->first_wait = NULL;
> - __intel_breadcrumbs_disable_irq(b);
> }
> } else {
> GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
> @@ -707,14 +724,16 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
> cancel_fake_irq(engine);
> spin_lock_irq(&b->lock);
>
> - __intel_breadcrumbs_disable_irq(b);
> + if (b->irq_enabled)
> + irq_enable(engine);
Can't hit that GEM_BUG_ON(b->irq_enabled) in
__intel_breadcrumbs_enable_irq from here?
> + else
> + irq_disable(engine);
> +
> if (intel_engine_has_waiter(engine)) {
> - __intel_breadcrumbs_enable_irq(b);
> if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
> wake_up_process(b->first_wait->tsk);
> - } else {
> + mod_timer(&b->hangcheck, wait_timeout());
If there are no waiters why schedule the timer? It won't do anything
anyway AFAICS.
I need to give this some more thought tomorrow.
Regards,
Tvrtko
> /* sanitize the IMR and unmask any auxiliary interrupts */
> - irq_disable(engine);
> }
>
> spin_unlock_irq(&b->lock);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 1271b6ebdd4d..1362169ef2b4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -246,8 +246,8 @@ struct intel_engine_cs {
>
> unsigned int hangcheck_interrupts;
>
> + bool irq_armed : 1;
> bool irq_enabled : 1;
> - bool rpm_wakelock : 1;
> I915_SELFTEST_DECLARE(bool mock : 1);
> } breadcrumbs;
>
> @@ -621,6 +621,8 @@ static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
> return wait;
> }
>
> +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
> +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
> void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
> void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
> bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
>
More information about the Intel-gfx
mailing list