[Intel-gfx] [PATCH 14/21] drm/i915: Only apply one barrier after a breadcrumb interrupt is posted
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jun 6 15:34:27 UTC 2016
On 03/06/16 17:08, Chris Wilson wrote:
> If we flag the seqno as potentially stale upon receiving an interrupt,
> we can use that information to reduce the frequency that we apply the
> heavyweight coherent seqno read (i.e. if we wake up a chain of waiters).
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 15 ++++++++++++++-
> drivers/gpu/drm/i915/i915_irq.c | 1 +
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 16 ++++++++++------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 4 files changed, 26 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4ddb9ff319cb..a71d08199d57 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3935,7 +3935,20 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
> * but it is easier and safer to do it every time the waiter
> * is woken.
> */
> - if (engine->irq_seqno_barrier) {
> + if (engine->irq_seqno_barrier && READ_ONCE(engine->irq_posted)) {
> + /* The ordering of irq_posted versus applying the barrier
> + * is crucial. The clearing of the current irq_posted must
> + * be visible before we perform the barrier operation,
> + * such that if a subsequent interrupt arrives, irq_posted
> + * is reasserted and our task rewoken (which causes us to
> + * do another __i915_request_irq_complete() immediately
> + * and reapply the barrier). Conversely, if the clear
> + * occurs after the barrier, then an interrupt that arrived
> + * whilst we waited on the barrier would not trigger a
> + * barrier on the next pass, and the read may not see the
> + * seqno update.
> + */
> + WRITE_ONCE(engine->irq_posted, false);
Why is this not smp_store_mb ?
> engine->irq_seqno_barrier(engine);
> if (i915_gem_request_completed(req))
> return true;
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index c14eb57b5807..14b3d65bb604 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -976,6 +976,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>
> static void notify_ring(struct intel_engine_cs *engine)
> {
> + smp_store_mb(engine->irq_posted, true);
> if (intel_engine_wakeup(engine)) {
> trace_i915_gem_request_notify(engine);
> engine->user_interrupts++;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 44346de39794..0f5fe114c204 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -43,12 +43,18 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
>
> static void irq_enable(struct intel_engine_cs *engine)
> {
> + /* Enabling the IRQ may miss the generation of the interrupt, but
> + * we still need to force the barrier before reading the seqno,
> + * just in case.
> + */
> + engine->irq_posted = true;
Should it be smp_store_mb here as well?
> WARN_ON(!engine->irq_get(engine));
> }
>
> static void irq_disable(struct intel_engine_cs *engine)
> {
> engine->irq_put(engine);
> + engine->irq_posted = false;
> }
>
> static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> @@ -56,7 +62,6 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> struct intel_engine_cs *engine =
> container_of(b, struct intel_engine_cs, breadcrumbs);
> struct drm_i915_private *i915 = engine->i915;
> - bool irq_posted = false;
>
> assert_spin_locked(&b->lock);
> if (b->rpm_wakelock)
> @@ -72,10 +77,8 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
>
> /* No interrupts? Kick the waiter every jiffie! */
> if (intel_irqs_enabled(i915)) {
> - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
> + if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
> irq_enable(engine);
> - irq_posted = true;
> - }
> b->irq_enabled = true;
> }
>
> @@ -83,7 +86,7 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
> mod_timer(&b->fake_irq, jiffies + 1);
>
> - return irq_posted;
> + return READ_ONCE(engine->irq_posted);
> }
>
> static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
> @@ -197,7 +200,8 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
> * in case the seqno passed.
> */
> __intel_breadcrumbs_enable_irq(b);
> - wake_up_process(to_wait(next)->task);
> + if (READ_ONCE(engine->irq_posted))
if (__intel_breadcrumbs_enable_irq(b)) ?
> + wake_up_process(to_wait(next)->task);
> }
>
> do {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index cb599a54931a..324f85e8d540 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -197,6 +197,7 @@ struct intel_engine_cs {
> struct i915_ctx_workarounds wa_ctx;
>
> unsigned irq_refcount; /* protected by dev_priv->irq_lock */
> + bool irq_posted;
> u32 irq_enable_mask; /* bitmask to enable ring interrupt */
> struct drm_i915_gem_request *trace_irq_req;
> bool __must_check (*irq_get)(struct intel_engine_cs *ring);
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list