[Intel-gfx] [PATCH 1/2] drm/i915: Push the wakeref->count deferral to the backend
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Aug 13 14:47:41 UTC 2019
Chris Wilson <chris at chris-wilson.co.uk> writes:
> If the backend wishes to defer the wakeref parking, make it responsible
> for unlocking the wakeref (i.e. bumping the counter). This allows it to
> time the unlock much more carefully in case it happens to needs the
> wakeref to be active during its deferral.
>
> For instance, during engine parking we may choose to emit an idle
> barrier (a request). To do so, we borrow the engine->kernel_context
> timeline and to ensure exclusive access we keep the
> engine->wakeref.count as 0. However, to submit that request to HW may
> require a intel_engine_pm_get() (e.g. to keep the submission tasklet
> alive) and before we allow that we have to rewake our wakeref to avoid a
> recursive deadlock.
>
> <4> [257.742916] IRQs not enabled as expected
> <4> [257.742930] WARNING: CPU: 0 PID: 0 at kernel/softirq.c:169 __local_bh_enable_ip+0xa9/0x100
> <4> [257.742936] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 btusb btrtl btbcm btintel snd_hda_intel snd_intel_nhlt bluetooth snd_hda_codec coretemp snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul ecdh_generic ecc ghash_clmulni_intel snd_pcm r8169 realtek lpc_ich prime_numbers i2c_hid
> <4> [257.742991] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U W 5.3.0-rc3-g5d0a06cd532c-drmtip_340+ #1
> <4> [257.742998] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015
> <4> [257.743008] RIP: 0010:__local_bh_enable_ip+0xa9/0x100
> <4> [257.743017] Code: 37 5b 5d c3 8b 80 50 08 00 00 85 c0 75 a9 80 3d 0b be 25 01 00 75 a0 48 c7 c7 f3 0c 06 ac c6 05 fb bd 25 01 01 e8 77 84 ff ff <0f> 0b eb 89 48 89 ef e8 3b 41 06 00 eb 98 e8 e4 5c f4 ff 5b 5d c3
> <4> [257.743025] RSP: 0018:ffffa78600003cb8 EFLAGS: 00010086
> <4> [257.743035] RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000010302
> <4> [257.743042] RDX: 0000000080010302 RSI: 0000000000000000 RDI: 00000000ffffffff
> <4> [257.743050] RBP: ffffffffc0494bb3 R08: 0000000000000000 R09: 0000000000000001
> <4> [257.743058] R10: 0000000014c8f0e9 R11: 00000000fee2ff8e R12: ffffa23ba8c38008
> <4> [257.743065] R13: ffffa23bacc579c0 R14: ffffa23bb7db0f60 R15: ffffa23b9cc8c430
> <4> [257.743074] FS: 0000000000000000(0000) GS:ffffa23bbba00000(0000) knlGS:0000000000000000
> <4> [257.743082] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4> [257.743089] CR2: 00007fe477b20778 CR3: 000000011f72a000 CR4: 00000000001006f0
> <4> [257.743096] Call Trace:
> <4> [257.743104] <IRQ>
> <4> [257.743265] __i915_request_commit+0x240/0x5d0 [i915]
> <4> [257.743427] ? __i915_request_create+0x228/0x4c0 [i915]
> <4> [257.743584] __engine_park+0x64/0x250 [i915]
> <4> [257.743730] ____intel_wakeref_put_last+0x1c/0x70 [i915]
> <4> [257.743878] i915_sample+0x2ee/0x310 [i915]
> <4> [257.744030] ? i915_pmu_cpu_offline+0xb0/0xb0 [i915]
> <4> [257.744040] __hrtimer_run_queues+0x11e/0x4b0
> <4> [257.744068] hrtimer_interrupt+0xea/0x250
> <4> [257.744079] ? lockdep_hardirqs_off+0x79/0xd0
> <4> [257.744101] smp_apic_timer_interrupt+0x96/0x280
> <4> [257.744114] apic_timer_interrupt+0xf/0x20
> <4> [257.744125] RIP: 0010:__do_softirq+0xb3/0x4ae
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111378
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_pm.c | 8 ++-
> drivers/gpu/drm/i915/i915_request.c | 66 ++++++++++++-----------
> drivers/gpu/drm/i915/i915_request.h | 2 +
> drivers/gpu/drm/i915/i915_scheduler.c | 3 +-
> drivers/gpu/drm/i915/intel_wakeref.c | 4 +-
> drivers/gpu/drm/i915/intel_wakeref.h | 11 ++++
> 6 files changed, 56 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> index 6b15e3335dd6..ad37c9808c1f 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
> @@ -68,9 +68,13 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
>
> /* Check again on the next retirement. */
> engine->wakeref_serial = engine->serial + 1;
> -
> i915_request_add_active_barriers(rq);
> +
> + rq->sched.attr.priority = INT_MAX; /* Preemption barrier */
> +
> __i915_request_commit(rq);
> + __intel_wakeref_defer_park(&engine->wakeref);
> + __i915_request_queue(rq, NULL);
>
> return false;
> }
> @@ -98,7 +102,7 @@ static int __engine_park(struct intel_wakeref *wf)
> intel_engine_pool_park(&engine->pool);
>
> /* Must be reset upon idling, or we may miss the busy wakeup. */
> - GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
> + engine->execlists.queue_priority_hint = INT_MIN;
To speed up the turnaround, we went through this on irc and this
was the only thing that was not agreed upon so with it removed,
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>
> if (engine->park)
> engine->park(engine);
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 0ea1136d453e..4021334dd2c5 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1186,6 +1186,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
> list_add(&ring->active_link, &rq->i915->gt.active_rings);
> rq->emitted_jiffies = jiffies;
>
> + return prev;
> +}
> +
> +void __i915_request_queue(struct i915_request *rq,
> + const struct i915_sched_attr *attr)
> +{
> /*
> * Let the backend know a new request has arrived that may need
> * to adjust the existing execution schedule due to a high priority
> @@ -1199,43 +1205,15 @@ struct i915_request *__i915_request_commit(struct i915_request *rq)
> */
> local_bh_disable();
> i915_sw_fence_commit(&rq->semaphore);
> - if (engine->schedule) {
> - struct i915_sched_attr attr = rq->gem_context->sched;
> -
> - /*
> - * Boost actual workloads past semaphores!
> - *
> - * With semaphores we spin on one engine waiting for another,
> - * simply to reduce the latency of starting our work when
> - * the signaler completes. However, if there is any other
> - * work that we could be doing on this engine instead, that
> - * is better utilisation and will reduce the overall duration
> - * of the current work. To avoid PI boosting a semaphore
> - * far in the distance past over useful work, we keep a history
> - * of any semaphore use along our dependency chain.
> - */
> - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
> - attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> -
> - /*
> - * Boost priorities to new clients (new request flows).
> - *
> - * Allow interactive/synchronous clients to jump ahead of
> - * the bulk clients. (FQ_CODEL)
> - */
> - if (list_empty(&rq->sched.signalers_list))
> - attr.priority |= I915_PRIORITY_WAIT;
> -
> - engine->schedule(rq, &attr);
> - }
> + if (attr && rq->engine->schedule)
> + rq->engine->schedule(rq, attr);
> i915_sw_fence_commit(&rq->submit);
> local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
> -
> - return prev;
> }
>
> void i915_request_add(struct i915_request *rq)
> {
> + struct i915_sched_attr attr = rq->gem_context->sched;
> struct i915_request *prev;
>
> lockdep_assert_held(&rq->timeline->mutex);
> @@ -1245,6 +1223,32 @@ void i915_request_add(struct i915_request *rq)
>
> prev = __i915_request_commit(rq);
>
> + /*
> + * Boost actual workloads past semaphores!
> + *
> + * With semaphores we spin on one engine waiting for another,
> + * simply to reduce the latency of starting our work when
> + * the signaler completes. However, if there is any other
> + * work that we could be doing on this engine instead, that
> + * is better utilisation and will reduce the overall duration
> + * of the current work. To avoid PI boosting a semaphore
> + * far in the distance past over useful work, we keep a history
> + * of any semaphore use along our dependency chain.
> + */
> + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN))
> + attr.priority |= I915_PRIORITY_NOSEMAPHORE;
> +
> + /*
> + * Boost priorities to new clients (new request flows).
> + *
> + * Allow interactive/synchronous clients to jump ahead of
> + * the bulk clients. (FQ_CODEL)
> + */
> + if (list_empty(&rq->sched.signalers_list))
> + attr.priority |= I915_PRIORITY_WAIT;
> +
> + __i915_request_queue(rq, &attr);
> +
> /*
> * In typical scenarios, we do not expect the previous request on
> * the timeline to be still tracked by timeline->last_request if it
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 313df3c37158..fec1d5f17c94 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -251,6 +251,8 @@ struct i915_request * __must_check
> i915_request_create(struct intel_context *ce);
>
> struct i915_request *__i915_request_commit(struct i915_request *request);
> +void __i915_request_queue(struct i915_request *rq,
> + const struct i915_sched_attr *attr);
>
> void i915_request_retire_upto(struct i915_request *rq);
>
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> index 0bd452e851d8..7b84ebca2901 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -349,8 +349,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
> unsigned long flags;
>
> GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
> -
> - if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
> + if (READ_ONCE(rq->sched.attr.priority) & bump)
> return;
>
> spin_lock_irqsave(&schedule_lock, flags);
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
> index d4443e81c1c8..868cc78048d0 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.c
> +++ b/drivers/gpu/drm/i915/intel_wakeref.c
> @@ -57,12 +57,10 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
> if (!atomic_dec_and_test(&wf->count))
> goto unlock;
>
> + /* ops->put() must reschedule its own release on error/deferral */
> if (likely(!wf->ops->put(wf))) {
> rpm_put(wf);
> wake_up_var(&wf->wakeref);
> - } else {
> - /* ops->put() must schedule its own release on deferral */
> - atomic_set_release(&wf->count, 1);
> }
>
> unlock:
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
> index 535a3a12864b..5f0c972a80fb 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -163,6 +163,17 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
> return READ_ONCE(wf->wakeref);
> }
>
> +/**
> + * __intel_wakeref_defer_park: Defer the current park callback
> + * @wf: the wakeref
> + */
> +static inline void
> +__intel_wakeref_defer_park(struct intel_wakeref *wf)
> +{
> + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
> + atomic_set_release(&wf->count, 1);
> +}
> +
> /**
> * intel_wakeref_wait_for_idle: Wait until the wakeref is idle
> * @wf: the wakeref
> --
> 2.23.0.rc1
More information about the Intel-gfx
mailing list