[Intel-gfx] [PATCH] drm/i915/execlists: Enable coarse preemption boundaries for gen8
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Apr 5 09:46:19 UTC 2019
On 29/03/2019 13:40, Chris Wilson wrote:
> When we introduced preemption, we chose to keep it disabled for gen8 as
> supporting preemption inside GPGPU user batches required various w/a in
> userspace. Since then, the desire to preempt long queues of requests
> between batches (e.g. within busywaiting semaphores) has grown. So allow
> arbitration within the busywaits and between requests, but disable
> arbitration within user batches so that we can preempt between requests
> and not risk breaking GPGPU.
>
> However, since this preemption is much coarser and doesn't interfere
> with userspace, we decline to include it amongst the scheduler
> capabilities. (This is also required for us to skip over the preemption
> selftests that expect to be able to preempt user batches.)
>
> Testcase: igt/gem_exec_scheduler/semaphore-user
> References: beecec901790 ("drm/i915/execlists: Preemption!")
> Fixes: e88619646971 ("drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michal Winiarski <michal.winiarski at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
> drivers/gpu/drm/i915/intel_lrc.c | 49 ++++--
> drivers/gpu/drm/i915/selftests/intel_lrc.c | 180 +++++++++++++++++++++
> 3 files changed, 217 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 662da485e15f..92993db38aad 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -562,7 +562,7 @@ static void init_contexts(struct drm_i915_private *i915)
>
> static bool needs_preempt_context(struct drm_i915_private *i915)
> {
> - return HAS_LOGICAL_RING_PREEMPTION(i915);
> + return HAS_EXECLISTS(i915);
> }
>
> int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index bec232acc8d7..b380688a0c1c 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -233,7 +233,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
> {
> int last_prio;
>
> - if (!intel_engine_has_preemption(engine))
> + if (!engine->preempt_context)
> return false;
>
> if (i915_request_completed(rq))
> @@ -2035,7 +2035,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
> {
> u32 *cs;
>
> - cs = intel_ring_begin(rq, 6);
> + cs = intel_ring_begin(rq, 4);
> if (IS_ERR(cs))
> return PTR_ERR(cs);
>
> @@ -2046,16 +2046,35 @@ static int gen8_emit_bb_start(struct i915_request *rq,
> * particular all the gen that do not need the w/a at all!), if we
> * took care to make sure that on every switch into this context
> * (both ordinary and for preemption) that arbitrartion was enabled
> - * we would be fine. However, there doesn't seem to be a downside to
> - * being paranoid and making sure it is set before each batch and
> - * every context-switch.
> - *
> - * Note that if we fail to enable arbitration before the request
> - * is complete, then we do not see the context-switch interrupt and
> - * the engine hangs (with RING_HEAD == RING_TAIL).
> - *
> - * That satisfies both the GPGPU w/a and our heavy-handed paranoia.
> + * we would be fine. However, for gen8 there is another w/a that
> + * requires us to not preempt inside GPGPU execution, so we keep
> + * arbitration disabled for gen8 batches. Arbitration will be
> + * re-enabled before we close the request
> + * (engine->emit_fini_breadcrumb).
> */
> + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
> +
> + /* FIXME(BDW): Address space and security selectors. */
> + *cs++ = MI_BATCH_BUFFER_START_GEN8 |
> + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
> + *cs++ = lower_32_bits(offset);
> + *cs++ = upper_32_bits(offset);
> +
> + intel_ring_advance(rq, cs);
> +
> + return 0;
> +}
> +
> +static int gen9_emit_bb_start(struct i915_request *rq,
> + u64 offset, u32 len,
> + const unsigned int flags)
> +{
> + u32 *cs;
> +
> + cs = intel_ring_begin(rq, 6);
> + if (IS_ERR(cs))
> + return PTR_ERR(cs);
> +
> *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
>
> /* FIXME(BDW): Address space and security selectors. */
This comment can now be removed.
> @@ -2316,7 +2335,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
> engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> if (!intel_vgpu_active(engine->i915))
> engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
> - if (engine->preempt_context)
> + if (engine->preempt_context &&
> + HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> }
>
> @@ -2350,7 +2370,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
> * until a more refined solution exists.
> */
> }
> - engine->emit_bb_start = gen8_emit_bb_start;
> + if (IS_GEN(engine->i915, 8))
> + engine->emit_bb_start = gen8_emit_bb_start;
> + else
> + engine->emit_bb_start = gen9_emit_bb_start;
> }
>
> static inline void
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index 0d3cae564db8..8bcd4e1d58ee 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -76,6 +76,185 @@ static int live_sanitycheck(void *arg)
> return err;
> }
>
> +static int live_busywait_preempt(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct i915_gem_context *ctx_hi, *ctx_lo;
> + struct intel_engine_cs *engine;
> + struct drm_i915_gem_object *obj;
> + struct i915_vma *vma;
> + enum intel_engine_id id;
> + intel_wakeref_t wakeref;
> + int err = -ENOMEM;
> + u32 *map;
> +
> + /*
> + * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
> + * preempt the busywaits used to synchronise between rings.
> + */
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(i915);
> +
> + ctx_hi = kernel_context(i915);
> + if (!ctx_hi)
> + goto err_unlock;
> + ctx_hi->sched.priority = INT_MAX;
> +
> + ctx_lo = kernel_context(i915);
> + if (!ctx_lo)
> + goto err_ctx_hi;
> + ctx_lo->sched.priority = INT_MIN;
> +
> + obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
> + if (IS_ERR(obj)) {
> + err = PTR_ERR(obj);
> + goto err_ctx_lo;
> + }
> +
> + map = i915_gem_object_pin_map(obj, I915_MAP_WC);
> + if (IS_ERR(map)) {
> + err = PTR_ERR(map);
> + goto err_obj;
> + }
> +
> + vma = i915_vma_instance(obj, &i915->ggtt.vm, 0);
> + if (IS_ERR(vma)) {
> + err = PTR_ERR(vma);
> + goto err_map;
> + }
> +
> + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
> + if (err)
> + goto err_map;
> +
> + for_each_engine(engine, i915, id) {
> + struct i915_request *lo, *hi;
> + struct igt_live_test t;
> + u32 *cs;
> +
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
> + err = -EIO;
> + goto err_vma;
> + }
> +
> + /*
> + * We create two requests. The low priority request
> + * busywaits on a semaphore (inside the ringbuffer where
> + * is should be preemptible) and the high priority requests
> + * uses a MI_STORE_DWORD_IMM to update the semaphore value
> + * allowing the first request to complete. If preemption
> + * fails, we hang instead.
> + */
> +
> + lo = i915_request_alloc(engine, ctx_lo);
> + if (IS_ERR(lo)) {
> + err = PTR_ERR(lo);
> + goto err_vma;
> + }
> +
> + cs = intel_ring_begin(lo, 8);
> + if (IS_ERR(cs)) {
> + err = PTR_ERR(cs);
> + i915_request_add(lo);
> + goto err_vma;
> + }
> +
> + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> + *cs++ = i915_ggtt_offset(vma);
> + *cs++ = 0;
> + *cs++ = 1;
> +
> + /* XXX Do we need a flush + invalidate here? */
> +
> + *cs++ = MI_SEMAPHORE_WAIT |
> + MI_SEMAPHORE_GLOBAL_GTT |
> + MI_SEMAPHORE_POLL |
> + MI_SEMAPHORE_SAD_EQ_SDD;
> + *cs++ = 0;
> + *cs++ = i915_ggtt_offset(vma);
> + *cs++ = 0;
> +
> + intel_ring_advance(lo, cs);
> + i915_request_add(lo);
> +
> + if (wait_for(READ_ONCE(*map), 10)) {
> + err = -ETIMEDOUT;
> + goto err_vma;
> + }
> +
> + /* Low priority request should be busywaiting now */
> + if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) {
> + pr_err("%s: Busywaiting request did not!\n",
> + engine->name);
> + err = -EIO;
> + goto err_vma;
> + }
> +
> + hi = i915_request_alloc(engine, ctx_hi);
> + if (IS_ERR(hi)) {
> + err = PTR_ERR(hi);
> + goto err_vma;
Could release the spinner with a CPU write from here onwards but I guess
it doesn't matter.
> + }
> +
> + cs = intel_ring_begin(hi, 4);
> + if (IS_ERR(cs)) {
> + err = PTR_ERR(cs);
> + i915_request_add(hi);
> + goto err_vma;
> + }
> +
> + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> + *cs++ = i915_ggtt_offset(vma);
> + *cs++ = 0;
> + *cs++ = 0;
> +
> + intel_ring_advance(hi, cs);
> + i915_request_add(hi);
> +
> + if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) {
> + struct drm_printer p = drm_info_printer(i915->drm.dev);
> +
> + pr_err("%s: Failed to preempt semaphore busywait!\n",
> + engine->name);
> +
> + intel_engine_dump(engine, &p, "%s\n", engine->name);
> + GEM_TRACE_DUMP();
> +
> + i915_gem_set_wedged(i915);
> + err = -EIO;
> + goto err_vma;
> + }
> + GEM_BUG_ON(READ_ONCE(*map));
> +
> + if (igt_live_test_end(&t)) {
> + err = -EIO;
> + goto err_vma;
> + }
> + }
> +
> + err = 0;
> +err_vma:
> + i915_vma_unpin(vma);
> +err_map:
> + i915_gem_object_unpin_map(obj);
> +err_obj:
> + i915_gem_object_put(obj);
> +err_ctx_lo:
> + kernel_context_close(ctx_lo);
> +err_ctx_hi:
> + kernel_context_close(ctx_hi);
> +err_unlock:
> + if (igt_flush_test(i915, I915_WAIT_LOCKED))
> + err = -EIO;
> + intel_runtime_pm_put(i915, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> + return err;
> +}
> +
> static int live_preempt(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> @@ -1127,6 +1306,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> SUBTEST(live_sanitycheck),
> + SUBTEST(live_busywait_preempt),
> SUBTEST(live_preempt),
> SUBTEST(live_late_preempt),
> SUBTEST(live_suppress_self_preempt),
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list