[Intel-gfx] [PATCH] drm/i915/selftests: Exercise potential false lite-restore
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Oct 1 12:16:19 UTC 2019
On 01/10/2019 10:51, Chris Wilson wrote:
> If execlists's lite-restore is based on the common GEM context tag
> rather than the per-intel_context LRCA, then a context switch between
> two intel_contexts on the same engine derived from the same GEM context
> will perform a lite-restore instead of a full context switch. We can
> exploit this by poisoning the ringbuffer of the first context and trying
> to trick a simple RING_TAIL update (i.e. lite-restore)
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> Then switch back to ce[0] for fun.
> ---
> drivers/gpu/drm/i915/gt/selftest_lrc.c | 132 +++++++++++++++++++++++++
> 1 file changed, 132 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index 93f2fcdc49bf..b90b970a44b9 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -79,6 +79,137 @@ static int live_sanitycheck(void *arg)
> return err;
> }
>
> +static int live_unlite_restore(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct intel_engine_cs *engine;
> + struct i915_gem_context *ctx;
> + enum intel_engine_id id;
> + intel_wakeref_t wakeref;
> + struct igt_spinner spin;
> + int err = -ENOMEM;
> +
> + /*
> + * Check that we can correctly context switch between 2 instances
> + * on the same engine from the same parent context.
> + */
> +
> + mutex_lock(&i915->drm.struct_mutex);
> + wakeref = intel_runtime_pm_get(&i915->runtime_pm);
> +
> + if (igt_spinner_init(&spin, &i915->gt))
> + goto err_unlock;
> +
> + ctx = kernel_context(i915);
> + if (!ctx)
> + goto err_spin;
> +
> + for_each_engine(engine, i915, id) {
> + struct intel_context *ce[2] = {};
> + struct i915_request *rq[3];
> + struct igt_live_test t;
> + int n;
> +
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
> + err = -EIO;
> + break;
> + }
> +
> + for (n = 0; n < ARRAY_SIZE(ce); n++) {
> + ce[n] = intel_context_create(ctx, engine);
> + if (IS_ERR(ce[n])) {
> + err = PTR_ERR(ce[n]);
> + goto err_ce;
> + }
> +
> + err = intel_context_pin(ce[n]);
> + if (err)
> + goto err_ce;
If pinning fails err_ce path will underflow the unpin. Perhaps you need
to only store in ce[] when both steps have passed and keep it in a local
until then.
> +
> + /*
> + * Setup the pair of contexts such that if we
> + * lite-restore using the RING_TAIL from ce[1] it
> + * will execute garbage from ce[0]->ring.
> + */
> + memset(ce[n]->ring->vaddr,
> + POISON_INUSE,
> + ce[n]->ring->vma->size);
> + }
> + intel_ring_reset(ce[1]->ring, ce[1]->ring->vma->size / 2);
> + __execlists_update_reg_state(ce[1], engine);
> +
> + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
> + if (IS_ERR(rq[0])) {
> + err = PTR_ERR(rq[0]);
> + goto err_ce;
> + }
> +
> + GEM_BUG_ON(rq[0]->tail > ce[1]->ring->emit);
> + i915_request_get(rq[0]);
> + i915_request_add(rq[0]);
> +
> + if (!igt_wait_for_spinner(&spin, rq[0])) {
> + i915_request_put(rq[0]);
> + goto err_ce;
> + }
> +
> + rq[1] = i915_request_create(ce[1]);
> + if (IS_ERR(rq[1])) {
> + err = PTR_ERR(rq[1]);
> + i915_request_put(rq[0]);
> + goto err_ce;
> + }
> + GEM_BUG_ON(rq[1]->tail <= rq[0]->tail);
> +
> + /* Ensure we do a completion switch from ce[0] to ce[1] */
> + i915_request_await_dma_fence(rq[1], &rq[0]->fence);
What do you mean by completion switch? You are setting up a dependency
so rq[1] (and rq[2]) won't be put into the elsp until spinner is ended
so it may not even be a context switch. Wouldn't you actually need the
opposite? I was expecting you would let the spinner run, make sure rq[1]
is in elsp and then count on time slicing to trigger a context switch.
Regards,
Tvrtko
> + i915_request_put(rq[0]);
> +
> + i915_request_get(rq[1]);
> + i915_request_add(rq[1]);
> +
> + /* And switch back to ce[0] for good measure */
> + rq[2] = i915_request_create(ce[0]);
> + if (IS_ERR(rq[2])) {
> + err = PTR_ERR(rq[2]);
> + i915_request_put(rq[1]);
> + goto err_ce;
> + }
> + GEM_BUG_ON(rq[2]->tail > rq[1]->tail);
> +
> + i915_request_await_dma_fence(rq[2], &rq[1]->fence);
> + i915_request_put(rq[1]);
> +
> + i915_request_add(rq[2]);
> +
> +err_ce:
> + igt_spinner_end(&spin);
> + for (n = 0; n < ARRAY_SIZE(ce); n++) {
> + if (IS_ERR_OR_NULL(ce[n]))
> + break;
> +
> + intel_context_unpin(ce[n]);
> + intel_context_put(ce[n]);
> + }
> +
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + if (err)
> + break;
> + }
> +
> + kernel_context_close(ctx);
> +err_spin:
> + igt_spinner_fini(&spin);
> +err_unlock:
> + intel_runtime_pm_put(&i915->runtime_pm, wakeref);
> + mutex_unlock(&i915->drm.struct_mutex);
> + return err;
> +}
> +
> static int
> emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
> {
> @@ -2178,6 +2309,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
> {
> static const struct i915_subtest tests[] = {
> SUBTEST(live_sanitycheck),
> + SUBTEST(live_unlite_restore),
> SUBTEST(live_timeslice_preempt),
> SUBTEST(live_busywait_preempt),
> SUBTEST(live_preempt),
>
More information about the Intel-gfx
mailing list