[Intel-gfx] [PATCH] drm/i915/gt: Incrementally check for rewinding

Chang, Bruce yu.bruce.chang at intel.com
Wed Jun 10 04:25:39 UTC 2020


On 6/9/2020 8:17 AM, Chris Wilson wrote:
> In commit 5ba32c7be81e ("drm/i915/execlists: Always force a context
> reload when rewinding RING_TAIL"), we placed the check for rewinding a
> context on actually submitting the next request in that context. This
> was so that we only had to check once, and could do so with precision
> avoiding as many forced restores as possible. For example, to ensure
> that we can resubmit the same request a couple of times, we include a
> small wa_tail such that on the next submission, the ring->tail will
> appear to move forwards when resubmitting the same request. This is very
> common as it will happen for every lite-restore to fill the second port
> after a context switch.
>
> However, intel_ring_direction() is limited in precision to movements of
> upto half the ring size. The consequence being that if we tried to
> unwind many requests, we could exceed half the ring and flip the sense
> of the direction, so missing a force restore. As no request can be
> greater than half the ring (i.e. 2048 bytes in the smallest case), we
> can check for rollback incrementally. As we check against the tail that
> would be submitted, we do not lose any sensitivity and allow lite
> restores for the simple case. We still need to double check upon
> submitting the context, to allow for multiple preemptions and
> resubmissions.
>
> Fixes: 5ba32c7be81e ("drm/i915/execlists: Always force a context reload when rewinding RING_TAIL")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: <stable at vger.kernel.org> # v5.4+

Verified this has fixed the issue regarding the GPU hang with incomplete 
error state.

reviewed by: Bruce Chang <yu.bruce.chang at intel.com>

> ---
>   drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   4 +-
>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  21 +++-
>   drivers/gpu/drm/i915/gt/intel_ring.c          |   4 +
>   drivers/gpu/drm/i915/gt/selftest_mocs.c       |  18 ++-
>   drivers/gpu/drm/i915/gt/selftest_ring.c       | 110 ++++++++++++++++++
>   .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
>   6 files changed, 154 insertions(+), 4 deletions(-)
>   create mode 100644 drivers/gpu/drm/i915/gt/selftest_ring.c
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index e5141a897786..0a05301e00fb 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -646,7 +646,7 @@ static int engine_setup_common(struct intel_engine_cs *engine)
>   struct measure_breadcrumb {
>   	struct i915_request rq;
>   	struct intel_ring ring;
> -	u32 cs[1024];
> +	u32 cs[2048];
>   };
>   
>   static int measure_breadcrumb_dw(struct intel_context *ce)
> @@ -667,6 +667,8 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
>   
>   	frame->ring.vaddr = frame->cs;
>   	frame->ring.size = sizeof(frame->cs);
> +	frame->ring.wrap =
> +		BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);


Not sure if this  frame->ring.wrap being used anywhere


>   	frame->ring.effective_size = frame->ring.size;
>   	intel_ring_update_space(&frame->ring);
>   	frame->rq.ring = &frame->ring;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index a057f7a2a521..5f33342c15e2 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -1137,6 +1137,13 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
>   			list_move(&rq->sched.link, pl);
>   			set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
>   
> +			/* Check in case rollback so far, we wrap [size/2] */
> +			if (intel_ring_direction(rq->ring,
> +						 intel_ring_wrap(rq->ring,
> +								 rq->tail),
> +						 rq->ring->tail) > 0)
> +				rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE;
> +


minor: maybe just me, "so far" -> "too far"?


>   			active = rq;
>   		} else {
>   			struct intel_engine_cs *owner = rq->context->engine;
> @@ -1505,8 +1512,9 @@ static u64 execlists_update_context(struct i915_request *rq)
>   	 * HW has a tendency to ignore us rewinding the TAIL to the end of
>   	 * an earlier request.
>   	 */
> +	GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail);
> +	prev = rq->ring->tail;
>   	tail = intel_ring_set_tail(rq->ring, rq->tail);
> -	prev = ce->lrc_reg_state[CTX_RING_TAIL];
>   	if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0))
>   		desc |= CTX_DESC_FORCE_RESTORE;
>   	ce->lrc_reg_state[CTX_RING_TAIL] = tail;
> @@ -4758,6 +4766,14 @@ static int gen12_emit_flush(struct i915_request *request, u32 mode)
>   	return 0;
>   }
>   
> +static void assert_request_valid(struct i915_request *rq)
> +{
> +	struct intel_ring *ring __maybe_unused = rq->ring;
> +
> +	/* Can we unwind this request without appearing to go forwards? */
> +	GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
> +}
> +
>   /*
>    * Reserve space for 2 NOOPs at the end of each request to be
>    * used as a workaround for not being allowed to do lite
> @@ -4770,6 +4786,9 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
>   	*cs++ = MI_NOOP;
>   	request->wa_tail = intel_ring_offset(request, cs);
>   
> +	/* Check that entire request is less than half the ring */
> +	assert_request_valid(request);
> +
>   	return cs;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
> index 8cda1b7e17ba..bdb324167ef3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring.c
> @@ -315,3 +315,7 @@ int intel_ring_cacheline_align(struct i915_request *rq)
>   	GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
>   	return 0;
>   }
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
> +#include "selftest_ring.c"
> +#endif
> diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> index 7bae64018ad9..b25eba50c88e 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
> @@ -18,6 +18,20 @@ struct live_mocs {
>   	void *vaddr;
>   };
>   
> +static struct intel_context *mocs_context_create(struct intel_engine_cs *engine)
> +{
> +	struct intel_context *ce;
> +
> +	ce = intel_context_create(engine);
> +	if (IS_ERR(ce))
> +		return ce;
> +
> +	/* We build large requests to read the registers from the ring */
> +	ce->ring = __intel_context_ring_size(SZ_16K);
> +
> +	return ce;
> +}
> +
>   static int request_add_sync(struct i915_request *rq, int err)
>   {
>   	i915_request_get(rq);
> @@ -301,7 +315,7 @@ static int live_mocs_clean(void *arg)
>   	for_each_engine(engine, gt, id) {
>   		struct intel_context *ce;
>   
> -		ce = intel_context_create(engine);
> +		ce = mocs_context_create(engine);
>   		if (IS_ERR(ce)) {
>   			err = PTR_ERR(ce);
>   			break;
> @@ -395,7 +409,7 @@ static int live_mocs_reset(void *arg)
>   	for_each_engine(engine, gt, id) {
>   		struct intel_context *ce;
>   
> -		ce = intel_context_create(engine);
> +		ce = mocs_context_create(engine);
>   		if (IS_ERR(ce)) {
>   			err = PTR_ERR(ce);
>   			break;
> diff --git a/drivers/gpu/drm/i915/gt/selftest_ring.c b/drivers/gpu/drm/i915/gt/selftest_ring.c
> new file mode 100644
> index 000000000000..2a8c534dc125
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/gt/selftest_ring.c
> @@ -0,0 +1,110 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright © 2020 Intel Corporation
> + */
> +
> +static struct intel_ring *mock_ring(unsigned long sz)
> +{
> +	struct intel_ring *ring;
> +
> +	ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
> +	if (!ring)
> +		return NULL;
> +
> +	kref_init(&ring->ref);
> +	ring->size = sz;
> +	ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(sz);
> +	ring->effective_size = sz;
> +	ring->vaddr = (void *)(ring + 1);
> +	atomic_set(&ring->pin_count, 1);
> +
> +	intel_ring_update_space(ring);
> +
> +	return ring;
> +}
> +
> +static void mock_ring_free(struct intel_ring *ring)
> +{
> +	kfree(ring);
> +}
> +
> +static int check_ring_direction(struct intel_ring *ring,
> +				u32 next, u32 prev,
> +				int expected)
> +{
> +	int result;
> +
> +	result = intel_ring_direction(ring, next, prev);
> +	if (result < 0)
> +		result = -1;
> +	else if (result > 0)
> +		result = 1;
> +
> +	if (result != expected) {
> +		pr_err("intel_ring_direction(%u, %u):%d != %d\n",
> +		       next, prev, result, expected);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int check_ring_step(struct intel_ring *ring, u32 x, u32 step)
> +{
> +	u32 prev = x, next = intel_ring_wrap(ring, x + step);
> +	int err = 0;
> +
> +	err |= check_ring_direction(ring, next, next,  0);
> +	err |= check_ring_direction(ring, prev, prev,  0);
> +	err |= check_ring_direction(ring, next, prev,  1);
> +	err |= check_ring_direction(ring, prev, next, -1);
> +
> +	return err;
> +}
> +
> +static int check_ring_offset(struct intel_ring *ring, u32 x, u32 step)
> +{
> +	int err = 0;
> +
> +	err |= check_ring_step(ring, x, step);
> +	err |= check_ring_step(ring, intel_ring_wrap(ring, x + 1), step);
> +	err |= check_ring_step(ring, intel_ring_wrap(ring, x - 1), step);
> +
> +	return err;
> +}
> +
> +static int igt_ring_direction(void *dummy)
> +{
> +	struct intel_ring *ring;
> +	unsigned int half = 2048;
> +	int step, err = 0;
> +
> +	ring = mock_ring(2 * half);
> +	if (!ring)
> +		return -ENOMEM;
> +
> +	GEM_BUG_ON(ring->size != 2 * half);
> +
> +	/* Precision of wrap detection is limited to ring->size / 2 */
> +	for (step = 1; step < half; step <<= 1) {
> +		err |= check_ring_offset(ring, 0, step);
> +		err |= check_ring_offset(ring, half, step);
> +	}
> +	err |= check_ring_step(ring, 0, half - 64);
> +
> +	/* And check unwrapped handling for good measure */
> +	err |= check_ring_offset(ring, 0, 2 * half + 64);
> +	err |= check_ring_offset(ring, 3 * half, 1);
> +
> +	mock_ring_free(ring);
> +	return err;
> +}
> +
> +int intel_ring_mock_selftests(void)
> +{
> +	static const struct i915_subtest tests[] = {
> +		SUBTEST(igt_ring_direction),
> +	};
> +
> +	return i915_subtests(tests, NULL);
> +}
> diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> index 1929feba4e8e..3db34d3eea58 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
> @@ -21,6 +21,7 @@ selftest(fence, i915_sw_fence_mock_selftests)
>   selftest(scatterlist, scatterlist_mock_selftests)
>   selftest(syncmap, i915_syncmap_mock_selftests)
>   selftest(uncore, intel_uncore_mock_selftests)
> +selftest(ring, intel_ring_mock_selftests)
>   selftest(engine, intel_engine_cs_mock_selftests)
>   selftest(timelines, intel_timeline_mock_selftests)
>   selftest(requests, i915_request_mock_selftests)


More information about the Intel-gfx mailing list