[Intel-gfx] [PATCH] drm/i915/execlists: Disable preemption under GVT

Zhang, Xiaolin xiaolin.zhang at intel.com
Wed Jul 10 10:04:06 UTC 2019


On 07/09/2019 05:12 PM, Chris Wilson wrote:
> Preempt-to-busy uses a GPU semaphore to enforce an idle-barrier across
> preemption, but mediated gvt does not fully support semaphores.
>
> v2: Fiddle around with the flags and settle on using has-semaphores for
> the core bits so that we retain the ability to preempt our own
> semaphores.
Chris,
With this patch, vgpu guest can boot up successfully with BAT test passed.
But I want to point out there is other GPU hang issue pop up after vgpu
guest boot issue addressed. I am not pretty sure is it related or not. 
Basically it is easy to trigger with glxears with vblank_mode 0 and the
GPU hang time is random and the call trace is below: (guest kernel log
is attached in case it is useful.).
[ 1192.680497] Asynchronous wait on fence i915:compiz[1866]:b30 timed
out (hint:intel_atomic_commit_ready+0x0/0x50 [i915])
[ 1193.512989] hangcheck rcs0
[ 1193.513650] hangcheck     Awake? 4
[ 1193.514299] hangcheck     Hangcheck: 9986 ms ago
[ 1193.515071] hangcheck     Reset count: 0 (global 0)
[ 1193.515854] hangcheck     Requests:
[ 1193.516410] hangcheck     RING_START: 0x00000000
[ 1193.517138] hangcheck     RING_HEAD:  0x00003198
[ 1193.517876] hangcheck     RING_TAIL:  0x00003198
[ 1193.518611] hangcheck     RING_CTL:   0x00000000
[ 1193.519380] hangcheck     RING_MODE:  0x00000200 [idle]
[ 1193.520149] hangcheck     RING_IMR: fffffefe
[ 1193.520799] hangcheck     ACTHD:  0x00000000_000a6650
[ 1193.521545] hangcheck     BBADDR: 0x00000000_00000000
[ 1193.522321] hangcheck     DMA_FADDR: 0x00000000_00000000
[ 1193.523392] hangcheck     IPEIR: 0x00000000
[ 1193.524171] hangcheck     IPEHR: 0x00000000
[ 1193.525050] hangcheck     Execlist status: 0x00040012 00000003, entries 6
[ 1193.526049] hangcheck     Execlist CSB read 5, write 5, tasklet
queued? no (enabled)
[ 1193.527154] hangcheck         Active[0: ring:{start:dff03000,
hwsp:dff661c0, seqno:00012175}, rq:  1b:12178-  prio=4097 @ 11649ms:
glxgears[2160]
[ 1193.528852] hangcheck         Pending[0] ring:{start:dff03000,
hwsp:dff661c0, seqno:00012175}, rq:  1b:12178-  prio=4097 @ 11649ms:
glxgears[2160]
[ 1193.532515] hangcheck         Pending[1] ring:{start:dff39000,
hwsp:dff66140, seqno:004f7b5e}, rq:  14:4f7b60  prio=4097 @ 11655ms:
Xorg[865]
[ 1193.536009] hangcheck         E  1b:12178-  prio=4097 @ 11658ms:
glxgears[2160]
[ 1193.537187] hangcheck         E  14:4f7b60  prio=4097 @ 11658ms:
Xorg[865]
[ 1193.538192] hangcheck         Queue priority hint: 4097
[ 1193.538894] hangcheck         Q  1a:b30-  prio=4097 @ 11650ms:
compiz[1866]
[ 1193.539810] hangcheck         Q  1b:1217a  prio=2 @ 11660ms:
glxgears[2160]
[ 1193.542485] hangcheck HWSP:
[ 1193.543703] hangcheck [0000] 00000000 00000000 00000000 00000000
00000000 00000000 00000000 00000000
[ 1193.546729] hangcheck *
[ 1193.547230] hangcheck [0040] 00000014 00000003 00008002 00000001
00000014 00000001 00000018 00000003
[ 1193.550607] hangcheck [0060] 00000001 00000000 00000014 00000001
00000000 00000000 00000000 00000005
[ 1193.552274] hangcheck [0080] 00000000 00000000 00000000 00000000
00000000 00000000 00000000 00000000
[ 1193.553937] hangcheck *
[ 1193.554381] hangcheck Idle? no
[ 1193.554902] hangcheck Signals:
[ 1193.555419] hangcheck     [1b:12178] @ 11678ms
[ 1193.864797] i915 0000:00:04.0: GPU HANG: ecode 9:0:0x00000000, hang
on rcs0
[ 1193.869234] [drm] GPU hangs can indicate a bug anywhere in the entire
gfx stack, including userspace.
[ 1193.871096] [drm] Please file a _new_ bug report on
bugs.freedesktop.org against DRI -> DRM/Intel
[ 1193.872483] [drm] drm/i915 developers can then reassign to the right
component if it's not a kernel issue.
[ 1193.873927] [drm] The gpu crash dump is required to analyze gpu
hangs, so please always attach it.
[ 1193.875395] [drm] GPU crash dump saved to /sys/class/drm/card0/error

BRs, Xiaolin
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Zhenyu Wang <zhenyuw at linux.intel.com>
> Cc: Xiaolin Zhang <xiaolin.zhang at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  4 ++--
>  drivers/gpu/drm/i915/gt/intel_lrc.c       | 24 +++++++++++++++++------
>  drivers/gpu/drm/i915/gt/selftest_lrc.c    |  6 ++++++
>  3 files changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 56310812da21..614ed8c488ef 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -825,6 +825,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>  	struct drm_i915_private *i915 = engine->i915;
>  	int ret;
>  
> +	engine->set_default_submission(engine);
> +
>  	/* We may need to do things with the shrinker which
>  	 * require us to immediately switch back to the default
>  	 * context. This can cause a problem as pinning the
> @@ -852,8 +854,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
>  
>  	engine->emit_fini_breadcrumb_dw = ret;
>  
> -	engine->set_default_submission(engine);
> -
>  	return 0;
>  
>  err_unpin:
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 558a5850de3c..ef36f4b5e212 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -295,6 +295,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>  {
>  	int last_prio;
>  
> +	if (!intel_engine_has_semaphores(engine))
> +		return false;
> +
>  	/*
>  	 * Check if the current priority hint merits a preemption attempt.
>  	 *
> @@ -893,6 +896,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
>  {
>  	int hint;
>  
> +	if (!intel_engine_has_semaphores(engine))
> +		return false;
> +
>  	if (list_is_last(&rq->sched.link, &engine->active.requests))
>  		return false;
>  
> @@ -2634,7 +2640,8 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>  	*cs++ = MI_USER_INTERRUPT;
>  
>  	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> -	cs = emit_preempt_busywait(request, cs);
> +	if (intel_engine_has_semaphores(request->engine))
> +		cs = emit_preempt_busywait(request, cs);
>  
>  	request->tail = intel_ring_offset(request, cs);
>  	assert_ring_tail_valid(request->ring, request->tail);
> @@ -2658,7 +2665,8 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>  	*cs++ = MI_USER_INTERRUPT;
>  
>  	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> -	cs = emit_preempt_busywait(request, cs);
> +	if (intel_engine_has_semaphores(request->engine))
> +		cs = emit_preempt_busywait(request, cs);
>  
>  	request->tail = intel_ring_offset(request, cs);
>  	assert_ring_tail_valid(request->ring, request->tail);
> @@ -2706,10 +2714,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
>  	engine->unpark = NULL;
>  
>  	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> -	if (!intel_vgpu_active(engine->i915))
> +	if (!intel_vgpu_active(engine->i915)) {
>  		engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
> -	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> -		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> +		if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
> +			engine->flags |= I915_ENGINE_HAS_PREEMPTION;
> +	}
>  }
>  
>  static void execlists_destroy(struct intel_engine_cs *engine)
> @@ -3399,7 +3408,6 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>  	ve->base.class = OTHER_CLASS;
>  	ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
>  	ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
> -	ve->base.flags = I915_ENGINE_IS_VIRTUAL;
>  
>  	/*
>  	 * The decision on whether to submit a request using semaphores
> @@ -3496,8 +3504,12 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
>  		ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
>  		ve->base.emit_fini_breadcrumb_dw =
>  			sibling->emit_fini_breadcrumb_dw;
> +
> +		ve->base.flags = sibling->flags;
>  	}
>  
> +	ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
> +
>  	return &ve->context;
>  
>  err_put:
> diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> index fe4e15f9ba9d..a13f06ba984b 100644
> --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
> @@ -269,6 +269,9 @@ static int live_timeslice_preempt(void *arg)
>  		enum intel_engine_id id;
>  
>  		for_each_engine(engine, i915, id) {
> +			if (!intel_engine_has_preemption(engine))
> +				continue;
> +
>  			memset(vaddr, 0, PAGE_SIZE);
>  
>  			err = slice_semaphore_queue(engine, vma, count);
> @@ -354,6 +357,9 @@ static int live_busywait_preempt(void *arg)
>  		struct igt_live_test t;
>  		u32 *cs;
>  
> +		if (!intel_engine_has_preemption(engine))
> +			continue;
> +
>  		if (!intel_engine_can_store_dword(engine))
>  			continue;
>  
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: gpu-hang.txt
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20190710/430fbe07/attachment-0001.txt>


More information about the Intel-gfx mailing list