[Intel-gfx] [PATCH 26/34] drm/i915: Identify active requests

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Jan 22 15:34:07 UTC 2019


On 21/01/2019 22:21, Chris Wilson wrote:
> To allow requests to forgo a common execution timeline, one question we
> need to be able to answer is "is this request running?". To track
> whether a request has started on HW, we can emit a breadcrumb at the
> beginning of the request and check its timeline's HWSP to see if the
> breadcrumb has advanced past the start of this request. (This is in
> contrast to the global timeline where we need only ask if we are on the
> global timeline and if the timeline has advanced past the end of the
> previous request.)
> 
> There is still confusion from a preempted request, which has already
> started but relinquished the HW to a high priority request. For the
> common case, this discrepancy should be negligible. However, for
> identification of hung requests, knowing which one was running at the
> time of the hang will be much more important.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  6 +++
>   drivers/gpu/drm/i915/i915_request.c          |  9 ++--
>   drivers/gpu/drm/i915/i915_request.h          |  1 +
>   drivers/gpu/drm/i915/i915_timeline.c         |  1 +
>   drivers/gpu/drm/i915/i915_timeline.h         |  2 +
>   drivers/gpu/drm/i915/intel_engine_cs.c       |  4 +-
>   drivers/gpu/drm/i915/intel_lrc.c             | 47 ++++++++++++++++----
>   drivers/gpu/drm/i915/intel_ringbuffer.c      | 43 ++++++++++--------
>   drivers/gpu/drm/i915/intel_ringbuffer.h      |  6 ++-
>   drivers/gpu/drm/i915/selftests/mock_engine.c |  2 +-
>   10 files changed, 86 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index f250109e1f66..defe7d60bb88 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1976,6 +1976,12 @@ static int eb_submit(struct i915_execbuffer *eb)
>   			return err;
>   	}
>   
> +	if (eb->engine->emit_init_breadcrumb) {
> +		err = eb->engine->emit_init_breadcrumb(eb->request);
> +		if (err)
> +			return err;
> +	}
> +
>   	err = eb->engine->emit_bb_start(eb->request,
>   					eb->batch->node.start +
>   					eb->batch_start_offset,
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index bb2885f1dc1e..0a8a2a1bf55d 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -333,6 +333,7 @@ void i915_request_retire_upto(struct i915_request *rq)
>   
>   static u32 timeline_get_seqno(struct i915_timeline *tl)
>   {
> +	tl->seqno += tl->has_initial_breadcrumb;
>   	return ++tl->seqno;

return tl->seqno += 1 + tl->has_initial_breadcrumb?

Not sure if it would make any difference in the code.

>   }
>   
> @@ -382,8 +383,8 @@ void __i915_request_submit(struct i915_request *request)
>   		intel_engine_enable_signaling(request, false);
>   	spin_unlock(&request->lock);
>   
> -	engine->emit_breadcrumb(request,
> -				request->ring->vaddr + request->postfix);
> +	engine->emit_fini_breadcrumb(request,
> +				     request->ring->vaddr + request->postfix);
>   
>   	/* Transfer from per-context onto the global per-engine timeline */
>   	move_to_timeline(request, &engine->timeline);
> @@ -657,7 +658,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	 * around inside i915_request_add() there is sufficient space at
>   	 * the beginning of the ring as well.
>   	 */
> -	rq->reserved_space = 2 * engine->emit_breadcrumb_sz * sizeof(u32);
> +	rq->reserved_space = 2 * engine->emit_fini_breadcrumb_sz * sizeof(u32);

Logic being fini breadcrumb is at least as big as the init one? I can't 
think of any easy asserts to verify that.

Also, a little bit of ring space wastage but I guess we don't care.

>   
>   	/*
>   	 * Record the position of the start of the request so that
> @@ -908,7 +909,7 @@ void i915_request_add(struct i915_request *request)
>   	 * GPU processing the request, we never over-estimate the
>   	 * position of the ring's HEAD.
>   	 */
> -	cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
> +	cs = intel_ring_begin(request, engine->emit_fini_breadcrumb_sz);
>   	GEM_BUG_ON(IS_ERR(cs));
>   	request->postfix = intel_ring_offset(request, cs);
>   
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index 96c586d6ff4d..340d6216791c 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -344,6 +344,7 @@ static inline bool i915_request_started(const struct i915_request *rq)
>   	if (i915_request_signaled(rq))
>   		return true;
>   
> +	/* Remember: started but may have since been preempted! */
>   	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
> index 007348b1b469..7bc9164733bc 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.c
> +++ b/drivers/gpu/drm/i915/i915_timeline.c
> @@ -132,6 +132,7 @@ int i915_timeline_init(struct drm_i915_private *i915,
>   	timeline->i915 = i915;
>   	timeline->name = name;
>   	timeline->pin_count = 0;
> +	timeline->has_initial_breadcrumb = !hwsp;
>   
>   	timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
>   	if (!hwsp) {
> diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h
> index ab736e2e5707..8caeb66d1cd5 100644
> --- a/drivers/gpu/drm/i915/i915_timeline.h
> +++ b/drivers/gpu/drm/i915/i915_timeline.h
> @@ -48,6 +48,8 @@ struct i915_timeline {
>   	struct i915_vma *hwsp_ggtt;
>   	u32 hwsp_offset;
>   
> +	bool has_initial_breadcrumb;
> +
>   	/**
>   	 * List of breadcrumbs associated with GPU requests currently
>   	 * outstanding.
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index e532b4b27239..2a4c547240a1 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1239,7 +1239,9 @@ static void print_request(struct drm_printer *m,
>   	drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
>   		   prefix,
>   		   rq->global_seqno,
> -		   i915_request_completed(rq) ? "!" : "",
> +		   i915_request_completed(rq) ? "!" :
> +		   i915_request_started(rq) ? "*" :
> +		   "",
>   		   rq->fence.context, rq->fence.seqno,
>   		   buf,
>   		   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 1bf178ca3e00..0a2d53f19625 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -649,7 +649,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>   		 * WaIdleLiteRestore:bdw,skl
>   		 * Apply the wa NOOPs to prevent
>   		 * ring:HEAD == rq:TAIL as we resubmit the
> -		 * request. See gen8_emit_breadcrumb() for
> +		 * request. See gen8_emit_fini_breadcrumb() for
>   		 * where we prepare the padding after the
>   		 * end of the request.
>   		 */
> @@ -1294,6 +1294,34 @@ execlists_context_pin(struct intel_engine_cs *engine,
>   	return __execlists_context_pin(engine, ctx, ce);
>   }
>   
> +static int gen8_emit_init_breadcrumb(struct i915_request *rq)
> +{
> +	u32 *cs;
> +
> +	GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
> +
> +	cs = intel_ring_begin(rq, 6);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	/*
> +	 * Check if we have been preempted before we even get started.
> +	 *
> +	 * After this point i915_request_started() reports true, even if
> +	 * we get preempted and so are no longer running.
> +	 */
> +	*cs++ = MI_ARB_CHECK;
> +	*cs++ = MI_NOOP;
> +
> +	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
> +	*cs++ = i915_timeline_seqno_address(rq->timeline);
> +	*cs++ = 0;
> +	*cs++ = rq->fence.seqno - 1;
> +
> +	intel_ring_advance(rq, cs);
> +	return 0;
> +}
> +
>   static int emit_pdps(struct i915_request *rq)
>   {
>   	const struct intel_engine_cs * const engine = rq->engine;
> @@ -2049,7 +2077,7 @@ static void gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
>   	request->wa_tail = intel_ring_offset(request, cs);
>   }
>   
> -static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
> +static void gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
>   {
>   	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
>   	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
> @@ -2070,9 +2098,9 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs)
>   
>   	gen8_emit_wa_tail(request, cs);
>   }
> -static const int gen8_emit_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
> +static const int gen8_emit_fini_breadcrumb_sz = 10 + WA_TAIL_DWORDS;
>   
> -static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
> +static void gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   {
>   	cs = gen8_emit_ggtt_write_rcs(cs,
>   				      request->fence.seqno,
> @@ -2096,7 +2124,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs)
>   
>   	gen8_emit_wa_tail(request, cs);
>   }
> -static const int gen8_emit_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS;
> +static const int gen8_emit_fini_breadcrumb_rcs_sz = 14 + WA_TAIL_DWORDS;
>   
>   static int gen8_init_rcs_context(struct i915_request *rq)
>   {
> @@ -2188,8 +2216,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>   	engine->request_alloc = execlists_request_alloc;
>   
>   	engine->emit_flush = gen8_emit_flush;
> -	engine->emit_breadcrumb = gen8_emit_breadcrumb;
> -	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz;
> +	engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
> +	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
> +	engine->emit_fini_breadcrumb_sz = gen8_emit_fini_breadcrumb_sz;
>   
>   	engine->set_default_submission = intel_execlists_set_default_submission;
>   
> @@ -2302,8 +2331,8 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
>   	/* Override some for render ring. */
>   	engine->init_context = gen8_init_rcs_context;
>   	engine->emit_flush = gen8_emit_flush_render;
> -	engine->emit_breadcrumb = gen8_emit_breadcrumb_rcs;
> -	engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_rcs_sz;
> +	engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
> +	engine->emit_fini_breadcrumb_sz = gen8_emit_fini_breadcrumb_rcs_sz;
>   
>   	ret = logical_ring_init(engine);
>   	if (ret)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 751bd4e7da42..f6b30eb46263 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
>   		err = PTR_ERR(timeline);
>   		goto err;
>   	}
> +	GEM_BUG_ON(timeline->has_initial_breadcrumb);
>   
>   	ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
>   	i915_timeline_put(timeline);
> @@ -1947,6 +1948,7 @@ static int ring_request_alloc(struct i915_request *request)
>   	int ret;
>   
>   	GEM_BUG_ON(!request->hw_context->pin_count);
> +	GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
>   
>   	/*
>   	 * Flush enough space to reduce the likelihood of waiting after
> @@ -2283,11 +2285,16 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>   	engine->context_pin = intel_ring_context_pin;
>   	engine->request_alloc = ring_request_alloc;
>   
> -	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
> -	engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
> +	/*
> +	 * Using a global execution timeline; the previous final breadcrumb is
> +	 * equivalent to our next initial bread so we can elide
> +	 * engine->emit_init_breadcrumb().
> +	 */
> +	engine->emit_fini_breadcrumb = i9xx_emit_breadcrumb;
> +	engine->emit_fini_breadcrumb_sz = i9xx_emit_breadcrumb_sz;
>   	if (IS_GEN(dev_priv, 5)) {
> -		engine->emit_breadcrumb = gen5_emit_breadcrumb;
> -		engine->emit_breadcrumb_sz = gen5_emit_breadcrumb_sz;
> +		engine->emit_fini_breadcrumb = gen5_emit_breadcrumb;
> +		engine->emit_fini_breadcrumb_sz = gen5_emit_breadcrumb_sz;
>   	}
>   
>   	engine->set_default_submission = i9xx_set_default_submission;
> @@ -2317,13 +2324,13 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>   	if (INTEL_GEN(dev_priv) >= 7) {
>   		engine->init_context = intel_rcs_ctx_init;
>   		engine->emit_flush = gen7_render_ring_flush;
> -		engine->emit_breadcrumb = gen7_rcs_emit_breadcrumb;
> -		engine->emit_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
> +		engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
> +		engine->emit_fini_breadcrumb_sz = gen7_rcs_emit_breadcrumb_sz;
>   	} else if (IS_GEN(dev_priv, 6)) {
>   		engine->init_context = intel_rcs_ctx_init;
>   		engine->emit_flush = gen6_render_ring_flush;
> -		engine->emit_breadcrumb = gen6_rcs_emit_breadcrumb;
> -		engine->emit_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
> +		engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
> +		engine->emit_fini_breadcrumb_sz = gen6_rcs_emit_breadcrumb_sz;
>   	} else if (IS_GEN(dev_priv, 5)) {
>   		engine->emit_flush = gen4_render_ring_flush;
>   	} else {
> @@ -2360,11 +2367,11 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine)
>   		engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
>   
>   		if (IS_GEN(dev_priv, 6)) {
> -			engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> -			engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> +			engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
> +			engine->emit_fini_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
>   		} else {
> -			engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
> -			engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
> +			engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
> +			engine->emit_fini_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
>   		}
>   	} else {
>   		engine->emit_flush = bsd_ring_flush;
> @@ -2389,11 +2396,11 @@ int intel_init_blt_ring_buffer(struct intel_engine_cs *engine)
>   	engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
>   
>   	if (IS_GEN(dev_priv, 6)) {
> -		engine->emit_breadcrumb = gen6_xcs_emit_breadcrumb;
> -		engine->emit_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
> +		engine->emit_fini_breadcrumb = gen6_xcs_emit_breadcrumb;
> +		engine->emit_fini_breadcrumb_sz = gen6_xcs_emit_breadcrumb_sz;
>   	} else {
> -		engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
> -		engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
> +		engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
> +		engine->emit_fini_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
>   	}
>   
>   	return intel_init_ring_buffer(engine);
> @@ -2412,8 +2419,8 @@ int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine)
>   	engine->irq_enable = hsw_vebox_irq_enable;
>   	engine->irq_disable = hsw_vebox_irq_disable;
>   
> -	engine->emit_breadcrumb = gen7_xcs_emit_breadcrumb;
> -	engine->emit_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
> +	engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
> +	engine->emit_fini_breadcrumb_sz = gen7_xcs_emit_breadcrumb_sz;
>   
>   	return intel_init_ring_buffer(engine);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a792bacf2930..d3d4f3667afb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -463,8 +463,10 @@ struct intel_engine_cs {
>   					 unsigned int dispatch_flags);
>   #define I915_DISPATCH_SECURE BIT(0)
>   #define I915_DISPATCH_PINNED BIT(1)
> -	void		(*emit_breadcrumb)(struct i915_request *rq, u32 *cs);
> -	int		emit_breadcrumb_sz;
> +	int		(*emit_init_breadcrumb)(struct i915_request *rq);
> +	void		(*emit_fini_breadcrumb)(struct i915_request *rq,
> +						u32 *cs);
> +	unsigned int	emit_fini_breadcrumb_sz;
>   
>   	/* Pass the request to the hardware queue (e.g. directly into
>   	 * the legacy ringbuffer or to the end of an execlist).
> diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
> index c0a408828415..2515cffb4490 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_engine.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
> @@ -227,7 +227,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
>   	engine->base.context_pin = mock_context_pin;
>   	engine->base.request_alloc = mock_request_alloc;
>   	engine->base.emit_flush = mock_emit_flush;
> -	engine->base.emit_breadcrumb = mock_emit_breadcrumb;
> +	engine->base.emit_fini_breadcrumb = mock_emit_breadcrumb;
>   	engine->base.submit_request = mock_submit_request;
>   
>   	if (i915_timeline_init(i915,
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko


More information about the Intel-gfx mailing list