[Intel-gfx] [PATCH 20/34] drm/i915: Introduce concept of per-timeline (context) HWSP

Wed Jan 23 01:35:35 UTC 2019

On 1/21/2019 14:21, Chris Wilson wrote:
> Supplement the per-engine HWSP with a per-timeline HWSP. That is a
> per-request pointer through which we can check a local seqno,
> abstracting away the presumption of a global seqno. In this first step,
> we point each request back into the engine's HWSP so everything
> continues to work with the global timeline.
>
> v2: s/i915_request_hwsp/hwsp_seqno/ to emphasis that this is the current
> HW value and that we are accessing it via i915_request merely as a
> convenience.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_request.c | 16 ++++++----
>   drivers/gpu/drm/i915/i915_request.h | 45 ++++++++++++++++++++++++-----
>   drivers/gpu/drm/i915/intel_lrc.c    |  9 ++++--
>   3 files changed, 55 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index 2721a356368f..d61e86c6a1d1 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -182,10 +182,11 @@ static void free_capture_list(struct i915_request *request)
>   static void __retire_engine_request(struct intel_engine_cs *engine,
>   				    struct i915_request *rq)
>   {
> -	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d\n",
> +	GEM_TRACE("%s(%s) fence %llx:%lld, global=%d, current %d:%d\n",
>   		  __func__, engine->name,
>   		  rq->fence.context, rq->fence.seqno,
>   		  rq->global_seqno,
> +		  hwsp_seqno(rq),
>   		  intel_engine_get_seqno(engine));
>   
>   	GEM_BUG_ON(!i915_request_completed(rq));
> @@ -244,10 +245,11 @@ static void i915_request_retire(struct i915_request *request)
>   {
>   	struct i915_gem_active *active, *next;
>   
> -	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
>   		  request->engine->name,
>   		  request->fence.context, request->fence.seqno,
>   		  request->global_seqno,
> +		  hwsp_seqno(request),
>   		  intel_engine_get_seqno(request->engine));
>   
>   	lockdep_assert_held(&request->i915->drm.struct_mutex);
> @@ -307,10 +309,11 @@ void i915_request_retire_upto(struct i915_request *rq)
>   	struct intel_ring *ring = rq->ring;
>   	struct i915_request *tmp;
>   
> -	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
>   		  rq->engine->name,
>   		  rq->fence.context, rq->fence.seqno,
>   		  rq->global_seqno,
> +		  hwsp_seqno(rq),
>   		  intel_engine_get_seqno(rq->engine));
>   
>   	lockdep_assert_held(&rq->i915->drm.struct_mutex);
> @@ -355,10 +358,11 @@ void __i915_request_submit(struct i915_request *request)
>   	struct intel_engine_cs *engine = request->engine;
>   	u32 seqno;
>   
> -	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld -> global=%d, current %d:%d\n",
>   		  engine->name,
>   		  request->fence.context, request->fence.seqno,
>   		  engine->timeline.seqno + 1,
> +		  hwsp_seqno(request),
>   		  intel_engine_get_seqno(engine));
>   
>   	GEM_BUG_ON(!irqs_disabled());
> @@ -405,10 +409,11 @@ void __i915_request_unsubmit(struct i915_request *request)
>   {
>   	struct intel_engine_cs *engine = request->engine;
>   
> -	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d\n",
> +	GEM_TRACE("%s fence %llx:%lld <- global=%d, current %d:%d\n",
>   		  engine->name,
>   		  request->fence.context, request->fence.seqno,
>   		  request->global_seqno,
> +		  hwsp_seqno(request),
>   		  intel_engine_get_seqno(engine));
>   
>   	GEM_BUG_ON(!irqs_disabled());
> @@ -616,6 +621,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
>   	rq->ring = ce->ring;
>   	rq->timeline = ce->ring->timeline;
>   	GEM_BUG_ON(rq->timeline == &engine->timeline);
> +	rq->hwsp_seqno = &engine->status_page.addr[I915_GEM_HWS_INDEX];
>   
>   	spin_lock_init(&rq->lock);
>   	dma_fence_init(&rq->fence,
> diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
> index c0f084ca4f29..ade010fe6e26 100644
> --- a/drivers/gpu/drm/i915/i915_request.h
> +++ b/drivers/gpu/drm/i915/i915_request.h
> @@ -130,6 +130,13 @@ struct i915_request {
>   	struct i915_sched_node sched;
>   	struct i915_dependency dep;
>   
> +	/*
> +	 * A convenience pointer to the current breadcrumb value stored in
> +	 * the HW status page (or our timeline's local equivalent). The full
> +	 * path would be rq->hw_context->ring->timeline->hwsp_seqno.
> +	 */
> +	const u32 *hwsp_seqno;
> +
>   	/**
>   	 * GEM sequence number associated with this request on the
>   	 * global execution timeline. It is zero when the request is not
> @@ -285,11 +292,6 @@ static inline bool i915_request_signaled(const struct i915_request *rq)
>   	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
>   }
>   
> -static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
> -					    u32 seqno);
> -static inline bool intel_engine_has_completed(struct intel_engine_cs *engine,
> -					      u32 seqno);
> -
>   /**
>    * Returns true if seq1 is later than seq2.
>    */
> @@ -298,6 +300,35 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
>   	return (s32)(seq1 - seq2) >= 0;
>   }
>   
> +static inline u32 __hwsp_seqno(const struct i915_request *rq)
> +{
> +	return READ_ONCE(*rq->hwsp_seqno);
> +}
> +
> +/**
> + * hwsp_seqno - the current breadcrumb value in the HW status page
> + * @rq: the request, to chase the relevant HW status page
> + *
> + * The emphasis in naming here is that hwsp_seqno() is not a property of the
> + * request, but an indication of the current HW state (associated with this
> + * request). Its value will change as the GPU executes more requests.
> + *
> + * Returns the current breadcrumb value in the associated HW status page (or
> + * the local timeline's equivalent) for this request. The request itself
> + * has the associated breadcrumb value of rq->fence.seqno, when the HW
> + * status page has that breadcrumb or later, this request is complete.
> + */
> +static inline u32 hwsp_seqno(const struct i915_request *rq)
> +{
> +	u32 seqno;
> +
> +	rcu_read_lock(); /* the HWSP may be freed at runtime */
> +	seqno = __hwsp_seqno(rq);
> +	rcu_read_unlock();
> +
> +	return seqno;
> +}
> +

Much better name and good comments too :).

Reviewed-by: John Harrison <John.C.Harrison at Intel.com>

>   /**
>    * i915_request_started - check if the request has begun being executed
>    * @rq: the request
> @@ -315,14 +346,14 @@ static inline bool i915_request_started(const struct i915_request *rq)
>   	if (!seqno) /* not yet submitted to HW */
>   		return false;
>   
> -	return intel_engine_has_started(rq->engine, seqno);
> +	return i915_seqno_passed(hwsp_seqno(rq), seqno - 1);
>   }
>   
>   static inline bool
>   __i915_request_completed(const struct i915_request *rq, u32 seqno)
>   {
>   	GEM_BUG_ON(!seqno);
> -	return intel_engine_has_completed(rq->engine, seqno) &&
> +	return i915_seqno_passed(hwsp_seqno(rq), seqno) &&
>   		seqno == i915_request_global_seqno(rq);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 464dd309fa99..e45c4e29c435 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -470,11 +470,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>   			desc = execlists_update_context(rq);
>   			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
>   
> -			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
> +			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
>   				  engine->name, n,
>   				  port[n].context_id, count,
>   				  rq->global_seqno,
>   				  rq->fence.context, rq->fence.seqno,
> +				  hwsp_seqno(rq),
>   				  intel_engine_get_seqno(engine),
>   				  rq_prio(rq));
>   		} else {
> @@ -767,11 +768,12 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
>   	while (num_ports-- && port_isset(port)) {
>   		struct i915_request *rq = port_request(port);
>   
> -		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d)\n",
> +		GEM_TRACE("%s:port%u global=%d (fence %llx:%lld), (current %d:%d)\n",
>   			  rq->engine->name,
>   			  (unsigned int)(port - execlists->port),
>   			  rq->global_seqno,
>   			  rq->fence.context, rq->fence.seqno,
> +			  hwsp_seqno(rq),
>   			  intel_engine_get_seqno(rq->engine));
>   
>   		GEM_BUG_ON(!execlists->active);
> @@ -997,12 +999,13 @@ static void process_csb(struct intel_engine_cs *engine)
>   						EXECLISTS_ACTIVE_USER));
>   
>   		rq = port_unpack(port, &count);
> -		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d), prio=%d\n",
> +		GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%lld) (current %d:%d), prio=%d\n",
>   			  engine->name,
>   			  port->context_id, count,
>   			  rq ? rq->global_seqno : 0,
>   			  rq ? rq->fence.context : 0,
>   			  rq ? rq->fence.seqno : 0,
> +			  rq ? hwsp_seqno(rq) : 0,
>   			  intel_engine_get_seqno(engine),
>   			  rq ? rq_prio(rq) : 0);
>