[Intel-gfx] [PATCH 12/18] drm/i915: Unify request submission

Wed Jul 27 17:51:35 UTC 2016

On 22/07/16 09:03, Joonas Lahtinen wrote:
> On ke, 2016-07-20 at 14:12 +0100, Chris Wilson wrote:
>> Move request submission from emit_request into its own common vfunc
>> from i915_add_request().
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_request.c    |  7 +++----
>>  drivers/gpu/drm/i915/i915_guc_submission.c |  9 ++++++---
>>  drivers/gpu/drm/i915/intel_guc.h           |  1 -
>>  drivers/gpu/drm/i915/intel_lrc.c           | 10 +++-------
>>  drivers/gpu/drm/i915/intel_ringbuffer.c    | 26 ++++++++++----------------
>>  drivers/gpu/drm/i915/intel_ringbuffer.h    | 23 +++++++++++------------
>>  6 files changed, 33 insertions(+), 43 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
>> index 408f390a4c98..3e633b47213c 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_request.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
>> @@ -469,12 +469,10 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>>  	 */
>>  	request->postfix = intel_ring_get_tail(ring);
>>
>> -	if (i915.enable_execlists)
>> -		ret = engine->emit_request(request);
>> -	else
>> -		ret = engine->add_request(request);
>>  	/* Not allowed to fail! */
>> +	ret = engine->emit_request(request);
>>  	WARN(ret, "emit|add_request failed: %d!\n", ret);
>
> You should fix the message too; s/|add//
>
>> +
>>  	/* Sanity check that the reserved size was large enough. */
>>  	ret = intel_ring_get_tail(ring) - request_start;
>>  	if (ret < 0)
>> @@ -485,6 +483,7 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>>  		  reserved_tail, ret);
>>
>>  	i915_gem_mark_busy(engine);
>> +	engine->submit_request(request);
>>  }
>>
>>  static unsigned long local_clock_us(unsigned int *cpu)
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index eccd34832fe6..32d0e1890950 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -585,7 +585,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
>>   * The only error here arises if the doorbell hardware isn't functioning
>>   * as expected, which really shouln't happen.
>>   */
>> -int i915_guc_submit(struct drm_i915_gem_request *rq)
>> +static void i915_guc_submit(struct drm_i915_gem_request *rq)
>>  {
>>  	unsigned int engine_id = rq->engine->id;
>>  	struct intel_guc *guc = &rq->i915->guc;
>> @@ -602,8 +602,6 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
>>
>>  	guc->submissions[engine_id] += 1;
>>  	guc->last_seqno[engine_id] = rq->fence.seqno;
>> -
>> -	return b_ret;
>
> Maybe we should have WARN(b_ret, "sumthing")? Although I see the return
> value was not handled previously. CC'ing Dave to comment on too.

If submission were to fail we'll probably(!) get a hang later; or else 
it might recover(?), though I'm not sure how. We're counting the number 
of times this goes wrong, and it's supposed to always be zero. We should 
make sure this is all captured in the error state; then if we start 
seeing that sometimes it *does* fail then I guess we'll want more 
detail. But for now we can just assume it will work.

>>  }
>>
>>  /*
>> @@ -992,6 +990,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>>  {
>>  	struct intel_guc *guc = &dev_priv->guc;
>>  	struct i915_guc_client *client;
>> +	struct intel_engine_cs *engine;
>>
>>  	/* client for execbuf submission */
>>  	client = guc_client_alloc(dev_priv,
>> @@ -1006,6 +1005,10 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>>  	host2guc_sample_forcewake(guc, client);
>>  	guc_init_doorbell_hw(guc);
>>
>> +	/* Take over from manual control of ELSP (execlists) */
>> +	for_each_engine(engine, dev_priv)
>> +		engine->submit_request = i915_guc_submit;

This doesn't get undone in i915_guc_submission_disable().
That will prevent the runtime fallback from working.

.Dave.

>> +
>>  	return 0;
>>  }
>>
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
>> index 3e3e743740c0..623cf26cd784 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -160,7 +160,6 @@ extern int intel_guc_resume(struct drm_device *dev);
>>  int i915_guc_submission_init(struct drm_i915_private *dev_priv);
>>  int i915_guc_submission_enable(struct drm_i915_private *dev_priv);
>>  int i915_guc_wq_check_space(struct drm_i915_gem_request *rq);
>> -int i915_guc_submit(struct drm_i915_gem_request *rq);
>>  void i915_guc_submission_disable(struct drm_i915_private *dev_priv);
>>  void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
>>
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index d17a193e8eaf..52edbcc9bca0 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -773,12 +773,6 @@ intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
>>  	 */
>>  	request->previous_context = engine->last_context;
>>  	engine->last_context = request->ctx;
>> -
>> -	if (i915.enable_guc_submission)
>> -		i915_guc_submit(request);
>> -	else
>> -		execlists_context_queue(request);
>> -
>
> Function name is still advance_and_submit, and now the call to submit
> is moved to add_request, I'm confused.
>
>>  	return 0;
>>  }
>>
>> @@ -1904,8 +1898,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
>>  {
>>  	/* Default vfuncs which can be overriden by each engine. */
>>  	engine->init_hw = gen8_init_common_ring;
>> -	engine->emit_request = gen8_emit_request;
>>  	engine->emit_flush = gen8_emit_flush;
>> +	engine->emit_request = gen8_emit_request;
>> +	engine->submit_request = execlists_context_queue;
>
> execlists_context_queue name could be changed too, just defined and one
> calling site.
>
>> +
>>  	engine->irq_enable = gen8_logical_ring_enable_irq;
>>  	engine->irq_disable = gen8_logical_ring_disable_irq;
>>  	engine->emit_bb_start = gen8_emit_bb_start;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 43dfa4be1cfd..907d933d62aa 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -1427,15 +1427,14 @@ static int gen6_signal(struct drm_i915_gem_request *signaller_req,
>>  }
>>
>>  /**
>> - * gen6_add_request - Update the semaphore mailbox registers
>> + * gen6_emit_request - Update the semaphore mailbox registers
>>   *
>>   * @request - request to write to the ring
>>   *
>>   * Update the mailbox registers in the *other* rings with the current seqno.
>>   * This acts like a signal in the canonical semaphore.
>>   */
>> -static int
>> -gen6_add_request(struct drm_i915_gem_request *req)
>> +static int gen6_emit_request(struct drm_i915_gem_request *req)
>>  {
>>  	struct intel_ring *ring = req->ring;
>>  	int ret;
>> @@ -1456,13 +1455,10 @@ gen6_add_request(struct drm_i915_gem_request *req)
>>
>>  	req->tail = intel_ring_get_tail(ring);
>>
>> -	req->engine->submit_request(req);
>> -
>>  	return 0;
>>  }
>>
>> -static int
>> -gen8_render_add_request(struct drm_i915_gem_request *req)
>> +static int gen8_render_emit_request(struct drm_i915_gem_request *req)
>>  {
>>  	struct intel_engine_cs *engine = req->engine;
>>  	struct intel_ring *ring = req->ring;
>> @@ -1486,8 +1482,9 @@ gen8_render_add_request(struct drm_i915_gem_request *req)
>>  	intel_ring_emit(ring, 0);
>>  	intel_ring_emit(ring, MI_USER_INTERRUPT);
>>  	intel_ring_emit(ring, MI_NOOP);
>> +	intel_ring_advance(ring);
>>
>> -	req->engine->submit_request(req);
>> +	req->tail = intel_ring_get_tail(ring);
>
> Ditto req->tail = ring->tail;
>
>>
>>  	return 0;
>>  }
>> @@ -1692,8 +1689,7 @@ bsd_ring_flush(struct drm_i915_gem_request *req,
>>  	return 0;
>>  }
>>
>> -static int
>> -i9xx_add_request(struct drm_i915_gem_request *req)
>> +static int i9xx_emit_request(struct drm_i915_gem_request *req)
>>  {
>>  	struct intel_ring *ring = req->ring;
>>  	int ret;
>> @@ -1710,8 +1706,6 @@ i9xx_add_request(struct drm_i915_gem_request *req)
>>
>>  	req->tail = intel_ring_get_tail(ring);
>>
>> -	req->engine->submit_request(req);
>> -
>>  	return 0;
>>  }
>>
>> @@ -2813,11 +2807,11 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>>  				      struct intel_engine_cs *engine)
>>  {
>>  	engine->init_hw = init_ring_common;
>> -	engine->submit_request = i9xx_submit_request;
>>
>> -	engine->add_request = i9xx_add_request;
>> +	engine->emit_request = i9xx_emit_request;
>>  	if (INTEL_GEN(dev_priv) >= 6)
>> -		engine->add_request = gen6_add_request;
>> +		engine->emit_request = gen6_emit_request;
>> +	engine->submit_request = i9xx_submit_request;
>>
>>  	if (INTEL_GEN(dev_priv) >= 8)
>>  		engine->emit_bb_start = gen8_emit_bb_start;
>> @@ -2846,7 +2840,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>>
>>  	if (INTEL_GEN(dev_priv) >= 8) {
>>  		engine->init_context = intel_rcs_ctx_init;
>> -		engine->add_request = gen8_render_add_request;
>> +		engine->emit_request = gen8_render_emit_request;
>>  		engine->emit_flush = gen8_render_ring_flush;
>>  		if (i915.semaphores)
>>  			engine->semaphore.signal = gen8_rcs_signal;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 1a38c383327e..856b732ddbbd 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -204,7 +204,17 @@ struct intel_engine_cs {
>>
>>  	int		(*init_context)(struct drm_i915_gem_request *req);
>>
>> -	int		(*add_request)(struct drm_i915_gem_request *req);
>> +	int		(*emit_flush)(struct drm_i915_gem_request *request,
>> +				      u32 invalidate_domains,
>> +				      u32 flush_domains);
>> +	int		(*emit_bb_start)(struct drm_i915_gem_request *req,
>> +					 u64 offset, u32 length,
>> +					 unsigned int dispatch_flags);
>> +#define I915_DISPATCH_SECURE 0x1
>> +#define I915_DISPATCH_PINNED 0x2
>> +#define I915_DISPATCH_RS     0x4
>
> Same here, maybe BIT(0) etc?
>
> Really like how the code looks more consistent now!
>
> Regards, Joonas
>
>> +	int		(*emit_request)(struct drm_i915_gem_request *req);
>> +	void		(*submit_request)(struct drm_i915_gem_request *req);
>>  	/* Some chipsets are not quite as coherent as advertised and need
>>  	 * an expensive kick to force a true read of the up-to-date seqno.
>>  	 * However, the up-to-date seqno is not always required and the last
>> @@ -282,17 +292,6 @@ struct intel_engine_cs {
>>  	unsigned int idle_lite_restore_wa;
>>  	bool disable_lite_restore_wa;
>>  	u32 ctx_desc_template;
>> -	int		(*emit_request)(struct drm_i915_gem_request *request);
>> -	int		(*emit_flush)(struct drm_i915_gem_request *request,
>> -				      u32 invalidate_domains,
>> -				      u32 flush_domains);
>> -	int		(*emit_bb_start)(struct drm_i915_gem_request *req,
>> -					 u64 offset, u32 length,
>> -					 unsigned int dispatch_flags);
>> -#define I915_DISPATCH_SECURE 0x1
>> -#define I915_DISPATCH_PINNED 0x2
>> -#define I915_DISPATCH_RS     0x4
>> -	void		(*submit_request)(struct drm_i915_gem_request *req);
>>
>>  	/**
>>  	 * List of objects currently involved in rendering from the