[Intel-gfx] [PATCH 10/18] drm/i915: Unify legacy/execlists emission of MI_BATCHBUFFER_START

Thu Jul 21 13:39:58 UTC 2016

On ke, 2016-07-20 at 14:12 +0100, Chris Wilson wrote:
> Both the ->dispatch_execbuffer and ->emit_bb_start callbacks do exactly
> the same thing, add MI_BATCHBUFFER_START to the request's ringbuffer -
> we need only one vfunc.
> 

Some ranting below,

Reviewed-by: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  6 ++--
>  drivers/gpu/drm/i915/i915_gem_render_state.c | 16 +++++-----
>  drivers/gpu/drm/i915/intel_lrc.c             | 15 ++++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.c      | 48 ++++++++++++++--------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h      | 12 +++----
>  5 files changed, 50 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 5cea95c6f98b..2d9f1f4bc058 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1326,9 +1326,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  	if (exec_len == 0)
>  		exec_len = params->batch_obj->base.size;
>  
> -	ret = params->engine->dispatch_execbuffer(params->request,
> -						  exec_start, exec_len,
> -						  params->dispatch_flags);
> +	ret = params->engine->emit_bb_start(params->request,
> +					    exec_start, exec_len,
> +					    params->dispatch_flags);
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index b2be4676a5cf..2ba759f3ab6f 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -234,18 +234,18 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>  	if (so.rodata == NULL)
>  		return 0;
>  
> -	ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
> -					     so.rodata->batch_items * 4,
> -					     I915_DISPATCH_SECURE);
> +	ret = req->engine->emit_bb_start(req, so.ggtt_offset,
> +					 so.rodata->batch_items * 4,
> +					 I915_DISPATCH_SECURE);
>  	if (ret)
>  		goto out;
>  
>  	if (so.aux_batch_size > 8) {
> -		ret = req->engine->dispatch_execbuffer(req,
> -						     (so.ggtt_offset +
> -						      so.aux_batch_offset),
> -						     so.aux_batch_size,
> -						     I915_DISPATCH_SECURE);
> +		ret = req->engine->emit_bb_start(req,
> +						 (so.ggtt_offset +
> +						  so.aux_batch_offset),
> +						 so.aux_batch_size,
> +						 I915_DISPATCH_SECURE);
>  		if (ret)
>  			goto out;
>  	}

The code above this line is exact reason why I don't like the a->b->c
(especially when there is repetition). But it's not new to this patch
so guess it'll do. Some future work to shorten down a little bit might
not hurt.

> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 6cd0e24ed50c..d17a193e8eaf 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -859,7 +859,9 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  	exec_start = params->batch_obj_vm_offset +
>  		     args->batch_start_offset;
>  
> -	ret = engine->emit_bb_start(params->request, exec_start, params->dispatch_flags);
> +	ret = engine->emit_bb_start(params->request,
> +				    exec_start, args->batch_len,
> +				    params->dispatch_flags);
>  	if (ret)
>  		return ret;
>  
> @@ -1535,7 +1537,8 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
>  }
>  
>  static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
> -			      u64 offset, unsigned dispatch_flags)
> +			      u64 offset, u32 len,
> +			      unsigned int dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
>  	bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
> @@ -1811,13 +1814,15 @@ static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
>  		return 0;
>  
>  	ret = req->engine->emit_bb_start(req, so.ggtt_offset,
> -				       I915_DISPATCH_SECURE);
> +					 so.rodata->batch_items * 4,
> +					 I915_DISPATCH_SECURE);
>  	if (ret)
>  		goto out;
>  
>  	ret = req->engine->emit_bb_start(req,
> -				       (so.ggtt_offset + so.aux_batch_offset),
> -				       I915_DISPATCH_SECURE);
> +					 (so.ggtt_offset + so.aux_batch_offset),
> +					 so.aux_batch_size,
> +					 I915_DISPATCH_SECURE);
>  	if (ret)
>  		goto out;
>  
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 6aa1657bbc9d..4488db485fa4 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -1779,9 +1779,9 @@ gen8_irq_disable(struct intel_engine_cs *engine)
>  }
>  
>  static int
> -i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
> -			 u64 offset, u32 length,
> -			 unsigned dispatch_flags)
> +i965_emit_bb_start(struct drm_i915_gem_request *req,
> +		   u64 offset, u32 length,
> +		   unsigned int dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
>  	int ret;
> @@ -1806,9 +1806,9 @@ i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  #define I830_TLB_ENTRIES (2)
>  #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
>  static int
> -i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
> -			 u64 offset, u32 len,
> -			 unsigned dispatch_flags)
> +i830_emit_bb_start(struct drm_i915_gem_request *req,
> +		   u64 offset, u32 len,
> +		   unsigned int dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
>  	u32 cs_offset = req->engine->scratch.gtt_offset;
> @@ -1868,9 +1868,9 @@ i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  }
>  
>  static int
> -i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
> -			 u64 offset, u32 len,
> -			 unsigned dispatch_flags)
> +i915_emit_bb_start(struct drm_i915_gem_request *req,
> +		   u64 offset, u32 len,
> +		   unsigned int dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
>  	int ret;
> @@ -2563,9 +2563,9 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
>  }
>  
>  static int
> -gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
> -			      u64 offset, u32 len,
> -			      unsigned dispatch_flags)
> +gen8_emit_bb_start(struct drm_i915_gem_request *req,
> +		   u64 offset, u32 len,
> +		   unsigned int dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
>  	bool ppgtt = USES_PPGTT(req->i915) &&
> @@ -2589,9 +2589,9 @@ gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  }
>  
>  static int
> -hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
> -			     u64 offset, u32 len,
> -			     unsigned dispatch_flags)
> +hsw_emit_bb_start(struct drm_i915_gem_request *req,
> +		  u64 offset, u32 len,
> +		  unsigned int dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
>  	int ret;
> @@ -2614,9 +2614,9 @@ hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
>  }
>  
>  static int
> -gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
> -			      u64 offset, u32 len,
> -			      unsigned dispatch_flags)
> +gen6_emit_bb_start(struct drm_i915_gem_request *req,
> +		   u64 offset, u32 len,
> +		   unsigned int dispatch_flags)
>  {
>  	struct intel_ring *ring = req->ring;
>  	int ret;
> @@ -2820,15 +2820,15 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv,
>  		engine->add_request = gen6_add_request;
>  
>  	if (INTEL_GEN(dev_priv) >= 8)
> -		engine->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
> +		engine->emit_bb_start = gen8_emit_bb_start;
>  	else if (INTEL_GEN(dev_priv) >= 6)
> -		engine->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
> +		engine->emit_bb_start = gen6_emit_bb_start;
>  	else if (INTEL_GEN(dev_priv) >= 4)
> -		engine->dispatch_execbuffer = i965_dispatch_execbuffer;
> +		engine->emit_bb_start = i965_emit_bb_start;
>  	else if (IS_I830(dev_priv) || IS_845G(dev_priv))
> -		engine->dispatch_execbuffer = i830_dispatch_execbuffer;
> +		engine->emit_bb_start = i830_emit_bb_start;
>  	else
> -		engine->dispatch_execbuffer = i915_dispatch_execbuffer;
> +		engine->emit_bb_start = i915_emit_bb_start;
>  
>  	intel_ring_init_irq(dev_priv, engine);
>  	intel_ring_init_semaphores(dev_priv, engine);
> @@ -2866,7 +2866,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
>  	}
>  
>  	if (IS_HASWELL(dev_priv))
> -		engine->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
> +		engine->emit_bb_start = hsw_emit_bb_start;
>  
>  	engine->init_hw = init_render_ring;
>  	engine->cleanup = render_ring_cleanup;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 49500cead7a5..85d6a70554b9 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -214,12 +214,6 @@ struct intel_engine_cs {
>  	 * monotonic, even if not coherent.
>  	 */
>  	void		(*irq_seqno_barrier)(struct intel_engine_cs *ring);
> -	int		(*dispatch_execbuffer)(struct drm_i915_gem_request *req,
> -					       u64 offset, u32 length,
> -					       unsigned dispatch_flags);
> -#define I915_DISPATCH_SECURE 0x1
> -#define I915_DISPATCH_PINNED 0x2
> -#define I915_DISPATCH_RS     0x4
>  	void		(*cleanup)(struct intel_engine_cs *ring);
>  
>  	/* GEN8 signal/wait table - never trust comments!
> @@ -295,7 +289,11 @@ struct intel_engine_cs {
>  				      u32 invalidate_domains,
>  				      u32 flush_domains);
>  	int		(*emit_bb_start)(struct drm_i915_gem_request *req,
> -					 u64 offset, unsigned dispatch_flags);
> +					 u64 offset, u32 length,
> +					 unsigned int dispatch_flags);
> +#define I915_DISPATCH_SECURE 0x1
> +#define I915_DISPATCH_PINNED 0x2
> +#define I915_DISPATCH_RS     0x4

BIT(0) BIT(1) etc. while touching it?

Regards, Joonas

>  
>  	/**
>  	 * List of objects currently involved in rendering from the
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation