[Intel-gfx] [PATCH 16/59] drm/i915: Add flag to i915_add_request() to skip the cache flush

Tue Mar 31 09:32:07 PDT 2015

On 19/03/2015 12:30, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> In order to explcitly track all GPU work (and completely remove the outstanding
> lazy request), it is necessary to add extra i915_add_request() calls to various
> places. Some of these do not need the implicit cache flush done as part of the
> standard batch buffer submission process.
>
> This patch adds a flag to _add_request() to specify whether the flush is
> required or not.
>
> For: VIZ-5115
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h              |    7 +++++--
>   drivers/gpu/drm/i915/i915_gem.c              |   17 ++++++++++-------
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c   |    2 +-
>   drivers/gpu/drm/i915/i915_gem_render_state.c |    2 +-
>   drivers/gpu/drm/i915/intel_lrc.c             |    2 +-
>   5 files changed, 18 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d3b718e..4bcb43f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2751,9 +2751,12 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
>   int __must_check i915_gem_suspend(struct drm_device *dev);
>   void __i915_add_request(struct intel_engine_cs *ring,
>   			struct drm_file *file,
> -			struct drm_i915_gem_object *batch_obj);
> +			struct drm_i915_gem_object *batch_obj,
> +			bool flush_caches);
>   #define i915_add_request(ring) \
> -	__i915_add_request(ring, NULL, NULL)
> +	__i915_add_request(ring, NULL, NULL, true)
> +#define i915_add_request_no_flush(ring) \
> +	__i915_add_request(ring, NULL, NULL, false)
>   int __i915_wait_request(struct drm_i915_gem_request *req,
>   			unsigned reset_counter,
>   			bool interruptible,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9a335d5..f143d15 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2329,7 +2329,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>    */
>   void __i915_add_request(struct intel_engine_cs *ring,
>   			struct drm_file *file,
> -			struct drm_i915_gem_object *obj)
> +			struct drm_i915_gem_object *obj,
> +			bool flush_caches)
>   {
>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>   	struct drm_i915_gem_request *request;
> @@ -2361,12 +2362,14 @@ void __i915_add_request(struct intel_engine_cs *ring,
>   	 * is that the flush _must_ happen before the next request, no matter
>   	 * what.
>   	 */
> -	if (i915.enable_execlists)
> -		ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
> -	else
> -		ret = intel_ring_flush_all_caches(ring);
> -	/* Not allowed to fail! */
> -	WARN_ON(ret);
> +	if (flush_caches) {
> +		if (i915.enable_execlists)
> +			ret = logical_ring_flush_all_caches(ringbuf, request->ctx);
> +		else
> +			ret = intel_ring_flush_all_caches(ring);
> +		/* Not allowed to fail! */
> +		WARN_ON(ret);

There has been a discussion about whether it's ok to use 
WARN_ON(<variable/constant>) since it doesn't add any useful information 
about the actual failure case to the kernel log. By that logic you 
should add WARN_ON to each individual call to _flush_all_caches above 
but I would be inclined to say that that would be slightly redundant. It 
could be argued that if you get a WARN stack_dump in the kernel log at 
this point you pretty much know from where it came.

Although, if you want to be able to rely entirely on the kernel log and 
don't want to read the source code to figure out what function call 
failed then you would have to either add WARN_ONs to each individual 
function call or replace WARN_ON(ret) with something like WARN(ret, 
"flush_all_caches returned %d", ret) or something.

Any other opinion on how we should do this from anyone? What pattern 
should we be following here?

> +	}
>
>   	/* Record the position of the start of the request so that
>   	 * should we detect the updated seqno part-way through the
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 3173550..c0be7d7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1060,7 +1060,7 @@ i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
>   	params->ring->gpu_caches_dirty = true;
>
>   	/* Add a breadcrumb for the completion of the batch buffer */
> -	__i915_add_request(params->ring, params->file, params->batch_obj);
> +	__i915_add_request(params->ring, params->file, params->batch_obj, true);
>   }
>
>   static int
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index ce4788f..4418616 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -173,7 +173,7 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
>
>   	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
>
> -	__i915_add_request(ring, NULL, so.obj);
> +	__i915_add_request(ring, NULL, so.obj, true);
>   	/* __i915_add_request moves object to inactive if it fails */
>   out:
>   	i915_gem_render_state_fini(&so);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8c69f88..4922725 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1350,7 +1350,7 @@ static int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
>
>   	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
>
> -	__i915_add_request(ring, file, so.obj);
> +	__i915_add_request(ring, file, so.obj, true);
>   	/* intel_logical_ring_add_request moves object to inactive if it
>   	 * fails */
>   out:
>