[Intel-gfx] [PATCH v4 03/38] drm/i915: Prelude to splitting i915_gem_do_execbuffer in two

Thu Feb 4 17:01:23 UTC 2016

On 01/11/2016 10:42 AM, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
> 
> The scheduler decouples the submission of batch buffers to the driver
> with their submission to the hardware. This basically means splitting
> the execbuffer() function in half. This change rearranges some code
> ready for the split to occur.
> 
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++++++++++++++++++------------
>  drivers/gpu/drm/i915/intel_lrc.c           | 18 ++++++---
>  2 files changed, 51 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index bfc4c17..0eca2b6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -933,10 +933,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>  	if (flush_domains & I915_GEM_DOMAIN_GTT)
>  		wmb();
>  
> -	/* Unconditionally invalidate gpu caches and ensure that we do flush
> -	 * any residual writes from the previous batch.
> -	 */
> -	return intel_ring_invalidate_all_caches(req);
> +	return 0;
>  }
>  
>  static bool
> @@ -1189,17 +1186,6 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  	u32 instp_mask;
>  	int ret;
>  
> -	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
> -	if (ret)
> -		return ret;
> -
> -	ret = i915_switch_context(params->request);
> -	if (ret)
> -		return ret;
> -
> -	WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
> -	     "%s didn't clear reload\n", ring->name);
> -
>  	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
>  	instp_mask = I915_EXEC_CONSTANTS_MASK;
>  	switch (instp_mode) {
> @@ -1233,11 +1219,37 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  		return -EINVAL;
>  	}
>  
> +	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
> +	if (ret)
> +		return ret;
> +
> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
> +
> +	/* To be split into two functions here... */
> +
> +	intel_runtime_pm_get(dev_priv);
> +
> +	/*
> +	 * Unconditionally invalidate gpu caches and ensure that we do flush
> +	 * any residual writes from the previous batch.
> +	 */
> +	ret = intel_ring_invalidate_all_caches(params->request);
> +	if (ret)
> +		goto error;
> +
> +	/* Switch to the correct context for the batch */
> +	ret = i915_switch_context(params->request);
> +	if (ret)
> +		goto error;
> +
> +	WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
> +	     "%s didn't clear reload\n", ring->name);
> +
>  	if (ring == &dev_priv->ring[RCS] &&
>  	    instp_mode != dev_priv->relative_constants_mode) {
>  		ret = intel_ring_begin(params->request, 4);
>  		if (ret)
> -			return ret;
> +			goto error;
>  
>  		intel_ring_emit(ring, MI_NOOP);
>  		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> @@ -1251,7 +1263,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
>  		ret = i915_reset_gen7_sol_offsets(dev, params->request);
>  		if (ret)
> -			return ret;
> +			goto error;
>  	}
>  
>  	exec_len   = args->batch_len;
> @@ -1262,14 +1274,20 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>  					exec_start, exec_len,
>  					params->dispatch_flags);
>  	if (ret)
> -		return ret;
> +		goto error;
>  
>  	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>  
> -	i915_gem_execbuffer_move_to_active(vmas, params->request);
>  	i915_gem_execbuffer_retire_commands(params);
>  
> -	return 0;
> +error:
> +	/*
> +	 * intel_gpu_busy should also get a ref, so it will free when the device
> +	 * is really idle.
> +	 */
> +	intel_runtime_pm_put(dev_priv);
> +
> +	return ret;
>  }
>  
>  /**
> @@ -1424,8 +1442,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  		dispatch_flags |= I915_DISPATCH_RS;
>  	}
>  
> -	intel_runtime_pm_get(dev_priv);
> -
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
>  		goto pre_mutex_err;
> @@ -1599,9 +1615,6 @@ err:
>  	mutex_unlock(&dev->struct_mutex);
>  
>  pre_mutex_err:
> -	/* intel_gpu_busy should also get a ref, so it will free when the device
> -	 * is really idle. */
> -	intel_runtime_pm_put(dev_priv);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index e510730..4bf0ee6 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -647,10 +647,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>  	if (flush_domains & I915_GEM_DOMAIN_GTT)
>  		wmb();
>  
> -	/* Unconditionally invalidate gpu caches and ensure that we do flush
> -	 * any residual writes from the previous batch.
> -	 */
> -	return logical_ring_invalidate_all_caches(req);
> +	return 0;
>  }
>  
>  int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
> @@ -913,6 +910,18 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  	if (ret)
>  		return ret;
>  
> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
> +
> +	/* To be split into two functions here... */
> +
> +	/*
> +	 * Unconditionally invalidate gpu caches and ensure that we do flush
> +	 * any residual writes from the previous batch.
> +	 */
> +	ret = logical_ring_invalidate_all_caches(params->request);
> +	if (ret)
> +		return ret;
> +
>  	if (ring == &dev_priv->ring[RCS] &&
>  	    instp_mode != dev_priv->relative_constants_mode) {
>  		ret = intel_logical_ring_begin(params->request, 4);
> @@ -937,7 +946,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>  
>  	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>  
> -	i915_gem_execbuffer_move_to_active(vmas, params->request);
>  	i915_gem_execbuffer_retire_commands(params);
>  
>  	return 0;
> 

Do we need to do anything if the cache invalidation fails like move the buffers back off the active list?  The order changed here, so I'm wondering.

If that's not a problem, then:
Reviewed-by: Jesse Barnes <jbarnes at virtuousgeek.org>