[Intel-gfx] [PATCH v4 03/38] drm/i915: Prelude to splitting i915_gem_do_execbuffer in two

Fri Feb 12 16:18:29 UTC 2016

On 04/02/2016 17:01, Jesse Barnes wrote:
> On 01/11/2016 10:42 AM, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> The scheduler decouples the submission of batch buffers to the driver
>> with their submission to the hardware. This basically means splitting
>> the execbuffer() function in half. This change rearranges some code
>> ready for the split to occur.
>>
>> For: VIZ-1587
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 63 ++++++++++++++++++------------
>>   drivers/gpu/drm/i915/intel_lrc.c           | 18 ++++++---
>>   2 files changed, 51 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index bfc4c17..0eca2b6 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -933,10 +933,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
>>   	if (flush_domains & I915_GEM_DOMAIN_GTT)
>>   		wmb();
>>   
>> -	/* Unconditionally invalidate gpu caches and ensure that we do flush
>> -	 * any residual writes from the previous batch.
>> -	 */
>> -	return intel_ring_invalidate_all_caches(req);
>> +	return 0;
>>   }
>>   
>>   static bool
>> @@ -1189,17 +1186,6 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>>   	u32 instp_mask;
>>   	int ret;
>>   
>> -	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
>> -	if (ret)
>> -		return ret;
>> -
>> -	ret = i915_switch_context(params->request);
>> -	if (ret)
>> -		return ret;
>> -
>> -	WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
>> -	     "%s didn't clear reload\n", ring->name);
>> -
>>   	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
>>   	instp_mask = I915_EXEC_CONSTANTS_MASK;
>>   	switch (instp_mode) {
>> @@ -1233,11 +1219,37 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>>   		return -EINVAL;
>>   	}
>>   
>> +	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
>> +	if (ret)
>> +		return ret;
>> +
>> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
>> +
>> +	/* To be split into two functions here... */
>> +
>> +	intel_runtime_pm_get(dev_priv);
>> +
>> +	/*
>> +	 * Unconditionally invalidate gpu caches and ensure that we do flush
>> +	 * any residual writes from the previous batch.
>> +	 */
>> +	ret = intel_ring_invalidate_all_caches(params->request);
>> +	if (ret)
>> +		goto error;
>> +
>> +	/* Switch to the correct context for the batch */
>> +	ret = i915_switch_context(params->request);
>> +	if (ret)
>> +		goto error;
>> +
>> +	WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
>> +	     "%s didn't clear reload\n", ring->name);
>> +
>>   	if (ring == &dev_priv->ring[RCS] &&
>>   	    instp_mode != dev_priv->relative_constants_mode) {
>>   		ret = intel_ring_begin(params->request, 4);
>>   		if (ret)
>> -			return ret;
>> +			goto error;
>>   
>>   		intel_ring_emit(ring, MI_NOOP);
>>   		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> @@ -1251,7 +1263,7 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>>   	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
>>   		ret = i915_reset_gen7_sol_offsets(dev, params->request);
>>   		if (ret)
>> -			return ret;
>> +			goto error;
>>   	}
>>   
>>   	exec_len   = args->batch_len;
>> @@ -1262,14 +1274,20 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
>>   					exec_start, exec_len,
>>   					params->dispatch_flags);
>>   	if (ret)
>> -		return ret;
>> +		goto error;
>>   
>>   	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>>   
>> -	i915_gem_execbuffer_move_to_active(vmas, params->request);
>>   	i915_gem_execbuffer_retire_commands(params);
>>   
>> -	return 0;
>> +error:
>> +	/*
>> +	 * intel_gpu_busy should also get a ref, so it will free when the device
>> +	 * is really idle.
>> +	 */
>> +	intel_runtime_pm_put(dev_priv);
>> +
>> +	return ret;
>>   }
>>   
>>   /**
>> @@ -1424,8 +1442,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>   		dispatch_flags |= I915_DISPATCH_RS;
>>   	}
>>   
>> -	intel_runtime_pm_get(dev_priv);
>> -
>>   	ret = i915_mutex_lock_interruptible(dev);
>>   	if (ret)
>>   		goto pre_mutex_err;
>> @@ -1599,9 +1615,6 @@ err:
>>   	mutex_unlock(&dev->struct_mutex);
>>   
>>   pre_mutex_err:
>> -	/* intel_gpu_busy should also get a ref, so it will free when the device
>> -	 * is really idle. */
>> -	intel_runtime_pm_put(dev_priv);
>>   	return ret;
>>   }
>>   
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index e510730..4bf0ee6 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -647,10 +647,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
>>   	if (flush_domains & I915_GEM_DOMAIN_GTT)
>>   		wmb();
>>   
>> -	/* Unconditionally invalidate gpu caches and ensure that we do flush
>> -	 * any residual writes from the previous batch.
>> -	 */
>> -	return logical_ring_invalidate_all_caches(req);
>> +	return 0;
>>   }
>>   
>>   int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
>> @@ -913,6 +910,18 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>>   	if (ret)
>>   		return ret;
>>   
>> +	i915_gem_execbuffer_move_to_active(vmas, params->request);
>> +
>> +	/* To be split into two functions here... */
>> +
>> +	/*
>> +	 * Unconditionally invalidate gpu caches and ensure that we do flush
>> +	 * any residual writes from the previous batch.
>> +	 */
>> +	ret = logical_ring_invalidate_all_caches(params->request);
>> +	if (ret)
>> +		return ret;
>> +
>>   	if (ring == &dev_priv->ring[RCS] &&
>>   	    instp_mode != dev_priv->relative_constants_mode) {
>>   		ret = intel_logical_ring_begin(params->request, 4);
>> @@ -937,7 +946,6 @@ int intel_execlists_submission(struct i915_execbuffer_params *params,
>>   
>>   	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
>>   
>> -	i915_gem_execbuffer_move_to_active(vmas, params->request);
>>   	i915_gem_execbuffer_retire_commands(params);
>>   
>>   	return 0;
>>
> Do we need to do anything if the cache invalidation fails like move the buffers back off the active list?  The order changed here, so I'm wondering.

I don't believe so. The objects are all tracked by request. The request 
itself is cancelled rather than added to the request list. So the next 
run of i915_gem_retire_requests_ring() will do all the necessary clean 
up. Potentially we could do something explicit here to speed the process 
up. However, this is only an intermediate step and there is no nice and 
simple move_to_inactive function to call. Once the scheduler arrives, a 
failed backend submission would be retried by the scheduler. So objects 
must not be cleaned up as that would break the re-submission.

> If that's not a problem, then:
> Reviewed-by: Jesse Barnes <jbarnes at virtuousgeek.org>