[Intel-gfx] [PATCH 06/26] drm/i915: Parse command buffer earlier in eb_relocate(slow)

Fri Jun 26 14:41:20 UTC 2020

On 6/23/20 4:28 PM, Maarten Lankhorst wrote:
> We want to introduce backoff logic, but we need to lock the
> pool object as well for command parsing. Because of this, we
> will need backoff logic for the engine pool obj, move the batch
> validation up slightly to eb_lookup_vmas, and the actual command
> parsing in a separate function which can get called from execbuf
> relocation fast and slowpath.
>
> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 66 ++++++++++---------
>   1 file changed, 36 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index f896b1a4b38a..7cb44915cfc7 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -290,6 +290,8 @@ struct i915_execbuffer {
>   	struct eb_vma_array *array;
>   };
>   
> +static int eb_parse(struct i915_execbuffer *eb);
> +
>   static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
>   {
>   	return intel_engine_requires_cmd_parser(eb->engine) ||
> @@ -873,6 +875,7 @@ static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
>   
>   static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   {
> +	struct drm_i915_private *i915 = eb->i915;
>   	unsigned int batch = eb_batch_index(eb);
>   	unsigned int i;
>   	int err = 0;
> @@ -886,18 +889,37 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   		vma = eb_lookup_vma(eb, eb->exec[i].handle);
>   		if (IS_ERR(vma)) {
>   			err = PTR_ERR(vma);
> -			break;
> +			goto err;
>   		}
>   
>   		err = eb_validate_vma(eb, &eb->exec[i], vma);
>   		if (unlikely(err)) {
>   			i915_vma_put(vma);
> -			break;
> +			goto err;
>   		}
>   
>   		eb_add_vma(eb, i, batch, vma);
>   	}
>   
> +	if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
> +		drm_dbg(&i915->drm,
> +			"Attempting to use self-modifying batch buffer\n");
> +		return -EINVAL;
> +	}
> +
> +	if (range_overflows_t(u64,
> +			      eb->batch_start_offset, eb->batch_len,
> +			      eb->batch->vma->size)) {
> +		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
> +		return -EINVAL;
> +	}
> +
> +	if (eb->batch_len == 0)
> +		eb->batch_len = eb->batch->vma->size - eb->batch_start_offset;
> +
> +	return 0;
> +
> +err:
>   	eb->vma[i].vma = NULL;
>   	return err;
>   }
> @@ -1809,7 +1831,7 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
>   	return 0;
>   }
>   
> -static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> +static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
>   {
>   	bool have_copy = false;
>   	struct eb_vma *ev;
> @@ -1872,6 +1894,11 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	if (err)
>   		goto err;
>   
> +	/* as last step, parse the command buffer */
> +	err = eb_parse(eb);
> +	if (err)
> +		goto err;
> +
>   	/*
>   	 * Leave the user relocations as are, this is the painfully slow path,
>   	 * and we want to avoid the complication of dropping the lock whilst
> @@ -1904,7 +1931,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	return err;
>   }
>   
> -static int eb_relocate(struct i915_execbuffer *eb)
> +static int eb_relocate_parse(struct i915_execbuffer *eb)
>   {
>   	int err;
>   
> @@ -1932,7 +1959,7 @@ static int eb_relocate(struct i915_execbuffer *eb)
>   			return eb_relocate_slow(eb);
>   	}
>   
> -	return 0;
> +	return eb_parse(eb);
>   }
>   
>   static int eb_move_to_gpu(struct i915_execbuffer *eb)
> @@ -2870,7 +2897,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (unlikely(err))
>   		goto err_context;
>   
> -	err = eb_relocate(&eb);
> +	err = eb_relocate_parse(&eb);
>   	if (err) {
>   		/*
>   		 * If the user expects the execobject.offset and
> @@ -2883,33 +2910,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		goto err_vma;
>   	}
>   
> -	if (unlikely(eb.batch->flags & EXEC_OBJECT_WRITE)) {
> -		drm_dbg(&i915->drm,
> -			"Attempting to use self-modifying batch buffer\n");
> -		err = -EINVAL;
> -		goto err_vma;
> -	}
> -
> -	if (range_overflows_t(u64,
> -			      eb.batch_start_offset, eb.batch_len,
> -			      eb.batch->vma->size)) {
> -		drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
> -		err = -EINVAL;
> -		goto err_vma;
> -	}
> -
> -	if (eb.batch_len == 0)
> -		eb.batch_len = eb.batch->vma->size - eb.batch_start_offset;
> -
> -	err = eb_parse(&eb);
> -	if (err)
> -		goto err_vma;
> -
>   	/*
>   	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>   	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>   	 * hsw should have this fixed, but bdw mucks it up again. */
> -	batch = eb.batch->vma;
>   	if (eb.batch_flags & I915_DISPATCH_SECURE) {
>   		struct i915_vma *vma;
>   
> @@ -2923,13 +2927,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   		 *   fitting due to fragmentation.
>   		 * So this is actually safe.
>   		 */
> -		vma = i915_gem_object_ggtt_pin(batch->obj, NULL, 0, 0, 0);
> +		vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
>   		if (IS_ERR(vma)) {
>   			err = PTR_ERR(vma);
>   			goto err_parse;
>   		}
>   
>   		batch = vma;
> +	} else {
> +		batch = eb.batch->vma;
>   	}
>   

Hmm, it's late friday afternoon so that might be the cause, but I fail 
to see what the above hunk is trying to achieve?

>   	/* All GPU relocation batches must be submitted prior to the user rq */