[Intel-gfx] [PATCH 2/3] drm/i915: Simplify eb_lookup_vmas()

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Jun 20 10:11:56 UTC 2017


On 16/06/2017 17:02, Chris Wilson wrote:
> Since the introduction of being able to perform a lockless lookup of an
> object (i915_gem_object_get_rcu() in fbbd37b36fa5 ("drm/i915: Move object
> release to a freelist + worker") we no longer need to split the
> object/vma lookup into 3 phases and so combine them into a much simpler
> single loop.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 140 +++++++++--------------------
>   1 file changed, 42 insertions(+), 98 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 9da0d209dd38..18d6581bc6c5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -47,16 +47,16 @@ enum {
>   #define DBG_FORCE_RELOC 0 /* choose one of the above! */
>   };
>   
> -#define __EXEC_OBJECT_HAS_REF		BIT(31)
> -#define __EXEC_OBJECT_HAS_PIN		BIT(30)
> -#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
> -#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
> -#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
> -#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
> +#define __EXEC_OBJECT_VALIDATED		BIT(31)
> +#define __EXEC_OBJECT_HAS_REF		BIT(30)
> +#define __EXEC_OBJECT_HAS_PIN		BIT(29)
> +#define __EXEC_OBJECT_HAS_FENCE		BIT(28)
> +#define __EXEC_OBJECT_NEEDS_MAP		BIT(27)
> +#define __EXEC_OBJECT_NEEDS_BIAS	BIT(26)
> +#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 26) /* all of the above */
>   #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
>   
>   #define __EXEC_HAS_RELOC	BIT(31)
> -#define __EXEC_VALIDATED	BIT(30)
>   #define UPDATE			PIN_OFFSET_FIXED
>   
>   #define BATCH_OFFSET_BIAS (256*1024)
> @@ -429,14 +429,16 @@ eb_validate_vma(struct i915_execbuffer *eb,
>   }
>   
>   static int
> -eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
> +eb_add_vma(struct i915_execbuffer *eb,
> +	   unsigned int i, struct i915_vma *vma,
> +	   unsigned int flags)
>   {
>   	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
>   	int err;
>   
>   	GEM_BUG_ON(i915_vma_is_closed(vma));
>   
> -	if (!(eb->args->flags & __EXEC_VALIDATED)) {
> +	if (!(flags & __EXEC_OBJECT_VALIDATED)) {
>   		err = eb_validate_vma(eb, entry, vma);
>   		if (unlikely(err))
>   			return err;
> @@ -471,7 +473,7 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
>   	 * to find the right target VMA when doing relocations.
>   	 */
>   	eb->vma[i] = vma;
> -	eb->flags[i] = entry->flags;
> +	eb->flags[i] = entry->flags | flags;
>   	vma->exec_flags = &eb->flags[i];
>   
>   	err = 0;
> @@ -680,15 +682,11 @@ static int eb_select_context(struct i915_execbuffer *eb)
>   	return 0;
>   }
>   
> -static int eb_lookup_vmas(struct i915_execbuffer *eb)
> +static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags)
>   {
> -#define INTERMEDIATE BIT(0)
> -	const unsigned int count = eb->buffer_count;
>   	struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut;
> -	struct i915_vma *vma;
> -	struct idr *idr;
> +	struct drm_i915_gem_object *uninitialized_var(obj);
>   	unsigned int i;
> -	int slow_pass = -1;
>   	int err;
>   
>   	INIT_LIST_HEAD(&eb->relocs);
> @@ -698,85 +696,39 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
>   		flush_work(&lut->resize);
>   	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
>   
> -	for (i = 0; i < count; i++) {
> -		hlist_for_each_entry(vma,
> -				     ht_head(lut, eb->exec[i].handle),
> -				     ctx_node) {
> -			if (vma->ctx_handle != eb->exec[i].handle)
> -				continue;
> -
> -			err = eb_add_vma(eb, i, vma);
> -			if (unlikely(err))
> -				return err;
> -			goto next_vma;
> -		}
> -
> -		if (slow_pass < 0)
> -			slow_pass = i;
> -
> -		eb->vma[i] = NULL;
> -
> -next_vma: ;
> -	}
> +	for (i = 0; i < eb->buffer_count; i++) {
> +		u32 handle = eb->exec[i].handle;
> +		struct hlist_head *hl = ht_head(lut, handle);
> +		struct i915_vma *vma;
>   
> -	if (slow_pass < 0)
> -		goto out;
> +		hlist_for_each_entry(vma, hl, ctx_node) {
> +			GEM_BUG_ON(vma->ctx != eb->ctx);
>   
> -	spin_lock(&eb->file->table_lock);
> -	/*
> -	 * Grab a reference to the object and release the lock so we can lookup
> -	 * or create the VMA without using GFP_ATOMIC
> -	 */
> -	idr = &eb->file->object_idr;
> -	for (i = slow_pass; i < count; i++) {
> -		struct drm_i915_gem_object *obj;
> +			if (vma->ctx_handle != handle)
> +				continue;
>   
> -		if (eb->vma[i])
> -			continue;
> +			goto add_vma;
> +		}
>   
> -		obj = to_intel_bo(idr_find(idr, eb->exec[i].handle));
> +		obj = i915_gem_object_lookup(eb->file, handle);
>   		if (unlikely(!obj)) {
> -			spin_unlock(&eb->file->table_lock);
> -			DRM_DEBUG("Invalid object handle %d at index %d\n",
> -				  eb->exec[i].handle, i);
>   			err = -ENOENT;
> -			goto err;
> +			goto err_vma;
>   		}
>   
> -		eb->vma[i] =
> -			(struct i915_vma *)ptr_pack_bits(obj, INTERMEDIATE, 1);
> -	}
> -	spin_unlock(&eb->file->table_lock);
> -
> -	for (i = slow_pass; i < count; i++) {
> -		struct drm_i915_gem_object *obj;
> -		unsigned int is_obj;
> -
> -		obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1);
> -		if (!is_obj)
> -			continue;
> +		flags |= __EXEC_OBJECT_HAS_REF;
>   
> -		/*
> -		 * NOTE: We can leak any vmas created here when something fails
> -		 * later on. But that's no issue since vma_unbind can deal with
> -		 * vmas which are not actually bound. And since only
> -		 * lookup_or_create exists as an interface to get at the vma
> -		 * from the (obj, vm) we don't run the risk of creating
> -		 * duplicated vmas for the same vm.
> -		 */
>   		vma = i915_vma_instance(obj, eb->vm, NULL);
>   		if (unlikely(IS_ERR(vma))) {
> -			DRM_DEBUG("Failed to lookup VMA\n");
>   			err = PTR_ERR(vma);
> -			goto err;
> +			goto err_obj;
>   		}
>   
>   		/* First come, first served */
>   		if (!vma->ctx) {
>   			vma->ctx = eb->ctx;
> -			vma->ctx_handle = eb->exec[i].handle;
> -			hlist_add_head(&vma->ctx_node,
> -				       ht_head(lut, eb->exec[i].handle));
> +			vma->ctx_handle = handle;
> +			hlist_add_head(&vma->ctx_node, hl);
>   			lut->ht_count++;
>   			lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
>   			if (i915_vma_is_ggtt(vma)) {
> @@ -784,18 +736,14 @@ next_vma: ;
>   				obj->vma_hashed = vma;
>   			}
>   
> -			i915_vma_get(vma);
> +			/* transfer ref to ctx */
> +			flags &= ~__EXEC_OBJECT_HAS_REF;
>   		}
>   
> -		err = eb_add_vma(eb, i, vma);
> +add_vma:
> +		err = eb_add_vma(eb, i, vma, flags);
>   		if (unlikely(err))
> -			goto err;
> -
> -		/* Only after we validated the user didn't use our bits */
> -		if (vma->ctx != eb->ctx) {
> -			i915_vma_get(vma);
> -			eb->flags[i] |= __EXEC_OBJECT_HAS_REF;
> -		}
> +			goto err_vma;
>   	}
>   
>   	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) {
> @@ -805,7 +753,6 @@ next_vma: ;
>   			lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
>   	}
>   
> -out:
>   	/* take note of the batch buffer before we might reorder the lists */
>   	i = eb_batch_index(eb);
>   	eb->batch = eb->vma[i];
> @@ -825,17 +772,14 @@ next_vma: ;
>   	if (eb->reloc_cache.has_fence)
>   		eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE;
>   
> -	eb->args->flags |= __EXEC_VALIDATED;
>   	return eb_reserve(eb);
>   
> -err:
> -	for (i = slow_pass; i < count; i++) {
> -		if (ptr_unmask_bits(eb->vma[i], 1))
> -			eb->vma[i] = NULL;
> -	}
> +err_obj:
> +	i915_gem_object_put(obj);
> +err_vma:
> +	eb->vma[i] = NULL;
>   	lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS;
>   	return err;
> -#undef INTERMEDIATE
>   }
>   
>   static struct i915_vma *
> @@ -868,7 +812,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
>   		unsigned int flags = eb->flags[i];
>   
>   		if (!vma)
> -			continue;
> +			break;
>   
>   		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
>   		vma->exec_flags = NULL;
> @@ -1714,7 +1658,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   	}
>   
>   	/* reacquire the objects */
> -	err = eb_lookup_vmas(eb);
> +	err = eb_lookup_vmas(eb, __EXEC_OBJECT_VALIDATED);
>   	if (err)
>   		goto err;
>   
> @@ -1768,7 +1712,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>   
>   static int eb_relocate(struct i915_execbuffer *eb)
>   {
> -	if (eb_lookup_vmas(eb))
> +	if (eb_lookup_vmas(eb, 0))
>   		goto slow;
>   
>   	/* The objects are in their final locations, apply the relocations. */
> 

Couldn't parse the diff mentally, but looked at the resulting code and 
that looked simple enough to me.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko




More information about the Intel-gfx mailing list