[Intel-gfx] [PATCH 27/39] drm/i915: Move vma lookup to its own lock

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Jan 2 15:07:09 UTC 2019


On 02/01/2019 09:41, Chris Wilson wrote:
> Remove the struct_mutex requirement for looking up the vma for an
> object.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c       |  6 +--
>   drivers/gpu/drm/i915/i915_gem.c           | 33 +++++++------
>   drivers/gpu/drm/i915/i915_gem_object.h    | 45 ++++++++++-------
>   drivers/gpu/drm/i915/i915_vma.c           | 60 +++++++++++++++--------
>   drivers/gpu/drm/i915/i915_vma.h           |  2 +-
>   drivers/gpu/drm/i915/selftests/i915_vma.c |  4 +-
>   6 files changed, 92 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 5e2d5c8d7e02..8059f6dd3886 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -159,14 +159,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
>   		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
>   	if (obj->base.name)
>   		seq_printf(m, " (name: %d)", obj->base.name);
> -	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +	list_for_each_entry(vma, &obj->vma.list, obj_link) {
>   		if (i915_vma_is_pinned(vma))
>   			pin_count++;
>   	}
>   	seq_printf(m, " (pinned x %d)", pin_count);
>   	if (obj->pin_global)
>   		seq_printf(m, " (global)");
> -	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +	list_for_each_entry(vma, &obj->vma.list, obj_link) {
>   		if (!drm_mm_node_allocated(&vma->node))
>   			continue;
>   
> @@ -322,7 +322,7 @@ static int per_file_stats(int id, void *ptr, void *data)
>   	if (obj->base.name || obj->base.dma_buf)
>   		stats->shared += obj->base.size;
>   
> -	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +	list_for_each_entry(vma, &obj->vma.list, obj_link) {
>   		if (!drm_mm_node_allocated(&vma->node))
>   			continue;
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a954e15c0315..e42ad20d6328 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -438,15 +438,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>   	if (ret)
>   		return ret;
>   
> -	while ((vma = list_first_entry_or_null(&obj->vma_list,
> -					       struct i915_vma,
> -					       obj_link))) {
> +	spin_lock(&obj->vma.lock);
> +	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
> +						       struct i915_vma,
> +						       obj_link))) {
>   		list_move_tail(&vma->obj_link, &still_in_list);
> +		spin_unlock(&obj->vma.lock);
> +
>   		ret = i915_vma_unbind(vma);
> -		if (ret)
> -			break;
> +
> +		spin_lock(&obj->vma.lock);
>   	}
> -	list_splice(&still_in_list, &obj->vma_list);
> +	list_splice(&still_in_list, &obj->vma.list);
> +	spin_unlock(&obj->vma.lock);
>   
>   	return ret;
>   }
> @@ -3640,7 +3644,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   	 * reading an invalid PTE on older architectures.
>   	 */
>   restart:
> -	list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +	list_for_each_entry(vma, &obj->vma.list, obj_link) {
>   		if (!drm_mm_node_allocated(&vma->node))
>   			continue;
>   
> @@ -3718,7 +3722,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   			 */
>   		}
>   
> -		list_for_each_entry(vma, &obj->vma_list, obj_link) {
> +		list_for_each_entry(vma, &obj->vma.list, obj_link) {
>   			if (!drm_mm_node_allocated(&vma->node))
>   				continue;
>   
> @@ -3728,7 +3732,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>   		}
>   	}
>   
> -	list_for_each_entry(vma, &obj->vma_list, obj_link)
> +	list_for_each_entry(vma, &obj->vma.list, obj_link)
>   		vma->node.color = cache_level;
>   	i915_gem_object_set_cache_coherency(obj, cache_level);
>   	obj->cache_dirty = true; /* Always invalidate stale cachelines */
> @@ -4304,7 +4308,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>   {
>   	mutex_init(&obj->mm.lock);
>   
> -	INIT_LIST_HEAD(&obj->vma_list);
> +	spin_lock_init(&obj->vma.lock);
> +	INIT_LIST_HEAD(&obj->vma.list);
> +
>   	INIT_LIST_HEAD(&obj->lut_list);
>   	INIT_LIST_HEAD(&obj->batch_pool_link);
>   
> @@ -4470,14 +4476,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>   		mutex_lock(&i915->drm.struct_mutex);
>   
>   		GEM_BUG_ON(i915_gem_object_is_active(obj));
> -		list_for_each_entry_safe(vma, vn,
> -					 &obj->vma_list, obj_link) {
> +		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
>   			GEM_BUG_ON(i915_vma_is_active(vma));
>   			vma->flags &= ~I915_VMA_PIN_MASK;
>   			i915_vma_destroy(vma);
>   		}
> -		GEM_BUG_ON(!list_empty(&obj->vma_list));
> -		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
> +		GEM_BUG_ON(!list_empty(&obj->vma.list));
> +		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
>   
>   		/* This serializes freeing with the shrinker. Since the free
>   		 * is delayed, first by RCU then by the workqueue, we want the
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 49ce797173b5..151453f0f951 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -86,24 +86,33 @@ struct drm_i915_gem_object {
>   
>   	const struct drm_i915_gem_object_ops *ops;
>   
> -	/**
> -	 * @vma_list: List of VMAs backed by this object
> -	 *
> -	 * The VMA on this list are ordered by type, all GGTT vma are placed
> -	 * at the head and all ppGTT vma are placed at the tail. The different
> -	 * types of GGTT vma are unordered between themselves, use the
> -	 * @vma_tree (which has a defined order between all VMA) to find an
> -	 * exact match.
> -	 */
> -	struct list_head vma_list;
> -	/**
> -	 * @vma_tree: Ordered tree of VMAs backed by this object
> -	 *
> -	 * All VMA created for this object are placed in the @vma_tree for
> -	 * fast retrieval via a binary search in i915_vma_instance().
> -	 * They are also added to @vma_list for easy iteration.
> -	 */
> -	struct rb_root vma_tree;
> +	struct {
> +		/**
> +		 * @vma.lock: protect the list/tre of vmas

tree

> +		 */
> +		struct spinlock lock;
> +
> +		/**
> +		 * @vma.list: List of VMAs backed by this object
> +		 *
> +		 * The VMA on this list are ordered by type, all GGTT vma are
> +		 * placed at the head and all ppGTT vma are placed at the tail.
> +		 * The different types of GGTT vma are unordered between
> +		 * themselves, use the @vma.tree (which has a defined order
> +		 * between all VMA) to quickly find an exact match.
> +		 */
> +		struct list_head list;
> +
> +		/**
> +		 * @vma.tree: Ordered tree of VMAs backed by this object
> +		 *
> +		 * All VMA created for this object are placed in the @vma.tree
> +		 * for fast retrieval via a binary search in
> +		 * i915_vma_instance(). They are also added to @vma.list for
> +		 * easy iteration.
> +		 */
> +		struct rb_root tree;
> +	} vma;
>   
>   	/**
>   	 * @lut_list: List of vma lookup entries in use for this object.
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index ad76a3309830..55cabb162fe3 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -187,32 +187,47 @@ vma_create(struct drm_i915_gem_object *obj,
>   								i915_gem_object_get_stride(obj));
>   		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
>   
> -		/*
> -		 * We put the GGTT vma at the start of the vma-list, followed
> -		 * by the ppGGTT vma. This allows us to break early when
> -		 * iterating over only the GGTT vma for an object, see
> -		 * for_each_ggtt_vma()
> -		 */
>   		vma->flags |= I915_VMA_GGTT;
> -		list_add(&vma->obj_link, &obj->vma_list);
> -	} else {
> -		list_add_tail(&vma->obj_link, &obj->vma_list);
>   	}
>   
> +	spin_lock(&obj->vma.lock);
> +
>   	rb = NULL;
> -	p = &obj->vma_tree.rb_node;
> +	p = &obj->vma.tree.rb_node;
>   	while (*p) {
>   		struct i915_vma *pos;
> +		long cmp;
>   
>   		rb = *p;
>   		pos = rb_entry(rb, struct i915_vma, obj_node);
> -		if (i915_vma_compare(pos, vm, view) < 0)
> +
> +		cmp = i915_vma_compare(pos, vm, view);
> +		if (cmp == 0) {
> +			spin_unlock(&obj->vma.lock);
> +			kmem_cache_free(vm->i915->vmas, vma);
> +			return pos;

Single unlock & free would be possible with a goto here but perhaps this 
is clearer.

> +		}
> +
> +		if (cmp < 0)

else if?

>   			p = &rb->rb_right;
>   		else
>   			p = &rb->rb_left;
>   	}
>   	rb_link_node(&vma->obj_node, rb, p);
> -	rb_insert_color(&vma->obj_node, &obj->vma_tree);
> +	rb_insert_color(&vma->obj_node, &obj->vma.tree);
> +
> +	if (i915_vma_is_ggtt(vma))
> +		/*
> +		 * We put the GGTT vma at the start of the vma-list, followed
> +		 * by the ppGGTT vma. This allows us to break early when
> +		 * iterating over only the GGTT vma for an object, see
> +		 * for_each_ggtt_vma()
> +		 */
> +		list_add(&vma->obj_link, &obj->vma.list);
> +	else
> +		list_add_tail(&vma->obj_link, &obj->vma.list);
> +
> +	spin_unlock(&obj->vma.lock);
>   
>   	mutex_lock(&vm->mutex);
>   	list_add_tail(&vma->vm_link, &vm->vma_list);
> @@ -232,7 +247,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
>   {
>   	struct rb_node *rb;
>

Assert vma.lock held would be good here.


> -	rb = obj->vma_tree.rb_node;
> +	rb = obj->vma.tree.rb_node;
>   	while (rb) {
>   		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
>   		long cmp;
> @@ -272,16 +287,17 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
>   {
>   	struct i915_vma *vma;
>   
> -	lockdep_assert_held(&obj->base.dev->struct_mutex);
>   	GEM_BUG_ON(view && !i915_is_ggtt(vm));
>   	GEM_BUG_ON(vm->closed);
>   
> +	spin_lock(&obj->vma.lock);
>   	vma = vma_lookup(obj, vm, view);
> -	if (!vma)
> +	spin_unlock(&obj->vma.lock);
> +
> +	if (unlikely(!vma))
>   		vma = vma_create(obj, vm, view);

lookup -> drop lock -> create is racy. Needs a comment why that's okay, 
if it is okay. Otherwise a bit more refactoring to make the sequence atomic.

>   
>   	GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
> -	GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
>   	return vma;
>   }
>   
> @@ -803,14 +819,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>   
>   	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>   
> -	list_del(&vma->obj_link);
> -
>   	mutex_lock(&vma->vm->mutex);
>   	list_del(&vma->vm_link);
>   	mutex_unlock(&vma->vm->mutex);
>   
> -	if (vma->obj)
> -		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
> +	if (vma->obj) {

Perplexing.. we have vma->obj == NULL somewhere or at some point?

> +		struct drm_i915_gem_object *obj = vma->obj;
> +
> +		spin_lock(&obj->vma.lock);
> +		list_del(&vma->obj_link);
> +		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
> +		spin_unlock(&obj->vma.lock);
> +	}
>   
>   	rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
>   		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 4f7c1c7599f4..7252abc73d3e 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
>    * or the list is empty ofc.
>    */
>   #define for_each_ggtt_vma(V, OBJ) \
> -	list_for_each_entry(V, &(OBJ)->vma_list, obj_link)		\
> +	list_for_each_entry(V, &(OBJ)->vma.list, obj_link)		\
>   		for_each_until(!i915_vma_is_ggtt(V))
>   
>   #endif
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index ffa74290e054..f1008b07dfd2 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -670,7 +670,7 @@ static int igt_vma_partial(void *arg)
>   		}
>   
>   		count = 0;
> -		list_for_each_entry(vma, &obj->vma_list, obj_link)
> +		list_for_each_entry(vma, &obj->vma.list, obj_link)
>   			count++;
>   		if (count != nvma) {
>   			pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
> @@ -699,7 +699,7 @@ static int igt_vma_partial(void *arg)
>   		i915_vma_unpin(vma);
>   
>   		count = 0;
> -		list_for_each_entry(vma, &obj->vma_list, obj_link)
> +		list_for_each_entry(vma, &obj->vma.list, obj_link)
>   			count++;
>   		if (count != nvma) {
>   			pr_err("(%s) allocated an extra full vma!\n", p->name);
> 

At which point does a patch come along which actually stops taking 
struct mutex on some path covered by this new lock?

Regards,

Tvrtko


More information about the Intel-gfx mailing list