[Intel-gfx] [PATCH 27/39] drm/i915: Move vma lookup to its own lock
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jan 2 15:07:09 UTC 2019
On 02/01/2019 09:41, Chris Wilson wrote:
> Remove the struct_mutex requirement for looking up the vma for an
> object.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 6 +--
> drivers/gpu/drm/i915/i915_gem.c | 33 +++++++------
> drivers/gpu/drm/i915/i915_gem_object.h | 45 ++++++++++-------
> drivers/gpu/drm/i915/i915_vma.c | 60 +++++++++++++++--------
> drivers/gpu/drm/i915/i915_vma.h | 2 +-
> drivers/gpu/drm/i915/selftests/i915_vma.c | 4 +-
> 6 files changed, 92 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 5e2d5c8d7e02..8059f6dd3886 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -159,14 +159,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
> if (obj->base.name)
> seq_printf(m, " (name: %d)", obj->base.name);
> - list_for_each_entry(vma, &obj->vma_list, obj_link) {
> + list_for_each_entry(vma, &obj->vma.list, obj_link) {
> if (i915_vma_is_pinned(vma))
> pin_count++;
> }
> seq_printf(m, " (pinned x %d)", pin_count);
> if (obj->pin_global)
> seq_printf(m, " (global)");
> - list_for_each_entry(vma, &obj->vma_list, obj_link) {
> + list_for_each_entry(vma, &obj->vma.list, obj_link) {
> if (!drm_mm_node_allocated(&vma->node))
> continue;
>
> @@ -322,7 +322,7 @@ static int per_file_stats(int id, void *ptr, void *data)
> if (obj->base.name || obj->base.dma_buf)
> stats->shared += obj->base.size;
>
> - list_for_each_entry(vma, &obj->vma_list, obj_link) {
> + list_for_each_entry(vma, &obj->vma.list, obj_link) {
> if (!drm_mm_node_allocated(&vma->node))
> continue;
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a954e15c0315..e42ad20d6328 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -438,15 +438,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
> if (ret)
> return ret;
>
> - while ((vma = list_first_entry_or_null(&obj->vma_list,
> - struct i915_vma,
> - obj_link))) {
> + spin_lock(&obj->vma.lock);
> + while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
> + struct i915_vma,
> + obj_link))) {
> list_move_tail(&vma->obj_link, &still_in_list);
> + spin_unlock(&obj->vma.lock);
> +
> ret = i915_vma_unbind(vma);
> - if (ret)
> - break;
> +
> + spin_lock(&obj->vma.lock);
> }
> - list_splice(&still_in_list, &obj->vma_list);
> + list_splice(&still_in_list, &obj->vma.list);
> + spin_unlock(&obj->vma.lock);
>
> return ret;
> }
> @@ -3640,7 +3644,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> * reading an invalid PTE on older architectures.
> */
> restart:
> - list_for_each_entry(vma, &obj->vma_list, obj_link) {
> + list_for_each_entry(vma, &obj->vma.list, obj_link) {
> if (!drm_mm_node_allocated(&vma->node))
> continue;
>
> @@ -3718,7 +3722,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> */
> }
>
> - list_for_each_entry(vma, &obj->vma_list, obj_link) {
> + list_for_each_entry(vma, &obj->vma.list, obj_link) {
> if (!drm_mm_node_allocated(&vma->node))
> continue;
>
> @@ -3728,7 +3732,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> }
> }
>
> - list_for_each_entry(vma, &obj->vma_list, obj_link)
> + list_for_each_entry(vma, &obj->vma.list, obj_link)
> vma->node.color = cache_level;
> i915_gem_object_set_cache_coherency(obj, cache_level);
> obj->cache_dirty = true; /* Always invalidate stale cachelines */
> @@ -4304,7 +4308,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
> {
> mutex_init(&obj->mm.lock);
>
> - INIT_LIST_HEAD(&obj->vma_list);
> + spin_lock_init(&obj->vma.lock);
> + INIT_LIST_HEAD(&obj->vma.list);
> +
> INIT_LIST_HEAD(&obj->lut_list);
> INIT_LIST_HEAD(&obj->batch_pool_link);
>
> @@ -4470,14 +4476,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
> mutex_lock(&i915->drm.struct_mutex);
>
> GEM_BUG_ON(i915_gem_object_is_active(obj));
> - list_for_each_entry_safe(vma, vn,
> - &obj->vma_list, obj_link) {
> + list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
> GEM_BUG_ON(i915_vma_is_active(vma));
> vma->flags &= ~I915_VMA_PIN_MASK;
> i915_vma_destroy(vma);
> }
> - GEM_BUG_ON(!list_empty(&obj->vma_list));
> - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
> + GEM_BUG_ON(!list_empty(&obj->vma.list));
> + GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
>
> /* This serializes freeing with the shrinker. Since the free
> * is delayed, first by RCU then by the workqueue, we want the
> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
> index 49ce797173b5..151453f0f951 100644
> --- a/drivers/gpu/drm/i915/i915_gem_object.h
> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
> @@ -86,24 +86,33 @@ struct drm_i915_gem_object {
>
> const struct drm_i915_gem_object_ops *ops;
>
> - /**
> - * @vma_list: List of VMAs backed by this object
> - *
> - * The VMA on this list are ordered by type, all GGTT vma are placed
> - * at the head and all ppGTT vma are placed at the tail. The different
> - * types of GGTT vma are unordered between themselves, use the
> - * @vma_tree (which has a defined order between all VMA) to find an
> - * exact match.
> - */
> - struct list_head vma_list;
> - /**
> - * @vma_tree: Ordered tree of VMAs backed by this object
> - *
> - * All VMA created for this object are placed in the @vma_tree for
> - * fast retrieval via a binary search in i915_vma_instance().
> - * They are also added to @vma_list for easy iteration.
> - */
> - struct rb_root vma_tree;
> + struct {
> + /**
> + * @vma.lock: protect the list/tre of vmas
tree
> + */
> + struct spinlock lock;
> +
> + /**
> + * @vma.list: List of VMAs backed by this object
> + *
> + * The VMA on this list are ordered by type, all GGTT vma are
> + * placed at the head and all ppGTT vma are placed at the tail.
> + * The different types of GGTT vma are unordered between
> + * themselves, use the @vma.tree (which has a defined order
> + * between all VMA) to quickly find an exact match.
> + */
> + struct list_head list;
> +
> + /**
> + * @vma.tree: Ordered tree of VMAs backed by this object
> + *
> + * All VMA created for this object are placed in the @vma.tree
> + * for fast retrieval via a binary search in
> + * i915_vma_instance(). They are also added to @vma.list for
> + * easy iteration.
> + */
> + struct rb_root tree;
> + } vma;
>
> /**
> * @lut_list: List of vma lookup entries in use for this object.
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index ad76a3309830..55cabb162fe3 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -187,32 +187,47 @@ vma_create(struct drm_i915_gem_object *obj,
> i915_gem_object_get_stride(obj));
> GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
>
> - /*
> - * We put the GGTT vma at the start of the vma-list, followed
> - * by the ppGGTT vma. This allows us to break early when
> - * iterating over only the GGTT vma for an object, see
> - * for_each_ggtt_vma()
> - */
> vma->flags |= I915_VMA_GGTT;
> - list_add(&vma->obj_link, &obj->vma_list);
> - } else {
> - list_add_tail(&vma->obj_link, &obj->vma_list);
> }
>
> + spin_lock(&obj->vma.lock);
> +
> rb = NULL;
> - p = &obj->vma_tree.rb_node;
> + p = &obj->vma.tree.rb_node;
> while (*p) {
> struct i915_vma *pos;
> + long cmp;
>
> rb = *p;
> pos = rb_entry(rb, struct i915_vma, obj_node);
> - if (i915_vma_compare(pos, vm, view) < 0)
> +
> + cmp = i915_vma_compare(pos, vm, view);
> + if (cmp == 0) {
> + spin_unlock(&obj->vma.lock);
> + kmem_cache_free(vm->i915->vmas, vma);
> + return pos;
Single unlock & free would be possible with a goto here but perhaps this
is clearer.
> + }
> +
> + if (cmp < 0)
else if?
> p = &rb->rb_right;
> else
> p = &rb->rb_left;
> }
> rb_link_node(&vma->obj_node, rb, p);
> - rb_insert_color(&vma->obj_node, &obj->vma_tree);
> + rb_insert_color(&vma->obj_node, &obj->vma.tree);
> +
> + if (i915_vma_is_ggtt(vma))
> + /*
> + * We put the GGTT vma at the start of the vma-list, followed
> + * by the ppGGTT vma. This allows us to break early when
> + * iterating over only the GGTT vma for an object, see
> + * for_each_ggtt_vma()
> + */
> + list_add(&vma->obj_link, &obj->vma.list);
> + else
> + list_add_tail(&vma->obj_link, &obj->vma.list);
> +
> + spin_unlock(&obj->vma.lock);
>
> mutex_lock(&vm->mutex);
> list_add_tail(&vma->vm_link, &vm->vma_list);
> @@ -232,7 +247,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
> {
> struct rb_node *rb;
>
Assert vma.lock held would be good here.
> - rb = obj->vma_tree.rb_node;
> + rb = obj->vma.tree.rb_node;
> while (rb) {
> struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
> long cmp;
> @@ -272,16 +287,17 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
> {
> struct i915_vma *vma;
>
> - lockdep_assert_held(&obj->base.dev->struct_mutex);
> GEM_BUG_ON(view && !i915_is_ggtt(vm));
> GEM_BUG_ON(vm->closed);
>
> + spin_lock(&obj->vma.lock);
> vma = vma_lookup(obj, vm, view);
> - if (!vma)
> + spin_unlock(&obj->vma.lock);
> +
> + if (unlikely(!vma))
> vma = vma_create(obj, vm, view);
lookup -> drop lock -> create is racy. Needs a comment why that's okay,
if it is okay. Otherwise a bit more refactoring to make the sequence atomic.
>
> GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
> - GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
> return vma;
> }
>
> @@ -803,14 +819,18 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>
> GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
>
> - list_del(&vma->obj_link);
> -
> mutex_lock(&vma->vm->mutex);
> list_del(&vma->vm_link);
> mutex_unlock(&vma->vm->mutex);
>
> - if (vma->obj)
> - rb_erase(&vma->obj_node, &vma->obj->vma_tree);
> + if (vma->obj) {
Perplexing.. we have vma->obj == NULL somewhere or at some point?
> + struct drm_i915_gem_object *obj = vma->obj;
> +
> + spin_lock(&obj->vma.lock);
> + list_del(&vma->obj_link);
> + rb_erase(&vma->obj_node, &vma->obj->vma.tree);
> + spin_unlock(&obj->vma.lock);
> + }
>
> rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
> GEM_BUG_ON(i915_gem_active_isset(&iter->base));
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 4f7c1c7599f4..7252abc73d3e 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
> * or the list is empty ofc.
> */
> #define for_each_ggtt_vma(V, OBJ) \
> - list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \
> + list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \
> for_each_until(!i915_vma_is_ggtt(V))
>
> #endif
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index ffa74290e054..f1008b07dfd2 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -670,7 +670,7 @@ static int igt_vma_partial(void *arg)
> }
>
> count = 0;
> - list_for_each_entry(vma, &obj->vma_list, obj_link)
> + list_for_each_entry(vma, &obj->vma.list, obj_link)
> count++;
> if (count != nvma) {
> pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
> @@ -699,7 +699,7 @@ static int igt_vma_partial(void *arg)
> i915_vma_unpin(vma);
>
> count = 0;
> - list_for_each_entry(vma, &obj->vma_list, obj_link)
> + list_for_each_entry(vma, &obj->vma.list, obj_link)
> count++;
> if (count != nvma) {
> pr_err("(%s) allocated an extra full vma!\n", p->name);
>
At which point does a patch come along which actually stops taking
struct mutex on some path covered by this new lock?
Regards,
Tvrtko
More information about the Intel-gfx
mailing list