[Intel-gfx] [PATCH 6/6] drm/i915: Store the vma in an rbtree under the object

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Tue Nov 1 09:43:53 UTC 2016


On 31/10/2016 10:26, Chris Wilson wrote:
> With full-ppgtt one of the main bottlenecks is the lookup of the VMA
> underneath the object. For execbuf there is merit in having a very fast
> direct lookup of ctx:handle to the vma using a hashtree, but that still
> leaves a large number of other lookups. One way to speed up the lookup
> would be to use a rhashtable, but that requires extra allocations and
> may exhibit poor worse case behaviour. An alternative is to use an
> embedded rbtree, i.e. no extra allocations and deterministic behaviour,
> but at the slight cost of O(lgN) lookups (instead of O(1) for
> rhashtable). The major of such tree will be very shallow and so not much
> slower, and still scales much, much better than the current unsorted
> list.
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=87726
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h     |  1 +
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 80 +++++++++++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
>  3 files changed, 57 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7a18bf66f797..e923d6596cac 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2230,6 +2230,7 @@ struct drm_i915_gem_object {
>
>  	/** List of VMAs backed by this object */
>  	struct list_head vma_list;
> +	struct rb_root vma_tree;
>
>  	/** Stolen memory for this object, instead of being backed by shmem. */
>  	struct drm_mm_node *stolen;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index e7afad585929..aa2d21c41091 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3399,6 +3399,7 @@ void i915_vma_destroy(struct i915_vma *vma)
>  	GEM_BUG_ON(!i915_vma_is_closed(vma));
>  	GEM_BUG_ON(vma->fence);
>
> +	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
>  	list_del(&vma->vm_link);
>  	if (!i915_vma_is_ggtt(vma))
>  		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
> @@ -3416,12 +3417,33 @@ void i915_vma_close(struct i915_vma *vma)
>  		WARN_ON(i915_vma_unbind(vma));
>  }
>
> +static inline int vma_compare(struct i915_vma *vma,
> +			      struct i915_address_space *vm,
> +			      const struct i915_ggtt_view *view)
> +{
> +	GEM_BUG_ON(view && !i915_vma_is_ggtt(vma));
> +
> +	if (vma->vm != vm)
> +		return vma->vm - vm;

This can theoretically overflow so I think long should be the return type.

> +
> +	if (!view)
> +		return vma->ggtt_view.type;
> +
> +	if (vma->ggtt_view.type != view->type)
> +		return vma->ggtt_view.type - view->type;
> +
> +	return memcmp(&vma->ggtt_view.params,
> +		      &view->params,
> +		      sizeof(view->params));
> +}
> +
>  static struct i915_vma *
>  __i915_vma_create(struct drm_i915_gem_object *obj,
>  		  struct i915_address_space *vm,
>  		  const struct i915_ggtt_view *view)
>  {
>  	struct i915_vma *vma;
> +	struct rb_node *rb, **p;
>  	int i;
>
>  	GEM_BUG_ON(vm->closed);
> @@ -3455,33 +3477,28 @@ __i915_vma_create(struct drm_i915_gem_object *obj,
>
>  	if (i915_is_ggtt(vm)) {
>  		vma->flags |= I915_VMA_GGTT;
> +		list_add(&vma->obj_link, &obj->vma_list);
>  	} else {
>  		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
> +		list_add_tail(&vma->obj_link, &obj->vma_list);
>  	}

Will this make a difference to anything since it is not used for 
lookups? I suppose it makes a nicer debugfs output if nothing else so 
not complaining.

>
> -	list_add_tail(&vma->obj_link, &obj->vma_list);
> -	return vma;
> -}
> +	rb = NULL;
> +	p = &obj->vma_tree.rb_node;
> +	while (*p) {
> +		struct i915_vma *pos;
>
> -static inline bool vma_matches(struct i915_vma *vma,
> -			       struct i915_address_space *vm,
> -			       const struct i915_ggtt_view *view)
> -{
> -	if (vma->vm != vm)
> -		return false;
> -
> -	if (!i915_vma_is_ggtt(vma))
> -		return true;
> -
> -	if (!view)
> -		return vma->ggtt_view.type == 0;
> -
> -	if (vma->ggtt_view.type != view->type)
> -		return false;
> +		rb = *p;
> +		pos = rb_entry(rb, struct i915_vma, obj_node);
> +		if (vma_compare(pos, vm, view) < 0)
> +			p = &rb->rb_right;
> +		else
> +			p = &rb->rb_left;
> +	}
> +	rb_link_node(&vma->obj_node, rb, p);
> +	rb_insert_color(&vma->obj_node, &obj->vma_tree);
>
> -	return memcmp(&vma->ggtt_view.params,
> -		      &view->params,
> -		      sizeof(view->params)) == 0;
> +	return vma;
>  }
>
>  struct i915_vma *
> @@ -3501,11 +3518,22 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
>  		    const struct i915_ggtt_view *view)
>  {
> -	struct i915_vma *vma;
> +	struct rb_node *rb;
> +
> +	rb = obj->vma_tree.rb_node;
> +	while (rb) {
> +		struct i915_vma *vma;
> +		int cmp;
>
> -	list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
> -		if (vma_matches(vma, vm, view))
> +		vma = rb_entry(rb, struct i915_vma, obj_node);
> +		cmp = vma_compare(vma, vm, view);
> +		if (cmp == 0)
>  			return vma;
> +		else if (cmp < 0)
> +			rb = rb->rb_right;
> +		else
> +			rb = rb->rb_left;
> +	}
>
>  	return NULL;
>  }
> @@ -3521,8 +3549,10 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
>  	GEM_BUG_ON(view && !i915_is_ggtt(vm));
>
>  	vma = i915_gem_obj_to_vma(obj, vm, view);
> -	if (!vma)
> +	if (!vma) {
>  		vma = __i915_vma_create(obj, vm, view);
> +		GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view));

Nice touch which makes the review so much easier! :)

> +	}
>
>  	GEM_BUG_ON(i915_vma_is_closed(vma));
>  	return vma;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 518e75b64290..c23ef9db1f53 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -227,6 +227,7 @@ struct i915_vma {
>  	struct list_head vm_link;
>
>  	struct list_head obj_link; /* Link in the object's VMA list */
> +	struct rb_node obj_node;
>
>  	/** This vma's place in the batchbuffer or on the eviction list */
>  	struct list_head exec_list;
>

LGTM in general.

Just the comparison return type (and accompanying locals) and then:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko


More information about the Intel-gfx mailing list