[Intel-gfx] [PATCH 1/2] drm/i915: Allow overlapping userptr objects

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Jul 23 15:23:53 CEST 2014


On 07/21/2014 01:21 PM, Chris Wilson wrote:
> Whilst I strongly advise against doing so for the implicit coherency
> issues between the multiple buffer objects accessing the same backing
> store, it nevertheless is a valid use case, akin to mmaping the same
> file multiple times.
>
> The reason why we forbade it earlier was that our use of the interval
> tree for fast invalidation upon vma changes excluded overlapping
> objects. So in the case where the user wishes to create such pairs of
> overlapping objects, we degrade the range invalidation to walkin the
> linear list of objects associated with the mm.
>
> A situation where overlapping objects could arise is the lax implementation
> of MIT-SHM Pixmaps in the xserver. A second situation is where the user
> wishes to have different access modes to a region of memory (e.g. access
> through a read-only userptr buffer and through a normal userptr buffer).
>
> v2: Compile for mmu-notifiers after tweaking
> v3: Rename is_linear/has_linear
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: "Li, Victor Y" <victor.y.li at intel.com>
> Cc: "Kelley, Sean V" <sean.v.kelley at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Cc: "Gong, Zhipeng" <zhipeng.gong at intel.com>
> Cc: Akash Goel <akash.goel at intel.com>
> Cc: "Volkin, Bradley D" <bradley.d.volkin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_userptr.c | 142 ++++++++++++++++++++++++--------
>   1 file changed, 106 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index b41614d..74c45da 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -40,19 +40,87 @@ struct i915_mmu_notifier {
>   	struct hlist_node node;
>   	struct mmu_notifier mn;
>   	struct rb_root objects;
> +	struct list_head linear;
>   	struct drm_device *dev;
>   	struct mm_struct *mm;
>   	struct work_struct work;
>   	unsigned long count;
>   	unsigned long serial;
> +	bool has_linear;
>   };
>
>   struct i915_mmu_object {
>   	struct i915_mmu_notifier *mmu;
>   	struct interval_tree_node it;
> +	struct list_head link;
>   	struct drm_i915_gem_object *obj;
> +	bool is_linear;
>   };
>
> +static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	unsigned long end;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	/* Cancel any active worker and force us to re-evaluate gup */
> +	obj->userptr.work = NULL;
> +
> +	if (obj->pages != NULL) {
> +		struct drm_i915_private *dev_priv = to_i915(dev);
> +		struct i915_vma *vma, *tmp;
> +		bool was_interruptible;
> +
> +		was_interruptible = dev_priv->mm.interruptible;
> +		dev_priv->mm.interruptible = false;
> +
> +		list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
> +			int ret = i915_vma_unbind(vma);
> +			WARN_ON(ret && ret != -EIO);
> +		}
> +		WARN_ON(i915_gem_object_put_pages(obj));
> +
> +		dev_priv->mm.interruptible = was_interruptible;
> +	}
> +
> +	end = obj->userptr.ptr + obj->base.size;
> +
> +	drm_gem_object_unreference(&obj->base);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return end;
> +}
> +
> +static void invalidate_range__linear(struct i915_mmu_notifier *mn,
> +				     struct mm_struct *mm,
> +				     unsigned long start,
> +				     unsigned long end)
> +{
> +	struct i915_mmu_object *mmu;
> +	unsigned long serial;
> +
> +restart:
> +	serial = mn->serial;
> +	list_for_each_entry(mmu, &mn->linear, link) {
> +		struct drm_i915_gem_object *obj;
> +
> +		if (mmu->it.last < start || mmu->it.start > end)
> +			continue;
> +
> +		obj = mmu->obj;
> +		drm_gem_object_reference(&obj->base);
> +		spin_unlock(&mn->lock);
> +
> +		cancel_userptr(obj);
> +
> +		spin_lock(&mn->lock);
> +		if (serial != mn->serial)
> +			goto restart;
> +	}
> +
> +	spin_unlock(&mn->lock);
> +}
> +
>   static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   						       struct mm_struct *mm,
>   						       unsigned long start,
> @@ -60,16 +128,19 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   {
>   	struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
>   	struct interval_tree_node *it = NULL;
> +	unsigned long next = start;
>   	unsigned long serial = 0;
>
>   	end--; /* interval ranges are inclusive, but invalidate range is exclusive */
> -	while (start < end) {
> +	while (next < end) {
>   		struct drm_i915_gem_object *obj;
>
>   		obj = NULL;
>   		spin_lock(&mn->lock);
> +		if (mn->has_linear)
> +			return invalidate_range__linear(mn, mm, start, end);
>   		if (serial == mn->serial)
> -			it = interval_tree_iter_next(it, start, end);
> +			it = interval_tree_iter_next(it, next, end);
>   		else
>   			it = interval_tree_iter_first(&mn->objects, start, end);
>   		if (it != NULL) {
> @@ -81,31 +152,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>   		if (obj == NULL)
>   			return;
>
> -		mutex_lock(&mn->dev->struct_mutex);
> -		/* Cancel any active worker and force us to re-evaluate gup */
> -		obj->userptr.work = NULL;
> -
> -		if (obj->pages != NULL) {
> -			struct drm_i915_private *dev_priv = to_i915(mn->dev);
> -			struct i915_vma *vma, *tmp;
> -			bool was_interruptible;
> -
> -			was_interruptible = dev_priv->mm.interruptible;
> -			dev_priv->mm.interruptible = false;
> -
> -			list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
> -				int ret = i915_vma_unbind(vma);
> -				WARN_ON(ret && ret != -EIO);
> -			}
> -			WARN_ON(i915_gem_object_put_pages(obj));
> -
> -			dev_priv->mm.interruptible = was_interruptible;
> -		}
> -
> -		start = obj->userptr.ptr + obj->base.size;
> -
> -		drm_gem_object_unreference(&obj->base);
> -		mutex_unlock(&mn->dev->struct_mutex);
> +		next = cancel_userptr(obj);
>   	}
>   }
>
> @@ -151,6 +198,8 @@ i915_mmu_notifier_get(struct drm_device *dev, struct mm_struct *mm)
>   	mmu->objects = RB_ROOT;
>   	mmu->count = 0;
>   	mmu->serial = 1;
> +	INIT_LIST_HEAD(&mmu->linear);
> +	mmu->has_linear = false;
>
>   	/* Protected by mmap_sem (write-lock) */
>   	ret = __mmu_notifier_register(&mmu->mn, mm);
> @@ -197,6 +246,17 @@ static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
>   		mmu->serial = 1;
>   }
>
> +static bool i915_mmu_notifier_has_linear(struct i915_mmu_notifier *mmu)
> +{
> +	struct i915_mmu_object *mn;
> +
> +	list_for_each_entry(mn, &mmu->linear, link)
> +		if (mn->is_linear)
> +			return true;
> +
> +	return false;
> +}
> +
>   static void
>   i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
>   		      struct i915_mmu_object *mn)
> @@ -204,7 +264,11 @@ i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
>   	lockdep_assert_held(&mmu->dev->struct_mutex);
>
>   	spin_lock(&mmu->lock);
> -	interval_tree_remove(&mn->it, &mmu->objects);
> +	list_del(&mn->link);
> +	if (mn->is_linear)
> +		mmu->has_linear = i915_mmu_notifier_has_linear(mmu);
> +	else
> +		interval_tree_remove(&mn->it, &mmu->objects);
>   	__i915_mmu_notifier_update_serial(mmu);
>   	spin_unlock(&mmu->lock);
>
> @@ -230,7 +294,6 @@ i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
>   	 */
>   	i915_gem_retire_requests(mmu->dev);
>
> -	/* Disallow overlapping userptr objects */
>   	spin_lock(&mmu->lock);
>   	it = interval_tree_iter_first(&mmu->objects,
>   				      mn->it.start, mn->it.last);
> @@ -243,14 +306,22 @@ i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
>   		 * to flush their object references upon which the object will
>   		 * be removed from the interval-tree, or the the range is
>   		 * still in use by another client and the overlap is invalid.
> +		 *
> +		 * If we do have an overlap, we cannot use the interval tree
> +		 * for fast range invalidation.
>   		 */
>
>   		obj = container_of(it, struct i915_mmu_object, it)->obj;
> -		ret = obj->userptr.workers ? -EAGAIN : -EINVAL;
> -	} else {
> +		if (!obj->userptr.workers)
> +			mmu->has_linear = mn->is_linear = true;
> +		else
> +			ret = -EAGAIN;
> +	} else
>   		interval_tree_insert(&mn->it, &mmu->objects);
> +
> +	if (ret == 0) {
> +		list_add(&mn->link, &mmu->linear);
>   		__i915_mmu_notifier_update_serial(mmu);
> -		ret = 0;
>   	}
>   	spin_unlock(&mmu->lock);
>   	mutex_unlock(&mmu->dev->struct_mutex);
> @@ -611,12 +682,11 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
>    * We impose several restrictions upon the memory being mapped
>    * into the GPU.
>    * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> - * 2. It cannot overlap any other userptr object in the same address space.
> - * 3. It must be normal system memory, not a pointer into another map of IO
> + * 2. It must be normal system memory, not a pointer into another map of IO
>    *    space (e.g. it must not be a GTT mmapping of another object).
> - * 4. We only allow a bo as large as we could in theory map into the GTT,
> + * 3. We only allow a bo as large as we could in theory map into the GTT,
>    *    that is we limit the size to the total size of the GTT.
> - * 5. The bo is marked as being snoopable. The backing pages are left
> + * 4. The bo is marked as being snoopable. The backing pages are left
>    *    accessible directly by the CPU, but reads and writes by the GPU may
>    *    incur the cost of a snoop (unless you have an LLC architecture).
>    *

Looks fine. Performance impact is potentially big as we discussed but I 
suppose we can leave that for later if an issue. So:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

I think it would be good to add some more tests to cover the tracking 
"handover" between the interval tree and linear list to ensure 
invalidation still works correctly in non-trivial cases. Code looks 
correct in that respect but just in case. It is not a top priority so 
not sure when I'll find time to actually do it.

Tvrtko



More information about the Intel-gfx mailing list