[Intel-gfx] [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl

Mon Jan 27 18:56:12 CET 2014

Hi Chris,

A few questions/comments throughout. I may be off the mark on some. Please
bear with me as I try to get more familiar with the gem code.

Thanks,
Brad

[ snip ]

On Fri, Jan 24, 2014 at 01:00:19AM -0800, Chris Wilson wrote:
> +static void
> +__i915_mmu_notifier_destroy_worker(struct work_struct *work)
> +{
> +	struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
> +	mmu_notifier_unregister(&mmu->mn, mmu->mm);
> +	kfree(mmu);
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
> +{
> +	hash_del(&mmu->node);
> +	INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
> +	schedule_work(&mmu->work);

The commit message mentions a potential lock inversion as the reason for using a wq.
A comment with the details might be helpful.

> +}
> +
> +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
> +{
> +	if (++mmu->serial == 0)
> +		mmu->serial = 1;
> +}
> +
> +static void
> +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	bool destroy;
> +
> +	spin_lock(&mmu->lock);
> +	interval_tree_remove(&mn->it, &mmu->objects);
> +	destroy = --mmu->count == 0;
> +	__i915_mmu_notifier_update_serial(mmu);
> +	spin_unlock(&mmu->lock);
> +
> +	if (destroy) /* protected against _add() by struct_mutex */
> +		__i915_mmu_notifier_destroy(mmu);

I see that we should hold struct_mutex when this function is called,
but I don't see that we try to get the mutex anywhere on the _add() path.
Have I missed something?

> +}
> +
> +static int
> +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> +		      struct i915_mmu_object *mn)
> +{
> +	int ret = -EINVAL;
> +
> +	spin_lock(&mmu->lock);
> +	/* Disallow overlapping userptr objects */
> +	if (!interval_tree_iter_first(&mmu->objects,
> +				      mn->it.start, mn->it.last)) {
> +		interval_tree_insert(&mn->it, &mmu->objects);
> +		mmu->count++;
> +		__i915_mmu_notifier_update_serial(mmu);
> +		ret = 0;
> +	}
> +	spin_unlock(&mmu->lock);
> +
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_mmu_object *mn;
> +
> +	mn = obj->userptr.mn;
> +	if (mn == NULL)
> +		return;
> +
> +	i915_mmu_notifier_del(mn->mmu, mn);
> +	obj->userptr.mn = NULL;
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	struct i915_mmu_notifier *mmu;
> +	struct i915_mmu_object *mn;
> +	int ret;
> +
> +	if (flags & I915_USERPTR_UNSYNCHRONIZED)
> +		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> +	mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
> +	if (IS_ERR(mmu))
> +		return PTR_ERR(mmu);
> +
> +	mn = kzalloc(sizeof(*mn), GFP_KERNEL);
> +	if (mn == NULL) {
> +		ret = -ENOMEM;
> +		goto destroy_mmu;
> +	}
> +
> +	mn->mmu = mmu;
> +	mn->it.start = obj->userptr.ptr;
> +	mn->it.last = mn->it.start + obj->base.size - 1;
> +	mn->obj = obj;
> +
> +	ret = i915_mmu_notifier_add(mmu, mn);
> +	if (ret)
> +		goto free_mn;
> +
> +	obj->userptr.mn = mn;
> +	return 0;
> +
> +free_mn:
> +	kfree(mn);
> +destroy_mmu:
> +	if (mmu->count == 0)
> +		__i915_mmu_notifier_destroy(mmu);

Other accesses to mmu->count are protected by mmu->lock. Again, I may have missed
something but don't immediately see why that's not required.

> +	return ret;
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> +				    unsigned flags)
> +{
> +	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> +		return -ENODEV;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return 0;
> +}
> +#endif
> +
> +struct get_pages_work {
> +	struct work_struct work;
> +	struct drm_i915_gem_object *obj;
> +	struct task_struct *task;
> +};
> +
> +static void
> +__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> +{
> +	struct get_pages_work *work = container_of(_work, typeof(*work), work);
> +	struct drm_i915_gem_object *obj = work->obj;
> +	struct drm_device *dev = obj->base.dev;
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	ret = -ENOMEM;
> +	pinned = 0;
> +
> +	pvec = kmalloc(num_pages*sizeof(struct page *),
> +		       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +	if (pvec == NULL)
> +		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +	if (pvec != NULL) {
> +		struct mm_struct *mm = obj->userptr.mm;
> +
> +		use_mm(mm);
> +		down_read(&mm->mmap_sem);
> +		while (pinned < num_pages) {
> +			ret = get_user_pages(work->task, mm,
> +					     obj->userptr.ptr + pinned * PAGE_SIZE,
> +					     num_pages - pinned,
> +					     !obj->userptr.read_only, 0,
> +					     pvec + pinned, NULL);
> +			if (ret < 0)
> +				break;
> +
> +			pinned += ret;
> +		}
> +		up_read(&mm->mmap_sem);
> +		unuse_mm(mm);
> +	}
> +
> +	mutex_lock(&dev->struct_mutex);
> +	if (obj->userptr.work != &work->work) {
> +		ret = 0;
> +	} else if (pinned == num_pages) {
> +		struct sg_table *st;
> +
> +		st = kmalloc(sizeof(*st), GFP_KERNEL);
> +		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +			kfree(st);
> +			ret = -ENOMEM;
> +		} else {
> +			struct scatterlist *sg;
> +			int n;
> +
> +			for_each_sg(st->sgl, sg, num_pages, n)
> +				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +			obj->pages = st;
> +			list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
> +			pinned = 0;
> +			ret = 0;
> +		}
> +	}
> +
> +	obj->userptr.work = ERR_PTR(ret);
> +	drm_gem_object_unreference(&obj->base);
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +
> +	put_task_struct(work->task);
> +	kfree(work);
> +}
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	const int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct page **pvec;
> +	int pinned, ret;
> +
> +	/* If userspace should engineer that these pages are replaced in
> +	 * the vma between us binding this page into the GTT and completion
> +	 * of rendering... Their loss. If they change the mapping of their
> +	 * pages they need to create a new bo to point to the new vma.
> +	 *
> +	 * However, that still leaves open the possibility of the vma
> +	 * being copied upon fork. Which falls under the same userspace
> +	 * synchronisation issue as a regular bo, except that this time
> +	 * the process may not be expecting that a particular piece of
> +	 * memory is tied to the GPU.
> +	 *
> +	 * Fortunately, we can hook into the mmu_notifier in order to
> +	 * discard the page references prior to anything nasty happening
> +	 * to the vma (discard or cloning) which should prevent the more
> +	 * egregious cases from causing harm.
> +	 */
> +
> +	pvec = NULL;
> +	pinned = 0;
> +	if (obj->userptr.mm == current->mm) {
> +		pvec = kmalloc(num_pages*sizeof(struct page *),
> +			       GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> +		if (pvec == NULL) {
> +			pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +			if (pvec == NULL)
> +				return -ENOMEM;
> +		}
> +
> +		pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> +					       !obj->userptr.read_only, pvec);
> +	}
> +	if (pinned < num_pages) {
> +		if (pinned < 0) {
> +			ret = pinned;
> +			pinned = 0;
> +		} else {
> +			/* Spawn a worker so that we can acquire the
> +			 * user pages without holding our mutex.
> +			 */
> +			ret = -EAGAIN;
> +			if (obj->userptr.work == NULL) {
> +				struct get_pages_work *work;
> +
> +				work = kmalloc(sizeof(*work), GFP_KERNEL);
> +				if (work != NULL) {
> +					obj->userptr.work = &work->work;
> +
> +					work->obj = obj;
> +					drm_gem_object_reference(&obj->base);
> +
> +					work->task = current;
> +					get_task_struct(work->task);
> +
> +					INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> +					schedule_work(&work->work);

Any reason to use the system wq instead of the driver wq here?
It doesn't look like it's the usual "takes modeset locks" justification.

> +				} else
> +					ret = -ENOMEM;
> +			} else {
> +				if (IS_ERR(obj->userptr.work)) {

} else if (...) { ?

> +					ret = PTR_ERR(obj->userptr.work);
> +					obj->userptr.work = NULL;
> +				}
> +			}
> +		}
> +	} else {
> +		struct sg_table *st;
> +
> +		st = kmalloc(sizeof(*st), GFP_KERNEL);
> +		if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +			kfree(st);
> +			ret = -ENOMEM;
> +		} else {
> +			struct scatterlist *sg;
> +			int n;
> +
> +			for_each_sg(st->sgl, sg, num_pages, n)
> +				sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> +			obj->pages = st;
> +			obj->userptr.work = NULL;
> +
> +			pinned = 0;
> +			ret = 0;
> +		}

This block is almost identical to code in the worker. Would it be worth extracting
the common parts into a helper?

> +	}
> +
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	if (obj->madv != I915_MADV_WILLNEED)
> +		obj->dirty = 0;

This is subtly different than similar code in the standard put_pages() in that
it sets dirty=0 for both DONTNEED and PURGED instead of just DONTNEED (w/ BUG_ON(PURGED)).
I don't think we will ever actually truncate userptr objects, so is there any reason for
this to be different?

> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +		struct page *page = sg_page(sg);
> +
> +		if (obj->dirty)
> +			set_page_dirty(page);
> +
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +	}
> +	obj->dirty = 0;
> +
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +
> +	BUG_ON(obj->userptr.work != NULL);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> +	i915_gem_userptr_release__mmu_notifier(obj);
> +
> +	if (obj->userptr.mm) {
> +		mmput(obj->userptr.mm);
> +		obj->userptr.mm = NULL;
> +	}
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> +	.get_pages = i915_gem_userptr_get_pages,
> +	.put_pages = i915_gem_userptr_put_pages,
> +	.release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some normal memory from the process
> + * context - user memory.
> + *
> + * We impose several restrictions upon the memory being mapped
> + * into the GPU.
> + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> + * 2. We only allow a bo as large as we could in theory map into the GTT,
> + *    that is we limit the size to the total size of the GTT.
> + * 3. The bo is marked as being snoopable. The backing pages are left
> + *    accessible directly by the CPU, but reads by the GPU may incur the cost
> + *    of a snoop (unless you have an LLC architecture).

No overlapping ranges

- Brad

> + *
> + * Synchronisation between multiple users and the GPU is left to userspace
> + * through the normal set-domain-ioctl. The kernel will enforce that the
> + * GPU relinquishes the VMA before it is returned back to the system
> + * i.e. upon free(), munmap() or process termination. However, the userspace
> + * malloc() library may not immediately relinquish the VMA after free() and
> + * instead reuse it whilst the GPU is still reading and writing to the VMA.
> + * Caveat emptor.
> + *
> + * Also note, that the object created here is not currently a "first class"
> + * object, in that several ioctls are banned. These are the CPU access
> + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
> + * direct access via your pointer rather than use those ioctls.
> + *
> + * If you think this is a good interface to use to pass GPU memory between
> + * drivers, please use dma-buf instead. In fact, wherever possible use
> + * dma-buf instead.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_userptr *args = data;
> +	struct drm_i915_gem_object *obj;
> +	int ret;
> +	u32 handle;
> +
> +	if (args->flags & ~(I915_USERPTR_READ_ONLY |
> +			    I915_USERPTR_UNSYNCHRONIZED))
> +		return -EINVAL;
> +
> +	if (offset_in_page(args->user_ptr | args->user_size))
> +		return -EINVAL;
> +
> +	if (args->user_size > dev_priv->gtt.base.total)
> +		return -E2BIG;
> +
> +	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
> +		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
> +		return -EFAULT;
> +
> +	/* Allocate the new object */
> +	obj = i915_gem_object_alloc(dev);
> +	if (obj == NULL)
> +		return -ENOMEM;
> +
> +	drm_gem_private_object_init(dev, &obj->base, args->user_size);
> +	i915_gem_object_init(obj, &i915_gem_userptr_ops);
> +	obj->cache_level = I915_CACHE_LLC;
> +	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +
> +	obj->userptr.ptr = args->user_ptr;
> +	obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> +
> +	/* And keep a pointer to the current->mm for resolving the user pages
> +	 * at binding. This means that we need to hook into the mmu_notifier
> +	 * in order to detect if the mmu is destroyed.
> +	 */
> +	ret = -ENOMEM;
> +	if ((obj->userptr.mm = get_task_mm(current)))
> +		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> +	if (ret == 0)
> +		ret = drm_gem_handle_create(file, &obj->base, &handle);
> +
> +	/* drop reference from allocate - handle holds it now */
> +	drm_gem_object_unreference_unlocked(&obj->base);
> +	if (ret)
> +		return ret;
> +
> +	args->handle = handle;
> +	return 0;
> +}
> +
> +int
> +i915_gem_init_userptr(struct drm_device *dev)
> +{
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	struct drm_i915_private *dev_priv = to_i915(dev);
> +	hash_init(dev_priv->mmu_notifiers);
> +#endif
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ae8cf61b8ce3..cce9f559e3d7 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
>  		err_puts(m, tiling_flag(err->tiling));
>  		err_puts(m, dirty_flag(err->dirty));
>  		err_puts(m, purgeable_flag(err->purgeable));
> +		err_puts(m, err->userptr ? " userptr" : "");
>  		err_puts(m, err->ring != -1 ? " " : "");
>  		err_puts(m, ring_str(err->ring));
>  		err_puts(m, i915_cache_level_str(err->cache_level));
> @@ -584,6 +585,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>  	err->tiling = obj->tiling_mode;
>  	err->dirty = obj->dirty;
>  	err->purgeable = obj->madv != I915_MADV_WILLNEED;
> +	err->userptr = obj->userptr.mm != 0;
>  	err->ring = obj->ring ? obj->ring->id : -1;
>  	err->cache_level = obj->cache_level;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 37c8073a8246..6c145a0be250 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_REG_READ		0x31
>  #define DRM_I915_GET_RESET_STATS	0x32
>  #define DRM_I915_GEM_CREATE2		0x33
> +#define DRM_I915_GEM_USERPTR		0x34
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
>  	__u32 pad;
>  };
>  
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u64 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
> +
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 1.8.5.3
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>