[Intel-gfx] [PATCH] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
Volkin, Bradley D
bradley.d.volkin at intel.com
Mon Jan 27 18:56:12 CET 2014
Hi Chris,
A few questions/comments throughout. I may be off the mark on some. Please
bear with me as I try to get more familiar with the gem code.
Thanks,
Brad
[ snip ]
On Fri, Jan 24, 2014 at 01:00:19AM -0800, Chris Wilson wrote:
> +static void
> +__i915_mmu_notifier_destroy_worker(struct work_struct *work)
> +{
> + struct i915_mmu_notifier *mmu = container_of(work, typeof(*mmu), work);
> + mmu_notifier_unregister(&mmu->mn, mmu->mm);
> + kfree(mmu);
> +}
> +
> +static void
> +__i915_mmu_notifier_destroy(struct i915_mmu_notifier *mmu)
> +{
> + hash_del(&mmu->node);
> + INIT_WORK(&mmu->work, __i915_mmu_notifier_destroy_worker);
> + schedule_work(&mmu->work);
The commit message mentions a potential lock inversion as the reason for using a wq.
A comment with the details might be helpful.
> +}
> +
> +static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mmu)
> +{
> + if (++mmu->serial == 0)
> + mmu->serial = 1;
> +}
> +
> +static void
> +i915_mmu_notifier_del(struct i915_mmu_notifier *mmu,
> + struct i915_mmu_object *mn)
> +{
> + bool destroy;
> +
> + spin_lock(&mmu->lock);
> + interval_tree_remove(&mn->it, &mmu->objects);
> + destroy = --mmu->count == 0;
> + __i915_mmu_notifier_update_serial(mmu);
> + spin_unlock(&mmu->lock);
> +
> + if (destroy) /* protected against _add() by struct_mutex */
> + __i915_mmu_notifier_destroy(mmu);
I see that we should hold struct_mutex when this function is called,
but I don't see that we try to get the mutex anywhere on the _add() path.
Have I missed something?
> +}
> +
> +static int
> +i915_mmu_notifier_add(struct i915_mmu_notifier *mmu,
> + struct i915_mmu_object *mn)
> +{
> + int ret = -EINVAL;
> +
> + spin_lock(&mmu->lock);
> + /* Disallow overlapping userptr objects */
> + if (!interval_tree_iter_first(&mmu->objects,
> + mn->it.start, mn->it.last)) {
> + interval_tree_insert(&mn->it, &mmu->objects);
> + mmu->count++;
> + __i915_mmu_notifier_update_serial(mmu);
> + ret = 0;
> + }
> + spin_unlock(&mmu->lock);
> +
> + return ret;
> +}
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> + struct i915_mmu_object *mn;
> +
> + mn = obj->userptr.mn;
> + if (mn == NULL)
> + return;
> +
> + i915_mmu_notifier_del(mn->mmu, mn);
> + obj->userptr.mn = NULL;
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> + unsigned flags)
> +{
> + struct i915_mmu_notifier *mmu;
> + struct i915_mmu_object *mn;
> + int ret;
> +
> + if (flags & I915_USERPTR_UNSYNCHRONIZED)
> + return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> + mmu = i915_mmu_notifier_get(obj->base.dev, obj->userptr.mm);
> + if (IS_ERR(mmu))
> + return PTR_ERR(mmu);
> +
> + mn = kzalloc(sizeof(*mn), GFP_KERNEL);
> + if (mn == NULL) {
> + ret = -ENOMEM;
> + goto destroy_mmu;
> + }
> +
> + mn->mmu = mmu;
> + mn->it.start = obj->userptr.ptr;
> + mn->it.last = mn->it.start + obj->base.size - 1;
> + mn->obj = obj;
> +
> + ret = i915_mmu_notifier_add(mmu, mn);
> + if (ret)
> + goto free_mn;
> +
> + obj->userptr.mn = mn;
> + return 0;
> +
> +free_mn:
> + kfree(mn);
> +destroy_mmu:
> + if (mmu->count == 0)
> + __i915_mmu_notifier_destroy(mmu);
Other accesses to mmu->count are protected by mmu->lock. Again, I may have missed
something but don't immediately see why that's not required.
> + return ret;
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> + unsigned flags)
> +{
> + if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> + return -ENODEV;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + return 0;
> +}
> +#endif
> +
> +struct get_pages_work {
> + struct work_struct work;
> + struct drm_i915_gem_object *obj;
> + struct task_struct *task;
> +};
> +
> +static void
> +__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> +{
> + struct get_pages_work *work = container_of(_work, typeof(*work), work);
> + struct drm_i915_gem_object *obj = work->obj;
> + struct drm_device *dev = obj->base.dev;
> + const int num_pages = obj->base.size >> PAGE_SHIFT;
> + struct page **pvec;
> + int pinned, ret;
> +
> + ret = -ENOMEM;
> + pinned = 0;
> +
> + pvec = kmalloc(num_pages*sizeof(struct page *),
> + GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> + if (pvec == NULL)
> + pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> + if (pvec != NULL) {
> + struct mm_struct *mm = obj->userptr.mm;
> +
> + use_mm(mm);
> + down_read(&mm->mmap_sem);
> + while (pinned < num_pages) {
> + ret = get_user_pages(work->task, mm,
> + obj->userptr.ptr + pinned * PAGE_SIZE,
> + num_pages - pinned,
> + !obj->userptr.read_only, 0,
> + pvec + pinned, NULL);
> + if (ret < 0)
> + break;
> +
> + pinned += ret;
> + }
> + up_read(&mm->mmap_sem);
> + unuse_mm(mm);
> + }
> +
> + mutex_lock(&dev->struct_mutex);
> + if (obj->userptr.work != &work->work) {
> + ret = 0;
> + } else if (pinned == num_pages) {
> + struct sg_table *st;
> +
> + st = kmalloc(sizeof(*st), GFP_KERNEL);
> + if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> + kfree(st);
> + ret = -ENOMEM;
> + } else {
> + struct scatterlist *sg;
> + int n;
> +
> + for_each_sg(st->sgl, sg, num_pages, n)
> + sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> + obj->pages = st;
> + list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list);
> + pinned = 0;
> + ret = 0;
> + }
> + }
> +
> + obj->userptr.work = ERR_PTR(ret);
> + drm_gem_object_unreference(&obj->base);
> + mutex_unlock(&dev->struct_mutex);
> +
> + release_pages(pvec, pinned, 0);
> + drm_free_large(pvec);
> +
> + put_task_struct(work->task);
> + kfree(work);
> +}
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> + const int num_pages = obj->base.size >> PAGE_SHIFT;
> + struct page **pvec;
> + int pinned, ret;
> +
> + /* If userspace should engineer that these pages are replaced in
> + * the vma between us binding this page into the GTT and completion
> + * of rendering... Their loss. If they change the mapping of their
> + * pages they need to create a new bo to point to the new vma.
> + *
> + * However, that still leaves open the possibility of the vma
> + * being copied upon fork. Which falls under the same userspace
> + * synchronisation issue as a regular bo, except that this time
> + * the process may not be expecting that a particular piece of
> + * memory is tied to the GPU.
> + *
> + * Fortunately, we can hook into the mmu_notifier in order to
> + * discard the page references prior to anything nasty happening
> + * to the vma (discard or cloning) which should prevent the more
> + * egregious cases from causing harm.
> + */
> +
> + pvec = NULL;
> + pinned = 0;
> + if (obj->userptr.mm == current->mm) {
> + pvec = kmalloc(num_pages*sizeof(struct page *),
> + GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
> + if (pvec == NULL) {
> + pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> + if (pvec == NULL)
> + return -ENOMEM;
> + }
> +
> + pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages,
> + !obj->userptr.read_only, pvec);
> + }
> + if (pinned < num_pages) {
> + if (pinned < 0) {
> + ret = pinned;
> + pinned = 0;
> + } else {
> + /* Spawn a worker so that we can acquire the
> + * user pages without holding our mutex.
> + */
> + ret = -EAGAIN;
> + if (obj->userptr.work == NULL) {
> + struct get_pages_work *work;
> +
> + work = kmalloc(sizeof(*work), GFP_KERNEL);
> + if (work != NULL) {
> + obj->userptr.work = &work->work;
> +
> + work->obj = obj;
> + drm_gem_object_reference(&obj->base);
> +
> + work->task = current;
> + get_task_struct(work->task);
> +
> + INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> + schedule_work(&work->work);
Any reason to use the system wq instead of the driver wq here?
It doesn't look like it's the usual "takes modeset locks" justification.
> + } else
> + ret = -ENOMEM;
> + } else {
> + if (IS_ERR(obj->userptr.work)) {
} else if (...) { ?
> + ret = PTR_ERR(obj->userptr.work);
> + obj->userptr.work = NULL;
> + }
> + }
> + }
> + } else {
> + struct sg_table *st;
> +
> + st = kmalloc(sizeof(*st), GFP_KERNEL);
> + if (st == NULL || sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> + kfree(st);
> + ret = -ENOMEM;
> + } else {
> + struct scatterlist *sg;
> + int n;
> +
> + for_each_sg(st->sgl, sg, num_pages, n)
> + sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +
> + obj->pages = st;
> + obj->userptr.work = NULL;
> +
> + pinned = 0;
> + ret = 0;
> + }
This block is almost identical to code in the worker. Would it be worth extracting
the common parts into a helper?
> + }
> +
> + release_pages(pvec, pinned, 0);
> + drm_free_large(pvec);
> + return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> + struct scatterlist *sg;
> + int i;
> +
> + if (obj->madv != I915_MADV_WILLNEED)
> + obj->dirty = 0;
This is subtly different than similar code in the standard put_pages() in that
it sets dirty=0 for both DONTNEED and PURGED instead of just DONTNEED (w/ BUG_ON(PURGED)).
I don't think we will ever actually truncate userptr objects, so is there any reason for
this to be different?
> +
> + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> + struct page *page = sg_page(sg);
> +
> + if (obj->dirty)
> + set_page_dirty(page);
> +
> + mark_page_accessed(page);
> + page_cache_release(page);
> + }
> + obj->dirty = 0;
> +
> + sg_free_table(obj->pages);
> + kfree(obj->pages);
> +
> + BUG_ON(obj->userptr.work != NULL);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> + i915_gem_userptr_release__mmu_notifier(obj);
> +
> + if (obj->userptr.mm) {
> + mmput(obj->userptr.mm);
> + obj->userptr.mm = NULL;
> + }
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> + .get_pages = i915_gem_userptr_get_pages,
> + .put_pages = i915_gem_userptr_put_pages,
> + .release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some normal memory from the process
> + * context - user memory.
> + *
> + * We impose several restrictions upon the memory being mapped
> + * into the GPU.
> + * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
> + * 2. We only allow a bo as large as we could in theory map into the GTT,
> + * that is we limit the size to the total size of the GTT.
> + * 3. The bo is marked as being snoopable. The backing pages are left
> + * accessible directly by the CPU, but reads by the GPU may incur the cost
> + * of a snoop (unless you have an LLC architecture).
No overlapping ranges
- Brad
> + *
> + * Synchronisation between multiple users and the GPU is left to userspace
> + * through the normal set-domain-ioctl. The kernel will enforce that the
> + * GPU relinquishes the VMA before it is returned back to the system
> + * i.e. upon free(), munmap() or process termination. However, the userspace
> + * malloc() library may not immediately relinquish the VMA after free() and
> + * instead reuse it whilst the GPU is still reading and writing to the VMA.
> + * Caveat emptor.
> + *
> + * Also note, that the object created here is not currently a "first class"
> + * object, in that several ioctls are banned. These are the CPU access
> + * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
> + * direct access via your pointer rather than use those ioctls.
> + *
> + * If you think this is a good interface to use to pass GPU memory between
> + * drivers, please use dma-buf instead. In fact, wherever possible use
> + * dma-buf instead.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct drm_i915_gem_userptr *args = data;
> + struct drm_i915_gem_object *obj;
> + int ret;
> + u32 handle;
> +
> + if (args->flags & ~(I915_USERPTR_READ_ONLY |
> + I915_USERPTR_UNSYNCHRONIZED))
> + return -EINVAL;
> +
> + if (offset_in_page(args->user_ptr | args->user_size))
> + return -EINVAL;
> +
> + if (args->user_size > dev_priv->gtt.base.total)
> + return -E2BIG;
> +
> + if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
> + (char __user *)(unsigned long)args->user_ptr, args->user_size))
> + return -EFAULT;
> +
> + /* Allocate the new object */
> + obj = i915_gem_object_alloc(dev);
> + if (obj == NULL)
> + return -ENOMEM;
> +
> + drm_gem_private_object_init(dev, &obj->base, args->user_size);
> + i915_gem_object_init(obj, &i915_gem_userptr_ops);
> + obj->cache_level = I915_CACHE_LLC;
> + obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> + obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +
> + obj->userptr.ptr = args->user_ptr;
> + obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
> +
> + /* And keep a pointer to the current->mm for resolving the user pages
> + * at binding. This means that we need to hook into the mmu_notifier
> + * in order to detect if the mmu is destroyed.
> + */
> + ret = -ENOMEM;
> + if ((obj->userptr.mm = get_task_mm(current)))
> + ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> + if (ret == 0)
> + ret = drm_gem_handle_create(file, &obj->base, &handle);
> +
> + /* drop reference from allocate - handle holds it now */
> + drm_gem_object_unreference_unlocked(&obj->base);
> + if (ret)
> + return ret;
> +
> + args->handle = handle;
> + return 0;
> +}
> +
> +int
> +i915_gem_init_userptr(struct drm_device *dev)
> +{
> +#if defined(CONFIG_MMU_NOTIFIER)
> + struct drm_i915_private *dev_priv = to_i915(dev);
> + hash_init(dev_priv->mmu_notifiers);
> +#endif
> + return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index ae8cf61b8ce3..cce9f559e3d7 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -201,6 +201,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
> err_puts(m, tiling_flag(err->tiling));
> err_puts(m, dirty_flag(err->dirty));
> err_puts(m, purgeable_flag(err->purgeable));
> + err_puts(m, err->userptr ? " userptr" : "");
> err_puts(m, err->ring != -1 ? " " : "");
> err_puts(m, ring_str(err->ring));
> err_puts(m, i915_cache_level_str(err->cache_level));
> @@ -584,6 +585,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
> err->tiling = obj->tiling_mode;
> err->dirty = obj->dirty;
> err->purgeable = obj->madv != I915_MADV_WILLNEED;
> + err->userptr = obj->userptr.mm != 0;
> err->ring = obj->ring ? obj->ring->id : -1;
> err->cache_level = obj->cache_level;
> }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 37c8073a8246..6c145a0be250 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -224,6 +224,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_I915_REG_READ 0x31
> #define DRM_I915_GET_RESET_STATS 0x32
> #define DRM_I915_GEM_CREATE2 0x33
> +#define DRM_I915_GEM_USERPTR 0x34
>
> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -275,6 +276,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
> #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>
> /* Allow drivers to submit batchbuffers directly to hardware, relying
> * on the security mechanisms provided by hardware.
> @@ -1129,4 +1131,18 @@ struct drm_i915_reset_stats {
> __u32 pad;
> };
>
> +struct drm_i915_gem_userptr {
> + __u64 user_ptr;
> + __u64 user_size;
> + __u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> + /**
> + * Returned handle for the object.
> + *
> + * Object handles are nonzero.
> + */
> + __u32 handle;
> +};
> +
> #endif /* _UAPI_I915_DRM_H_ */
> --
> 1.8.5.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
More information about the Intel-gfx
mailing list