[Intel-gfx] [PATCH 3/3] drm/i915/userptr: Probe vma range before gup
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Jan 15 10:27:02 UTC 2019
On 14/01/2019 21:17, Chris Wilson wrote:
> We want to exclude any GGTT objects from being present on our internal
> lists to avoid the deadlock we may run into with our requirement for
> struct_mutex during invalidate. However, if the gup_fast fails, we put
> the userptr onto the workqueue and mark it as active, so that we
> remember to serialise the worker upon mmu_invalidate.
>
> Note that despite the previous fix, it is still better to avoid the
> struct_mutex recursion where possible, leaving the recursion only to
> handle the shrinker-esque paths.
>
> v2: Hold mmap_sem to prevent modifications to the mm while we probe and
> add ourselves to the interval-tree for notificiation.
> v3: Rely on mmap_sem for a simpler patch.
> v4: Mark up the mmap_sem nesting
> v5: Don't deactivate on -EAGAIN as that means the worker is queued
> v6: Fight the indentation and chained if-else error handling
> v7: Fight again.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104209
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: MichaĆ Winiarski <michal.winiarski at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_userptr.c | 150 ++++++++++++++++--------
> 1 file changed, 98 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 9c2008a480e2..7b9e14bc3bc5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -435,7 +435,7 @@ struct get_pages_work {
> struct task_struct *task;
> };
>
> -static struct sg_table *
> +static int
> __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
> struct page **pvec, int num_pages)
> {
> @@ -446,7 +446,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
>
> st = kmalloc(sizeof(*st), GFP_KERNEL);
> if (!st)
> - return ERR_PTR(-ENOMEM);
> + return -ENOMEM;
>
> alloc_table:
> ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
> @@ -455,7 +455,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
> GFP_KERNEL);
> if (ret) {
> kfree(st);
> - return ERR_PTR(ret);
> + return ret;
> }
>
> ret = i915_gem_gtt_prepare_pages(obj, st);
> @@ -468,14 +468,14 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
> }
>
> kfree(st);
> - return ERR_PTR(ret);
> + return ret;
> }
>
> sg_page_sizes = i915_sg_page_sizes(st->sgl);
>
> __i915_gem_object_set_pages(obj, st, sg_page_sizes);
>
> - return st;
> + return 0;
> }
>
> static void
> @@ -520,19 +520,14 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>
> mutex_lock(&obj->mm.lock);
> if (obj->userptr.work == &work->work) {
> - struct sg_table *pages = ERR_PTR(ret);
> -
> if (pinned == npages) {
> - pages = __i915_gem_userptr_alloc_pages(obj, pvec,
> - npages);
> - if (!IS_ERR(pages)) {
> + ret = __i915_gem_userptr_alloc_pages(obj, pvec, npages);
> + if (!ret)
> pinned = 0;
> - pages = NULL;
> - }
> }
>
> - obj->userptr.work = ERR_CAST(pages);
> - if (IS_ERR(pages))
> + obj->userptr.work = ERR_PTR(ret);
> + if (ret)
> __i915_gem_userptr_set_active(obj, false);
> }
> mutex_unlock(&obj->mm.lock);
> @@ -545,7 +540,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> kfree(work);
> }
>
> -static struct sg_table *
> +static int
> __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
> {
> struct get_pages_work *work;
> @@ -571,7 +566,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
> */
> work = kmalloc(sizeof(*work), GFP_KERNEL);
> if (work == NULL)
> - return ERR_PTR(-ENOMEM);
> + return -ENOMEM;
>
> obj->userptr.work = &work->work;
>
> @@ -583,19 +578,86 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
> INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
> queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
>
> - return ERR_PTR(-EAGAIN);
> + return -EAGAIN;
> }
>
> -static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +static int
> +probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
> +{
> + const unsigned long end = addr + len;
> + struct vm_area_struct *vma;
> + int ret = -EFAULT;
> +
> + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
> + if (vma->vm_start > addr)
> + break;
> +
> + /*
> + * Exclude any VMA that is not backed only by struct_page, i.e.
> + * IO regions that include our own GGTT mmaps. We cannot handle
> + * such ranges, as we may encounter deadlocks around our
> + * struct_mutex on mmu_invalidate_range.
> + */
> + if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
> + break;
> +
> + if (vma->vm_end >= end) {
> + ret = 0;
> + break;
> + }
> +
> + addr = vma->vm_end;
> + }
> +
> + return ret;
> +}
> +
> +static int try_fast_gup(struct drm_i915_gem_object *obj)
> {
> const int num_pages = obj->base.size >> PAGE_SHIFT;
> - struct mm_struct *mm = obj->userptr.mm->mm;
> struct page **pvec;
> - struct sg_table *pages;
> - bool active;
> - int pinned;
> + int pinned, err;
> +
> + pvec = kvmalloc_array(num_pages, sizeof(struct page *),
> + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
> + if (!pvec) /* defer to worker if malloc fails */
> + return -ENOMEM;
> +
> + pinned = __get_user_pages_fast(obj->userptr.ptr,
> + num_pages,
> + !i915_gem_object_is_readonly(obj),
> + pvec);
> + if (pinned < 0) {
> + err = pinned;
> + pinned = 0;
> + goto out_pvec;
> + }
>
> - /* If userspace should engineer that these pages are replaced in
> + if (pinned < num_pages) {
> + err = -EFAULT;
> + goto out_pinned;
> + }
> +
> + err = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
> + if (err)
> + goto out_pinned;
> +
> + __i915_gem_userptr_set_active(obj, true);
> + pinned = 0;
> +out_pinned:
> + release_pages(pvec, pinned);
> +out_pvec:
> + kvfree(pvec);
> + return err;
> +}
> +
> +static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> + struct mm_struct *mm = obj->userptr.mm->mm;
> + int err;
> +
> + /*
> + * If userspace should engineer that these pages are replaced in
> * the vma between us binding this page into the GTT and completion
> * of rendering... Their loss. If they change the mapping of their
> * pages they need to create a new bo to point to the new vma.
> @@ -620,40 +682,24 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> return -EAGAIN;
> }
>
> - pvec = NULL;
> - pinned = 0;
> + if (mm == current->mm && try_fast_gup(obj) == 0)
> + return 0;
>
> - if (mm == current->mm) {
> - pvec = kvmalloc_array(num_pages, sizeof(struct page *),
> - GFP_KERNEL |
> - __GFP_NORETRY |
> - __GFP_NOWARN);
> - if (pvec) /* defer to worker if malloc fails */
> - pinned = __get_user_pages_fast(obj->userptr.ptr,
> - num_pages,
> - !i915_gem_object_is_readonly(obj),
> - pvec);
> - }
> + /* lockdep doesn't yet automatically allow nesting of readers */
> + down_read_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
>
> - active = false;
> - if (pinned < 0) {
> - pages = ERR_PTR(pinned);
> - pinned = 0;
> - } else if (pinned < num_pages) {
> - pages = __i915_gem_userptr_get_pages_schedule(obj);
> - active = pages == ERR_PTR(-EAGAIN);
> - } else {
> - pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
> - active = !IS_ERR(pages);
> - }
> - if (active)
> + err = probe_range(mm, obj->userptr.ptr, obj->base.size);
Stick a comment here about how the probe is not a guarantee the mapping
won't change by the time gup worker runs.
> + if (err)
> + goto err_unlock;
> +
> + err = __i915_gem_userptr_get_pages_schedule(obj);
> + if (err == -EAGAIN)
> __i915_gem_userptr_set_active(obj, true);
>
> - if (IS_ERR(pages))
> - release_pages(pvec, pinned);
> - kvfree(pvec);
> +err_unlock:
> + up_read(&mm->mmap_sem);
May be safer to drop the lock earlier, immediately after probe. I don't
see holding it while queuing the worker and doing internal book-keeping
is useful and might just create more lock chain dependencies.
Regards,
Tvrtko
>
> - return PTR_ERR_OR_ZERO(pages);
> + return err;
> }
>
> static void
>
More information about the Intel-gfx
mailing list