[Intel-gfx] [PATCH 3/3] drm/i915: Use a task to cancel the userptr on invalidate_range
Daniel Vetter
daniel at ffwll.ch
Tue Oct 6 05:04:19 PDT 2015
On Thu, Oct 01, 2015 at 12:34:47PM +0100, Chris Wilson wrote:
> Whilst discussing possible ways to trigger an invalidate_range on a
> userptr with an aliased GGTT mmapping (and so cause a struct_mutex
> deadlock), the conclusion is that we can, and we must, prevent any
> possible deadlock by avoiding taking the mutex at all during
> invalidate_range. This has numerous advantages all of which stem from
> avoid the sleeping function from inside the unknown context. In
> particular, it simplifies the invalidate_range because we no longer
> have to juggle the spinlock/mutex and can just hold the spinlock
> for the entire walk. To compensate, we have to make get_pages a bit more
> complicated in order to serialise with a pending cancel_userptr worker.
> As we hold the struct_mutex, we have no choice but to return EAGAIN and
> hope that the worker is then flushed before we retry after reacquiring
> the struct_mutex.
>
> The important caveat is that the invalidate_range itself is no longer
> synchronous. There exists a small but definite period in time in which
> the old PTE's page remain accessible via the GPU. Note however that the
> physical pages themselves are not invalidated by the mmu_notifier, just
> the CPU view of the address space. The impact should be limited to a
> delay in pages being flushed, rather than a possibility of writing to
> the wrong pages. The only race condition that this worsens is remapping
> an userptr active on the GPU where fresh work may still reference the
> old pages due to struct_mutex contention. Given that userspace is racing
> with the GPU, it is fair to say that the results are undefined.
>
> v2: Only queue (and importantly only take one refcnt) the worker once.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: MichaĆ Winiarski <michal.winiarski at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Pulled in all 3 patches. Btw some pretty kerneldoc explaining the
high-level interactions would be neat for all the userptr stuff ... I'm
totally lost in i915_gem_userptr.c ;-)
Thanks, Daniel
> ---
> drivers/gpu/drm/i915/i915_gem_userptr.c | 148 +++++++++++++-------------------
> 1 file changed, 61 insertions(+), 87 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index 161f7fbf5b76..1b3b451b6658 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -50,7 +50,6 @@ struct i915_mmu_notifier {
> struct mmu_notifier mn;
> struct rb_root objects;
> struct list_head linear;
> - unsigned long serial;
> bool has_linear;
> };
>
> @@ -59,14 +58,16 @@ struct i915_mmu_object {
> struct interval_tree_node it;
> struct list_head link;
> struct drm_i915_gem_object *obj;
> + struct work_struct work;
> bool active;
> bool is_linear;
> };
>
> -static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
> +static void __cancel_userptr__worker(struct work_struct *work)
> {
> + struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
> + struct drm_i915_gem_object *obj = mo->obj;
> struct drm_device *dev = obj->base.dev;
> - unsigned long end;
>
> mutex_lock(&dev->struct_mutex);
> /* Cancel any active worker and force us to re-evaluate gup */
> @@ -89,46 +90,28 @@ static unsigned long cancel_userptr(struct drm_i915_gem_object *obj)
> dev_priv->mm.interruptible = was_interruptible;
> }
>
> - end = obj->userptr.ptr + obj->base.size;
> -
> drm_gem_object_unreference(&obj->base);
> mutex_unlock(&dev->struct_mutex);
> -
> - return end;
> }
>
> -static void *invalidate_range__linear(struct i915_mmu_notifier *mn,
> - struct mm_struct *mm,
> - unsigned long start,
> - unsigned long end)
> +static unsigned long cancel_userptr(struct i915_mmu_object *mo)
> {
> - struct i915_mmu_object *mo;
> - unsigned long serial;
> -
> -restart:
> - serial = mn->serial;
> - list_for_each_entry(mo, &mn->linear, link) {
> - struct drm_i915_gem_object *obj;
> -
> - if (mo->it.last < start || mo->it.start > end)
> - continue;
> -
> - obj = mo->obj;
> -
> - if (!mo->active ||
> - !kref_get_unless_zero(&obj->base.refcount))
> - continue;
> -
> - spin_unlock(&mn->lock);
> -
> - cancel_userptr(obj);
> -
> - spin_lock(&mn->lock);
> - if (serial != mn->serial)
> - goto restart;
> + unsigned long end = mo->obj->userptr.ptr + mo->obj->base.size;
> +
> + /* The mmu_object is released late when destroying the
> + * GEM object so it is entirely possible to gain a
> + * reference on an object in the process of being freed
> + * since our serialisation is via the spinlock and not
> + * the struct_mutex - and consequently use it after it
> + * is freed and then double free it.
> + */
> + if (mo->active && kref_get_unless_zero(&mo->obj->base.refcount)) {
> + schedule_work(&mo->work);
> + /* only schedule one work packet to avoid the refleak */
> + mo->active = false;
> }
>
> - return NULL;
> + return end;
> }
>
> static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> @@ -136,45 +119,32 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> unsigned long start,
> unsigned long end)
> {
> - struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn);
> - struct interval_tree_node *it = NULL;
> - unsigned long next = start;
> - unsigned long serial = 0;
> -
> - end--; /* interval ranges are inclusive, but invalidate range is exclusive */
> - while (next < end) {
> - struct drm_i915_gem_object *obj = NULL;
> -
> - spin_lock(&mn->lock);
> - if (mn->has_linear)
> - it = invalidate_range__linear(mn, mm, start, end);
> - else if (serial == mn->serial)
> - it = interval_tree_iter_next(it, next, end);
> - else
> - it = interval_tree_iter_first(&mn->objects, start, end);
> - if (it != NULL) {
> - struct i915_mmu_object *mo =
> - container_of(it, struct i915_mmu_object, it);
> -
> - /* The mmu_object is released late when destroying the
> - * GEM object so it is entirely possible to gain a
> - * reference on an object in the process of being freed
> - * since our serialisation is via the spinlock and not
> - * the struct_mutex - and consequently use it after it
> - * is freed and then double free it.
> - */
> - if (mo->active &&
> - kref_get_unless_zero(&mo->obj->base.refcount))
> - obj = mo->obj;
> -
> - serial = mn->serial;
> + struct i915_mmu_notifier *mn =
> + container_of(_mn, struct i915_mmu_notifier, mn);
> + struct i915_mmu_object *mo;
> +
> + /* interval ranges are inclusive, but invalidate range is exclusive */
> + end--;
> +
> + spin_lock(&mn->lock);
> + if (mn->has_linear) {
> + list_for_each_entry(mo, &mn->linear, link) {
> + if (mo->it.last < start || mo->it.start > end)
> + continue;
> +
> + cancel_userptr(mo);
> }
> - spin_unlock(&mn->lock);
> - if (obj == NULL)
> - return;
> + } else {
> + struct interval_tree_node *it;
>
> - next = cancel_userptr(obj);
> + it = interval_tree_iter_first(&mn->objects, start, end);
> + while (it) {
> + mo = container_of(it, struct i915_mmu_object, it);
> + start = cancel_userptr(mo);
> + it = interval_tree_iter_next(it, start, end);
> + }
> }
> + spin_unlock(&mn->lock);
> }
>
> static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> @@ -194,7 +164,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
> spin_lock_init(&mn->lock);
> mn->mn.ops = &i915_gem_userptr_notifier;
> mn->objects = RB_ROOT;
> - mn->serial = 1;
> INIT_LIST_HEAD(&mn->linear);
> mn->has_linear = false;
>
> @@ -208,12 +177,6 @@ i915_mmu_notifier_create(struct mm_struct *mm)
> return mn;
> }
>
> -static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mn)
> -{
> - if (++mn->serial == 0)
> - mn->serial = 1;
> -}
> -
> static int
> i915_mmu_notifier_add(struct drm_device *dev,
> struct i915_mmu_notifier *mn,
> @@ -260,10 +223,9 @@ i915_mmu_notifier_add(struct drm_device *dev,
> } else
> interval_tree_insert(&mo->it, &mn->objects);
>
> - if (ret == 0) {
> + if (ret == 0)
> list_add(&mo->link, &mn->linear);
> - __i915_mmu_notifier_update_serial(mn);
> - }
> +
> spin_unlock(&mn->lock);
> mutex_unlock(&dev->struct_mutex);
>
> @@ -291,7 +253,6 @@ i915_mmu_notifier_del(struct i915_mmu_notifier *mn,
> mn->has_linear = i915_mmu_notifier_has_linear(mn);
> else
> interval_tree_remove(&mo->it, &mn->objects);
> - __i915_mmu_notifier_update_serial(mn);
> spin_unlock(&mn->lock);
> }
>
> @@ -358,6 +319,7 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
> mo->it.start = obj->userptr.ptr;
> mo->it.last = mo->it.start + obj->base.size - 1;
> mo->obj = obj;
> + INIT_WORK(&mo->work, __cancel_userptr__worker);
>
> ret = i915_mmu_notifier_add(obj->base.dev, mn, mo);
> if (ret) {
> @@ -566,10 +528,12 @@ __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj,
> return ret;
> }
>
> -static void
> +static int
> __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
> bool value)
> {
> + int ret = 0;
> +
> /* During mm_invalidate_range we need to cancel any userptr that
> * overlaps the range being invalidated. Doing so requires the
> * struct_mutex, and that risks recursion. In order to cause
> @@ -582,12 +546,20 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
> */
> #if defined(CONFIG_MMU_NOTIFIER)
> if (obj->userptr.mmu_object == NULL)
> - return;
> + return 0;
>
> spin_lock(&obj->userptr.mmu_object->mn->lock);
> - obj->userptr.mmu_object->active = value;
> + /* In order to serialise get_pages with an outstanding
> + * cancel_userptr, we must drop the struct_mutex and try again.
> + */
> + if (!value || !work_pending(&obj->userptr.mmu_object->work))
> + obj->userptr.mmu_object->active = value;
> + else
> + ret = -EAGAIN;
> spin_unlock(&obj->userptr.mmu_object->mn->lock);
> #endif
> +
> + return ret;
> }
>
> static void
> @@ -736,7 +708,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> return -EAGAIN;
>
> /* Let the mmu-notifier know that we have begun and need cancellation */
> - __i915_gem_userptr_set_active(obj, true);
> + ret = __i915_gem_userptr_set_active(obj, true);
> + if (ret)
> + return ret;
>
> pvec = NULL;
> pinned = 0;
> --
> 2.6.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list