[Intel-gfx] [PATCH 2/3] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

Tue Jan 15 11:25:28 UTC 2019

On 15/01/2019 10:00, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-01-15 09:47:45)
>>
>> On 14/01/2019 21:17, Chris Wilson wrote:
>>> Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
>>> notifiers") we have been able to report failure from
>>> mmu_invalidate_range_start which allows us to use a trylock on the
>>> struct_mutex to avoid potential recursion and report -EBUSY instead.
>>> Furthermore, this allows us to pull the work into the main callback and
>>> avoid the sleight-of-hand in using a workqueue to avoid lockdep.
>>>
>>> However, not all paths to mmu_invalidate_range_start are prepared to
>>> handle failure, so instead of reporting the recursion, deal with it by
>>> propagating the failure upwards, who can decide themselves to handle it
>>> or report it.
>>>
>>> v2: Mark up the recursive lock behaviour and comment on the various weak
>>> points.
>>>
>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
>>> References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers")
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_drv.h         |   4 +-
>>>    drivers/gpu/drm/i915/i915_gem.c         |  30 +++-
>>>    drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
>>>    drivers/gpu/drm/i915/i915_gem_userptr.c | 220 +++++++++++-------------
>>>    4 files changed, 135 insertions(+), 126 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index fa99824f63b3..4b67037c0318 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2931,8 +2931,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */
>>>        I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */
>>>    };
>>>    
>>> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>>> -                              enum i915_mm_subclass subclass);
>>> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>>> +                             enum i915_mm_subclass subclass);
>>>    void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
>>>    
>>>    enum i915_map_type {
>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>>> index 0bfed33178e1..90c167f71345 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>> @@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
>>>        struct sg_table *pages;
>>>    
>>>        pages = fetch_and_zero(&obj->mm.pages);
>>> -     if (!pages)
>>> -             return NULL;
>>> +     if (IS_ERR_OR_NULL(pages))
>>> +             return pages;
>>>    
>>>        spin_lock(&i915->mm.obj_lock);
>>>        list_del(&obj->mm.link);
>>> @@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
>>>        return pages;
>>>    }
>>>    
>>> -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>>> -                              enum i915_mm_subclass subclass)
>>> +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>>> +                             enum i915_mm_subclass subclass)
>>>    {
>>>        struct sg_table *pages;
>>> +     int ret;
>>>    
>>>        if (i915_gem_object_has_pinned_pages(obj))
>>> -             return;
>>> +             return -EBUSY;
>>>    
>>>        GEM_BUG_ON(obj->bind_count);
>>> -     if (!i915_gem_object_has_pages(obj))
>>> -             return;
>>>    
>>>        /* May be called by shrinker from within get_pages() (on another bo) */
>>>        mutex_lock_nested(&obj->mm.lock, subclass);
>>> -     if (unlikely(atomic_read(&obj->mm.pages_pin_count)))
>>> +     if (unlikely(atomic_read(&obj->mm.pages_pin_count))) {
>>> +             ret = -EBUSY;
>>>                goto unlock;
>>> +     }
>>>    
>>>        /*
>>>         * ->put_pages might need to allocate memory for the bit17 swizzle
>>> @@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
>>>         * lists early.
>>>         */
>>>        pages = __i915_gem_object_unset_pages(obj);
>>> +
>>> +     /*
>>> +      * XXX Temporary hijinx to avoid updating all backends to handle
>>> +      * NULL pages. In the future, when we have more asynchronous
>>> +      * get_pages backends we should be better able to handle the
>>> +      * cancellation of the async task in a more uniform manner.
>>> +      */
>>> +     if (!pages && !i915_gem_object_needs_async_cancel(obj))
>>> +             pages = ERR_PTR(-EINVAL);
>>> +
>>>        if (!IS_ERR(pages))
>>>                obj->ops->put_pages(obj, pages);
>>>    
>>> +     ret = 0;
>>>    unlock:
>>>        mutex_unlock(&obj->mm.lock);
>>> +
>>> +     return ret;
>>>    }
>>>    
>>>    bool i915_sg_trim(struct sg_table *orig_st)
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
>>> index ff3da64470dd..cb1b0144d274 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_object.h
>>> +++ b/drivers/gpu/drm/i915/i915_gem_object.h
>>> @@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops {
>>>    #define I915_GEM_OBJECT_HAS_STRUCT_PAGE     BIT(0)
>>>    #define I915_GEM_OBJECT_IS_SHRINKABLE       BIT(1)
>>>    #define I915_GEM_OBJECT_IS_PROXY    BIT(2)
>>> +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
>>>    
>>>        /* Interface between the GEM object and its backing storage.
>>>         * get_pages() is called once prior to the use of the associated set
>>> @@ -387,6 +388,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
>>>        return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
>>>    }
>>>    
>>> +static inline bool
>>> +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
>>> +{
>>> +     return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
>>> +}
>>> +
>>>    static inline bool
>>>    i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
>>>    {
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
>>> index 1fb6a7bb5054..9c2008a480e2 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
>>> @@ -49,77 +49,81 @@ struct i915_mmu_notifier {
>>>        struct hlist_node node;
>>>        struct mmu_notifier mn;
>>>        struct rb_root_cached objects;
>>> -     struct workqueue_struct *wq;
>>> +     struct i915_mm_struct *mm;
>>>    };
>>>    
>>>    struct i915_mmu_object {
>>>        struct i915_mmu_notifier *mn;
>>>        struct drm_i915_gem_object *obj;
>>>        struct interval_tree_node it;
>>> -     struct list_head link;
>>> -     struct work_struct work;
>>> -     bool attached;
>>>    };
>>>    
>>> -static void cancel_userptr(struct work_struct *work)
>>> +static void add_object(struct i915_mmu_object *mo)
>>>    {
>>> -     struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
>>> -     struct drm_i915_gem_object *obj = mo->obj;
>>> -     struct work_struct *active;
>>> -
>>> -     /* Cancel any active worker and force us to re-evaluate gup */
>>> -     mutex_lock(&obj->mm.lock);
>>> -     active = fetch_and_zero(&obj->userptr.work);
>>> -     mutex_unlock(&obj->mm.lock);
>>> -     if (active)
>>> -             goto out;
>>> -
>>> -     i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
>>> -
>>> -     mutex_lock(&obj->base.dev->struct_mutex);
>>> -
>>> -     /* We are inside a kthread context and can't be interrupted */
>>> -     if (i915_gem_object_unbind(obj) == 0)
>>> -             __i915_gem_object_put_pages(obj, I915_MM_NORMAL);
>>> -     WARN_ONCE(i915_gem_object_has_pages(obj),
>>> -               "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n",
>>> -               obj->bind_count,
>>> -               atomic_read(&obj->mm.pages_pin_count),
>>> -               obj->pin_global);
>>> -
>>> -     mutex_unlock(&obj->base.dev->struct_mutex);
>>> -
>>> -out:
>>> -     i915_gem_object_put(obj);
>>> +     GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
>>> +     interval_tree_insert(&mo->it, &mo->mn->objects);
>>>    }
>>>    
>>> -static void add_object(struct i915_mmu_object *mo)
>>> +static void del_object(struct i915_mmu_object *mo)
>>>    {
>>> -     if (mo->attached)
>>> +     if (RB_EMPTY_NODE(&mo->it.rb))
>>>                return;
>>>    
>>> -     interval_tree_insert(&mo->it, &mo->mn->objects);
>>> -     mo->attached = true;
>>> +     interval_tree_remove(&mo->it, &mo->mn->objects);
>>> +     RB_CLEAR_NODE(&mo->it.rb);
>>>    }
>>>    
>>> -static void del_object(struct i915_mmu_object *mo)
>>> +static void
>>> +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
>>>    {
>>> -     if (!mo->attached)
>>> +     struct i915_mmu_object *mo = obj->userptr.mmu_object;
>>> +
>>> +     /*
>>> +      * During mm_invalidate_range we need to cancel any userptr that
>>> +      * overlaps the range being invalidated. Doing so requires the
>>> +      * struct_mutex, and that risks recursion. In order to cause
>>> +      * recursion, the user must alias the userptr address space with
>>> +      * a GTT mmapping (possible with a MAP_FIXED) - then when we have
>>> +      * to invalidate that mmaping, mm_invalidate_range is called with
>>> +      * the userptr address *and* the struct_mutex held.  To prevent that
>>> +      * we set a flag under the i915_mmu_notifier spinlock to indicate
>>> +      * whether this object is valid.
>>> +      */
>>> +     if (!mo)
>>>                return;
>>>    
>>> -     interval_tree_remove(&mo->it, &mo->mn->objects);
>>> -     mo->attached = false;
>>> +     spin_lock(&mo->mn->lock);
>>> +     if (value)
>>> +             add_object(mo);
>>> +     else
>>> +             del_object(mo);
>>> +     spin_unlock(&mo->mn->lock);
>>>    }
>>>    
>>> -static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
>>> -                     const struct mmu_notifier_range *range)
>>> +static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
>>> +{
>>> +     switch (mutex_trylock_recursive(m)) {
>>> +     default:
>>> +     case MUTEX_TRYLOCK_FAILED:
>>> +             mutex_lock_nested(m, I915_MM_SHRINKER);
>>
>> Have you found in the meantime that this is actually needed? Although it
>> is since we use this subclass to put the pages..
> 
> Yes, if we do not attempt to try a full mutex_lock() here, we fail
> the invalidation subtests of gem_userptr_blits(). But you mean the
> subclass? Given enough time I'll find the path to break it, at the
> present time I feel more comfortable cargo-culting the rules devised for
> the recursive locking from inside direct-reclaim to another
> direct-reclaim path.
> 
> The end-goal here is that upon breaking up struct_mutex we replace it
> various locks that we are not allowed to allocate under. No more direct
> reclaim recursion.
> 
>> Should i915_gem_shrinker_tainst_mutex taint obj->mm.lock with the
>> shrinker subclass?
> 
> The shrinker uses the obj->mm.lock I915_MM_SHRINKER subclass, so it's a
> little more faff to taint.

I am not 100% sure of all paths that could be affected by the struct 
mutex nested annotation but I guess it is worth a shot..

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko