[Intel-gfx] [PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Mar 26 15:59:20 UTC 2018
On 26/03/2018 15:59, Chris Wilson wrote:
> We've always been blatantly ignoring mmu_notifier.h:
>
> * Invalidation of multiple concurrent ranges may be
> * optionally permitted by the driver. Either way the
> * establishment of sptes is forbidden in the range passed to
> * invalidate_range_begin/end for the whole duration of the
> * invalidate_range_begin/end critical section.
>
> by not preventing concurrent calls to gup while an invalidate_range is
> being processed. Wrap gup and invalidate_range in a paired rw_semaphore
> to allow concurrent lookups, that are then interrupted and disabled
> across the invalidate_range. Further refinement can be applied by
> tracking the invalidate_range versus individual gup, which should be
> done using a common set of helpers for all mmu_notifier subsystems share
> the same need. I hear HMM is one such toolbox...
>
> For the time being, assume concurrent invalidate and lookup are rare,
> but not rare enough to completely ignore.
I think I suggested a few times we should just "ban" the object on first
invalidate and never ever for its lifetime allow it to obtain backing
store again. I just don't remember why we decided not to go with that
approach. :( Thinking about it now I still don't see that this
restriction would be a problem and would simplify things.
With more locks I am quite fearful what lockdep will say, but lets see...
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_userptr.c | 29 ++++++++++++++++++++++++++++-
> 1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> index d596a8302ca3..938107dffd37 100644
> --- a/drivers/gpu/drm/i915/i915_gem_userptr.c
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -47,6 +47,7 @@ struct i915_mm_struct {
>
> struct i915_mmu_notifier {
> spinlock_t lock;
> + struct rw_semaphore sem;
> struct hlist_node node;
> struct mmu_notifier mn;
> struct rb_root_cached objects;
> @@ -123,6 +124,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> struct interval_tree_node *it;
> LIST_HEAD(cancelled);
>
> + down_write(&mn->sem);
> +
> if (RB_EMPTY_ROOT(&mn->objects.rb_root))
> return;
>
> @@ -156,8 +159,20 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
> flush_workqueue(mn->wq);
> }
>
> +static void i915_gem_userptr_mn_invalidate_range_end(struct mmu_notifier *_mn,
> + struct mm_struct *mm,
> + unsigned long start,
> + unsigned long end)
> +{
> + struct i915_mmu_notifier *mn =
> + container_of(_mn, struct i915_mmu_notifier, mn);
> +
> + up_write(&mn->sem);
> +}
> +
> static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> .invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
> + .invalidate_range_end = i915_gem_userptr_mn_invalidate_range_end,
> };
>
> static struct i915_mmu_notifier *
> @@ -170,6 +185,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
> return ERR_PTR(-ENOMEM);
>
> spin_lock_init(&mn->lock);
> + init_rwsem(&mn->sem);
> mn->mn.ops = &i915_gem_userptr_notifier;
> mn->objects = RB_ROOT_CACHED;
> mn->wq = alloc_workqueue("i915-userptr-release",
> @@ -504,12 +520,15 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
>
> pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
> if (pvec != NULL) {
> + struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
> struct mm_struct *mm = obj->userptr.mm->mm;
> unsigned int flags = 0;
>
> if (!obj->userptr.read_only)
> flags |= FOLL_WRITE;
>
> + down_read(&mn->sem);
> +
> ret = -EFAULT;
> if (mmget_not_zero(mm)) {
> down_read(&mm->mmap_sem);
> @@ -528,6 +547,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
> up_read(&mm->mmap_sem);
> mmput(mm);
> }
> +
> + up_read(&mn->sem);
> }
>
> mutex_lock(&obj->mm.lock);
> @@ -636,15 +657,21 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> pinned = 0;
>
> if (mm == current->mm) {
> + struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
> +
> pvec = kvmalloc_array(num_pages, sizeof(struct page *),
> GFP_KERNEL |
> __GFP_NORETRY |
> __GFP_NOWARN);
> - if (pvec) /* defer to worker if malloc fails */
> +
> + /* defer to worker if malloc fails */
> + if (pvec && down_read_trylock(&mn->sem)) {
> pinned = __get_user_pages_fast(obj->userptr.ptr,
> num_pages,
> !obj->userptr.read_only,
> pvec);
> + up_read(&mn->sem);
> + }
> }
>
> active = false;
>
Simple enough but I don't dare say anything until results from shards
arrive.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list