[PATCH] drm/i915/userptr: Wrap mmu_notifier inside its own rw_semaphore

Chris Wilson chris at chris-wilson.co.uk
Mon Mar 26 20:09:31 UTC 2018


We've always been blatantly ignoring mmu_notifier.h:

 * Invalidation of multiple concurrent ranges may be
 * optionally permitted by the driver. Either way the
 * establishment of sptes is forbidden in the range passed to
 * invalidate_range_begin/end for the whole duration of the
 * invalidate_range_begin/end critical section.

by not preventing concurrent calls to gup while an invalidate_range is
being processed. Wrap gup and invalidate_range in a paired rw_semaphore
to allow concurrent lookups, that are then interrupted and disabled
across the invalidate_range. Further refinement can be applied by
tracking the invalidate_range versus individual gup, which should be
done using a common set of helpers for all mmu_notifier subsystems share
the same need. I hear HMM is one such toolbox...

For the time being, assume concurrent invalidate and lookup are rare,
but not rare enough to completely ignore.

v2: lies to keep lockdep happy

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Michał Winiarski <michal.winiarski at intel.com>
---
 drivers/gpu/drm/i915/i915_gem_userptr.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index d596a8302ca3..8ac788c56f37 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -47,6 +47,7 @@ struct i915_mm_struct {
 
 struct i915_mmu_notifier {
 	spinlock_t lock;
+	struct rw_semaphore sem;
 	struct hlist_node node;
 	struct mmu_notifier mn;
 	struct rb_root_cached objects;
@@ -123,6 +124,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 	struct interval_tree_node *it;
 	LIST_HEAD(cancelled);
 
+	down_write(&mn->sem);
+
 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
 		return;
 
@@ -156,8 +159,20 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 		flush_workqueue(mn->wq);
 }
 
+static void i915_gem_userptr_mn_invalidate_range_end(struct mmu_notifier *_mn,
+						     struct mm_struct *mm,
+						     unsigned long start,
+						     unsigned long end)
+{
+	struct i915_mmu_notifier *mn =
+		container_of(_mn, struct i915_mmu_notifier, mn);
+
+	up_write(&mn->sem);
+}
+
 static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
 	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+	.invalidate_range_end = i915_gem_userptr_mn_invalidate_range_end,
 };
 
 static struct i915_mmu_notifier *
@@ -170,6 +185,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
 		return ERR_PTR(-ENOMEM);
 
 	spin_lock_init(&mn->lock);
+	init_rwsem(&mn->sem);
 	mn->mn.ops = &i915_gem_userptr_notifier;
 	mn->objects = RB_ROOT_CACHED;
 	mn->wq = alloc_workqueue("i915-userptr-release",
@@ -504,12 +520,16 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 
 	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 	if (pvec != NULL) {
+		struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
 		struct mm_struct *mm = obj->userptr.mm->mm;
 		unsigned int flags = 0;
 
 		if (!obj->userptr.read_only)
 			flags |= FOLL_WRITE;
 
+		if (mn)
+			down_read_nested(&mn->sem, SINGLE_DEPTH_NESTING);
+
 		ret = -EFAULT;
 		if (mmget_not_zero(mm)) {
 			down_read(&mm->mmap_sem);
@@ -528,6 +548,9 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			up_read(&mm->mmap_sem);
 			mmput(mm);
 		}
+
+		if (mn)
+			up_read(&mn->sem);
 	}
 
 	mutex_lock(&obj->mm.lock);
@@ -636,15 +659,22 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
 	pinned = 0;
 
 	if (mm == current->mm) {
+		struct i915_mmu_notifier *mn = obj->userptr.mm->mn;
+
 		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
 				      GFP_KERNEL |
 				      __GFP_NORETRY |
 				      __GFP_NOWARN);
-		if (pvec) /* defer to worker if malloc fails */
+
+		/* defer to worker if malloc fails */
+		if (pvec && (!mn || down_read_trylock(&mn->sem))) {
 			pinned = __get_user_pages_fast(obj->userptr.ptr,
 						       num_pages,
 						       !obj->userptr.read_only,
 						       pvec);
+			if (mn)
+				up_read(&mn->sem);
+		}
 	}
 
 	active = false;
-- 
2.16.3



More information about the Intel-gfx-trybot mailing list