[PATCH 28/28] fixup userptr implementation

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Mon Aug 3 08:46:56 UTC 2020


---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  52 +++++-
 drivers/gpu/drm/i915/gem/i915_gem_object.h    |   2 +
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |   2 +
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c   | 167 +++++++++++-------
 drivers/gpu/drm/i915/i915_active.c            |  20 +--
 5 files changed, 167 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 2a6e100a4465..1d8d2c935ef5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -49,14 +49,16 @@ enum {
 
 #define __EXEC_OBJECT_HAS_PIN		BIT(31)
 #define __EXEC_OBJECT_HAS_FENCE		BIT(30)
-#define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
-#define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
-#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
+#define __EXEC_OBJECT_USERPTR_INIT	BIT(29)
+#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
+#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
+#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
 #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
 
 #define __EXEC_HAS_RELOC	BIT(31)
 #define __EXEC_ENGINE_PINNED	BIT(30)
-#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
+#define __EXEC_USERPTR_USED	BIT(29)
+#define __EXEC_INTERNAL_FLAGS	(~0u << 29)
 #define UPDATE			PIN_OFFSET_FIXED
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -843,6 +845,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		}
 
 		eb_add_vma(eb, i, batch, vma);
+
+		if (i915_gem_object_is_userptr(vma->obj)) {
+			err = i915_gem_object_userptr_submit_init(vma->obj);
+			if (err)
+				return err;
+
+			eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
+			eb->args->flags |= __EXEC_USERPTR_USED;
+		}
 	}
 
 	if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
@@ -950,6 +961,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
 
 		eb_unreserve_vma(ev);
 
+		if (ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
+			ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
+			i915_gem_object_userptr_submit_fini(vma->obj);
+		}
+
 		if (final)
 			i915_vma_put(vma);
 	}
@@ -1885,6 +1901,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
 	return 0;
 }
 
+static int eb_reinit_userptr(struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+	int ret;
+
+	if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
+		return 0;
+
+	for (i = 0; i < count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+
+		if (!i915_gem_object_is_userptr(ev->vma->obj))
+			continue;
+
+		ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
+		if (ret)
+			return ret;
+
+		ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
+	}
+
+	return 0;
+}
+
 static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
 					   struct i915_request *rq)
 {
@@ -1940,6 +1981,9 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
 		err = 0;
 	}
 
+	if (!err)
+		err = eb_reinit_userptr(eb);
+
 err_relock:
 	i915_gem_ww_ctx_init(&eb->ww, true);
 	if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 11f28f9ada28..745e2963da52 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -529,7 +529,9 @@ i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
 	return obj->ops == &i915_gem_userptr_ops;
 }
 
+int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
 int i915_gem_object_userptr_submit_begin(struct drm_i915_gem_object *obj);
 int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
+void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 46d0d82e0d32..fcaba8756557 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -289,6 +289,8 @@ struct drm_i915_gem_object {
 			unsigned long notifier_seq;
 
 			struct mmu_interval_notifier notifier;
+			struct page **pvec;
+			int page_ref;
 		} userptr;
 
 		unsigned long scratch;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index a4d3fe63a9a4..5781b6d65ed4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -119,28 +119,54 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
 
 #endif
 
-static struct sg_table *
-__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
-			       struct page **pvec, unsigned long num_pages)
+static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	mutex_lock(&i915->mm.notifier_lock);
+	if (!obj->userptr.page_ref--) {
+		const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
+
+		unpin_user_pages(obj->userptr.pvec, num_pages);
+		kfree(obj->userptr.pvec);
+		obj->userptr.pvec = NULL;
+	}
+	GEM_BUG_ON(obj->userptr.page_ref < 0);
+	mutex_unlock(&i915->mm.notifier_lock);
+}
+
+static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
 	unsigned int max_segment = i915_sg_segment_size();
 	struct sg_table *st;
 	unsigned int sg_page_sizes;
+	struct page **pvec;
 	int ret;
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
+
+	mutex_lock(&i915->mm.notifier_lock);
+	if (GEM_WARN_ON(!obj->userptr.page_ref)) {
+		mutex_unlock(&i915->mm.notifier_lock);
+		ret = -EFAULT;
+		goto err_free;
+	}
+
+	obj->userptr.page_ref++;
+	pvec = obj->userptr.pvec;
+	mutex_unlock(&i915->mm.notifier_lock);
 
 alloc_table:
 	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
 					  0, num_pages << PAGE_SHIFT,
 					  max_segment,
 					  GFP_KERNEL);
-	if (ret) {
-		kfree(st);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto err;
 
 	ret = i915_gem_gtt_prepare_pages(obj, st);
 	if (ret) {
@@ -151,63 +177,19 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 			goto alloc_table;
 		}
 
-		kfree(st);
-		return ERR_PTR(ret);
+		goto err;
 	}
 
 	sg_page_sizes = i915_sg_page_sizes(st->sgl);
 
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
-	return st;
-}
-
-static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
-{
-	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
-	struct page **pvec;
-	struct sg_table *pages;
-	unsigned int gup_flags = 0;
-	int pinned, ret;
-
-	if (obj->userptr.notifier.mm && obj->userptr.notifier.mm != current->mm)
-		return -EFAULT;
-
-	pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);
-	if (!pvec)
-		return -ENOMEM;
-
-	/*
-	 * Using __get_user_pages_fast() with a read-only
-	 * access is questionable. A read-only page may be
-	 * COW-broken, and then this might end up giving
-	 * the wrong side of the COW..
-	 *
-	 * We may or may not care.
-	 */
-	if (!i915_gem_object_is_readonly(obj))
-		gup_flags |= FOLL_WRITE;
-
-	pinned = ret = 0;
-	while (pinned < num_pages) {
-		ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
-					  num_pages - pinned, gup_flags,
-					  &pvec[pinned]);
-		if (ret < 0)
-			break;
-
-		pinned += ret;
-	}
-
-	if (ret >= 0) {
-		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
-		ret = PTR_ERR_OR_ZERO(pages);
-	}
-
-	if (ret < 0 && pinned > 0)
-		unpin_user_pages(pvec, pinned);
-	kvfree(pvec);
+	return 0;
 
+err:
+	i915_gem_object_userptr_drop_ref(obj);
+err_free:
+	kfree(st);
 	return ret;
 }
 
@@ -283,21 +265,77 @@ int i915_gem_object_userptr_submit_begin(struct drm_i915_gem_object *obj)
 	if (!IS_ERR_OR_NULL(pages))
 		i915_gem_userptr_put_pages(obj, pages);
 
-	if (obj->userptr.notifier.mm)
-		obj->userptr.notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier);
-
 	err = ____i915_gem_object_get_pages(obj);
 	mutex_unlock(&obj->mm.lock);
 
 	return err;
 }
 
+int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	unsigned int gup_flags = 0;
+	unsigned long notifier_seq;
+	int pinned, ret;
+	bool reffed = false;
+
+	if (obj->userptr.notifier.mm)
+		notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier);
+
+	if (obj->userptr.notifier.mm && obj->userptr.notifier.mm != current->mm)
+		return -EFAULT;
+
+	pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!pvec)
+		return -ENOMEM;
+
+	/*
+	 * Using __get_user_pages_fast() with a read-only
+	 * access is questionable. A read-only page may be
+	 * COW-broken, and then this might end up giving
+	 * the wrong side of the COW..
+	 *
+	 * We may or may not care.
+	 */
+	if (!i915_gem_object_is_readonly(obj))
+		gup_flags |= FOLL_WRITE;
+
+	pinned = ret = 0;
+	while (pinned < num_pages) {
+		ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
+					  num_pages - pinned, gup_flags,
+					  &pvec[pinned]);
+		if (ret < 0)
+			break;
+
+		pinned += ret;
+	}
+
+	if (ret >= 0) {
+		mutex_lock(&i915->mm.notifier_lock);
+		if (!obj->userptr.page_ref++) {
+			obj->userptr.pvec = pvec;
+			obj->userptr.notifier_seq = notifier_seq;
+			reffed = true;
+		}
+		mutex_unlock(&i915->mm.notifier_lock);
+	}
+
+	if (!reffed) {
+		unpin_user_pages(pvec, pinned);
+		kvfree(pvec);
+	}
+
+	return ret;
+}
+
 int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj)
 {
 	if (obj->userptr.notifier.mm &&
 	    mmu_interval_read_retry(&obj->userptr.notifier,
 				    obj->userptr.notifier_seq)) {
-
 		/* We collided with the mmu notifier, need to retry */
 
 		return -EAGAIN;
@@ -306,6 +344,11 @@ int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_userptr_drop_ref(obj);
+}
+
 static void
 i915_gem_userptr_release(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b0a6522be3d1..2bf1e444dda7 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -296,18 +296,13 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
 static struct i915_active_fence *
 active_instance(struct i915_active *ref, u64 idx)
 {
-	struct active_node *node, *prealloc;
+	struct active_node *node;
 	struct rb_node **p, *parent;
 
 	node = __active_lookup(ref, idx);
 	if (likely(node))
 		return &node->base;
 
-	/* Preallocate a replacement, just in case */
-	prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
-	if (!prealloc)
-		return NULL;
-
 	spin_lock_irq(&ref->tree_lock);
 	GEM_BUG_ON(i915_active_is_idle(ref));
 
@@ -317,10 +312,8 @@ active_instance(struct i915_active *ref, u64 idx)
 		parent = *p;
 
 		node = rb_entry(parent, struct active_node, node);
-		if (node->timeline == idx) {
-			kmem_cache_free(global.slab_cache, prealloc);
+		if (node->timeline == idx)
 			goto out;
-		}
 
 		if (node->timeline < idx)
 			p = &parent->rb_right;
@@ -328,7 +321,14 @@ active_instance(struct i915_active *ref, u64 idx)
 			p = &parent->rb_left;
 	}
 
-	node = prealloc;
+	/*
+	 * XXX: We should preallocate this before i915_active_ref() is ever
+	 *  called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC.
+	 */
+	node = kmem_cache_alloc(global.slab_cache, GFP_ATOMIC);
+	if (!node)
+		goto out;
+
 	__i915_active_fence_init(&node->base, NULL, node_retire);
 	node->ref = ref;
 	node->timeline = idx;
-- 
2.28.0.rc2



More information about the Intel-gfx-trybot mailing list