[PATCH 28/28] fixup userptr implementation
Maarten Lankhorst
maarten.lankhorst at linux.intel.com
Mon Aug 3 14:41:14 UTC 2020
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 58 +++++-
drivers/gpu/drm/i915/gem/i915_gem_object.h | 2 +
.../gpu/drm/i915/gem/i915_gem_object_types.h | 2 +
drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 169 +++++++++++-------
drivers/gpu/drm/i915/i915_active.c | 20 +--
5 files changed, 175 insertions(+), 76 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 2a6e100a4465..54fa2b5d62a2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -49,14 +49,16 @@ enum {
#define __EXEC_OBJECT_HAS_PIN BIT(31)
#define __EXEC_OBJECT_HAS_FENCE BIT(30)
-#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
-#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
-#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
+#define __EXEC_OBJECT_USERPTR_INIT BIT(29)
+#define __EXEC_OBJECT_NEEDS_MAP BIT(28)
+#define __EXEC_OBJECT_NEEDS_BIAS BIT(27)
+#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
#define __EXEC_ENGINE_PINNED BIT(30)
-#define __EXEC_INTERNAL_FLAGS (~0u << 30)
+#define __EXEC_USERPTR_USED BIT(29)
+#define __EXEC_INTERNAL_FLAGS (~0u << 29)
#define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024)
@@ -843,6 +845,15 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
}
eb_add_vma(eb, i, batch, vma);
+
+ if (i915_gem_object_is_userptr(vma->obj)) {
+ err = i915_gem_object_userptr_submit_init(vma->obj);
+ if (err)
+ return err;
+
+ eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
+ eb->args->flags |= __EXEC_USERPTR_USED;
+ }
}
if (unlikely(eb->batch->flags & EXEC_OBJECT_WRITE)) {
@@ -906,6 +917,12 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
}
}
+ if (!(ev->flags & EXEC_OBJECT_WRITE)) {
+ err = dma_resv_reserve_shared(vma->resv, 1);
+ if (err)
+ return err;
+ }
+
GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
}
@@ -950,6 +967,11 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
eb_unreserve_vma(ev);
+ if (ev->flags & __EXEC_OBJECT_USERPTR_INIT) {
+ ev->flags &= ~__EXEC_OBJECT_USERPTR_INIT;
+ i915_gem_object_userptr_submit_fini(vma->obj);
+ }
+
if (final)
i915_vma_put(vma);
}
@@ -1885,6 +1907,31 @@ static int eb_prefault_relocations(const struct i915_execbuffer *eb)
return 0;
}
+static int eb_reinit_userptr(struct i915_execbuffer *eb)
+{
+ const unsigned int count = eb->buffer_count;
+ unsigned int i;
+ int ret;
+
+ if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
+ return 0;
+
+ for (i = 0; i < count; i++) {
+ struct eb_vma *ev = &eb->vma[i];
+
+ if (!i915_gem_object_is_userptr(ev->vma->obj))
+ continue;
+
+ ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
+ if (ret)
+ return ret;
+
+ ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
+ }
+
+ return 0;
+}
+
static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
struct i915_request *rq)
{
@@ -1940,6 +1987,9 @@ static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb,
err = 0;
}
+ if (!err)
+ err = eb_reinit_userptr(eb);
+
err_relock:
i915_gem_ww_ctx_init(&eb->ww, true);
if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 11f28f9ada28..745e2963da52 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -529,7 +529,9 @@ i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
return obj->ops == &i915_gem_userptr_ops;
}
+int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_submit_begin(struct drm_i915_gem_object *obj);
int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
+void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj);
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 46d0d82e0d32..fcaba8756557 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -289,6 +289,8 @@ struct drm_i915_gem_object {
unsigned long notifier_seq;
struct mmu_interval_notifier notifier;
+ struct page **pvec;
+ int page_ref;
} userptr;
unsigned long scratch;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index a4d3fe63a9a4..e5bc230a644a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -119,28 +119,54 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
#endif
-static struct sg_table *
-__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
- struct page **pvec, unsigned long num_pages)
+static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+ mutex_lock(&i915->mm.notifier_lock);
+ if (!obj->userptr.page_ref--) {
+ const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
+
+ unpin_user_pages(obj->userptr.pvec, num_pages);
+ kfree(obj->userptr.pvec);
+ obj->userptr.pvec = NULL;
+ }
+ GEM_BUG_ON(obj->userptr.page_ref < 0);
+ mutex_unlock(&i915->mm.notifier_lock);
+}
+
+static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st;
unsigned int sg_page_sizes;
+ struct page **pvec;
int ret;
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
+
+ mutex_lock(&i915->mm.notifier_lock);
+ if (GEM_WARN_ON(!obj->userptr.page_ref)) {
+ mutex_unlock(&i915->mm.notifier_lock);
+ ret = -EFAULT;
+ goto err_free;
+ }
+
+ obj->userptr.page_ref++;
+ pvec = obj->userptr.pvec;
+ mutex_unlock(&i915->mm.notifier_lock);
alloc_table:
ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
0, num_pages << PAGE_SHIFT,
max_segment,
GFP_KERNEL);
- if (ret) {
- kfree(st);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto err;
ret = i915_gem_gtt_prepare_pages(obj, st);
if (ret) {
@@ -151,63 +177,19 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
goto alloc_table;
}
- kfree(st);
- return ERR_PTR(ret);
+ goto err;
}
sg_page_sizes = i915_sg_page_sizes(st->sgl);
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
- return st;
-}
-
-static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
-{
- const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
- struct page **pvec;
- struct sg_table *pages;
- unsigned int gup_flags = 0;
- int pinned, ret;
-
- if (obj->userptr.notifier.mm && obj->userptr.notifier.mm != current->mm)
- return -EFAULT;
-
- pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);
- if (!pvec)
- return -ENOMEM;
-
- /*
- * Using __get_user_pages_fast() with a read-only
- * access is questionable. A read-only page may be
- * COW-broken, and then this might end up giving
- * the wrong side of the COW..
- *
- * We may or may not care.
- */
- if (!i915_gem_object_is_readonly(obj))
- gup_flags |= FOLL_WRITE;
-
- pinned = ret = 0;
- while (pinned < num_pages) {
- ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
- num_pages - pinned, gup_flags,
- &pvec[pinned]);
- if (ret < 0)
- break;
-
- pinned += ret;
- }
-
- if (ret >= 0) {
- pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
- ret = PTR_ERR_OR_ZERO(pages);
- }
-
- if (ret < 0 && pinned > 0)
- unpin_user_pages(pvec, pinned);
- kvfree(pvec);
+ return 0;
+err:
+ i915_gem_object_userptr_drop_ref(obj);
+err_free:
+ kfree(st);
return ret;
}
@@ -283,21 +265,79 @@ int i915_gem_object_userptr_submit_begin(struct drm_i915_gem_object *obj)
if (!IS_ERR_OR_NULL(pages))
i915_gem_userptr_put_pages(obj, pages);
- if (obj->userptr.notifier.mm)
- obj->userptr.notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier);
-
err = ____i915_gem_object_get_pages(obj);
mutex_unlock(&obj->mm.lock);
return err;
}
+int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
+ struct page **pvec;
+ unsigned int gup_flags = 0;
+ unsigned long notifier_seq;
+ int pinned, ret;
+ bool reffed = false;
+
+ if (obj->userptr.notifier.mm)
+ notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier);
+
+ if (obj->userptr.notifier.mm && obj->userptr.notifier.mm != current->mm)
+ return -EFAULT;
+
+ pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);
+ if (!pvec)
+ return -ENOMEM;
+
+ /*
+ * Using __get_user_pages_fast() with a read-only
+ * access is questionable. A read-only page may be
+ * COW-broken, and then this might end up giving
+ * the wrong side of the COW..
+ *
+ * We may or may not care.
+ */
+ if (!i915_gem_object_is_readonly(obj))
+ gup_flags |= FOLL_WRITE;
+
+ pinned = ret = 0;
+ while (pinned < num_pages) {
+ ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
+ num_pages - pinned, gup_flags,
+ &pvec[pinned]);
+ if (ret < 0)
+ break;
+
+ pinned += ret;
+ }
+
+ if (ret >= 0) {
+ ret = 0;
+
+ mutex_lock(&i915->mm.notifier_lock);
+ if (!obj->userptr.page_ref++) {
+ obj->userptr.pvec = pvec;
+ obj->userptr.notifier_seq = notifier_seq;
+ reffed = true;
+ }
+ mutex_unlock(&i915->mm.notifier_lock);
+ }
+
+ if (!reffed) {
+ unpin_user_pages(pvec, pinned);
+ kvfree(pvec);
+ }
+
+ return ret;
+}
+
int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj)
{
if (obj->userptr.notifier.mm &&
mmu_interval_read_retry(&obj->userptr.notifier,
obj->userptr.notifier_seq)) {
-
/* We collided with the mmu notifier, need to retry */
return -EAGAIN;
@@ -306,6 +346,11 @@ int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj)
return 0;
}
+void i915_gem_object_userptr_submit_fini(struct drm_i915_gem_object *obj)
+{
+ i915_gem_object_userptr_drop_ref(obj);
+}
+
static void
i915_gem_userptr_release(struct drm_i915_gem_object *obj)
{
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index b0a6522be3d1..2bf1e444dda7 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -296,18 +296,13 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
static struct i915_active_fence *
active_instance(struct i915_active *ref, u64 idx)
{
- struct active_node *node, *prealloc;
+ struct active_node *node;
struct rb_node **p, *parent;
node = __active_lookup(ref, idx);
if (likely(node))
return &node->base;
- /* Preallocate a replacement, just in case */
- prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
- if (!prealloc)
- return NULL;
-
spin_lock_irq(&ref->tree_lock);
GEM_BUG_ON(i915_active_is_idle(ref));
@@ -317,10 +312,8 @@ active_instance(struct i915_active *ref, u64 idx)
parent = *p;
node = rb_entry(parent, struct active_node, node);
- if (node->timeline == idx) {
- kmem_cache_free(global.slab_cache, prealloc);
+ if (node->timeline == idx)
goto out;
- }
if (node->timeline < idx)
p = &parent->rb_right;
@@ -328,7 +321,14 @@ active_instance(struct i915_active *ref, u64 idx)
p = &parent->rb_left;
}
- node = prealloc;
+ /*
+ * XXX: We should preallocate this before i915_active_ref() is ever
+ * called, but we cannot call into fs_reclaim() anyway, so use GFP_ATOMIC.
+ */
+ node = kmem_cache_alloc(global.slab_cache, GFP_ATOMIC);
+ if (!node)
+ goto out;
+
__i915_active_fence_init(&node->base, NULL, node_retire);
node->ref = ref;
node->timeline = idx;
--
2.28.0.rc2
More information about the Intel-gfx-trybot
mailing list