[PATCH 63/63] drm/i915/gem: Acquire all vma/objects under reservation_ww_class

Tue Jun 23 20:49:08 UTC 2020

In order to actually handle eviction and what not, we need to process
all the objects together under a common lock, reservation_ww_class. As
such, do a memory reservation pass after looking up the object/vma,
which then feeds into the rest of execbuf [relocation, cmdparsing,
flushing and ofc execution].

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 121 +++++++++++++++---
 1 file changed, 100 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index a0389c49361b..62120ce218e8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -53,10 +53,9 @@ struct eb_vma_array {
 
 #define __EXEC_OBJECT_HAS_PIN		BIT(31)
 #define __EXEC_OBJECT_HAS_FENCE		BIT(30)
-#define __EXEC_OBJECT_HAS_PAGES		BIT(29)
-#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
-#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
-#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
+#define __EXEC_OBJECT_NEEDS_MAP		BIT(29)
+#define __EXEC_OBJECT_NEEDS_BIAS	BIT(28)
+#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 28) /* all of the above */
 
 #define __EXEC_HAS_RELOC	BIT(31)
 #define __EXEC_INTERNAL_FLAGS	(~0u << 31)
@@ -241,6 +240,8 @@ struct i915_execbuffer {
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
 
+	struct dma_fence *mm_fence;
+
 	struct i915_request *request; /** our request to build */
 	struct eb_vma *batch; /** identity of the batch obj/vma */
 	struct i915_vma *trampoline; /** trampoline used for chaining */
@@ -331,12 +332,7 @@ static inline void eb_unreserve_vma(struct eb_vma *ev)
 	if (ev->flags & __EXEC_OBJECT_HAS_PIN)
 		__i915_vma_unpin(vma);
 
-	if (ev->flags & __EXEC_OBJECT_HAS_PAGES)
-		i915_gem_object_unpin_pages(vma->obj);
-
-	ev->flags &= ~(__EXEC_OBJECT_HAS_PIN |
-		       __EXEC_OBJECT_HAS_FENCE |
-		       __EXEC_OBJECT_HAS_PAGES);
+	ev->flags &= ~(__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE);
 }
 
 static void eb_vma_array_destroy(struct kref *kref)
@@ -667,6 +663,56 @@ eb_add_vma(struct i915_execbuffer *eb,
 	list_add_tail(&ev->lock_link, &eb->lock);
 }
 
+static int eb_vma_get_pages(struct i915_execbuffer *eb,
+			    struct eb_vma *ev,
+			    u64 idx)
+{
+	struct i915_vma *vma = ev->vma;
+	int err;
+
+	/* XXX also preallocate PD for vma */
+
+	err = ____i915_gem_object_get_pages_async(vma->obj);
+	if (err)
+		return err;
+
+	/* Bump the obj's active refcount for the execbuf on this timeline */
+	return i915_active_ref(&vma->obj->mm.active, idx, eb->mm_fence);
+}
+
+static int eb_reserve_mm(struct i915_execbuffer *eb)
+{
+	const u64 idx = eb->context->timeline->fence_context;
+	struct ww_acquire_ctx acquire;
+	struct eb_vma *ev;
+	int err;
+
+	eb->mm_fence = __dma_fence_create_proxy(0, 0);
+	if (!eb->mm_fence)
+		return -ENOMEM;
+
+	ww_acquire_init(&acquire, &reservation_ww_class);
+
+	err = eb_lock_vma(eb, &acquire);
+	if (err)
+		goto out;
+
+	ww_acquire_done(&acquire);
+
+	list_for_each_entry(ev, &eb->lock, lock_link) {
+		struct i915_vma *vma = ev->vma;
+
+		if (err == 0)
+			err = eb_vma_get_pages(eb, ev, idx);
+
+		i915_vma_unlock(vma);
+	}
+
+out:
+	ww_acquire_fini(&acquire);
+	return err;
+}
+
 struct eb_vm_work {
 	struct dma_fence_work base;
 	struct list_head unbound;
@@ -1314,20 +1360,9 @@ static int eb_vm_throttle(struct eb_vm_work *work)
 
 static int eb_prepare_vma(struct eb_vma *ev)
 {
-	struct i915_vma *vma = ev->vma;
-	int err;
-
 	ev->hole.flags = 0;
 	ev->bind_flags = 0;
 
-	if (!(ev->flags &  __EXEC_OBJECT_HAS_PAGES)) {
-		err = i915_gem_object_pin_pages(vma->obj);
-		if (err)
-			return err;
-
-		ev->flags |=  __EXEC_OBJECT_HAS_PAGES;
-	}
-
 	return 0;
 }
 
@@ -2415,6 +2450,10 @@ static int eb_relocate(struct i915_execbuffer *eb)
 	if (err)
 		return err;
 
+	err = eb_reserve_mm(eb);
+	if (err)
+		return err;
+
 	err = eb_reserve_vm(eb);
 	if (err)
 		return err;
@@ -2846,6 +2885,12 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
 {
 	int err;
 
+	if (eb->mm_fence) {
+		dma_fence_proxy_set_real(eb->mm_fence, &eb->request->fence);
+		dma_fence_put(eb->mm_fence);
+		eb->mm_fence = NULL;
+	}
+
 	if (eb->reloc_cache.fence) {
 		err = i915_request_await_dma_fence(eb->request,
 						   &eb->reloc_cache.rq->fence);
@@ -3341,6 +3386,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb.batch_start_offset = args->batch_start_offset;
 	eb.batch_len = args->batch_len;
 	eb.trampoline = NULL;
+	eb.mm_fence = NULL;
 
 	eb.batch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
@@ -3535,6 +3581,39 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 		eb_reloc_signal(&eb, eb.reloc_cache.rq);
 	if (eb.trampoline)
 		i915_vma_unpin(eb.trampoline);
+	if (eb.mm_fence) {
+		struct intel_timeline *tl = eb.context->timeline;
+		struct dma_fence *prev =
+			i915_active_fence_get(&tl->last_request);
+
+		/*
+		 * We have to rollback our per-timeline reference token.
+		 *
+		 * However as we did not take note of the previous token on
+		 * each object, we substitute the previous fence in our timeline
+		 * as a replacement for this submission token. If there is no
+		 * prior fence in the timeline, the reference token will be
+		 * released.
+		 *
+		 * If the object was unused (or idle) in this timeline, and
+		 * was instantiated for this execbuf, it will now persist until
+		 * the end of the previous request. Possibly keeping it around
+		 * until the user's next execbuf, but at any rate keeping it
+		 * alive when it's not yet in use.
+		 *
+		 * If the object was only being used earlier in the timeline,
+		 * we've just extended it's live range to the end of the
+		 * current timeline, which is a fair approximation of the
+		 * intended acquisition.
+		 *
+		 * If the object was last used in the previous request, there
+		 * is no change in live range.
+		 */
+		dma_fence_proxy_set_real(eb.mm_fence, prev);
+		dma_fence_put(prev);
+
+		dma_fence_put(eb.mm_fence);
+	}
 	eb_unpin_engine(&eb);
 err_context:
 	i915_gem_context_put(eb.gem_context);
-- 
2.20.1