[PATCH 6/6] drm/i915: Use vma resources for async unbinding

Fri Dec 3 12:55:25 UTC 2021

Implement asynch (non-blocking) unbinding by not syncing the vma before
calling unbind on the vma_resource.
Add the resulting unbind fence to the object's dma_resv from where it is
picked up by the ttm migration code.
Ideally these unbind fences should be coalesced with the migration blit
fence to avoid stalling the migration blit waiting for unbind, as they
can certainly go on in parallel, but since we don't yet have a
reasonable data structure to use to coalesce fences and attach the
resulting fence to a timeline, we defer that for now.

Note that with async unbinding, even while the unbind waits for the
preceding bind to complete before unbinding, the vma itself might have been
destroyed in the process, clearing the vma pages. Therefore we can
only allow async unbinding if we have a refcounted sg-list and keep a
refcount on that for the vma resource pages to stay intact until
binding occurs. If this condition is not met, a request for an async
unbind is diverted to a sync unbind.

Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c |  11 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c         |   2 +-
 drivers/gpu/drm/i915/gt/intel_gtt.c          |   4 +
 drivers/gpu/drm/i915/gt/intel_gtt.h          |   3 +
 drivers/gpu/drm/i915/i915_drv.h              |   1 +
 drivers/gpu/drm/i915/i915_gem.c              |   3 +
 drivers/gpu/drm/i915/i915_vma.c              | 162 ++++++++++++---
 drivers/gpu/drm/i915/i915_vma.h              |   3 +-
 drivers/gpu/drm/i915/i915_vma_resource.c     | 201 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_vma_resource.h     |  27 ++-
 10 files changed, 377 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 4b6f3cda15b6..89fdd47967d6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 	int ret;
 
-	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	/*
+	 * Note: The async unbinding here will actually transform the
+	 * blocking wait for unbind into a wait before finally submitting
+	 * evict / migration blit and thus stall the migration timeline
+	 * which may not be good for overall troughput. We should make
+	 * sure we await the unbind fences *after* the migration blit
+	 * instead of *before* as we currently do.
+	 */
+	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
+				     I915_GEM_OBJECT_UNBIND_ASYNC);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 60bbf25f712b..734a3d7d2db0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -145,7 +145,7 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 			continue;
 
 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
-			__i915_vma_evict(vma);
+			__i915_vma_evict(vma, false);
 			drm_mm_remove_node(&vma->node);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 0dd254cb1f69..5347bf2d1cd7 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -160,6 +160,9 @@ static void __i915_vm_release(struct work_struct *work)
 	struct i915_address_space *vm =
 		container_of(work, struct i915_address_space, release_work);
 
+	/* Synchronize async unbinds. */
+	i915_vma_resource_bind_dep_sync_all(vm);
+
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -188,6 +191,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	if (!kref_read(&vm->resv_ref))
 		kref_init(&vm->resv_ref);
 
+	vm->pending_unbind = RB_ROOT_CACHED;
 	INIT_WORK(&vm->release_work, __i915_vm_release);
 	atomic_set(&vm->open, 1);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 0cbe9683df4e..de5344f56f89 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -264,6 +264,9 @@ struct i915_address_space {
 	/* Flags used when creating page-table objects for this vm */
 	unsigned long lmem_pt_obj_flags;
 
+	/* Interval tree for pending unbind vma resources */
+	struct rb_root_cached pending_unbind;
+
 	struct drm_i915_gem_object *
 		(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 85bb8d3107f0..62d554149292 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1672,6 +1672,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 #define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
 #define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
+#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 527228d4da7e..22ffa6636f0e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -165,6 +165,9 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 					} else {
 						ret = -EBUSY;
 					}
+				} else if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+					assert_object_held(vma->obj);
+					ret = i915_vma_unbind_async(vma);
 				} else {
 					ret = i915_vma_unbind(vma);
 				}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 8677b01f9e8f..64a27a09848b 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -284,11 +284,11 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
 
 struct i915_vma_work {
 	struct dma_fence_work base;
-	struct i915_address_space *vm;
 	struct i915_vm_pt_stash stash;
-	struct i915_vma *vma;
+	struct i915_vma_resource *vma_res;
 	struct drm_i915_gem_object *pinned;
 	struct i915_sw_dma_fence_cb cb;
+	struct i915_refct_sgt *rsgt;
 	enum i915_cache_level cache_level;
 	unsigned int flags;
 };
@@ -296,10 +296,11 @@ struct i915_vma_work {
 static void __vma_bind(struct dma_fence_work *work)
 {
 	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-	struct i915_vma *vma = vw->vma;
+	struct i915_vma_resource *vma_res = vw->vma_res;
+
+	vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
+			       vma_res, vw->cache_level, vw->flags);
 
-	vma->ops->bind_vma(vw->vm, &vw->stash,
-			   vma->resource, vw->cache_level, vw->flags);
 }
 
 static void __vma_release(struct dma_fence_work *work)
@@ -311,8 +312,10 @@ static void __vma_release(struct dma_fence_work *work)
 		i915_gem_object_put(vw->pinned);
 	}
 
-	i915_vm_free_pt_stash(vw->vm, &vw->stash);
-	i915_vm_put(vw->vm);
+	i915_vm_free_pt_stash(vw->vma_res->vm, &vw->stash);
+	i915_vma_resource_put(vw->vma_res);
+	if (vw->rsgt)
+		i915_refct_sgt_put(vw->rsgt);
 }
 
 static const struct dma_fence_work_ops bind_ops = {
@@ -384,12 +387,13 @@ I915_SELFTEST_EXPORT void
 i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
 				struct i915_vma *vma)
 {
-	__i915_vma_resource_init(vma_res);
+	__i915_vma_resource_init(vma_res, vma->vm);
 
 	vma_res->bi.pages = vma->pages;
 	vma_res->bi.page_sizes = vma->page_sizes;
 	vma_res->bi.readonly = i915_gem_object_is_readonly(vma->obj);
 	vma_res->bi.lmem = i915_gem_object_is_lmem(vma->obj);
+	vma_res->ops = vma->ops;
 	vma_res->private = vma->private;
 	vma_res->start = vma->node.start;
 	vma_res->node_size = vma->node.size;
@@ -417,6 +421,7 @@ int i915_vma_bind(struct i915_vma *vma,
 {
 	u32 bind_flags;
 	u32 vma_flags;
+	int ret;
 
 	lockdep_assert_held(&vma->vm->mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -448,6 +453,23 @@ int i915_vma_bind(struct i915_vma *vma,
 
 	GEM_BUG_ON(!vma->pages);
 
+	/* Wait for or await async unbinds touching our range */
+	if (work && bind_flags & vma->vm->bind_async_flags)
+		ret = i915_vma_resource_bind_dep_await(vma->vm, &work->base,
+						       vma->node.start,
+						       vma->node.size,
+						       true,
+						       GFP_NOWAIT |
+						       __GFP_RETRY_MAYFAIL |
+						       __GFP_NOWARN);
+	else
+		ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
+						      vma->node.size, true);
+	if (ret) {
+		kfree(vma_res);
+		return ret;
+	}
+
 	if (vma->resource || !vma_res) {
 		/* Rebinding with an additional I915_VMA_*_BIND */
 		GEM_WARN_ON(!vma_flags);
@@ -460,9 +482,11 @@ int i915_vma_bind(struct i915_vma *vma,
 	if (work && bind_flags & vma->vm->bind_async_flags) {
 		struct dma_fence *prev;
 
-		work->vma = vma;
+		work->vma_res = i915_vma_resource_get(vma->resource);
 		work->cache_level = cache_level;
 		work->flags = bind_flags;
+		if (vma->obj->mm.rsgt)
+			work->rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
 
 		/*
 		 * Note we only want to chain up to the migration fence on
@@ -971,8 +995,6 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 			goto err_rpm;
 		}
 
-		work->vm = i915_vm_get(vma->vm);
-
 		dma_fence_work_chain(&work->base, moving);
 
 		/* Allocate enough page directories to used PTE */
@@ -1358,9 +1380,9 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 	return 0;
 }
 
-void __i915_vma_evict(struct i915_vma *vma)
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 {
-	struct dma_fence *unbind_fence;
+	struct dma_fence *unbind_fence = NULL;
 
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
 
@@ -1394,25 +1416,32 @@ void __i915_vma_evict(struct i915_vma *vma)
 
 	if (likely(atomic_read(&vma->vm->open))) {
 		trace_i915_vma_unbind(vma);
-		vma->ops->unbind_vma(vma->vm, vma->resource);
+		unbind_fence = i915_vma_resource_unbind(vma->resource);
 	}
+	i915_vma_resource_put(vma->resource);
+	vma->resource = NULL;
 	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
 		   &vma->flags);
 
-	unbind_fence = i915_vma_resource_unbind(vma->resource);
-	i915_vma_resource_put(vma->resource);
-	vma->resource = NULL;
+	/* Object backend must be async capable. */
+	GEM_WARN_ON(async && !vma->obj->mm.rsgt);
 
 	i915_vma_detach(vma);
-	vma_unbind_pages(vma);
+
+	if (!async && unbind_fence) {
+		dma_fence_wait(unbind_fence, false);
+		dma_fence_put(unbind_fence);
+		unbind_fence = NULL;
+	}
 
 	/*
-	 * This uninterruptible wait under the vm mutex is currently
-	 * only ever blocking while the vma is being captured from.
-	 * With async unbinding, this wait here will be removed.
+	 * Binding itself may not have completed until the unbind fence signals,
+	 * so don't drop the pages until that happens, unless the resource is
+	 * async_capable.
 	 */
-	dma_fence_wait(unbind_fence, false);
-	dma_fence_put(unbind_fence);
+
+	vma_unbind_pages(vma);
+	return unbind_fence;
 }
 
 int __i915_vma_unbind(struct i915_vma *vma)
@@ -1439,13 +1468,52 @@ int __i915_vma_unbind(struct i915_vma *vma)
 		return ret;
 
 	GEM_BUG_ON(i915_vma_is_active(vma));
-	__i915_vma_evict(vma);
+	__i915_vma_evict(vma, false);
 
 	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
 
 	return 0;
 }
 
+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
+{
+	struct dma_fence *fence;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return NULL;
+
+	if (i915_vma_is_pinned(vma)) {
+		vma_print_allocator(vma, "is pinned");
+		return ERR_PTR(-EAGAIN);
+	}
+
+	/*
+	 * We probably need to replace this with awaiting the fences of the
+	 * object's dma_resv when the vma active goes away. When doing that
+	 * we need to be careful to not add the vma_resource unbind fence
+	 * immediately to the object's dma_resv, because then unbinding
+	 * the next vma from the object, in case there are many, will
+	 * actually await the unbinding of the previous vmas, which is
+	 * undesirable.
+	 */
+	if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
+				       I915_ACTIVE_AWAIT_EXCL |
+				       I915_ACTIVE_AWAIT_ACTIVE) < 0) {
+		int ret = i915_vma_sync(vma);
+
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	fence = __i915_vma_evict(vma, true);
+
+	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
+
+	return fence;
+}
+
 int i915_vma_unbind(struct i915_vma *vma)
 {
 	struct i915_address_space *vm = vma->vm;
@@ -1481,6 +1549,52 @@ int i915_vma_unbind(struct i915_vma *vma)
 	return err;
 }
 
+int i915_vma_unbind_async(struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_address_space *vm = vma->vm;
+	struct dma_fence *fence;
+	int err;
+
+	/*
+	 * We need the dma-resv lock since we add the
+	 * unbind fence to the dma-resv object.
+	 */
+	assert_object_held(obj);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return 0;
+
+	if (i915_vma_is_pinned(vma)) {
+		vma_print_allocator(vma, "is pinned");
+		return -EAGAIN;
+	}
+
+	if (!obj->mm.rsgt)
+		return i915_vma_unbind(vma);
+
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+		/* Might require pm. Not handled yet.*/
+		return i915_vma_unbind(vma);
+
+	err = dma_resv_reserve_shared(obj->base.resv, 1);
+	if (err)
+		return i915_vma_unbind(vma);
+
+	err = mutex_lock_interruptible(&vma->vm->mutex);
+	if (err)
+		return err;
+
+	fence = __i915_vma_unbind_async(vma);
+	mutex_unlock(&vm->mutex);
+	if (IS_ERR_OR_NULL(fence))
+		return PTR_ERR_OR_ZERO(fence);
+
+	dma_resv_add_shared_fence(obj->base.resv, fence);
+
+	return 0;
+}
+
 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
 {
 	i915_gem_object_make_unshrinkable(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 7a0b95666479..86d3094a07f5 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -215,9 +215,10 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
-void __i915_vma_evict(struct i915_vma *vma);
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
 int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
+int __must_check i915_vma_unbind_async(struct i915_vma *vma);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index 0f149bd41f09..3b5b38fc0ca1 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -2,10 +2,45 @@
 /*
  * Copyright © 2021 Intel Corporation
  */
+
+#include <linux/interval_tree_generic.h>
 #include <linux/slab.h>
 
+#include "i915_sw_fence.h"
+#include "i915_sw_fence_work.h"
 #include "i915_vma_resource.h"
 
+#include "gt/intel_gtt.h"
+
+enum {
+	I915_VMA_RESOURCE_IMM = DMA_FENCE_FLAG_USER_BITS,
+};
+
+/**
+ * DOC:
+ * We use a per-vm interval tree to keep track of vma_resources
+ * scheduled for unbind but not yet unbound. The tree is protected by
+ * the vm mutex, and nodes are removed just after the unbind fence signals.
+ * The removal takes the vm mutex from a kernel thread which we need to
+ * keep in mind so that we don't grab the mutex and try to wait for all
+ * pending unbinds to complete, because that will temporaryily block many
+ * of the workqueue threads, and people will get angry.
+ *
+ * We should consider using a single ordered fence per VM instead but that
+ * requires ordering the unbinds and might introduce unnecessary waiting
+ * for unrelated unbinds. Amount of code will probably be roughly the same
+ * due to the simplicity of using the interval tree interface.
+ *
+ * Another drawback of this interval tree is that the complexity of insertion
+ * and removal of fences increases as O(ln(pending_unbinds)) instead of
+ * O(1) for a single fence without interval tree.  
+ */
+#define VMA_RES_START(node) ((node)->start)
+#define VMA_RES_LAST(node) ((node)->start + (node)->node_size - 1)
+INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
+		     unsigned long, __subtree_last,
+		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
+
 static const char *get_driver_name(struct dma_fence *fence)
 {
 	return "vma unbind fence";
@@ -21,14 +56,6 @@ static struct dma_fence_ops unbind_fence_ops = {
 	.get_timeline_name = get_timeline_name,
 };
 
-void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
-{
-	spin_lock_init(&vma_res->lock);
-	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
-		       &vma_res->lock, 0, 0);
-	refcount_set(&vma_res->hold_count, 1);
-}
-
 struct i915_vma_resource *i915_vma_resource_alloc(void)
 {
 	struct i915_vma_resource *vma_res =
@@ -39,8 +66,20 @@ struct i915_vma_resource *i915_vma_resource_alloc(void)
 
 static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
 {
-	if (refcount_dec_and_test(&vma_res->hold_count))
-		dma_fence_signal(&vma_res->unbind_fence);
+	struct i915_address_space *vm;
+
+	if (!refcount_dec_and_test(&vma_res->hold_count))
+		return;
+
+	vm = vma_res->vm;
+	dma_fence_signal(&vma_res->unbind_fence);
+	if (!RB_EMPTY_NODE(&vma_res->rb)) {
+		mutex_lock(&vm->mutex);
+		vma_res_itree_remove(vma_res, &vm->pending_unbind);
+		mutex_unlock(&vm->mutex);
+	}
+
+	vma_res->vm = NULL;
 }
 
 /**
@@ -62,6 +101,7 @@ void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
 		/* Inefficient open-coded might_lock_irqsave() */
 		spin_lock_irqsave(&vma_res->lock, irq_flags);
 		spin_unlock_irqrestore(&vma_res->lock, irq_flags);
+		might_lock(&vma_res->vm->mutex);
 	}
 
 	__i915_vma_resource_unhold(vma_res);
@@ -89,10 +129,147 @@ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
 	return held;
 }
 
-struct dma_fence *
-i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+static void i915_vma_resource_unbind_work(struct work_struct *work)
 {
+	struct i915_vma_resource *vma_res =
+		container_of(work, typeof(*vma_res), work);
+
+	vma_res->ops->unbind_vma(vma_res->vm, vma_res);
 	__i915_vma_resource_unhold(vma_res);
+	i915_sw_fence_fini(&vma_res->chain);
+	i915_vma_resource_put(vma_res);
+}
+
+static int
+i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
+			       enum i915_sw_fence_notify state)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(fence, typeof(*vma_res), chain);
+	struct dma_fence *unbind_fence =
+		&vma_res->unbind_fence;
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_get(unbind_fence);
+		if (test_bit(I915_VMA_RESOURCE_IMM, &unbind_fence->flags)) {
+			RB_CLEAR_NODE(&vma_res->rb);
+			i915_vma_resource_unbind_work(&vma_res->work);
+		} else {
+			INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
+			vma_res_itree_insert(vma_res,
+					     &vma_res->vm->pending_unbind);
+			queue_work(system_unbound_wq, &vma_res->work);
+		}
+		break;
+	case FENCE_FREE:
+		i915_vma_resource_put(vma_res);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+{
+	/* Reference for the returned fence. */
+	dma_fence_get(&vma_res->unbind_fence);
+	/* Reference for the sw fence */
 	dma_fence_get(&vma_res->unbind_fence);
+
+	if (atomic_read(&vma_res->chain.pending) <= 1)
+		__set_bit(I915_VMA_RESOURCE_IMM, &vma_res->unbind_fence.flags);
+	i915_sw_fence_commit(&vma_res->chain);
+
 	return &vma_res->unbind_fence;
 }
+
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res,
+			      struct i915_address_space *vm)
+{
+	spin_lock_init(&vma_res->lock);
+	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
+		       &vma_res->lock, 0, 0);
+	refcount_set(&vma_res->hold_count, 1);
+	i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
+	vma_res->vm = vm;
+}
+
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    unsigned long offset,
+				    unsigned long size,
+				    bool intr)
+{
+	struct i915_vma_resource *node;
+	unsigned long last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_sleep();
+
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret = dma_fence_wait(&node->unbind_fence, intr);
+
+		if (ret)
+			return ret;
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
+
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
+{
+	struct i915_vma_resource *node;
+
+	do {
+		mutex_lock(&vm->mutex);
+		node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
+						ULONG_MAX);
+		if (node)
+			dma_fence_get(&node->unbind_fence);
+		mutex_unlock(&vm->mutex);
+
+		if (node) {
+			/*
+			 * The wait makes sure the node eventually removes
+			 * itself from the tree.
+			 */
+			dma_fence_wait(&node->unbind_fence, false);
+			dma_fence_put(&node->unbind_fence);
+		}
+	} while (node);
+}
+
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct dma_fence_work *f,
+				     unsigned long offset,
+				     unsigned long size,
+				     bool intr,
+				     gfp_t gfp)
+{
+	struct i915_vma_resource *node;
+	unsigned long last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_alloc(gfp);
+	might_sleep();
+
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret;
+
+		ret = i915_sw_fence_await_dma_fence(&f->chain,
+						    &node->unbind_fence,
+						    MAX_SCHEDULE_TIMEOUT, gfp);
+		if (ret)
+			ret = dma_fence_wait(&node->unbind_fence, intr);
+		if (ret)
+			return ret;
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 2dfcbf5fa76e..3fe8a043a7df 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -11,6 +11,8 @@
 
 #include "gem/i915_gem_object_types.h" /* For page_sizes. */
 
+struct dma_fence_work;
+
 /**
  * struct i915_vma_resource - Snapshotted unbind information.
  * @unbind_fence: Fence to mark unbinding complete. Note that this fence
@@ -31,6 +33,11 @@ struct i915_vma_resource {
 	/* See above for description of the lock. */
 	spinlock_t lock;
 	refcount_t hold_count;
+	struct work_struct work;
+	struct i915_sw_fence chain;
+	struct rb_node rb;
+	unsigned long __subtree_last;
+	struct i915_address_space *vm;
 
 	/**
 	 * struct i915_vma_bindinfo - Information needed for async bind
@@ -50,11 +57,14 @@ struct i915_vma_resource {
 		bool lmem:1;
 	} bi;
 
+	const struct i915_vma_ops *ops;
 	void *private;
 	unsigned long start;
 	unsigned long node_size;
 	unsigned long vma_size;
 	u32 page_sizes_gtt;
+
+	/* Backend private_stuff. Should really go into @private. */
 	u32 bound_flags;
 	bool allocated:1;
 };
@@ -85,6 +95,21 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
 	dma_fence_put(&vma_res->unbind_fence);
 }
 
-void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res,
+			      struct i915_address_space *vm);
+
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    unsigned long first,
+				    unsigned long last,
+				    bool intr);
+
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct dma_fence_work *f,
+				     unsigned long first,
+				     unsigned long last,
+				     bool intr,
+				     gfp_t gfp);
+
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm);
 
 #endif
-- 
2.31.1