[PATCH 126/126] async-vma-bind
Chris Wilson
chris at chris-wilson.co.uk
Tue Sep 11 11:04:48 UTC 2018
---
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +-
drivers/gpu/drm/i915/i915_vma.c | 154 ++++++++++++++++--
drivers/gpu/drm/i915/i915_vma.h | 4 +
.../gpu/drm/i915/selftests/i915_gem_context.c | 4 +
drivers/gpu/drm/i915/selftests/i915_vma.c | 6 +-
5 files changed, 161 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 9d047e5363ea..ee577a34fcaa 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1179,6 +1179,10 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto err_unpin;
}
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err)
+ goto err_request;
+
err = i915_request_await_object(rq, vma->obj, true);
if (err)
goto err_request;
@@ -1189,7 +1193,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_request;
- GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
err = i915_vma_move_to_active(batch, rq, 0);
if (err)
goto skip_request;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index b8ee20f77c53..85bce043fb48 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -284,6 +284,126 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
return vma;
}
+struct async_bind {
+ struct dma_fence dma;
+ struct work_struct work;
+ struct i915_sw_fence wait;
+ struct i915_vma *vma;
+ enum i915_cache_level cache_level;
+ u32 flags;
+};
+
+static void do_async_bind(struct work_struct *work)
+{
+ struct async_bind *ab = container_of(work, typeof(*ab), work);
+ struct i915_vma *vma = ab->vma;
+ int err;
+
+ err = ab->wait.error;
+ if (!err)
+ err = vma->ops->bind_vma(vma, ab->cache_level, ab->flags);
+ if (err) {
+ smp_store_mb(vma->error, err);
+ dma_fence_set_error(&ab->dma, err);
+ }
+
+ complete_all(&vma->completion);
+ i915_vma_put(vma);
+
+ dma_fence_signal(&ab->dma);
+ dma_fence_put(&ab->dma);
+}
+
+static int __i915_sw_fence_call
+async_bind_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+ struct async_bind *ab = container_of(fence, typeof(*ab), wait);
+
+ switch (state) {
+ case FENCE_COMPLETE:
+ queue_work(system_unbound_wq, &ab->work);
+ break;
+
+ case FENCE_FREE:
+ dma_fence_put(&ab->dma);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static const char *async_bind_driver_name(struct dma_fence *fence)
+{
+ return DRIVER_NAME;
+}
+
+static const char *async_bind_timeline_name(struct dma_fence *fence)
+{
+ return "bind";
+}
+
+static void async_bind_release(struct dma_fence *fence)
+{
+ struct async_bind *ab = container_of(fence, typeof(*ab), dma);
+
+ i915_sw_fence_fini(&ab->wait);
+
+ BUILD_BUG_ON(offsetof(typeof(*ab), dma));
+ dma_fence_free(&ab->dma);
+}
+
+static const struct dma_fence_ops async_bind_ops = {
+ .get_driver_name = async_bind_driver_name,
+ .get_timeline_name = async_bind_timeline_name,
+ .release = async_bind_release,
+};
+
+static DEFINE_SPINLOCK(async_lock);
+
+static int queue_async_bind(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct async_bind *ab;
+
+ ab = kmalloc(sizeof(*ab), GFP_KERNEL);
+ if (!ab)
+ return -ENOMEM;
+
+ INIT_WORK(&ab->work, do_async_bind);
+
+ dma_fence_init(&ab->dma,
+ &async_bind_ops,
+ &async_lock,
+ vma->vm->i915->mm.unordered_timeline,
+ 0);
+ i915_sw_fence_init(&ab->wait, async_bind_notify);
+
+ ab->vma = i915_vma_get(vma);
+ ab->cache_level = cache_level;
+ ab->flags = flags;
+
+ dma_fence_get(&ab->dma);
+ if (i915_sw_fence_await_reservation(&ab->wait,
+ vma->resv, NULL,
+ true, I915_FENCE_TIMEOUT,
+ I915_FENCE_GFP) < 0) {
+ i915_vma_put(vma);
+ i915_sw_fence_fini(&ab->wait);
+ kfree(ab);
+ return -ENOMEM;
+ }
+
+ reservation_object_lock(vma->resv, NULL);
+ reservation_object_add_excl_fence(vma->resv, &ab->dma);
+ reservation_object_unlock(vma->resv);
+
+ init_completion(&vma->completion);
+ i915_sw_fence_commit(&ab->wait);
+
+ return 0;
+}
+
/**
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
* @vma: VMA to map
@@ -301,17 +421,12 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
u32 vma_flags;
int ret;
+ GEM_BUG_ON(!flags);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
GEM_BUG_ON(vma->size > vma->node.size);
-
- if (GEM_WARN_ON(range_overflows(vma->node.start,
- vma->node.size,
- vma->vm->total)))
- return -ENODEV;
-
- if (GEM_WARN_ON(!flags))
- return -EINVAL;
-
+ GEM_BUG_ON(range_overflows(vma->node.start,
+ vma->node.size,
+ vma->vm->total));
bind_flags = 0;
if (flags & PIN_GLOBAL)
bind_flags |= I915_VMA_GLOBAL_BIND;
@@ -329,7 +444,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
GEM_BUG_ON(!vma->pages);
trace_i915_vma_bind(vma, bind_flags);
- ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
+ if (bind_flags & I915_VMA_LOCAL_BIND)
+ ret = queue_async_bind(vma, cache_level, bind_flags);
+ else
+ ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
if (ret)
return ret;
@@ -1000,10 +1118,20 @@ int i915_vma_move_to_active(struct i915_vma *vma,
{
struct drm_i915_gem_object *obj = vma->obj;
struct i915_gem_active *active;
+ int err;
lockdep_assert_held(&rq->i915->drm.struct_mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
+ /*
+ * The error should be set before the async chain is decoupled. If
+ * the async bind is still pending, then we shall report the error
+ * by propagating it along the chain of fences.
+ */
+ err = READ_ONCE(vma->error);
+ if (err)
+ return err;
+
active = active_instance(vma, rq->fence.context);
if (IS_ERR(active))
return PTR_ERR(active);
@@ -1099,6 +1227,12 @@ int i915_vma_unbind(struct i915_vma *vma)
if (!drm_mm_node_allocated(&vma->node))
return 0;
+ if (vma->flags & I915_VMA_LOCAL_BIND) {
+ ret = wait_for_completion_interruptible(&vma->completion);
+ if (ret)
+ return ret;
+ }
+
if (i915_vma_is_map_and_fenceable(vma)) {
/*
* Check that we have flushed all writes through the GGTT
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 50508d93dbb9..fc8f41b536a1 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -25,6 +25,7 @@
#ifndef __I915_VMA_H__
#define __I915_VMA_H__
+#include <linux/completion.h>
#include <linux/io-mapping.h>
#include <linux/rbtree.h>
@@ -95,6 +96,9 @@ struct i915_vma {
#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
#define I915_VMA_GGTT_WRITE BIT(12)
+ struct completion completion;
+ int error;
+
unsigned int active_count;
struct rb_root active;
struct i915_gem_active last_active;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index bffa0e871825..97688f48f3bb 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -346,6 +346,10 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
goto err_batch;
}
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err)
+ goto err_request;
+
flags = 0;
if (INTEL_GEN(vm->i915) <= 5)
flags |= I915_DISPATCH_SECURE;
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index ece26ea39181..a3f129950e6e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -200,8 +200,12 @@ static int igt_vma_create(void *arg)
mock_context_close(ctx);
}
- list_for_each_entry_safe(obj, on, &objects, st_link)
+ list_for_each_entry_safe(obj, on, &objects, st_link) {
+ i915_gem_object_wait(obj,
+ I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT,
+ NULL);
i915_gem_object_put(obj);
+ }
return err;
}
--
2.19.0.rc2
More information about the Intel-gfx-trybot
mailing list