[Intel-gfx] [PATCH 10/14] drm/i915/gem: Allow combining submit-fences with syncobj
Chris Wilson
chris at chris-wilson.co.uk
Sun May 3 11:21:28 UTC 2020
We allow exported sync_file fences to be used as submit fences, but they
are not the only source of user fences. We also accept an array of
syncobj, and as with sync_file these are dma_fences underneath and so
feature the same set of controls. The submit-fence allows for a request
to be scheduled at the same time as the signaler, rather than as normal
after. Userspace can combine submit-fence with its own semaphores for
intra-batch scheduling.
Not exposing submit-fences to syncobj was at the time just a matter of
pragmatic expediency.
Fixes: a88b6e4cbafd ("drm/i915: Allow specification of parallel execbuf")
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4854
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 14 ++-
drivers/gpu/drm/i915/i915_request.c | 110 ++++++++++++++++++
include/uapi/drm/i915_drm.h | 7 +-
3 files changed, 123 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 8b854f87a249..67ba33b3de60 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2432,7 +2432,7 @@ static void
__free_fence_array(struct drm_syncobj **fences, unsigned int n)
{
while (n--)
- drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+ drm_syncobj_put(ptr_mask_bits(fences[n], 3));
kvfree(fences);
}
@@ -2489,7 +2489,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
- fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+ fences[n] = ptr_pack_bits(syncobj, fence.flags, 3);
}
return fences;
@@ -2520,7 +2520,7 @@ await_fence_array(struct i915_execbuffer *eb,
struct dma_fence *fence;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+ syncobj = ptr_unpack_bits(fences[n], &flags, 3);
if (!(flags & I915_EXEC_FENCE_WAIT))
continue;
@@ -2544,7 +2544,11 @@ await_fence_array(struct i915_execbuffer *eb,
spin_unlock(&syncobj->lock);
}
- err = i915_request_await_dma_fence(eb->request, fence);
+ if (flags & I915_EXEC_FENCE_WAIT_SUBMIT)
+ err = i915_request_await_execution(eb->request, fence,
+ eb->engine->bond_execute);
+ else
+ err = i915_request_await_dma_fence(eb->request, fence);
dma_fence_put(fence);
if (err < 0)
return err;
@@ -2565,7 +2569,7 @@ signal_fence_array(struct i915_execbuffer *eb,
struct drm_syncobj *syncobj;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+ syncobj = ptr_unpack_bits(fences[n], &flags, 3);
if (!(flags & I915_EXEC_FENCE_SIGNAL))
continue;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 95edc5523a01..248efbc01224 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -23,6 +23,7 @@
*/
#include <linux/dma-fence-array.h>
+#include <linux/dma-fence-proxy.h>
#include <linux/irq_work.h>
#include <linux/prefetch.h>
#include <linux/sched.h>
@@ -1213,6 +1214,110 @@ __i915_request_await_execution(struct i915_request *to,
&from->fence);
}
+struct execution_proxy {
+ struct wait_queue_entry base;
+ void (*hook)(struct i915_request *rq,
+ struct dma_fence *signal);
+ struct i915_request *request;
+ struct dma_fence *fence;
+ struct timer_list timer;
+ struct work_struct work;
+};
+
+static void execution_proxy_work(struct work_struct *work)
+{
+ struct execution_proxy *wait = container_of(work, typeof(*wait), work);
+ struct i915_request *rq = wait->request;
+
+ del_timer_sync(&wait->timer);
+
+ if (wait->fence) {
+ int err;
+
+ mutex_lock(&rq->context->timeline->mutex);
+
+ if (dma_fence_is_i915(wait->fence))
+ err = __i915_request_await_execution(rq,
+ to_request(wait->fence),
+ wait->hook);
+ else
+ err = i915_sw_fence_await_dma_fence(&rq->submit,
+ wait->fence,
+ I915_FENCE_TIMEOUT,
+ GFP_KERNEL);
+ if (err < 0)
+ i915_request_set_error_once(rq, err);
+
+ mutex_unlock(&rq->context->timeline->mutex);
+ }
+
+ i915_sw_fence_complete(&rq->submit);
+
+ dma_fence_put(wait->fence);
+ kfree(wait);
+}
+
+static int
+execution_proxy_wake(struct wait_queue_entry *entry,
+ unsigned int mode,
+ int flags,
+ void *fence)
+{
+ struct execution_proxy *wait = container_of(entry, typeof(*wait), base);
+
+ wait->fence = dma_fence_get(fence);
+ schedule_work(&wait->work);
+
+ return 0;
+}
+
+static void
+execution_proxy_timer(struct timer_list *t)
+{
+ struct execution_proxy *wait = container_of(t, typeof(*wait), timer);
+
+ if (dma_fence_remove_proxy_listener(wait->base.private, &wait->base)) {
+ struct i915_request *rq = wait->request;
+
+ pr_notice("Asynchronous wait on proxy fence for %s:%s:%llx timed out\n",
+ rq->fence.ops->get_driver_name(&rq->fence),
+ rq->fence.ops->get_timeline_name(&rq->fence),
+ rq->fence.seqno);
+ i915_request_set_error_once(rq, -ETIMEDOUT);
+
+ schedule_work(&wait->work);
+ }
+}
+
+static int
+__i915_request_await_proxy_execution(struct i915_request *rq,
+ struct dma_fence *fence,
+ unsigned long timeout,
+ void (*hook)(struct i915_request *rq,
+ struct dma_fence *signal))
+{
+ struct execution_proxy *wait;
+
+ wait = kzalloc(sizeof(*wait), GFP_KERNEL);
+ if (!wait)
+ return -ENOMEM;
+
+ i915_sw_fence_await(&rq->submit);
+
+ wait->base.private = fence;
+ wait->base.func = execution_proxy_wake;
+ wait->request = rq;
+ wait->hook = hook;
+ INIT_WORK(&wait->work, execution_proxy_work);
+
+ timer_setup(&wait->timer, execution_proxy_timer, 0);
+ if (timeout)
+ mod_timer(&wait->timer, round_jiffies_up(jiffies + timeout));
+
+ dma_fence_add_proxy_listener(fence, &wait->base);
+ return 0;
+}
+
int
i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence,
@@ -1249,6 +1354,11 @@ i915_request_await_execution(struct i915_request *rq,
ret = __i915_request_await_execution(rq,
to_request(fence),
hook);
+ else if (dma_fence_is_proxy(fence))
+ ret = __i915_request_await_proxy_execution(rq,
+ fence,
+ I915_FENCE_TIMEOUT,
+ hook);
else
ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
I915_FENCE_TIMEOUT,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..704dd0e3bc1d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1040,9 +1040,10 @@ struct drm_i915_gem_exec_fence {
*/
__u32 handle;
-#define I915_EXEC_FENCE_WAIT (1<<0)
-#define I915_EXEC_FENCE_SIGNAL (1<<1)
-#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
+#define I915_EXEC_FENCE_WAIT (1u << 0)
+#define I915_EXEC_FENCE_SIGNAL (1u << 1)
+#define I915_EXEC_FENCE_WAIT_SUBMIT (1u << 2)
+#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_WAIT_SUBMIT << 1))
__u32 flags;
};
--
2.20.1
More information about the Intel-gfx
mailing list