[Intel-gfx] [PATCH 18/18] drm/i915: Support explicit fencing for execbuf
Chris Wilson
chris at chris-wilson.co.uk
Tue Aug 30 08:18:12 UTC 2016
Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.
Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/Kconfig | 1 +
drivers/gpu/drm/i915/i915_drv.c | 5 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 82 ++++++++++++++++++++++++++++--
include/uapi/drm/i915_drm.h | 36 ++++++++++++-
4 files changed, 117 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..319ca27ea719 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -18,6 +18,7 @@ config DRM_I915
select INPUT if ACPI
select ACPI_VIDEO if ACPI
select ACPI_BUTTON if ACPI
+ select SYNC_FILE
help
Choose this option if you have a system that has "Intel Graphics
Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 437ab0695373..492c4d4e5ebc 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -365,6 +365,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_ASYNC:
value = 1;
break;
+ case I915_PARAM_HAS_EXEC_FENCE:
+ value = 1;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
@@ -2552,7 +2555,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
- DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ca7675c9c1b7..98042fc26404 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
#include <linux/dma_remapping.h>
#include <linux/reservation.h>
+#include <linux/sync_file.h>
#include <linux/uaccess.h>
#include <drm/drmP.h>
@@ -1153,6 +1154,35 @@ eb_await_request(struct drm_i915_gem_request *to,
return 0;
}
+static int eb_await_fence(struct drm_i915_gem_request *req, struct fence *fence)
+{
+ struct fence_array *array;
+ int i;
+
+ if (fence_is_i915(fence))
+ return eb_await_request(req, to_request(fence));
+
+ if (!fence_is_array(fence))
+ return i915_sw_fence_await_dma_fence(&req->submit,
+ fence, GFP_KERNEL);
+
+ array = to_fence_array(fence);
+ for (i = 0; i < array->num_fences; i++) {
+ struct fence *child = array->fences[i];
+ int ret;
+
+ if (fence_is_i915(child))
+ ret = eb_await_request(req, to_request(child));
+ else
+ ret = i915_sw_fence_await_dma_fence(&req->submit,
+ child, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
static int
eb_await_bo(struct drm_i915_gem_request *to,
struct drm_i915_gem_object *obj,
@@ -1701,6 +1731,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct i915_execbuffer_params *params = ¶ms_master;
const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 dispatch_flags;
+ struct fence *in_fence = NULL;
+ struct sync_file *out_fence = NULL;
+ int out_fence_fd = -1;
int ret;
bool need_relocs;
@@ -1744,6 +1777,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
dispatch_flags |= I915_DISPATCH_RS;
}
+ if (args->flags & I915_EXEC_FENCE_IN) {
+ in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+ if (!in_fence) {
+ ret = -EINVAL;
+ goto pre_mutex_err;
+ }
+ }
+
+ if (args->flags & I915_EXEC_FENCE_OUT) {
+ out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+ if (out_fence_fd < 0) {
+ ret = out_fence_fd;
+ out_fence_fd = -1;
+ goto pre_mutex_err;
+ }
+ }
+
/* Take a local wakeref for preparing to dispatch the execbuf as
* we expect to access the hardware fairly frequently in the
* process. Upon first dispatch, we acquire another prolonged
@@ -1888,6 +1938,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err_batch_unpin;
}
+ if (in_fence) {
+ ret = eb_await_fence(params->request, in_fence);
+ if (ret < 0)
+ goto err_request;
+ }
+
+ if (out_fence_fd != -1) {
+ out_fence = sync_file_create(fence_get(¶ms->request->fence));
+ if (!out_fence) {
+ ret = -ENOMEM;
+ goto err_request;
+ }
+ }
+
/* Whilst this request exists, batch_obj will be on the
* active_list, and so will hold the active reference. Only when this
* request is retired will the the batch_obj be moved onto the
@@ -1915,6 +1979,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ret = execbuf_submit(params, args, &eb->vmas);
err_request:
__i915_add_request(params->request, ret == 0);
+ if (out_fence) {
+ if (ret == 0) {
+ fd_install(out_fence_fd, out_fence->file);
+ args->rsvd2 &= ~((u64)0xffffffff << 32);
+ args->rsvd2 |= (u64)out_fence_fd << 32;
+ out_fence_fd = -1;
+ } else {
+ fput(out_fence->file);
+ }
+ }
err_batch_unpin:
/*
@@ -1936,6 +2010,9 @@ pre_mutex_err:
/* intel_gpu_busy should also get a ref, so it will free when the device
* is really idle. */
intel_runtime_pm_put(dev_priv);
+ if (out_fence_fd != -1)
+ put_unused_fd(out_fence_fd);
+ fence_put(in_fence);
return ret;
}
@@ -2043,11 +2120,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
return -EINVAL;
}
- if (args->rsvd2 != 0) {
- DRM_DEBUG("dirty rvsd2 field\n");
- return -EINVAL;
- }
-
exec2_list = drm_malloc_gfp(args->buffer_count,
sizeof(*exec2_list),
GFP_TEMPORARY);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 730cba23f982..735fd509fb31 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2
#define DRM_I915_GET_SPRITE_COLORKEY 0x2a
#define DRM_I915_SET_SPRITE_COLORKEY 0x2b
#define DRM_I915_GEM_WAIT 0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -393,6 +395,13 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_HAS_EXEC_ASYNC 41
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE 42
+
typedef struct drm_i915_getparam {
__s32 param;
/*
@@ -842,7 +851,32 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_RESOURCE_STREAMER (1<<15)
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN (1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ *
+ * Using I915_EXEC_FENCE_OUT requires use of
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
+ * back to userspace. Failure to do so will cause the out-fence to always
+ * be reported as zero, and the real fence fd to be leaked.
+ */
+#define I915_EXEC_FENCE_OUT (1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
--
2.9.3
More information about the Intel-gfx
mailing list