[Intel-gfx] [PATCH 13/13] drm/i915: Support explicit fencing for execbuf
John Harrison
John.C.Harrison at Intel.com
Fri Aug 26 15:08:28 UTC 2016
On 25/08/2016 10:08, Chris Wilson wrote:
> Now that the user can opt-out of implicit fencing, we need to give them
> back control over the fencing. We employ sync_file to wrap our
> drm_i915_gem_request and provide an fd that userspace can merge with
> other sync_file fds and pass back to the kernel to wait upon before
> future execution.
>
> Testcase: igt/gem_exec_fence
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/Kconfig | 1 +
> drivers/gpu/drm/i915/i915_drv.c | 5 +-
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 79 ++++++++++++++++++++++++++++--
> include/uapi/drm/i915_drm.h | 8 ++-
> 4 files changed, 86 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7769e469118f..319ca27ea719 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -18,6 +18,7 @@ config DRM_I915
> select INPUT if ACPI
> select ACPI_VIDEO if ACPI
> select ACPI_BUTTON if ACPI
> + select SYNC_FILE
> help
> Choose this option if you have a system that has "Intel Graphics
> Media Accelerator" or "HD Graphics" integrated graphics,
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 3eecfef8bcca..9460bfff360f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -358,6 +358,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_EXEC_ASYNC:
> value = 1;
> break;
> + case I915_PARAM_HAS_EXEC_FENCE:
> + value = 1;
> + break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> return -EINVAL;
> @@ -2547,7 +2550,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
> DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
> DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
> DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
> - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
> DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
> DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index a0f46293e492..8701b8bb46fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -28,6 +28,7 @@
>
> #include <linux/dma_remapping.h>
> #include <linux/reservation.h>
> +#include <linux/sync_file.h>
> #include <linux/uaccess.h>
>
> #include <drm/drmP.h>
> @@ -1686,6 +1687,33 @@ eb_select_engine(struct drm_i915_private *dev_priv,
> return engine;
> }
>
> +static int eb_add_fence(struct drm_i915_gem_request *req, struct fence *fence)
> +{
> + struct fence_array *array;
> + int ret, i;
> +
> + if (fence_is_i915(fence))
> + return __eb_sync(req, to_request(fence));
> +
> + if (!fence_is_array(fence))
> + return i915_sw_fence_await_dma_fence(&req->submit, fence, GFP_KERNEL);
> +
> + array = to_fence_array(fence);
> + for (i = 0; i < array->num_fences; i++) {
> + struct fence *child = array->fences[i];
> +
> + if (fence_is_i915(child))
> + ret = __eb_sync(req, to_request(child));
> + else
> + ret = i915_sw_fence_await_dma_fence(&req->submit,
> + child, GFP_KERNEL);
> + if (ret < 0)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> static int
> i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> struct drm_file *file,
> @@ -1703,6 +1731,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> struct i915_execbuffer_params *params = ¶ms_master;
> const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
> u32 dispatch_flags;
> + struct fence *in_fence = NULL;
> + struct sync_file *out_fence = NULL;
> + int out_fence_fd = -1;
> int ret;
> bool need_relocs;
>
> @@ -1746,6 +1777,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> dispatch_flags |= I915_DISPATCH_RS;
> }
>
> + if (args->flags & I915_EXEC_FENCE_IN) {
> + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> + if (!in_fence) {
> + ret = -EINVAL;
> + goto pre_mutex_err;
> + }
> + }
> +
> + if (args->flags & I915_EXEC_FENCE_OUT) {
> + out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
> + if (out_fence_fd < 0) {
> + ret = out_fence_fd;
> + out_fence_fd = -1;
> + goto pre_mutex_err;
> + }
> + }
> +
> /* Take a local wakeref for preparing to dispatch the execbuf as
> * we expect to access the hardware fairly frequently in the
> * process. Upon first dispatch, we acquire another prolonged
> @@ -1890,6 +1938,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> goto err_batch_unpin;
> }
>
> + if (in_fence) {
> + ret = eb_add_fence(params->request, in_fence);
> + if (ret < 0)
> + goto err_request;
> + }
> +
> + if (out_fence_fd != -1) {
> + out_fence = sync_file_create(fence_get(¶ms->request->fence));
> + if (!out_fence) {
> + ret = -ENOMEM;
> + goto err_request;
> + }
> + }
> +
> /* Whilst this request exists, batch_obj will be on the
> * active_list, and so will hold the active reference. Only when this
> * request is retired will the the batch_obj be moved onto the
> @@ -1917,6 +1979,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> ret = execbuf_submit(params, args, &eb->vmas);
> err_request:
> __i915_add_request(params->request, ret == 0);
> + if (out_fence) {
> + if (ret == 0) {
> + fd_install(out_fence_fd, out_fence->file);
> + args->rsvd2 &= ~(u64)0xffffffff << 32;
> + args->rsvd2 |= (u64)out_fence_fd << 32;
> + out_fence_fd = -1;
> + } else
> + fput(out_fence->file);
> + }
>
> err_batch_unpin:
> /*
> @@ -1938,6 +2009,9 @@ pre_mutex_err:
> /* intel_gpu_busy should also get a ref, so it will free when the device
> * is really idle. */
> intel_runtime_pm_put(dev_priv);
> + if (out_fence_fd != -1)
> + put_unused_fd(out_fence_fd);
> + fence_put(in_fence);
> return ret;
> }
>
> @@ -2045,11 +2119,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
> return -EINVAL;
> }
>
> - if (args->rsvd2 != 0) {
> - DRM_DEBUG("dirty rvsd2 field\n");
> - return -EINVAL;
> - }
> -
> exec2_list = drm_malloc_gfp(args->buffer_count,
> sizeof(*exec2_list),
> GFP_TEMPORARY);
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 75f9cab23249..9fe670cc7bf3 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_I915_OVERLAY_PUT_IMAGE 0x27
> #define DRM_I915_OVERLAY_ATTRS 0x28
> #define DRM_I915_GEM_EXECBUFFER2 0x29
> +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2
> #define DRM_I915_GET_SPRITE_COLORKEY 0x2a
> #define DRM_I915_SET_SPRITE_COLORKEY 0x2b
> #define DRM_I915_GEM_WAIT 0x2c
> @@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
> #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
> #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
> #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
> #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -388,6 +390,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_HAS_POOLED_EU 38
> #define I915_PARAM_MIN_EU_IN_POOL 39
> #define I915_PARAM_HAS_EXEC_ASYNC 40
> +#define I915_PARAM_HAS_EXEC_FENCE 41
>
> typedef struct drm_i915_getparam {
> __s32 param;
> @@ -821,7 +824,10 @@ struct drm_i915_gem_execbuffer2 {
> */
> #define I915_EXEC_RESOURCE_STREAMER (1<<15)
>
> -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
> +#define I915_EXEC_FENCE_IN (1<<16)
> +#define I915_EXEC_FENCE_OUT (1<<17)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
>
> #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
> #define i915_execbuffer2_set_context_id(eb2, context) \
I think it is also worth adding something like:
@@ -705,7 +705,7 @@ struct drm_i915_gem_exec_object2 {
__u64 flags;
__u64 rsvd1;
- __u64 rsvd2;
+ __u64 rsvd2; /* Used for fence fd */
};
Or even renaming 'rsvd2' to 'fence_io' or some such? Maybe split it into
two u32 entries to avoid all the messy bitmasking and shifting.
More information about the Intel-gfx
mailing list