[Intel-gfx] [PATCH 13/13] drm/i915: Support explicit fencing for execbuf

John Harrison John.C.Harrison at Intel.com
Fri Aug 26 15:08:28 UTC 2016


On 25/08/2016 10:08, Chris Wilson wrote:
> Now that the user can opt-out of implicit fencing, we need to give them
> back control over the fencing. We employ sync_file to wrap our
> drm_i915_gem_request and provide an fd that userspace can merge with
> other sync_file fds and pass back to the kernel to wait upon before
> future execution.
>
> Testcase: igt/gem_exec_fence
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/Kconfig               |  1 +
>   drivers/gpu/drm/i915/i915_drv.c            |  5 +-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 79 ++++++++++++++++++++++++++++--
>   include/uapi/drm/i915_drm.h                |  8 ++-
>   4 files changed, 86 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 7769e469118f..319ca27ea719 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -18,6 +18,7 @@ config DRM_I915
>   	select INPUT if ACPI
>   	select ACPI_VIDEO if ACPI
>   	select ACPI_BUTTON if ACPI
> +	select SYNC_FILE
>   	help
>   	  Choose this option if you have a system that has "Intel Graphics
>   	  Media Accelerator" or "HD Graphics" integrated graphics,
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 3eecfef8bcca..9460bfff360f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -358,6 +358,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_EXEC_ASYNC:
>   		value = 1;
>   		break;
> +	case I915_PARAM_HAS_EXEC_FENCE:
> +		value = 1;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> @@ -2547,7 +2550,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
>   	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
>   	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
> -	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
>   	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
>   	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index a0f46293e492..8701b8bb46fc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -28,6 +28,7 @@
>   
>   #include <linux/dma_remapping.h>
>   #include <linux/reservation.h>
> +#include <linux/sync_file.h>
>   #include <linux/uaccess.h>
>   
>   #include <drm/drmP.h>
> @@ -1686,6 +1687,33 @@ eb_select_engine(struct drm_i915_private *dev_priv,
>   	return engine;
>   }
>   
> +static int eb_add_fence(struct drm_i915_gem_request *req, struct fence *fence)
> +{
> +	struct fence_array *array;
> +	int ret, i;
> +
> +	if (fence_is_i915(fence))
> +		return __eb_sync(req, to_request(fence));
> +
> +	if (!fence_is_array(fence))
> +		return i915_sw_fence_await_dma_fence(&req->submit, fence, GFP_KERNEL);
> +
> +	array = to_fence_array(fence);
> +	for (i = 0; i < array->num_fences; i++) {
> +		struct fence *child = array->fences[i];
> +
> +		if (fence_is_i915(child))
> +			ret = __eb_sync(req, to_request(child));
> +		else
> +			ret = i915_sw_fence_await_dma_fence(&req->submit,
> +							    child, GFP_KERNEL);
> +		if (ret < 0)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   static int
>   i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   		       struct drm_file *file,
> @@ -1703,6 +1731,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   	struct i915_execbuffer_params *params = &params_master;
>   	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
>   	u32 dispatch_flags;
> +	struct fence *in_fence = NULL;
> +	struct sync_file *out_fence = NULL;
> +	int out_fence_fd = -1;
>   	int ret;
>   	bool need_relocs;
>   
> @@ -1746,6 +1777,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   		dispatch_flags |= I915_DISPATCH_RS;
>   	}
>   
> +	if (args->flags & I915_EXEC_FENCE_IN) {
> +		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> +		if (!in_fence) {
> +			ret = -EINVAL;
> +			goto pre_mutex_err;
> +		}
> +	}
> +
> +	if (args->flags & I915_EXEC_FENCE_OUT) {
> +		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
> +		if (out_fence_fd < 0) {
> +			ret = out_fence_fd;
> +			out_fence_fd = -1;
> +			goto pre_mutex_err;
> +		}
> +	}
> +
>   	/* Take a local wakeref for preparing to dispatch the execbuf as
>   	 * we expect to access the hardware fairly frequently in the
>   	 * process. Upon first dispatch, we acquire another prolonged
> @@ -1890,6 +1938,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   		goto err_batch_unpin;
>   	}
>   
> +	if (in_fence) {
> +		ret = eb_add_fence(params->request, in_fence);
> +		if (ret < 0)
> +			goto err_request;
> +	}
> +
> +	if (out_fence_fd != -1) {
> +		out_fence = sync_file_create(fence_get(&params->request->fence));
> +		if (!out_fence) {
> +			ret = -ENOMEM;
> +			goto err_request;
> +		}
> +	}
> +
>   	/* Whilst this request exists, batch_obj will be on the
>   	 * active_list, and so will hold the active reference. Only when this
>   	 * request is retired will the the batch_obj be moved onto the
> @@ -1917,6 +1979,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   	ret = execbuf_submit(params, args, &eb->vmas);
>   err_request:
>   	__i915_add_request(params->request, ret == 0);
> +	if (out_fence) {
> +		if (ret == 0) {
> +			fd_install(out_fence_fd, out_fence->file);
> +			args->rsvd2 &= ~(u64)0xffffffff << 32;
> +			args->rsvd2 |= (u64)out_fence_fd << 32;
> +			out_fence_fd = -1;
> +		} else
> +			fput(out_fence->file);
> +	}
>   
>   err_batch_unpin:
>   	/*
> @@ -1938,6 +2009,9 @@ pre_mutex_err:
>   	/* intel_gpu_busy should also get a ref, so it will free when the device
>   	 * is really idle. */
>   	intel_runtime_pm_put(dev_priv);
> +	if (out_fence_fd != -1)
> +		put_unused_fd(out_fence_fd);
> +	fence_put(in_fence);
>   	return ret;
>   }
>   
> @@ -2045,11 +2119,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
>   		return -EINVAL;
>   	}
>   
> -	if (args->rsvd2 != 0) {
> -		DRM_DEBUG("dirty rvsd2 field\n");
> -		return -EINVAL;
> -	}
> -
>   	exec2_list = drm_malloc_gfp(args->buffer_count,
>   				    sizeof(*exec2_list),
>   				    GFP_TEMPORARY);
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 75f9cab23249..9fe670cc7bf3 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
>   #define DRM_I915_OVERLAY_ATTRS	0x28
>   #define DRM_I915_GEM_EXECBUFFER2	0x29
> +#define DRM_I915_GEM_EXECBUFFER2_WR	DRM_I915_GEM_EXECBUFFER2
>   #define DRM_I915_GET_SPRITE_COLORKEY	0x2a
>   #define DRM_I915_SET_SPRITE_COLORKEY	0x2b
>   #define DRM_I915_GEM_WAIT	0x2c
> @@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>   #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
>   #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
>   #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>   #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>   #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -388,6 +390,7 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_HAS_POOLED_EU	 38
>   #define I915_PARAM_MIN_EU_IN_POOL	 39
>   #define I915_PARAM_HAS_EXEC_ASYNC	 40
> +#define I915_PARAM_HAS_EXEC_FENCE	 41
>   
>   typedef struct drm_i915_getparam {
>   	__s32 param;
> @@ -821,7 +824,10 @@ struct drm_i915_gem_execbuffer2 {
>    */
>   #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
>   
> -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
> +#define I915_EXEC_FENCE_IN		(1<<16)
> +#define I915_EXEC_FENCE_OUT		(1<<17)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
>   
>   #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
>   #define i915_execbuffer2_set_context_id(eb2, context) \

I think it is also worth adding something like:
@@ -705,7 +705,7 @@ struct drm_i915_gem_exec_object2 {
         __u64 flags;

         __u64 rsvd1;
-       __u64 rsvd2;
+       __u64 rsvd2;    /* Used for fence fd */
  };

Or even renaming 'rsvd2' to 'fence_io' or some such? Maybe split it into 
two u32 entries to avoid all the messy bitmasking and shifting.



More information about the Intel-gfx mailing list