[Intel-gfx] [PATCH 12/13] drm/i915: Add sync framework support to execbuff IOCTL

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Dec 11 07:29:20 PST 2015



On 11/12/15 13:12, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> Various projects desire a mechanism for managing dependencies between
> work items asynchronously. This can also include work items across
> complete different and independent systems. For example, an
> application wants to retreive a frame from a video in device,
> using it for rendering on a GPU then send it to the video out device
> for display all without having to stall waiting for completion along
> the way. The sync framework allows this. It encapsulates
> synchronisation events in file descriptors. The application can
> request a sync point for the completion of each piece of work. Drivers
> should also take sync points in with each new work request and not
> schedule the work to start until the sync has been signalled.
>
> This patch adds sync framework support to the exec buffer IOCTL. A
> sync point can be passed in to stall execution of the batch buffer
> until signalled. And a sync point can be returned after each batch
> buffer submission which will be signalled upon that batch buffer's
> completion.
>
> At present, the input sync point is simply waited on synchronously
> inside the exec buffer IOCTL call. Once the GPU scheduler arrives,
> this will be handled asynchronously inside the scheduler and the IOCTL
> can return without having to wait.
>
> Note also that the scheduler will re-order the execution of batch
> buffers, e.g. because a batch buffer is stalled on a sync point and
> cannot be submitted yet but other, independent, batch buffers are
> being presented to the driver. This means that the timeline within the
> sync points returned cannot be global to the engine. Instead they must
> be kept per context per engine (the scheduler may not re-order batches
> within a context). Hence the timeline cannot be based on the existing
> seqno values but must be a new implementation.
>
> This patch is a port of work by several people that has been pulled
> across from Android. It has been updated several times across several
> patches. Rather than attempt to port each individual patch, this
> version is the finished product as a single patch. The various
> contributors/authors along the way (in addition to myself) were:
>    Satyanantha RamaGopal M <rama.gopal.m.satyanantha at intel.com>
>    Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>    Michel Thierry <michel.thierry at intel.com>
>    Arun Siluvery <arun.siluvery at linux.intel.com>
>
> v2: New patch in series.
>
> v3: Updated to use the new 'sync_fence_is_signaled' API rather than
> having to know about the internal meaning of the 'fence::status' field
> (which recently got inverted!) [work by Peter Lawthers].
>
> Updated after review comments by Daniel Vetter. Removed '#ifdef
> CONFIG_SYNC' and add 'select SYNC' to the Kconfig instead. Moved the
> fd installation of fences to the end of the execbuff call to in order
> to remove the need to use 'sys_close' to clean up on failure.
>
> Updated after review comments by Tvrtko Ursulin. Remvoed the
> 'fence_external' flag as redundant. Covnerted DRM_ERRORs to
> DRM_DEBUGs. Changed one second wait to a wait forever when waiting on
> incoming fences.
>
> v4: Re-instated missing return of fd to user land that somehow got
> lost in the anti-sys_close() re-factor.
>
> For: VIZ-5190
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> Signed-off-by: Peter Lawthers <peter.lawthers at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Daniel Vetter <daniel at ffwll.ch>
> ---
>   drivers/gpu/drm/i915/Kconfig               |  3 +
>   drivers/gpu/drm/i915/i915_drv.h            |  6 ++
>   drivers/gpu/drm/i915/i915_gem.c            | 89 +++++++++++++++++++++++++++-
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 95 ++++++++++++++++++++++++++++--
>   include/uapi/drm/i915_drm.h                | 16 ++++-
>   5 files changed, 200 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 1d96fe1..cb5d5b2 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -22,6 +22,9 @@ config DRM_I915
>   	select ACPI_VIDEO if ACPI
>   	select ACPI_BUTTON if ACPI
>   	select MMU_NOTIFIER

select MMU_NOTIFIER is not upstream! :)

> +	# ANDROID is required for SYNC
> +	select ANDROID
> +	select SYNC
>   	help
>   	  Choose this option if you have a system that has "Intel Graphics
>   	  Media Accelerator" or "HD Graphics" integrated graphics,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d013c6d..194bca0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2278,6 +2278,12 @@ void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked);
>   int i915_create_fence_timeline(struct drm_device *dev,
>   			       struct intel_context *ctx,
>   			       struct intel_engine_cs *ring);
> +struct sync_fence;
> +int i915_create_sync_fence(struct drm_i915_gem_request *req,
> +			   struct sync_fence **sync_fence, int *fence_fd);
> +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req,
> +				struct sync_fence *sync_fence, int fence_fd);
> +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence);
>
>   static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4817015..279d79f 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -37,6 +37,7 @@
>   #include <linux/swap.h>
>   #include <linux/pci.h>
>   #include <linux/dma-buf.h>
> +#include <../drivers/android/sync.h>
>
>   #define RQ_BUG_ON(expr)
>
> @@ -2560,7 +2561,13 @@ void __i915_add_request(struct drm_i915_gem_request *request,
>
>   	/*
>   	 * Add the fence to the pending list before emitting the commands to
> -	 * generate a seqno notification interrupt.
> +	 * generate a seqno notification interrupt. This will also enable
> +	 * interrupts if 'signal_requested' has been set.
> +	 *
> +	 * For example, if an exported sync point has been requested for this
> +	 * request then it can be waited on without the driver's knowledge,
> +	 * i.e. without calling __i915_wait_request(). Thus interrupts must
> +	 * be enabled from the start rather than only on demand.
>   	 */
>   	i915_gem_request_submit(request);
>
> @@ -2901,6 +2908,86 @@ static unsigned i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t
>   	return seqno;
>   }
>
> +int i915_create_sync_fence(struct drm_i915_gem_request *req,
> +			   struct sync_fence **sync_fence, int *fence_fd)
> +{
> +	char ring_name[] = "i915_ring0";
> +	int fd;
> +
> +	fd = get_unused_fd_flags(O_CLOEXEC);
> +	if (fd < 0) {
> +		DRM_DEBUG("No available file descriptors!\n");
> +		*fence_fd = -1;
> +		return fd;
> +	}
> +
> +	ring_name[9] += req->ring->id;

I think this will possibly blow up if CONFIG_DEBUG_RODATA is set, which 
is the case on most kernels.

So I think you need to make a local copy with kstrdup and free it after 
calling sync_fence_create_dma.

> +	*sync_fence = sync_fence_create_dma(ring_name, &req->fence);
> +	if (!*sync_fence) {
> +		put_unused_fd(fd);
> +		*fence_fd = -1;
> +		return -ENOMEM;
> +	}
> +
> +	*fence_fd = fd;
> +
> +	return 0;
> +}
> +
> +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req,
> +				struct sync_fence *sync_fence, int fence_fd)
> +{
> +	sync_fence_install(sync_fence, fence_fd);
> +
> +	/*
> +	 * NB: The corresponding put happens automatically on file close
> +	 * from sync_fence_release() via the fops callback.
> +	 */
> +	fence_get(&req->fence);
> +
> +	/*
> +	 * The sync framework adds a callback to the fence. The fence
> +	 * framework calls 'enable_signalling' when a callback is added.
> +	 * Thus this flag should have been set by now. If not then
> +	 * 'enable_signalling' must be called explicitly because exporting
> +	 * a fence to user land means it can be waited on asynchronously and
> +	 * thus must be signalled asynchronously.
> +	 */
> +	WARN_ON(!req->signal_requested);
> +}
> +
> +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *sync_fence)
> +{
> +	struct fence *dma_fence;
> +	struct drm_i915_gem_request *req;
> +	int i;
> +
> +	if (sync_fence_is_signaled(sync_fence))
> +		return true;
> +
> +	for(i = 0; i < sync_fence->num_fences; i++) {
> +		dma_fence = sync_fence->cbs[i].sync_pt;
> +
> +		/* No need to worry about dead points: */
> +		if (fence_is_signaled(dma_fence))
> +			continue;
> +
> +		/* Can't ignore other people's points: */

Maybe add "unsignaled" to qualify.

> +		if(dma_fence->ops != &i915_gem_request_fops)
> +			return false;
> +
> +		req = container_of(dma_fence, typeof(*req), fence);
> +
> +		/* Can't ignore points on other rings: */
> +		if (req->ring != ring)
> +			return false;
> +
> +		/* Same ring means guaranteed to be in order so ignore it. */
> +	}
> +
> +	return true;
> +}
> +
>   int i915_gem_request_alloc(struct intel_engine_cs *ring,
>   			   struct intel_context *ctx,
>   			   struct drm_i915_gem_request **req_out)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index bfc4c17..5f629f8 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -26,6 +26,7 @@
>    *
>    */
>
> +#include <linux/syscalls.h>
>   #include <drm/drmP.h>
>   #include <drm/i915_drm.h>
>   #include "i915_drv.h"
> @@ -33,6 +34,7 @@
>   #include "intel_drv.h"
>   #include <linux/dma_remapping.h>
>   #include <linux/uaccess.h>
> +#include <../drivers/android/sync.h>
>
>   #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>   #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -1322,6 +1324,38 @@ eb_get_batch(struct eb_vmas *eb)
>   	return vma->obj;
>   }
>
> +static int i915_early_fence_wait(struct intel_engine_cs *ring, int fence_fd)
> +{
> +	struct sync_fence *fence;
> +	int ret = 0;
> +
> +	if (fence_fd < 0) {
> +		DRM_DEBUG("Invalid wait fence fd %d on ring %d\n", fence_fd,
> +			  (int) ring->id);
> +		return 1;

Suggest adding kerneldoc describing return values from this function.

It wasn't immediately clear to me what one means.

But I am also not sure that invalid fd shouldn't be an outright error 
instead of allowing execbuf to contiue.

> +	}
> +
> +	fence = sync_fence_fdget(fence_fd);
> +	if (fence == NULL) {
> +		DRM_DEBUG("Invalid wait fence %d on ring %d\n", fence_fd,
> +			  (int) ring->id);
> +		return 1;
> +	}
> +
> +	if (!sync_fence_is_signaled(fence)) {

Minor comment, but i915_safe_to_ignore_fence checks this as well so you 
could remove it here.

> +		/*
> +		 * Wait forever for the fence to be signalled. This is safe
> +		 * because the the mutex lock has not yet been acquired and
> +		 * the wait is interruptible.
> +		 */
> +		if (!i915_safe_to_ignore_fence(ring, fence))
> +			ret = sync_fence_wait(fence, -1);
> +	}
> +
> +	sync_fence_put(fence);
> +	return ret;
> +}
> +
>   static int
>   i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   		       struct drm_file *file,
> @@ -1341,6 +1375,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   	u32 dispatch_flags;
>   	int ret;
>   	bool need_relocs;
> +	int fd_fence_complete = -1;
> +	int fd_fence_wait = lower_32_bits(args->rsvd2);
> +	struct sync_fence *sync_fence;
> +
> +	/*
> +	 * Make sure an broken fence handle is not returned no matter
> +	 * how early an error might be hit. Note that rsvd2 has to be
> +	 * saved away first because it is also an input parameter!
> +	 */

Instead of the 2nd sentence maybe say something like "Note that we have 
saved rsvd2 already for later use since it is also in input parameter!". 
Like written I was expecting the code following the comment to do that, 
and then was confused when it didn't. Or maybe my attention span is too 
short.

> +	if (args->flags & I915_EXEC_CREATE_FENCE)
> +		args->rsvd2 = (__u64) -1;
>
>   	if (!i915_gem_check_execbuffer(args))
>   		return -EINVAL;
> @@ -1424,6 +1469,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   		dispatch_flags |= I915_DISPATCH_RS;
>   	}
>
> +	/*
> +	 * Without a GPU scheduler, any fence waits must be done up front.
> +	 */
> +	if (args->flags & I915_EXEC_WAIT_FENCE) {
> +		ret = i915_early_fence_wait(ring, fd_fence_wait);
> +		if (ret < 0)
> +			return ret;
> +
> +		args->flags &= ~I915_EXEC_WAIT_FENCE;
> +	}
> +
>   	intel_runtime_pm_get(dev_priv);
>
>   	ret = i915_mutex_lock_interruptible(dev);
> @@ -1571,8 +1627,41 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>   	params->batch_obj               = batch_obj;
>   	params->ctx                     = ctx;
>
> +	if (args->flags & I915_EXEC_CREATE_FENCE) {
> +		/*
> +		 * Caller has requested a sync fence.
> +		 * User interrupts will be enabled to make sure that
> +		 * the timeline is signalled on completion.
> +		 */

Is it signaled or signalled? There is a lot of usage of both throughout 
the patches and I as a non-native speaker am amu^H^H^Hconfused. ;)

> +		ret = i915_create_sync_fence(params->request, &sync_fence,
> +					     &fd_fence_complete);
> +		if (ret) {
> +			DRM_ERROR("Fence creation failed for ring %d, ctx %p\n",
> +				  ring->id, ctx);
> +			goto err_batch_unpin;
> +		}
> +	}
> +
>   	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
>
> +	if (fd_fence_complete != -1) {
> +		if (ret) {
> +			sync_fence_put(sync_fence);
> +			put_unused_fd(fd_fence_complete);
> +		} else {
> +			/*
> +			 * Install the fence into the pre-allocated file
> +			 * descriptor to the fence object so that user land
> +			 * can wait on it...
> +			 */
> +			i915_install_sync_fence_fd(params->request,
> +						   sync_fence, fd_fence_complete);
> +
> +			/* Return the fence through the rsvd2 field */
> +			args->rsvd2 = (__u64) fd_fence_complete;
> +		}
> +	}
> +
>   err_batch_unpin:
>   	/*
>   	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
> @@ -1602,6 +1691,7 @@ pre_mutex_err:
>   	/* intel_gpu_busy should also get a ref, so it will free when the device
>   	 * is really idle. */
>   	intel_runtime_pm_put(dev_priv);
> +
>   	return ret;
>   }
>
> @@ -1707,11 +1797,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
>   		return -EINVAL;
>   	}
>
> -	if (args->rsvd2 != 0) {
> -		DRM_DEBUG("dirty rvsd2 field\n");
> -		return -EINVAL;
> -	}
> -
>   	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
>   			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
>   	if (exec2_list == NULL)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 67cebe6..86f7921 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -250,7 +250,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
>   #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>   #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> -#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>   #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>   #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>   #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -695,7 +695,7 @@ struct drm_i915_gem_exec_object2 {
>   	__u64 flags;
>
>   	__u64 rsvd1;
> -	__u64 rsvd2;
> +	__u64 rsvd2;	/* Used for fence fd */
>   };
>
>   struct drm_i915_gem_execbuffer2 {
> @@ -776,7 +776,17 @@ struct drm_i915_gem_execbuffer2 {
>    */
>   #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
>
> -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
> +/** Caller supplies a sync fence fd in the rsvd2 field.
> + * Wait for it to be signalled before starting the work
> + */
> +#define I915_EXEC_WAIT_FENCE		(1<<16)
> +
> +/** Caller wants a sync fence fd for this execbuffer.
> + *  It will be returned in rsvd2
> + */
> +#define I915_EXEC_CREATE_FENCE		(1<<17)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_CREATE_FENCE<<1)
>
>   #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
>   #define i915_execbuffer2_set_context_id(eb2, context) \
>

Regards,

Tvrtko



More information about the Intel-gfx mailing list