[Intel-gfx] [RFC 9/9] drm/i915: Add sync framework support to execbuff IOCTL
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jul 27 06:00:18 PDT 2015
On 07/17/2015 03:31 PM, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> Various projects desire a mechanism for managing dependencies between
> work items asynchronously. This can also include work items across
> complete different and independent systems. For example, an
> application wants to retreive a frame from a video in device,
> using it for rendering on a GPU then send it to the video out device
> for display all without having to stall waiting for completion along
> the way. The sync framework allows this. It encapsulates
> synchronisation events in file descriptors. The application can
> request a sync point for the completion of each piece of work. Drivers
> should also take sync points in with each new work request and not
> schedule the work to start until the sync has been signalled.
>
> This patch adds sync framework support to the exec buffer IOCTL. A
> sync point can be passed in to stall execution of the batch buffer
> until signalled. And a sync point can be returned after each batch
> buffer submission which will be signalled upon that batch buffer's
> completion.
>
> At present, the input sync point is simply waited on synchronously
> inside the exec buffer IOCTL call. Once the GPU scheduler arrives,
> this will be handled asynchronously inside the scheduler and the IOCTL
> can return without having to wait.
>
> Note also that the scheduler will re-order the execution of batch
> buffers, e.g. because a batch buffer is stalled on a sync point and
> cannot be submitted yet but other, independent, batch buffers are
> being presented to the driver. This means that the timeline within the
> sync points returned cannot be global to the engine. Instead they must
> be kept per context per engine (the scheduler may not re-order batches
> within a context). Hence the timeline cannot be based on the existing
> seqno values but must be a new implementation.
>
> This patch is a port of work by several people that has been pulled
> across from Android. It has been updated several times across several
> patches. Rather than attempt to port each individual patch, this
> version is the finished product as a single patch. The various
> contributors/authors along the way (in addition to myself) were:
> Satyanantha RamaGopal M <rama.gopal.m.satyanantha at intel.com>
> Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Michel Thierry <michel.thierry at intel.com>
> Arun Siluvery <arun.siluvery at linux.intel.com>
>
> [new patch in series]
>
> For: VIZ-5190
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 6 ++
> drivers/gpu/drm/i915/i915_gem.c | 84 ++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 ++++++++++++++++++++++++++++--
> include/uapi/drm/i915_drm.h | 16 +++++-
> 4 files changed, 188 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d7f1aa5..cf6b7cd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2168,6 +2168,7 @@ struct drm_i915_gem_request {
> struct list_head delay_free_list;
> bool cancelled;
> bool irq_enabled;
> + bool fence_external;
>
> /** On Which ring this request was generated */
> struct drm_i915_private *i915;
> @@ -2252,6 +2253,11 @@ void i915_gem_request_notify(struct intel_engine_cs *ring);
> int i915_create_fence_timeline(struct drm_device *dev,
> struct intel_context *ctx,
> struct intel_engine_cs *ring);
> +#ifdef CONFIG_SYNC
> +struct sync_fence;
> +int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd);
> +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence);
> +#endif
>
> static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
> {
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 3f20087..de93422 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -37,6 +37,9 @@
> #include <linux/swap.h>
> #include <linux/pci.h>
> #include <linux/dma-buf.h>
> +#ifdef CONFIG_SYNC
> +#include <../drivers/staging/android/sync.h>
> +#endif
>
> #define RQ_BUG_ON(expr)
>
> @@ -2549,6 +2552,15 @@ void __i915_add_request(struct drm_i915_gem_request *request,
> */
> i915_gem_request_submit(request);
>
> + /*
> + * If an external sync point has been requested for this request then
> + * it can be waited on without the driver's knowledge, i.e. without
> + * calling __i915_wait_request(). Thus interrupts must be enabled
> + * from the start rather than only on demand.
> + */
> + if (request->fence_external)
> + i915_gem_request_enable_interrupt(request);
Maybe then fence_exported would be clearer, fence_external at first
sounds like it is coming from another driver or something.
> +
> if (i915.enable_execlists)
> ret = ring->emit_request(request);
> else {
> @@ -2857,6 +2869,78 @@ static uint32_t i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t
> return seqno;
> }
>
> +#ifdef CONFIG_SYNC
> +int i915_create_sync_fence(struct drm_i915_gem_request *req, int *fence_fd)
> +{
> + char ring_name[] = "i915_ring0";
> + struct sync_fence *sync_fence;
> + int fd;
> +
> + fd = get_unused_fd_flags(O_CLOEXEC);
> + if (fd < 0) {
> + DRM_DEBUG("No available file descriptors!\n");
> + *fence_fd = -1;
> + return fd;
> + }
> +
> + ring_name[9] += req->ring->id;
> + sync_fence = sync_fence_create_dma(ring_name, &req->fence);
This will call ->enable_signalling so perhaps you could enable
interrupts in there for exported fences. Maybe it would be a tiny bit
more logically grouped. (Rather than have _add_request do it.)
> + if (!sync_fence) {
> + put_unused_fd(fd);
> + *fence_fd = -1;
> + return -ENOMEM;
> + }
> +
> + sync_fence_install(sync_fence, fd);
> + *fence_fd = fd;
> +
> + // Necessary??? Who does the put???
> + fence_get(&req->fence);
sync_fence_release?
> +
> + req->fence_external = true;
> +
> + return 0;
> +}
> +
> +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *sync_fence)
> +{
> + struct fence *dma_fence;
> + struct drm_i915_gem_request *req;
> + bool ignore;
> + int i;
> +
> + if (atomic_read(&sync_fence->status) != 0)
> + return true;
> +
> + ignore = true;
> + for(i = 0; i < sync_fence->num_fences; i++) {
> + dma_fence = sync_fence->cbs[i].sync_pt;
> +
> + /* No need to worry about dead points: */
> + if (fence_is_signaled(dma_fence))
> + continue;
> +
> + /* Can't ignore other people's points: */
> + if(dma_fence->ops != &i915_gem_request_fops) {
> + ignore = false;
> + break;
The same as return false and then don't need bool ignore at all.
> + }
> +
> + req = container_of(dma_fence, typeof(*req), fence);
> +
> + /* Can't ignore points on other rings: */
> + if (req->ring != ring) {
> + ignore = false;
> + break;
> + }
> +
> + /* Same ring means guaranteed to be in order so ignore it. */
> + }
> +
> + return ignore;
> +}
> +#endif
> +
> int i915_gem_request_alloc(struct intel_engine_cs *ring,
> struct intel_context *ctx,
> struct drm_i915_gem_request **req_out)
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 923a3c4..b1a1659 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -26,6 +26,7 @@
> *
> */
>
> +#include <linux/syscalls.h>
> #include <drm/drmP.h>
> #include <drm/i915_drm.h>
> #include "i915_drv.h"
> @@ -33,6 +34,9 @@
> #include "intel_drv.h"
> #include <linux/dma_remapping.h>
> #include <linux/uaccess.h>
> +#ifdef CONFIG_SYNC
> +#include <../drivers/staging/android/sync.h>
> +#endif
>
> #define __EXEC_OBJECT_HAS_PIN (1<<31)
> #define __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -1403,6 +1407,35 @@ eb_get_batch(struct eb_vmas *eb)
> return vma->obj;
> }
>
> +#ifdef CONFIG_SYNC
I don't expect you'll be able to get away with ifdef's in the code like
this so for non-RFC it will have to be cleaned up.
> +static int i915_early_fence_wait(struct intel_engine_cs *ring, int fence_fd)
> +{
> + struct sync_fence *fence;
> + int ret = 0;
> +
> + if (fence_fd < 0) {
> + DRM_ERROR("Invalid wait fence fd %d on ring %d\n", fence_fd,
> + (int) ring->id);
> + return 1;
> + }
> +
> + fence = sync_fence_fdget(fence_fd);
> + if (fence == NULL) {
> + DRM_ERROR("Invalid wait fence %d on ring %d\n", fence_fd,
> + (int) ring->id);
These two should be DRM_DEBUG to prevent userspace from spamming the
logs too easily.
> + return 1;
> + }
> +
> + if (atomic_read(&fence->status) == 0) {
> + if (!i915_safe_to_ignore_fence(ring, fence))
> + ret = sync_fence_wait(fence, 1000);
I expect you have to wait indefinitely here, not just for one second.
> + }
> +
> + sync_fence_put(fence);
> + return ret;
> +}
> +#endif
> +
> static int
> i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> struct drm_file *file,
> @@ -1422,6 +1455,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> u32 dispatch_flags;
> int ret;
> bool need_relocs;
> + int fd_fence_complete = -1;
> +#ifdef CONFIG_SYNC
> + int fd_fence_wait = lower_32_bits(args->rsvd2);
> +#endif
> +
> + /*
> + * Make sure an broken fence handle is not returned no matter
> + * how early an error might be hit. Note that rsvd2 has to be
> + * saved away first because it is also an input parameter!
> + */
> + if (args->flags & I915_EXEC_CREATE_FENCE)
> + args->rsvd2 = (__u64) -1;
>
> if (!i915_gem_check_execbuffer(args))
> return -EINVAL;
> @@ -1505,6 +1550,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> dispatch_flags |= I915_DISPATCH_RS;
> }
>
> +#ifdef CONFIG_SYNC
> + /*
> + * Without a GPU scheduler, any fence waits must be done up front.
> + */
> + if (args->flags & I915_EXEC_WAIT_FENCE) {
> + ret = i915_early_fence_wait(ring, fd_fence_wait);
> + if (ret < 0)
> + return ret;
> +
> + args->flags &= ~I915_EXEC_WAIT_FENCE;
> + }
> +#endif
> +
> intel_runtime_pm_get(dev_priv);
>
> ret = i915_mutex_lock_interruptible(dev);
> @@ -1652,6 +1710,27 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> params->batch_obj = batch_obj;
> params->ctx = ctx;
>
> +#ifdef CONFIG_SYNC
> + if (args->flags & I915_EXEC_CREATE_FENCE) {
> + /*
> + * Caller has requested a sync fence.
> + * User interrupts will be enabled to make sure that
> + * the timeline is signalled on completion.
> + */
> + ret = i915_create_sync_fence(params->request,
> + &fd_fence_complete);
> + if (ret) {
> + DRM_ERROR("Fence creation failed for ring %d, ctx %p\n",
> + ring->id, ctx);
> + args->rsvd2 = (__u64) -1;
> + goto err;
> + }
> +
> + /* Return the fence through the rsvd2 field */
> + args->rsvd2 = (__u64) fd_fence_complete;
> + }
> +#endif
> +
> ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
>
> err_batch_unpin:
> @@ -1683,6 +1762,12 @@ pre_mutex_err:
> /* intel_gpu_busy should also get a ref, so it will free when the device
> * is really idle. */
> intel_runtime_pm_put(dev_priv);
> +
> + if (fd_fence_complete != -1) {
> + sys_close(fd_fence_complete);
I am not sure calling system call functions from driver code will be
allowed. that's why I was doing fd_install only when sure everything
went OK.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list