[Intel-gfx] [RFC] drm/i915: Add sync framework support to execbuff IOCTL

Thu Jul 2 04:09:59 PDT 2015

From: John Harrison <John.C.Harrison at Intel.com>

Various projects desire a mechanism for managing dependencies between
work items asynchronously. This can also include work items across
complete different and independent systems. For example, an
application wants to retreive a frame from a video in device,
using it for rendering on a GPU then send it to the video out device
for display all without having to stall waiting for completion along
the way. The sync framework allows this. It encapsulates
synchronisation events in file descriptors. The application can
request a sync point for the completion of each piece of work. Drivers
should also take sync points in with each new work request and not
schedule the work to start until the sync has been signalled.

This patch adds sync framework support to the exec buffer IOCTL. A
sync point can be passed in to stall execution of the batch buffer
until signalled. And a sync point can be returned after each batch
buffer submission which will be signalled upon that batch buffer's
completion.

At present, the input sync point is simply waited on synchronously
inside the exec buffer IOCTL call. Once the GPU scheduler arrives,
this will be handled asynchronously inside the scheduler and the IOCTL
can return without having to wait.

Note also that the scheduler will re-order the execution of batch
buffers, e.g. because a batch buffer is stalled on a sync point and
cannot be submitted yet but other, independent, batch buffers are
being presented to the driver. This means that the timeline within the
sync points returned cannot be global to the engine. Instead they must
be kept per context per engine (the scheduler may not re-order batches
within a context). Hence the timeline cannot be based on the existing
seqno values but must be a new implementation.

Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
---
 drivers/gpu/drm/i915/Makefile              |   2 +
 drivers/gpu/drm/i915/i915_drv.h            |  18 ++
 drivers/gpu/drm/i915/i915_gem.c            |  38 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  89 ++++++-
 drivers/gpu/drm/i915/intel_sync.c          | 357 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_sync.h          |  76 ++++++
 include/uapi/drm/i915_drm.h                |  18 +-
 7 files changed, 592 insertions(+), 6 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_sync.c
 create mode 100644 drivers/gpu/drm/i915/intel_sync.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index de21965..1b813ea 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -90,6 +90,8 @@ i915-y += i915_vgpu.o
 # legacy horrors
 i915-y += i915_dma.o
 
+i915-$(CONFIG_SYNC) += intel_sync.o
+
 obj-$(CONFIG_DRM_I915)  += i915.o
 
 CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dca69d1..36d40d4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -836,6 +836,10 @@ struct i915_ctx_hang_stats {
 	bool banned;
 };
 
+#ifdef CONFIG_SYNC
+struct i915_sync_timeline;
+#endif
+
 /* This must match up with the value previously used for execbuf2.rsvd1. */
 #define DEFAULT_CONTEXT_HANDLE 0
 
@@ -880,6 +884,9 @@ struct intel_context {
 		struct drm_i915_gem_object *state;
 		struct intel_ringbuffer *ringbuf;
 		int pin_count;
+#ifdef CONFIG_SYNC
+		struct i915_sync_timeline *sync_timeline;
+#endif
 	} engine[I915_NUM_RINGS];
 
 	struct list_head link;
@@ -2201,6 +2208,11 @@ struct drm_i915_gem_request {
 	/** process identifier submitting this request */
 	struct pid *pid;
 
+#ifdef CONFIG_SYNC
+	/** native sync timeline value **/
+	uint32_t sync_value;
+#endif
+
 	/**
 	 * The ELSP only accepts two elements at a time, so we queue
 	 * context/tail pairs on a given queue (ring->execlist_queue) until the
@@ -2291,6 +2303,12 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
 	*pdst = src;
 }
 
+#ifdef CONFIG_SYNC
+struct drm_i915_gem_request *i915_gem_request_find_by_sync_value(struct intel_engine_cs *ring,
+								 struct intel_context *ctx,
+								 uint32_t sync_value);
+#endif
+
 /*
  * A command that requires special handling by the command parser.
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ae40a65..4ef1469 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -37,6 +37,7 @@
 #include <linux/swap.h>
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
+#include "intel_sync.h"
 
 #define RQ_BUG_ON(expr)
 
@@ -2534,6 +2535,17 @@ void __i915_add_request(struct drm_i915_gem_request *request,
 	 */
 	i915_gem_request_submit(request);
 
+#ifdef CONFIG_SYNC
+	/*
+	 * If an external sync point has been requested for this request then
+	 * it can be waited on without the driver's knowledge, i.e. without
+	 * calling __i915_wait_request(). Thus interrupts must be enabled
+	 * from the start rather than only on demand.
+	 */
+	if (request->sync_value)
+		i915_gem_request_enable_interrupt(request);
+#endif
+
 	if (i915.enable_execlists)
 		ret = ring->emit_request(request);
 	else {
@@ -2735,6 +2747,11 @@ void i915_gem_request_notify(struct intel_engine_cs *ring)
 
 			fence_signal_locked(&req->fence);
 			trace_i915_gem_request_complete(req);
+
+#ifdef CONFIG_SYNC
+			if (req->sync_value)
+				i915_sync_timeline_advance(req->ctx, req->ring, req->sync_value);
+#endif
 		}
 
 		list_del_init(&req->signal_list);
@@ -2830,6 +2847,27 @@ void i915_gem_request_cancel(struct drm_i915_gem_request *req)
 	i915_gem_request_unreference(req);
 }
 
+#ifdef CONFIG_SYNC
+struct drm_i915_gem_request *i915_gem_request_find_by_sync_value(struct intel_engine_cs *ring,
+								 struct intel_context *ctx,
+								 uint32_t sync_value)
+{
+	struct drm_i915_gem_request *req = NULL;
+
+	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+
+	list_for_each_entry(req, &ring->request_list, list) {
+		if (req->ctx != ctx)
+			continue;
+
+		if (req->sync_value == sync_value)
+			break;
+	}
+
+	return req;
+}
+#endif
+
 struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 600db74..f556826 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -26,12 +26,14 @@
  *
  */
 
+#include <linux/syscalls.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
+#include "intel_sync.h"
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
@@ -1402,6 +1404,35 @@ eb_get_batch(struct eb_vmas *eb)
 	return vma->obj;
 }
 
+#ifdef CONFIG_SYNC
+static int i915_early_fence_wait(struct intel_engine_cs *ring, int fence_fd)
+{
+	struct sync_fence *fence;
+	int ret = 0;
+
+	if (fence_fd < 0) {
+		DRM_ERROR("Invalid wait fence fd %d on ring %d\n", fence_fd,
+			  (int) ring->id);
+		return 1;
+	}
+
+	fence = sync_fence_fdget(fence_fd);
+	if (fence == NULL) {
+		DRM_ERROR("Invalid wait fence %d on ring %d\n", fence_fd,
+			  (int) ring->id);
+		return 1;
+	}
+
+	if (atomic_read(&fence->status) == 0) {
+		if (!i915_safe_to_ignore_fence(ring, fence))
+			ret = sync_fence_wait(fence, 1000);
+	}
+
+	sync_fence_put(fence);
+	return ret;
+}
+#endif
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
@@ -1421,6 +1452,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	u32 dispatch_flags;
 	int ret;
 	bool need_relocs;
+	int fd_fence_complete = -1;
+#ifdef CONFIG_SYNC
+	int fd_fence_wait = (int) args->rsvd2;
+#endif
+
+	/*
+	 * Make sure an broken fence handle is not returned no matter
+	 * how early an error might be hit. Note that rsvd2 has to be
+	 * saved away first because it is also an input parameter!
+	 */
+	if (args->flags & I915_EXEC_REQUEST_FENCE)
+		args->rsvd2 = (__u64) -1;
 
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
@@ -1490,6 +1533,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_SYNC
+	/*
+	 * Without a GPU scheduler, any fence waits must be done up front.
+	 */
+	if (args->flags & I915_EXEC_WAIT_FENCE) {
+		ret = i915_early_fence_wait(ring, fd_fence_wait);
+		if (ret < 0)
+			goto pre_mutex_err;
+
+		args->flags &= ~I915_EXEC_WAIT_FENCE;
+	}
+#endif
+
 	intel_runtime_pm_get(dev_priv);
 
 	ret = i915_mutex_lock_interruptible(dev);
@@ -1637,6 +1693,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	params->batch_obj               = batch_obj;
 	params->ctx                     = ctx;
 
+#ifdef CONFIG_SYNC
+	if (args->flags & I915_EXEC_REQUEST_FENCE) {
+		/*
+		 * Caller has requested a sync fence.
+		 * User interrupts will be enabled to make sure that
+		 * the timeline is signalled on completion.
+		 */
+		ret = i915_sync_create_fence(params->request,
+					     &fd_fence_complete,
+					     args->flags & I915_EXEC_RING_MASK);
+		if (ret) {
+			DRM_ERROR("Fence creation failed for ring %d, ctx %p\n",
+				  ring->id, ctx);
+			args->rsvd2 = (__u64) -1;
+			goto err;
+		}
+
+		/* Return the fence through the rsvd2 field */
+		args->rsvd2 = (__u64) fd_fence_complete;
+	}
+#endif
+
 	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
 
 err_batch_unpin:
@@ -1668,6 +1746,12 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+
+	if (fd_fence_complete != -1) {
+		sys_close(fd_fence_complete);
+		args->rsvd2 = (__u64) -1;
+	}
+
 	return ret;
 }
 
@@ -1773,11 +1857,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->rsvd2 != 0) {
-		DRM_DEBUG("dirty rvsd2 field\n");
-		return -EINVAL;
-	}
-
 	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
 			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
 	if (exec2_list == NULL)
diff --git a/drivers/gpu/drm/i915/intel_sync.c b/drivers/gpu/drm/i915/intel_sync.c
new file mode 100644
index 0000000..c610078
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sync.c
@@ -0,0 +1,357 @@
+/**************************************************************************
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Satyanantha RamaGopal M <rama.gopal.m.satyanantha at intel.com>
+ *      Ian Lister <ian.lister at intel.com>
+ *      Tvrtko Ursulin <tvrtko.ursulin at intel.com>
+ *      John Harrison <John.C.Harrison at Intel.com>
+ */
+#include <linux/device.h>
+#include <drm/drmP.h>
+#include <uapi/drm/drm.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+#include "intel_drv.h"
+#include "intel_sync.h"
+
+static int i915_sync_pt_has_signaled(struct sync_pt *sync_pt)
+{
+	struct i915_sync_pt *pt = container_of(sync_pt,
+					       struct i915_sync_pt, pt);
+	struct sync_timeline *tl = sync_pt_parent(sync_pt);
+	struct i915_sync_timeline *obj = container_of(tl,
+			struct i915_sync_timeline, obj);
+
+	/* On ring timeout fail the status of pending sync_pts.
+	 * This callback is synchronous with the thread which calls
+	 * sync_timeline_signal. If this has been signaled due to
+	 * an error then timeline->killed_at will be set to the dead
+	 * value.
+	 */
+	if (pt->pvt.value == obj->pvt.killed_at)
+		return -ETIMEDOUT;
+	else if (pt->pvt.cycle != obj->pvt.cycle) {
+		/* The seqno has wrapped so complete this point */
+		return 1;
+	} else
+		/* This shouldn't require locking as it is synchronous
+		 * with the timeline signal function which is the only updater
+		 * of these fields
+		 */
+		return (obj->pvt.value >= pt->pvt.value) ? 1 : 0;
+
+	return 0;
+}
+
+static int i915_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct i915_sync_pt *pt_a = container_of(a, struct i915_sync_pt, pt);
+	struct i915_sync_pt *pt_b = container_of(b, struct i915_sync_pt, pt);
+
+	if (pt_a->pvt.value == pt_b->pvt.value)
+		return 0;
+	else
+		return (pt_a->pvt.value > pt_b->pvt.value) ? 1 : -1;
+}
+
+static int i915_sync_fill_driver_data(struct sync_pt *sync_pt,
+				    void *data, int size)
+{
+	struct i915_sync_pt *pt = container_of(sync_pt,
+					       struct i915_sync_pt, pt);
+
+	if (size < sizeof(pt->pvt))
+		return -ENOMEM;
+
+	memcpy(data, &pt->pvt, sizeof(pt->pvt));
+
+	return sizeof(pt->pvt);
+}
+
+static
+struct sync_pt *i915_sync_pt_create(struct i915_sync_timeline *obj,
+				    u32 value, u32 cycle, u64 ring_mask)
+{
+	struct i915_sync_pt *pt;
+
+	if (!obj)
+		return NULL;
+
+	pt = (struct i915_sync_pt *)
+		sync_pt_create(&obj->obj, sizeof(struct i915_sync_pt));
+
+	if (pt) {
+		pt->pvt.value = value;
+		pt->pvt.cycle = cycle;
+		pt->pvt.ring_mask = ring_mask;
+	}
+
+	return (struct sync_pt *)pt;
+}
+
+static struct sync_pt *i915_sync_pt_dup(struct sync_pt *sync_pt)
+{
+	struct i915_sync_pt *pt = container_of(sync_pt,
+					       struct i915_sync_pt, pt);
+	struct sync_pt *new_pt;
+	struct sync_timeline *tl = sync_pt_parent(sync_pt);
+	struct i915_sync_timeline *obj = container_of(tl,
+			struct i915_sync_timeline, obj);
+
+	new_pt = (struct sync_pt *)i915_sync_pt_create(obj, pt->pvt.value,
+					pt->pvt.cycle, pt->pvt.ring_mask);
+	return new_pt;
+}
+
+static void i915_sync_pt_free(struct sync_pt *sync_pt)
+{
+}
+
+void i915_sync_pt_timeline_value_str(struct sync_timeline *timeline, char *str, int size)
+{
+	struct i915_sync_timeline *obj = container_of(timeline, struct i915_sync_timeline, obj);
+
+	snprintf(str, size, "%d [%d]", obj->pvt.value, obj->pvt.ring->get_seqno(obj->pvt.ring, true));
+}
+
+void i915_sync_pt_pt_value_str(struct sync_pt *sync_pt, char *str, int size)
+{
+	struct i915_sync_pt *pt = container_of(sync_pt,
+					       struct i915_sync_pt, pt);
+	struct i915_sync_timeline *timeline =
+		container_of(sync_pt_parent(sync_pt),
+				struct i915_sync_timeline, obj);
+	struct drm_i915_gem_request *req;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(timeline->pvt.ring->dev);
+	if (ret) {
+		snprintf(str, size, "%d [err = %d!]", pt->pvt.value, ret);
+		return;
+	}
+
+	req = i915_gem_request_find_by_sync_value(timeline->pvt.ring, timeline->pvt.ctx, pt->pvt.value);
+
+	if (req)
+		snprintf(str, size, "%d [%d:%d]", pt->pvt.value, req->uniq, req->seqno);
+	else
+		snprintf(str, size, "%d [-]", pt->pvt.value);
+
+	mutex_unlock(&timeline->pvt.ring->dev->struct_mutex);
+}
+
+struct sync_timeline_ops i915_sync_timeline_ops = {
+	.driver_name = "i915_sync",
+	.dup = i915_sync_pt_dup,
+	.has_signaled = i915_sync_pt_has_signaled,
+	.compare = i915_sync_pt_compare,
+	.fill_driver_data = i915_sync_fill_driver_data,
+	.free_pt = i915_sync_pt_free,
+	.timeline_value_str = i915_sync_pt_timeline_value_str,
+	.pt_value_str = i915_sync_pt_pt_value_str,
+};
+
+int i915_sync_timeline_create(struct drm_device *dev,
+			      struct intel_context *ctx,
+			      struct intel_engine_cs *ring)
+{
+	struct i915_sync_timeline **timeline;
+	struct i915_sync_timeline *local;
+
+	timeline = &ctx->engine[ring->id].sync_timeline;
+
+	if (*timeline)
+		return 0;
+
+	local = (struct i915_sync_timeline *)
+			sync_timeline_create(&i915_sync_timeline_ops,
+				     sizeof(struct i915_sync_timeline),
+				     ring->name);
+
+	if (!local)
+		return -EINVAL;
+
+	local->pvt.killed_at = 0;
+	local->pvt.next      = 1;
+
+	/* Start the timeline from seqno 0 as this is a special value
+	 * that is reserved for invalid sync points.
+	 */
+	local->pvt.value = 0;
+	local->pvt.ctx = ctx;
+	local->pvt.ring = ring;
+
+	*timeline = local;
+
+	return 0;
+}
+
+static uint32_t get_next_value(struct i915_sync_timeline *timeline)
+{
+	uint32_t value;
+
+	value = timeline->pvt.next;
+
+	/* Reserve zero for invalid */
+	if (++timeline->pvt.next == 0 ) {
+		timeline->pvt.next = 1;
+		timeline->pvt.cycle++;
+	}
+
+	return value;
+}
+
+void i915_sync_timeline_destroy(struct intel_context *ctx,
+				struct intel_engine_cs *ring)
+{
+	struct i915_sync_timeline **timeline;
+
+	timeline = &ctx->engine[ring->id].sync_timeline;
+
+	if (*timeline) {
+		sync_timeline_destroy(&(*timeline)->obj);
+		*timeline = NULL;
+	}
+}
+
+void i915_sync_timeline_signal(struct i915_sync_timeline *obj, u32 value)
+{
+	/* Update the timeline to notify it that
+	 * the monotonic counter has advanced.
+	 */
+	if (obj) {
+		obj->pvt.value = value;
+		sync_timeline_signal(&obj->obj);
+	}
+}
+
+int i915_sync_create_fence(struct drm_i915_gem_request *req,
+			   int *fd_out, u64 ring_mask)
+{
+	struct sync_pt *pt;
+	int fd = -1, err;
+	struct sync_fence *fence;
+	struct i915_sync_timeline *timeline;
+
+	if (req->sync_value) {
+		DRM_DEBUG_DRIVER("Already got a sync point! [ring:%s, ctx:%p, seqno:%u]\n",
+				 req->ring->name, req->ctx, i915_gem_request_get_seqno(req));
+		*fd_out = -1;
+		return -EINVAL;
+	}
+
+	timeline = req->ctx->engine[req->ring->id].sync_timeline;
+
+	if (!timeline) {
+		DRM_DEBUG_DRIVER("Missing timeline! [ring:%s, ctx:%p, seqno:%u]\n",
+				 req->ring->name, req->ctx, i915_gem_request_get_seqno(req));
+		*fd_out = -1;
+		return -ENODEV;
+	}
+
+	req->sync_value = get_next_value(timeline);
+	pt = i915_sync_pt_create(timeline,
+				 req->sync_value,
+				 timeline->pvt.cycle,
+				 ring_mask);
+	if (!pt) {
+		DRM_DEBUG_DRIVER("Failed to create sync point for ring:%s, ctx:%p, seqno:%u\n",
+				 req->ring->name, req->ctx, i915_gem_request_get_seqno(req));
+		*fd_out = -1;
+		return -ENOMEM;
+	}
+
+	fd = get_unused_fd_flags(0);
+	if (fd < 0) {
+		DRM_DEBUG_DRIVER("Unable to get file descriptor for fence\n");
+		err = fd;
+		goto err;
+	}
+
+	fence = sync_fence_create("I915", pt);
+	if (fence) {
+		sync_fence_install(fence, fd);
+		*fd_out = fd;
+		return 0;
+	}
+
+	DRM_DEBUG_DRIVER("Fence creation failed\n");
+	err = -ENOMEM;
+	put_unused_fd(fd);
+err:
+	sync_pt_free(pt);
+	*fd_out = -1;
+	return err;
+}
+
+void i915_sync_timeline_advance(struct intel_context *ctx,
+				struct intel_engine_cs *ring,
+				uint32_t value)
+{
+	struct i915_sync_timeline *timeline;
+
+	timeline = ctx->engine[ring->id].sync_timeline;
+
+	if (timeline)
+		i915_sync_timeline_signal(timeline, value);
+}
+
+bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence)
+{
+	struct i915_sync_timeline *timeline;
+	struct fence *dma_fence;
+	struct sync_pt *pt;
+	bool ignore;
+	int i;
+
+	if (atomic_read(&fence->status) != 0)
+		return true;
+
+	ignore = true;
+	for(i = 0; i < fence->num_fences; i++) {
+		dma_fence = fence->cbs[i].sync_pt;
+		pt = container_of(dma_fence, struct sync_pt, base);
+
+		/* No need to worry about dead points: */
+		if (fence_is_signaled(dma_fence))
+			continue;
+
+		/* Can't ignore other people's points: */
+		if(sync_pt_parent(pt)->ops != &i915_sync_timeline_ops) {
+			ignore = false;
+			break;
+		}
+
+		timeline = container_of(sync_pt_parent(pt), struct i915_sync_timeline, obj);
+
+		/* Can't ignore points on other rings: */
+		if (timeline->pvt.ring != ring) {
+			ignore = false;
+			break;
+		}
+
+		/* Same ring means guaranteed to be in order so ignore it. */
+	}
+
+	return ignore;
+}
diff --git a/drivers/gpu/drm/i915/intel_sync.h b/drivers/gpu/drm/i915/intel_sync.h
new file mode 100644
index 0000000..45476a1
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_sync.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Satyanantha RamaGopal M <rama.gopal.m.satyanantha at intel.com>
+ *      Ian Lister <ian.lister at intel.com>
+ *      Tvrtko Ursulin <tvrtko.ursulin at intel.com>
+ *      John Harrison <John.C.Harrison at Intel.com>
+ */
+#ifndef _INTEL_SYNC_H_
+#define _INTEL_SYNC_H_
+
+#include <../drivers/staging/android/sync.h>
+
+#ifdef CONFIG_SYNC
+
+struct drm_i915_private;
+
+struct i915_sync_timeline {
+	struct	sync_timeline	obj;
+
+	struct {
+		u32         value;
+		u32         cycle;
+		uint32_t    killed_at;
+		uint32_t    next;
+
+		struct intel_context *ctx;
+		struct intel_engine_cs *ring;
+	} pvt;
+};
+
+struct i915_sync_pt {
+	struct sync_pt		pt;
+
+	struct drm_i915_gem_syncpt_driver_data pvt;
+};
+
+bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence);
+
+int i915_sync_timeline_create(struct drm_device *dev,
+			      struct intel_context *ctx,
+			      struct intel_engine_cs *ring);
+
+void i915_sync_timeline_destroy(struct intel_context *ctx,
+				struct intel_engine_cs *ring);
+
+int i915_sync_create_fence(struct drm_i915_gem_request *req,
+			   int *fd_out, u64 ring_mask);
+
+void i915_sync_timeline_advance(struct intel_context *ctx,
+				struct intel_engine_cs *ring,
+				uint32_t value);
+
+#endif /* CONFIG_SYNC */
+
+#endif /* _INTEL_SYNC_H_ */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f88cc1c..9371347f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -765,7 +765,17 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1		(1<<13)
 #define I915_EXEC_BSD_RING2		(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Caller supplies a sync fence fd in the rsvd2 field.
+ * Wait for it to be signalled before starting the work
+ */
+#define I915_EXEC_WAIT_FENCE		(1<<15)
+
+/** Caller wants a sync fence fd for this execbuffer.
+ *  It will be returned in rsvd2
+ */
+#define I915_EXEC_REQUEST_FENCE		(1<<16)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(1<<17)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -773,6 +783,12 @@ struct drm_i915_gem_execbuffer2 {
 #define i915_execbuffer2_get_context_id(eb2) \
 	((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
 
+struct drm_i915_gem_syncpt_driver_data {
+	__u32 value;
+	__u32 cycle;
+	__u64 ring_mask;
+};
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
-- 
1.9.1