[Intel-gfx] [PATCH 5/5] drm/i915: s/seqno/request/ tracking inside objects

Chris Wilson chris at chris-wilson.co.uk
Tue Aug 12 21:05:51 CEST 2014


At the heart of this change is that the seqno is a too low level of an
abstraction to handle the growing complexities of command tracking, both
with the introduction of multiple command queues with execbuffer and the
potential for reordering with a scheduler. On top of the seqno we have
the request. Conceptually this is just a fence, but it also has
substantial bookkeeping of its own in order to track the context and
batch in flight, for example. It is the central structure upon which we
can extend with dependency tracking et al.

As regards the objects, they were using the seqno as a simple fence,
upon which is check or even wait upon for command completion. This patch
exchanges that seqno/ring pair with the request itself. For the
majority, lifetime of the request is ordered by how we retire objects
then requests. However, both the unlocked waits and probing elsewhere do
not tie into the normal request lifetimes and so we need to introduce a
kref. Extending the objects to use the request as the fence naturally
extends to segregrating read/write fence tracking. This has significance
for it reduces the number of semaphores we need to emit, reducing the
likelihood of #54226, and improving performance overall.

v2: Rebase and split out the othogonal tweaks.

A silly happened with this patch. It seemed to nullify our earlier
seqno-vs-interrupt w/a. I could not spot why, but gen6+ started to fail
with missed interrupts (a good test of our robustness handling). So I
ripped out the existing ACTHD read and replaced it with a RING_HEAD to
manually check whether the request is complete. That also had the nice
consequence of forcing __wait_request() to being the central arbiter of
request completion.

The keener eyed reviewr will also spot that the reset_counter is moved
into the request simplifing __wait_request() callsites and reducing the
number of atomic reads by virtue of moving the check for a pending GPU
reset to the endpoints of GPU access.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes at virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Oscar Mateo <oscar.mateo at intel.com>
Cc: Brad Volkin <bradley.d.volkin at intel.com>
Cc: "Kukanova, Svetlana" <svetlana.kukanova at intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c          |  29 +-
 drivers/gpu/drm/i915/i915_dma.c              |   2 +
 drivers/gpu/drm/i915/i915_drv.h              | 121 ++--
 drivers/gpu/drm/i915/i915_gem.c              | 850 +++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_context.c      |  19 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  37 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   5 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c       |   2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c        |  36 +-
 drivers/gpu/drm/i915/i915_irq.c              |  28 +-
 drivers/gpu/drm/i915/i915_trace.h            |   4 +-
 drivers/gpu/drm/i915/intel_display.c         | 151 ++---
 drivers/gpu/drm/i915/intel_drv.h             |   8 +-
 drivers/gpu/drm/i915/intel_lrc.c             | 115 +---
 drivers/gpu/drm/i915/intel_overlay.c         | 118 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 164 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  19 +-
 17 files changed, 922 insertions(+), 786 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d42db6b..604a73a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
@@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
 		   obj->base.write_domain,
-		   obj->last_read_seqno,
-		   obj->last_write_seqno,
-		   obj->last_fenced_seqno,
+		   i915_request_seqno(rq),
+		   i915_request_seqno(obj->last_write.request),
+		   i915_request_seqno(obj->last_fence.request),
 		   i915_cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -168,8 +169,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		*t = '\0';
 		seq_printf(m, " (%s mappable)", s);
 	}
-	if (obj->ring != NULL)
-		seq_printf(m, " (%s)", obj->ring->name);
+	if (rq)
+		seq_printf(m, " (%s)", rq->ring->name);
 	if (obj->frontbuffer_bits)
 		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
@@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 			if (ppgtt->file_priv != stats->file_priv)
 				continue;
 
-			if (obj->ring) /* XXX per-vma statistic */
+			if (obj->active) /* XXX per-vma statistic */
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	} else {
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			stats->global += obj->base.size;
-			if (obj->ring)
+			if (obj->active)
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -574,7 +575,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	struct drm_i915_gem_request *gem_request;
+	struct i915_gem_request *rq;
 	int ret, count, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -587,12 +588,10 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 			continue;
 
 		seq_printf(m, "%s requests:\n", ring->name);
-		list_for_each_entry(gem_request,
-				    &ring->request_list,
-				    list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			seq_printf(m, "    %d @ %d\n",
-				   gem_request->seqno,
-				   (int) (jiffies - gem_request->emitted_jiffies));
+				   rq->seqno,
+				   (int)(jiffies - rq->emitted_jiffies));
 		}
 		count++;
 	}
@@ -609,7 +608,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
 {
 	if (ring->get_seqno) {
 		seq_printf(m, "Current sequence (%s): %u\n",
-			   ring->name, ring->get_seqno(ring, false));
+			   ring->name, ring->get_seqno(ring));
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 04dd611..ba7f15c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1598,6 +1598,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	/* For the ugly agnostic INTEL_INFO macro */
 	BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev));
 
+	BUILD_BUG_ON(I915_NUM_RINGS >= (1 << I915_NUM_RING_BITS));
+
 	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e0dcd70..c3563a0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -191,6 +191,7 @@ enum hpd_pin {
 
 struct drm_i915_private;
 struct i915_mmu_object;
+struct i915_gem_request;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -1740,16 +1741,15 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	struct list_head ring_list;
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
+	 * rendering and so a submitted request), and is not set if it is on
+	 * inactive (ready to be unbound) list. We track activity per engine.
 	 */
-	unsigned int active:1;
+	unsigned int active:I915_NUM_RING_BITS;
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -1817,13 +1817,11 @@ struct drm_i915_gem_object {
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
-	struct intel_engine_cs *ring;
-
-	/** Breadcrumb of last rendering to the buffer. */
-	uint32_t last_read_seqno;
-	uint32_t last_write_seqno;
-	/** Breadcrumb of last fenced GPU access to the buffer. */
-	uint32_t last_fenced_seqno;
+	/** Breadcrumbs of last rendering to the buffer. */
+	struct {
+		struct i915_gem_request *request;
+		struct list_head ring_list;
+	} last_write, last_read[I915_NUM_RINGS], last_fence;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	uint32_t stride;
@@ -1856,6 +1854,8 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
+
 void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits);
@@ -1870,10 +1870,14 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
  * sequence-number comparisons on buffer last_rendering_seqnos, and associate
  * an emission time with seqnos for tracking how far ahead of the GPU we are.
  */
-struct drm_i915_gem_request {
+struct i915_gem_request {
+	struct kref kref;
+
 	/** On Which ring this request was generated */
 	struct intel_engine_cs *ring;
 
+	unsigned reset_counter;
+
 	/** GEM sequence number associated with this request. */
 	uint32_t seqno;
 
@@ -1898,8 +1902,64 @@ struct drm_i915_gem_request {
 	struct drm_i915_file_private *file_priv;
 	/** file_priv list entry for this request */
 	struct list_head client_list;
+
+	bool completed:1;
 };
 
+static inline struct intel_engine_cs *i915_request_ring(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring : NULL;
+}
+
+static inline int i915_request_ring_id(struct i915_gem_request *rq)
+{
+	return rq ? rq->ring->id : -1;
+}
+
+static inline u32 i915_request_seqno(struct i915_gem_request *rq)
+{
+	return rq ? rq->seqno : 0;
+}
+
+/**
+ * Returns true if seq1 is later than seq2.
+ */
+static inline bool
+__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+{
+	return (int32_t)(seq1 - seq2) >= 0;
+}
+
+static inline bool
+i915_request_complete(struct i915_gem_request *rq)
+{
+	if (!rq->completed &&
+	    __i915_seqno_passed(rq->ring->get_seqno(rq->ring),
+				rq->seqno))
+		rq->completed = true;
+	return rq->completed;
+}
+
+static inline struct i915_gem_request *
+i915_request_get(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_get(&rq->kref);
+	return rq;
+}
+
+void __i915_request_free(struct kref *kref);
+
+struct i915_gem_request *i915_gem_seqno_to_request(struct intel_engine_cs *ring,
+						   u32 seqno);
+
+static inline void
+i915_request_put(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_put(&rq->kref, __i915_request_free);
+}
+
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
 	struct drm_file *file;
@@ -2368,22 +2428,18 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to);
+			 struct intel_engine_cs *to,
+			 bool readonly);
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring);
+			     struct intel_engine_cs *ring,
+			     unsigned fenced);
+#define VMA_IS_FENCED 0x1
+#define VMA_HAS_FENCE 0x2
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
-/**
- * Returns true if seq1 is later than seq2.
- */
-static inline bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
-{
-	return (int32_t)(seq1 - seq2) >= 0;
-}
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
@@ -2393,14 +2449,15 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
 void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
-				      bool interruptible);
-int __must_check i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno);
+				      bool interruptible,
+				      unsigned *reset_counter);
+int __must_check i915_gem_check_olr(struct i915_gem_request *rq);
 
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
@@ -2443,12 +2500,12 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *batch_obj,
-		       u32 *seqno);
-#define i915_add_request(ring, seqno) \
-	__i915_add_request(ring, NULL, NULL, seqno)
-int __must_check i915_wait_seqno(struct intel_engine_cs *ring,
-				 uint32_t seqno);
+		       struct drm_i915_gem_object *batch_obj);
+#define i915_add_request(ring) \
+	__i915_add_request(ring, NULL, NULL)
+int __must_check i915_wait_request(struct i915_gem_request *rq);
+int __i915_request_wait(struct i915_gem_request *rq,
+			bool interruptible);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
@@ -2776,8 +2833,6 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
-void intel_notify_mmio_flip(struct intel_engine_cs *ring);
-
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6c2f0b8..9c8c881 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -44,9 +44,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
-
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
@@ -108,6 +105,85 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
+{
+	intel_fb_obj_flush(obj, true);
+	obj->last_write.request = NULL;
+	list_del_init(&obj->last_write.ring_list);
+}
+
+static void
+i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
+{
+	obj->last_fence.request = NULL;
+	list_del_init(&obj->last_fence.ring_list);
+}
+
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
+			     struct intel_engine_cs *ring)
+{
+	struct i915_vma *vma;
+
+	BUG_ON(obj->active == 0);
+	BUG_ON(obj->base.write_domain);
+
+	obj->last_read[ring->id].request = NULL;
+	list_del_init(&obj->last_read[ring->id].ring_list);
+
+	if (--obj->active)
+		return;
+
+	BUG_ON(obj->last_write.request);
+	BUG_ON(obj->last_fence.request);
+
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (!list_empty(&vma->mm_list))
+			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
+	}
+
+	drm_gem_object_unreference(&obj->base);
+
+	WARN_ON(i915_verify_lists(dev));
+}
+
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_request *rq;
+	int i;
+
+	if (!obj->active)
+		return;
+
+	rq = obj->last_write.request;
+	if (rq && i915_request_complete(rq))
+		i915_gem_object_retire__write(obj);
+
+	rq = obj->last_fence.request;
+	if (rq && i915_request_complete(rq))
+		i915_gem_object_retire__fence(obj);
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		rq = obj->last_read[i].request;
+		if (rq && i915_request_complete(rq)) {
+			/* Although we just checked these above, the hardware
+			 * may have just completed them in the interval and
+			 * to keep the request lifetimes correct, we must
+			 * retire write/fence before read.
+			 */
+			if (i915_request_ring_id(obj->last_write.request) == i)
+				i915_gem_object_retire__write(obj);
+
+			if (i915_request_ring_id(obj->last_fence.request) == i)
+				i915_gem_object_retire__fence(obj);
+
+			i915_gem_object_retire__read(obj, rq->ring);
+		}
+	}
+}
+
 static int
 i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
@@ -1073,9 +1149,12 @@ unlock:
 
 int
 i915_gem_check_wedge(struct i915_gpu_error *error,
-		     bool interruptible)
+		     bool interruptible,
+		     unsigned *reset_counter)
 {
-	if (i915_reset_in_progress(error)) {
+	unsigned wedge = atomic_read(&error->reset_counter);
+
+	if (wedge & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)) {
 		/* Non-interruptible callers can't handle -EAGAIN, hence return
 		 * -EIO unconditionally for these. */
 		if (!interruptible)
@@ -1088,6 +1167,10 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
 		return -EAGAIN;
 	}
 
+	if (*reset_counter && *reset_counter != wedge)
+		return -EAGAIN;
+
+	*reset_counter = wedge;
 	return 0;
 }
 
@@ -1096,15 +1179,15 @@ i915_gem_check_wedge(struct i915_gpu_error *error,
  * equal.
  */
 int
-i915_gem_check_olr(struct intel_engine_cs *ring, u32 seqno)
+i915_gem_check_olr(struct i915_gem_request *rq)
 {
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex));
+	BUG_ON(!mutex_is_locked(&rq->ring->dev->struct_mutex));
 
 	ret = 0;
-	if (seqno == ring->outstanding_lazy_seqno)
-		ret = i915_add_request(ring, NULL);
+	if (rq == rq->ring->preallocated_request)
+		ret = i915_add_request(rq->ring);
 
 	return ret;
 }
@@ -1128,32 +1211,50 @@ static bool can_wait_boost(struct drm_i915_file_private *file_priv)
 	return !atomic_xchg(&file_priv->rps_wait_boost, true);
 }
 
+static bool __i915_request_complete__wa(struct i915_gem_request *rq)
+{
+	struct intel_engine_cs *ring = rq->ring;
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	unsigned head, tail;
+
+	if (i915_request_complete(rq))
+		return true;
+
+	/* Sadly not all architectures are coherent wrt to the seqno
+	 * write being visible before the CPU is woken up by the
+	 * interrupt. In order to avoid going to sleep without seeing
+	 * the last seqno and never waking up again, we explicity check
+	 * whether the ring has advanced past our request. The uncached
+	 * register read (which requires waking the GT up) is pure brute
+	 * force, and only just enough.
+	 */
+	head = __intel_ring_space(I915_READ_HEAD(ring) & HEAD_ADDR,
+				  ring->buffer->tail, ring->buffer->size);
+	tail = __intel_ring_space(rq->tail,
+				  ring->buffer->tail, ring->buffer->size);
+	if (head >= tail)
+		rq->completed = true;
+
+	return rq->completed;
+}
+
 /**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
- * @reset_counter: reset sequence associated with the given seqno
+ * __wait_request - wait until execution of request has finished
+ * @request: the request to wait upon
  * @interruptible: do an interruptible wait (normally yes)
  * @timeout: in - how long to wait (NULL forever); out - how much time remaining
  *
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
- *
- * Returns 0 if the seqno was found within the alloted time. Else returns the
+ * Returns 0 if the request was completed within the alloted time. Else returns the
  * errno with remaining time filled in timeout argument.
  */
-static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
-			unsigned reset_counter,
-			bool interruptible,
-			struct timespec *timeout,
-			struct drm_i915_file_private *file_priv)
+static int __wait_request(struct i915_gem_request *rq,
+			  bool interruptible,
+			  struct timespec *timeout,
+			  struct drm_i915_file_private *file_priv)
 {
+	struct intel_engine_cs *ring = rq->ring;
 	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	const bool irq_test_in_progress =
 		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
 	struct timespec before, now;
@@ -1163,7 +1264,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 
 	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
+	if (i915_request_complete(rq))
 		return 0;
 
 	timeout_expire = timeout ? jiffies + timespec_to_jiffies_timeout(timeout) : 0;
@@ -1180,7 +1281,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 		return -ENODEV;
 
 	/* Record current time in case interrupted by signal, or wedged */
-	trace_i915_gem_request_wait_begin(ring, seqno);
+	trace_i915_gem_request_wait_begin(ring, rq->seqno);
 	getrawmonotonic(&before);
 	for (;;) {
 		struct timer_list timer;
@@ -1190,19 +1291,12 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 
 		/* We need to check whether any gpu reset happened in between
 		 * the caller grabbing the seqno and now ... */
-		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
-			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
-			 * is truely gone. */
-			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-			if (ret == 0)
-				ret = -EAGAIN;
+		ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible, &rq->reset_counter);
+		if (ret)
 			break;
-		}
 
-		if (i915_seqno_passed(ring->get_seqno(ring, false), seqno)) {
-			ret = 0;
+		if (__i915_request_complete__wa(rq))
 			break;
-		}
 
 		if (interruptible && signal_pending(current)) {
 			ret = -ERESTARTSYS;
@@ -1231,7 +1325,7 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
 		}
 	}
 	getrawmonotonic(&now);
-	trace_i915_gem_request_wait_end(ring, seqno);
+	trace_i915_gem_request_wait_end(ring, rq->seqno);
 
 	if (!irq_test_in_progress)
 		ring->irq_put(ring);
@@ -1253,46 +1347,28 @@ static int __wait_seqno(struct intel_engine_cs *ring, u32 seqno,
  * request and object lists appropriately for that event.
  */
 int
-i915_wait_seqno(struct intel_engine_cs *ring, uint32_t seqno)
+i915_wait_request(struct i915_gem_request *rq)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	bool interruptible = dev_priv->mm.interruptible;
+	struct drm_device *dev = rq->ring->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret;
 
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(seqno == 0);
-
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-	if (ret)
-		return ret;
+	if (WARN_ON(!mutex_is_locked(&dev->struct_mutex)))
+		return -EINVAL;
 
-	ret = i915_gem_check_olr(ring, seqno);
+	ret = i915_gem_check_olr(rq);
 	if (ret)
 		return ret;
 
-	return __wait_seqno(ring, seqno,
-			    atomic_read(&dev_priv->gpu_error.reset_counter),
-			    interruptible, NULL, NULL);
+	return __wait_request(rq, dev_priv->mm.interruptible,
+			      NULL, NULL);
 }
 
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
-				     struct intel_engine_cs *ring)
+int
+__i915_request_wait(struct i915_gem_request *rq,
+		    bool interruptible)
 {
-	if (!obj->active)
-		return 0;
-
-	/* Manually manage the write flush as we may have not yet
-	 * retired the buffer.
-	 *
-	 * Note that the last_write_seqno is always the earlier of
-	 * the two (read/write) seqno, so if we haved successfully waited,
-	 * we know we have passed the last write.
-	 */
-	obj->last_write_seqno = 0;
-
-	return 0;
+	return __wait_request(rq, interruptible, NULL, NULL);
 }
 
 /**
@@ -1303,19 +1379,27 @@ static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
-	struct intel_engine_cs *ring = obj->ring;
-	u32 seqno;
-	int ret;
+	int i, ret;
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
+	if (readonly) {
+		if (obj->last_write.request == NULL)
+			return 0;
 
-	ret = i915_wait_seqno(ring, seqno);
-	if (ret)
-		return ret;
+		ret = i915_wait_request(obj->last_write.request);
+		if (ret)
+			return ret;
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			if (obj->last_read[i].request == NULL)
+				continue;
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+			ret = i915_wait_request(obj->last_read[i].request);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1328,34 +1412,42 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = obj->ring;
-	unsigned reset_counter;
-	u32 seqno;
-	int ret;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
+	int i, n, ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
+	n = 0;
+	if (readonly) {
+		if (obj->last_write.request)
+			rq[n++] = i915_request_get(obj->last_write.request);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++)
+			if (obj->last_read[i].request)
+				rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+	if (n == 0)
 		return 0;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_check_olr(ring, seqno);
-	if (ret)
-		return ret;
+	for (i = 0; i < n; i++) {
+		ret = i915_gem_check_olr(rq[i]);
+		if (ret)
+			goto out;
+	}
 
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, file_priv);
+
+	for (i = 0; ret == 0 && i < n; i++)
+		ret = __wait_request(rq[i], true, NULL, file_priv);
+
 	mutex_lock(&dev->struct_mutex);
-	if (ret)
-		return ret;
 
-	return i915_gem_object_wait_rendering__tail(obj, ring);
+out:
+	for (i = 0; i < n; i++)
+		i915_request_put(rq[i]);
+
+	return ret;
 }
 
 /**
@@ -2157,81 +2249,57 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *ring)
-{
-	u32 seqno = intel_ring_get_seqno(ring);
-
-	BUG_ON(ring == NULL);
-	if (obj->ring != ring && obj->last_write_seqno) {
-		/* Keep the seqno relative to the current ring */
-		obj->last_write_seqno = seqno;
-	}
-	obj->ring = ring;
-
-	/* Add a reference if we're newly entering the active list. */
-	if (!obj->active) {
-		drm_gem_object_reference(&obj->base);
-		obj->active = 1;
-	}
-
-	list_move_tail(&obj->ring_list, &ring->active_list);
-
-	obj->last_read_seqno = seqno;
-}
-
 void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *ring)
+			     struct intel_engine_cs *ring,
+			     unsigned fenced)
 {
-	list_move_tail(&vma->mm_list, &vma->vm->active_list);
-	return i915_gem_object_move_to_active(vma->obj, ring);
-}
-
-static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct i915_address_space *vm;
-	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = intel_ring_get_request(ring);
+	u32 old_read = obj->base.read_domains;
+	u32 old_write = obj->base.write_domain;
 
-	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
-	BUG_ON(!obj->active);
+	BUG_ON(rq == NULL);
 
-	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
-		vma = i915_gem_obj_to_vma(obj, vm);
-		if (vma && !list_empty(&vma->mm_list))
-			list_move_tail(&vma->mm_list, &vm->inactive_list);
-	}
-
-	intel_fb_obj_flush(obj, true);
+	obj->base.write_domain = obj->base.pending_write_domain;
+	if (obj->base.write_domain == 0)
+		obj->base.pending_read_domains |= obj->base.read_domains;
+	obj->base.read_domains = obj->base.pending_read_domains;
 
-	list_del_init(&obj->ring_list);
-	obj->ring = NULL;
+	obj->base.pending_read_domains = 0;
+	obj->base.pending_write_domain = 0;
 
-	obj->last_read_seqno = 0;
-	obj->last_write_seqno = 0;
-	obj->base.write_domain = 0;
+	trace_i915_gem_object_change_domain(obj, old_read, old_write);
+	if (obj->base.read_domains == 0)
+		return;
 
-	obj->last_fenced_seqno = 0;
+	/* Add a reference if we're newly entering the active list. */
+	if (obj->last_read[ring->id].request == NULL && obj->active++ == 0)
+		drm_gem_object_reference(&obj->base);
 
-	obj->active = 0;
-	drm_gem_object_unreference(&obj->base);
+	obj->last_read[ring->id].request = rq;
+	list_move_tail(&obj->last_read[ring->id].ring_list, &ring->read_list);
 
-	WARN_ON(i915_verify_lists(dev));
-}
+	if (obj->base.write_domain) {
+		obj->dirty = 1;
+		obj->last_write.request = rq;
+		list_move_tail(&obj->last_write.ring_list, &ring->write_list);
+		intel_fb_obj_invalidate(obj, ring);
 
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *ring = obj->ring;
+		/* update for the implicit flush after a batch */
+		obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	}
 
-	if (ring == NULL)
-		return;
+	if (fenced & VMA_IS_FENCED) {
+		obj->last_fence.request = rq;
+		list_move_tail(&obj->last_fence.ring_list, &ring->fence_list);
+		if (fenced & VMA_HAS_FENCE) {
+			struct drm_i915_private *dev_priv = to_i915(ring->dev);
+			list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+					&dev_priv->mm.fence_list);
+		}
+	}
 
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_read_seqno))
-		i915_gem_object_move_to_inactive(obj);
+	list_move_tail(&vma->mm_list, &vma->vm->active_list);
 }
 
 static int
@@ -2306,11 +2374,10 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 
 int __i915_add_request(struct intel_engine_cs *ring,
 		       struct drm_file *file,
-		       struct drm_i915_gem_object *obj,
-		       u32 *out_seqno)
+		       struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	u32 request_ring_position, request_start;
 	int ret;
 
@@ -2326,10 +2393,16 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request = ring->preallocated_lazy_request;
-	if (WARN_ON(request == NULL))
+	rq = ring->preallocated_request;
+	if (WARN_ON(rq == NULL))
 		return -ENOMEM;
 
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible,
+				   &rq->reset_counter);
+	if (ret)
+		return ret;
+
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
@@ -2341,10 +2414,8 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	request->seqno = intel_ring_get_seqno(ring);
-	request->ring = ring;
-	request->head = request_start;
-	request->tail = request_ring_position;
+	rq->head = request_start;
+	rq->tail = request_ring_position;
 
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
@@ -2352,32 +2423,31 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 * inactive_list and lose its active reference. Hence we do not need
 	 * to explicitly hold another reference here.
 	 */
-	request->batch_obj = obj;
+	rq->batch_obj = obj;
 
 	/* Hold a reference to the current context so that we can inspect
 	 * it later in case a hangcheck error event fires.
 	 */
-	request->ctx = ring->last_context;
-	if (request->ctx)
-		i915_gem_context_reference(request->ctx);
+	rq->ctx = ring->last_context;
+	if (rq->ctx)
+		i915_gem_context_reference(rq->ctx);
 
-	request->emitted_jiffies = jiffies;
-	list_add_tail(&request->list, &ring->request_list);
-	request->file_priv = NULL;
+	rq->emitted_jiffies = jiffies;
+	list_add_tail(&rq->list, &ring->request_list);
+	rq->file_priv = NULL;
 
 	if (file) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
 
 		spin_lock(&file_priv->mm.lock);
-		request->file_priv = file_priv;
-		list_add_tail(&request->client_list,
+		rq->file_priv = file_priv;
+		list_add_tail(&rq->client_list,
 			      &file_priv->mm.request_list);
 		spin_unlock(&file_priv->mm.lock);
 	}
 
-	trace_i915_gem_request_add(ring, request->seqno);
-	ring->outstanding_lazy_seqno = 0;
-	ring->preallocated_lazy_request = NULL;
+	trace_i915_gem_request_add(ring, rq->seqno);
+	ring->preallocated_request = NULL;
 
 	if (!dev_priv->ums.mm_suspended) {
 		i915_queue_hangcheck(ring->dev);
@@ -2389,22 +2459,20 @@ int __i915_add_request(struct intel_engine_cs *ring,
 		intel_mark_busy(dev_priv->dev);
 	}
 
-	if (out_seqno)
-		*out_seqno = request->seqno;
 	return 0;
 }
 
 static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+i915_gem_request_remove_from_client(struct i915_gem_request *rq)
 {
-	struct drm_i915_file_private *file_priv = request->file_priv;
+	struct drm_i915_file_private *file_priv = rq->file_priv;
 
 	if (!file_priv)
 		return;
 
 	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
+	list_del(&rq->client_list);
+	rq->file_priv = NULL;
 	spin_unlock(&file_priv->mm.lock);
 }
 
@@ -2452,30 +2520,37 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
+void __i915_request_free(struct kref *kref)
 {
-	list_del(&request->list);
-	i915_gem_request_remove_from_client(request);
+	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
+	kfree(rq);
+}
 
-	if (request->ctx)
-		i915_gem_context_unreference(request->ctx);
+static void i915_request_retire(struct i915_gem_request *rq)
+{
+	rq->completed = true;
 
-	kfree(request);
+	list_del(&rq->list);
+	i915_gem_request_remove_from_client(rq);
+
+	if (rq->ctx) {
+		i915_gem_context_unreference(rq->ctx);
+		rq->ctx = NULL;
+	}
+
+	i915_request_put(rq);
 }
 
-struct drm_i915_gem_request *
+struct i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
-	u32 completed_seqno;
-
-	completed_seqno = ring->get_seqno(ring, false);
+	struct i915_gem_request *rq;
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (i915_seqno_passed(completed_seqno, request->seqno))
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (i915_request_complete(rq))
 			continue;
 
-		return request;
+		return rq;
 	}
 
 	return NULL;
@@ -2484,33 +2559,53 @@ i915_gem_find_active_request(struct intel_engine_cs *ring)
 static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
 				       struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	bool ring_hung;
 
-	request = i915_gem_find_active_request(ring);
+	rq = i915_gem_find_active_request(ring);
 
-	if (request == NULL)
+	if (rq == NULL)
 		return;
 
 	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 
-	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
+	i915_set_reset_status(dev_priv, rq->ctx, ring_hung);
 
-	list_for_each_entry_continue(request, &ring->request_list, list)
-		i915_set_reset_status(dev_priv, request->ctx, false);
+	list_for_each_entry_continue(rq, &ring->request_list, list)
+		i915_set_reset_status(dev_priv, rq->ctx, false);
 }
 
 static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 					struct intel_engine_cs *ring)
 {
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
+		obj = list_first_entry(&ring->write_list,
 				       struct drm_i915_gem_object,
-				       ring_list);
+				       last_write.ring_list);
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
+
+		i915_gem_object_retire__fence(obj);
+	}
+
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		i915_gem_object_retire__read(obj, ring);
 	}
 
 	/*
@@ -2521,19 +2616,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 	 * the request.
 	 */
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	/* These may not have been flush before the reset, do so now */
-	kfree(ring->preallocated_lazy_request);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	kfree(ring->preallocated_request);
+	ring->preallocated_request = NULL;
 }
 
 void i915_gem_restore_fences(struct drm_device *dev)
@@ -2592,49 +2686,77 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 
 	WARN_ON(i915_verify_lists(ring->dev));
 
-	seqno = ring->get_seqno(ring, true);
+	seqno = ring->get_seqno(ring);
 
 	/* Move any buffers on the active list that are no longer referenced
 	 * by the ringbuffer to the flushing/inactive lists as appropriate,
 	 * before we free the context associated with the requests.
 	 */
-	while (!list_empty(&ring->active_list)) {
+	while (!list_empty(&ring->write_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->write_list,
+				       struct drm_i915_gem_object,
+				       last_write.ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_write.request->seqno))
+			break;
+
+		i915_gem_object_retire__write(obj);
+	}
+
+	while (!list_empty(&ring->fence_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&ring->active_list,
-				      struct drm_i915_gem_object,
-				      ring_list);
+		obj = list_first_entry(&ring->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.ring_list);
 
-		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_fence.request->seqno))
 			break;
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__fence(obj);
 	}
 
+	while (!list_empty(&ring->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&ring->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[ring->id].ring_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_read[ring->id].request->seqno))
+			break;
+
+		i915_gem_object_retire__read(obj, ring);
+	}
 
 	while (!list_empty(&ring->request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&ring->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		rq = list_first_entry(&ring->request_list,
+				      struct i915_gem_request,
+				      list);
 
-		if (!i915_seqno_passed(seqno, request->seqno))
+		if (!__i915_seqno_passed(seqno, rq->seqno))
 			break;
 
-		trace_i915_gem_request_retire(ring, request->seqno);
+		trace_i915_gem_request_retire(ring, rq->seqno);
 		/* We know the GPU must have read the request to have
 		 * sent us the seqno + interrupt, so use the position
 		 * of tail of the request to update the last known position
 		 * of the GPU head.
 		 */
-		ring->buffer->last_retired_head = request->tail;
+		ring->buffer->last_retired_head = rq->tail;
 
-		i915_gem_free_request(request);
+		i915_request_retire(rq);
 	}
 
 	if (unlikely(ring->trace_irq_seqno &&
-		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
+		     __i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
 		ring->irq_put(ring);
 		ring->trace_irq_seqno = 0;
 	}
@@ -2699,14 +2821,23 @@ i915_gem_idle_work_handler(struct work_struct *work)
 static int
 i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
-	int ret;
+	int i;
 
-	if (obj->active) {
-		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
+	if (!obj->active)
+		return 0;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct i915_gem_request *rq = obj->last_read[i].request;
+		int ret;
+
+		if (rq == NULL)
+			continue;
+
+		ret = i915_gem_check_olr(rq);
 		if (ret)
 			return ret;
 
-		i915_gem_retire_requests_ring(obj->ring);
+		i915_gem_retire_requests_ring(rq->ring);
 	}
 
 	return 0;
@@ -2737,14 +2868,11 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 int
 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *ring = NULL;
 	struct timespec timeout_stack, *timeout = NULL;
-	unsigned reset_counter;
-	u32 seqno = 0;
-	int ret = 0;
+	struct i915_gem_request *rq[I915_NUM_RINGS] = {};
+	int i, n, ret = 0;
 
 	if (args->timeout_ns >= 0) {
 		timeout_stack = ns_to_timespec(args->timeout_ns);
@@ -2766,13 +2894,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (ret)
 		goto out;
 
-	if (obj->active) {
-		seqno = obj->last_read_seqno;
-		ring = obj->ring;
-	}
-
-	if (seqno == 0)
-		 goto out;
+	if (!obj->active)
+		goto out;
 
 	/* Do this after OLR check to make sure we make forward progress polling
 	 * on this IOCTL with a 0 timeout (like busy ioctl)
@@ -2782,11 +2905,23 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto out;
 	}
 
+	for (i = n = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+
 	drm_gem_object_unreference(&obj->base);
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, timeout, file->driver_priv);
+	for (i = 0; i < n; i++) {
+		if (ret == 0)
+			ret = __wait_request(rq[i], true, timeout, file->driver_priv);
+
+		i915_request_put(rq[i]);
+	}
+
 	if (timeout)
 		args->timeout_ns = timespec_to_ns(timeout);
 	return ret;
@@ -2797,6 +2932,41 @@ out:
 	return ret;
 }
 
+static int
+i915_request_sync(struct i915_gem_request *rq,
+		  struct intel_engine_cs *to,
+		  struct drm_i915_gem_object *obj)
+{
+	int ret, idx;
+
+	if (to == NULL)
+		return i915_wait_request(rq);
+
+	idx = intel_ring_sync_index(rq->ring, to);
+	if (rq->seqno <= rq->ring->semaphore.sync_seqno[idx])
+		return 0;
+
+	ret = i915_gem_check_olr(rq);
+	if (ret)
+		return ret;
+
+	if (!i915_request_complete(rq)) {
+		trace_i915_gem_ring_sync_to(rq->ring, to, rq->seqno);
+		ret = to->semaphore.sync_to(to, rq->ring, rq->seqno);
+		if (ret)
+			return ret;
+	}
+
+	/* We must recheck last_read_request because sync_to()
+	 * might have just caused seqno wrap under
+	 * the radar.
+	 */
+	if (obj->last_read[rq->ring->id].request == rq)
+		rq->ring->semaphore.sync_seqno[idx] = rq->seqno;
+
+	return 0;
+}
+
 /**
  * i915_gem_object_sync - sync an object to a ring.
  *
@@ -2811,40 +2981,36 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to)
+		     struct intel_engine_cs *to,
+		     bool readonly)
 {
-	struct intel_engine_cs *from = obj->ring;
-	u32 seqno;
-	int ret, idx;
-
-	if (from == NULL || to == from)
-		return 0;
+	struct i915_gem_request *rq;
+	struct intel_engine_cs *semaphore;
+	int ret = 0, i;
 
-	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
+	semaphore = NULL;
+	if (i915_semaphore_is_enabled(obj->base.dev))
+		semaphore = to;
 
-	idx = intel_ring_sync_index(from, to);
-
-	seqno = obj->last_read_seqno;
-	/* Optimization: Avoid semaphore sync when we are sure we already
-	 * waited for an object with higher seqno */
-	if (seqno <= from->semaphore.sync_seqno[idx])
-		return 0;
-
-	ret = i915_gem_check_olr(obj->ring, seqno);
-	if (ret)
-		return ret;
+	if (readonly) {
+		rq = obj->last_write.request;
+		if (rq != NULL && to != rq->ring)
+			ret = i915_request_sync(rq, semaphore, obj);
+	} else {
+		for (i = 0; i < I915_NUM_RINGS; i++) {
+			rq = obj->last_read[i].request;
+			if (rq == NULL || to == rq->ring)
+				continue;
 
-	trace_i915_gem_ring_sync_to(from, to, seqno);
-	ret = to->semaphore.sync_to(to, from, seqno);
-	if (!ret)
-		/* We use last_read_seqno because sync_to()
-		 * might have just caused seqno wrap under
-		 * the radar.
-		 */
-		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
+			ret = i915_request_sync(rq, semaphore, obj);
+			if (ret)
+				break;
+		}
+	}
 
+	i915_gem_object_retire(obj);
 	return ret;
+
 }
 
 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -3150,14 +3316,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 static int
 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 {
-	if (obj->last_fenced_seqno) {
-		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
-		if (ret)
-			return ret;
+	int ret;
 
-		obj->last_fenced_seqno = 0;
-	}
+	if (obj->last_fence.request == NULL)
+		return 0;
 
+	ret = i915_wait_request(obj->last_fence.request);
+	if (ret)
+		return ret;
+
+	i915_gem_object_retire__fence(obj);
 	return 0;
 }
 
@@ -3822,11 +3990,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	bool was_pin_display;
 	int ret;
 
-	if (pipelined != obj->ring) {
-		ret = i915_gem_object_sync(obj, pipelined);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_sync(obj, pipelined, true);
+	if (ret)
+		return ret;
 
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -3974,38 +4140,35 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
-	struct drm_i915_gem_request *request;
-	struct intel_engine_cs *ring = NULL;
-	unsigned reset_counter;
-	u32 seqno = 0;
+	struct i915_gem_request *rq, *iter;
 	int ret;
 
 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
-	if (ret)
-		return ret;
+	/* used for querying whethering the GPU is wedged by legacy userspace */
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return -EIO;
 
 	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
+	rq = NULL;
+	list_for_each_entry(iter, &file_priv->mm.request_list, client_list) {
+		if (time_after_eq(iter->emitted_jiffies, recent_enough))
 			break;
-
-		ring = request->ring;
-		seqno = request->seqno;
+		rq = iter;
 	}
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
+	rq = i915_request_get(rq);
 	spin_unlock(&file_priv->mm.lock);
 
-	if (seqno == 0)
+	if (rq == NULL)
 		return 0;
 
-	ret = __wait_seqno(ring, seqno, reset_counter, true, NULL, NULL);
+	ret = __wait_request(rq, true, NULL, NULL);
 	if (ret == 0)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
 
+	i915_request_put(rq);
 	return ret;
 }
 
@@ -4219,7 +4382,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret;
+	int ret, i;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -4238,10 +4401,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 */
 	ret = i915_gem_object_flush_active(obj);
 
-	args->busy = obj->active;
-	if (obj->ring) {
+	args->busy = 0;
+	if (obj->active) {
 		BUILD_BUG_ON(I915_NUM_RINGS > 16);
-		args->busy |= intel_ring_flag(obj->ring) << 16;
+		args->busy |= 1;
+		for (i = 0; i < I915_NUM_RINGS; i++)  {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			args->busy |= 1 << (16 + i);
+		}
 	}
 
 	drm_gem_object_unreference(&obj->base);
@@ -4307,8 +4476,13 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
+	int i;
+
 	INIT_LIST_HEAD(&obj->global_list);
-	INIT_LIST_HEAD(&obj->ring_list);
+	INIT_LIST_HEAD(&obj->last_fence.ring_list);
+	INIT_LIST_HEAD(&obj->last_write.ring_list);
+	for (i = 0; i < I915_NUM_RINGS; i++)
+		INIT_LIST_HEAD(&obj->last_read[i].ring_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 
@@ -4876,7 +5050,9 @@ i915_gem_lastclose(struct drm_device *dev)
 static void
 init_ring_lists(struct intel_engine_cs *ring)
 {
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
+	INIT_LIST_HEAD(&ring->fence_list);
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
@@ -4972,13 +5148,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	 */
 	spin_lock(&file_priv->mm.lock);
 	while (!list_empty(&file_priv->mm.request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&file_priv->mm.request_list,
-					   struct drm_i915_gem_request,
-					   client_list);
-		list_del(&request->client_list);
-		request->file_priv = NULL;
+		rq = list_first_entry(&file_priv->mm.request_list,
+				      struct i915_gem_request,
+				      client_list);
+		list_del(&rq->client_list);
+		rq->file_priv = NULL;
 	}
 	spin_unlock(&file_priv->mm.lock);
 }
@@ -5266,3 +5442,37 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 
 	return vma;
 }
+
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)
+{
+	u32 seqno = 0;
+	struct i915_gem_request *rq = NULL;
+	int i;
+
+	/* This is approximate as seqno cannot be used across rings */
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
+			rq = obj->last_read[i].request, seqno = rq->seqno;
+	}
+
+	return rq;
+}
+
+struct i915_gem_request *i915_gem_seqno_to_request(struct intel_engine_cs *ring,
+						   u32 seqno)
+{
+	struct i915_gem_request *rq;
+
+	list_for_each_entry(rq, &ring->request_list, list) {
+		if (rq->seqno == seqno)
+			return rq;
+
+		if (__i915_seqno_passed(seqno, rq->seqno))
+			break;
+	}
+
+	return NULL;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 9683e62..5cc1e98 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -303,13 +303,9 @@ void i915_gem_context_reset(struct drm_device *dev)
 		if (!lctx)
 			continue;
 
-		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
+		if (dctx->legacy_hw_ctx.rcs_state && i == RCS)
 			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
 						      get_context_alignment(dev), 0));
-			/* Fake a finish/inactive */
-			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
-			dctx->legacy_hw_ctx.rcs_state->active = 0;
-		}
 
 		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
 			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
@@ -385,7 +381,6 @@ void i915_gem_context_fini(struct drm_device *dev)
 		WARN_ON(!dev_priv->ring[RCS].last_context);
 		if (dev_priv->ring[RCS].last_context == dctx) {
 			/* Fake switch to NULL context */
-			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
 			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
 			i915_gem_context_unreference(dctx);
 			dev_priv->ring[RCS].last_context = NULL;
@@ -613,8 +608,11 @@ static int do_switch(struct intel_engine_cs *ring,
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), ring);
+		struct drm_i915_gem_object *from_obj = from->legacy_hw_ctx.rcs_state;
+
+		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from_obj), ring, 0);
+
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -622,11 +620,10 @@ static int do_switch(struct intel_engine_cs *ring,
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
-		from->legacy_hw_ctx.rcs_state->dirty = 1;
-		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != ring);
+		from_obj->dirty = 1;
 
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
+		i915_gem_object_ggtt_unpin(from_obj);
 		i915_gem_context_unreference(from);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1a0611b..13a2f13 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -832,7 +832,8 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, ring);
+
+		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
 		if (ret)
 			return ret;
 
@@ -946,40 +947,20 @@ void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 				   struct intel_engine_cs *ring)
 {
-	u32 seqno = intel_ring_get_seqno(ring);
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-		struct drm_i915_gem_object *obj = vma->obj;
-		u32 old_read = obj->base.read_domains;
-		u32 old_write = obj->base.write_domain;
-
-		obj->base.write_domain = obj->base.pending_write_domain;
-		if (obj->base.write_domain == 0)
-			obj->base.pending_read_domains |= obj->base.read_domains;
-		obj->base.read_domains = obj->base.pending_read_domains;
-
-		i915_vma_move_to_active(vma, ring);
-		if (obj->base.write_domain) {
-			obj->dirty = 1;
-			obj->last_write_seqno = seqno;
+		unsigned fenced;
 
-			intel_fb_obj_invalidate(obj, ring);
-
-			/* update for the implicit flush after a batch */
-			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-		}
+		fenced = 0;
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			obj->last_fenced_seqno = seqno;
-			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-				struct drm_i915_private *dev_priv = to_i915(ring->dev);
-				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
-					       &dev_priv->mm.fence_list);
-			}
+			fenced |= VMA_IS_FENCED;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+				fenced |= VMA_HAS_FENCE;
 		}
 
-		trace_i915_gem_object_change_domain(obj, old_read, old_write);
+		i915_vma_move_to_active(vma, ring, fenced);
 	}
 }
 
@@ -993,7 +974,7 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, NULL);
+	(void)__i915_add_request(ring, file, obj);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index e60be3f..fc1223c 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -159,9 +159,10 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
 	if (ret)
 		goto out;
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
+	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring, 0);
 
-	ret = __i915_add_request(ring, NULL, so.obj, NULL);
+	ret = __i915_add_request(ring, NULL, so.obj);
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	render_state_fini(&so);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 7e623bf..a45651d 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 		if (ret == 0) {
 			obj->fence_dirty =
-				obj->last_fenced_seqno ||
+				obj->last_fence.request ||
 				obj->fence_reg != I915_FENCE_REG_NONE;
 
 			obj->tiling_mode = args->tiling_mode;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 1e05414..fb1041f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -661,11 +661,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
-	err->rseqno = obj->last_read_seqno;
-	err->wseqno = obj->last_write_seqno;
+	err->rseqno = i915_request_seqno(rq);
+	err->wseqno = i915_request_seqno(obj->last_write.request);
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
@@ -679,7 +680,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
 	err->userptr = obj->userptr.mm != NULL;
-	err->ring = obj->ring ? obj->ring->id : -1;
+	err->ring = i915_request_ring_id(rq);
 	err->cache_level = obj->cache_level;
 }
 
@@ -877,8 +878,8 @@ static void i915_record_ring_state(struct drm_device *dev,
 
 	ering->waiting = waitqueue_active(&ring->irq_queue);
 	ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
-	ering->seqno = ring->get_seqno(ring, false);
 	ering->acthd = intel_ring_get_active_head(ring);
+	ering->seqno = ring->get_seqno(ring);
 	ering->head = I915_READ_HEAD(ring);
 	ering->tail = I915_READ_TAIL(ring);
 	ering->ctl = I915_READ_CTL(ring);
@@ -972,7 +973,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	int i, count;
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
@@ -987,13 +988,12 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
 		i915_record_ring_state(dev, error, ring, &error->ring[i]);
 
-		request = i915_gem_find_active_request(ring);
-		if (request) {
+		rq = i915_gem_find_active_request(ring);
+		if (rq) {
 			struct i915_address_space *vm;
 
-			vm = request->ctx && request->ctx->ppgtt ?
-				&request->ctx->ppgtt->base :
-				&dev_priv->gtt.base;
+			vm = rq->ctx && rq->ctx->ppgtt ?
+				&rq->ctx->ppgtt->base : &dev_priv->gtt.base;
 
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
@@ -1001,7 +1001,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			 */
 			error->ring[i].batchbuffer =
 				i915_error_object_create(dev_priv,
-							 request->batch_obj,
+							 rq->batch_obj,
 							 vm);
 
 			if (HAS_BROKEN_CS_TLB(dev_priv->dev))
@@ -1009,11 +1009,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
 					i915_error_ggtt_object_create(dev_priv,
 							     ring->scratch.obj);
 
-			if (request->file_priv) {
+			if (rq->file_priv) {
 				struct task_struct *task;
 
 				rcu_read_lock();
-				task = pid_task(request->file_priv->file->pid,
+				task = pid_task(rq->file_priv->file->pid,
 						PIDTYPE_PID);
 				if (task) {
 					strcpy(error->ring[i].comm, task->comm);
@@ -1032,7 +1032,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list)
+		list_for_each_entry(rq, &ring->request_list, list)
 			count++;
 
 		error->ring[i].num_requests = count;
@@ -1045,13 +1045,13 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		}
 
 		count = 0;
-		list_for_each_entry(request, &ring->request_list, list) {
+		list_for_each_entry(rq, &ring->request_list, list) {
 			struct drm_i915_error_request *erq;
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
-			erq->jiffies = request->emitted_jiffies;
-			erq->tail = request->tail;
+			erq->seqno = rq->seqno;
+			erq->jiffies = rq->emitted_jiffies;
+			erq->tail = rq->tail;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b1bb88f..2dab019 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1265,9 +1265,6 @@ static void notify_ring(struct drm_device *dev,
 
 	trace_i915_gem_request_complete(ring);
 
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		intel_notify_mmio_flip(ring);
-
 	wake_up_all(&ring->irq_queue);
 	i915_queue_hangcheck(dev);
 }
@@ -3041,18 +3038,15 @@ static void gen8_disable_vblank(struct drm_device *dev, int pipe)
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
-static u32
-ring_last_seqno(struct intel_engine_cs *ring)
-{
-	return list_entry(ring->request_list.prev,
-			  struct drm_i915_gem_request, list)->seqno;
-}
-
 static bool
-ring_idle(struct intel_engine_cs *ring, u32 seqno)
+ring_idle(struct intel_engine_cs *ring)
 {
-	return (list_empty(&ring->request_list) ||
-		i915_seqno_passed(seqno, ring_last_seqno(ring)));
+	if (list_empty(&ring->request_list))
+		return true;
+
+	return i915_request_complete(list_entry(ring->request_list.prev,
+						struct i915_gem_request,
+						list));
 }
 
 static bool
@@ -3155,6 +3149,7 @@ static int semaphore_passed(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_engine_cs *signaller;
+	struct i915_gem_request *rq;
 	u32 seqno;
 
 	ring->hangcheck.deadlock++;
@@ -3167,7 +3162,8 @@ static int semaphore_passed(struct intel_engine_cs *ring)
 	if (signaller->hangcheck.deadlock >= I915_NUM_RINGS)
 		return -1;
 
-	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
+	rq = i915_gem_seqno_to_request(ring, seqno);
+	if (rq == NULL || i915_request_complete(rq))
 		return 1;
 
 	/* cursory check for an unkickable deadlock */
@@ -3268,11 +3264,11 @@ static void i915_hangcheck_elapsed(unsigned long data)
 
 		semaphore_clear_deadlocks(dev_priv);
 
-		seqno = ring->get_seqno(ring, false);
 		acthd = intel_ring_get_active_head(ring);
+		seqno = ring->get_seqno(ring);
 
 		if (ring->hangcheck.seqno == seqno) {
-			if (ring_idle(ring, seqno)) {
+			if (ring_idle(ring)) {
 				ring->hangcheck.action = HANGCHECK_IDLE;
 
 				if (waitqueue_active(&ring->irq_queue)) {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index f5aa006..0072d17 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -365,7 +365,7 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->seqno = seqno;
+			   __entry->seqno = intel_ring_get_seqno(ring);
 			   __entry->flags = flags;
 			   i915_trace_irq_get(ring, seqno);
 			   ),
@@ -435,7 +435,7 @@ TRACE_EVENT(i915_gem_request_complete,
 	    TP_fast_assign(
 			   __entry->dev = ring->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->seqno = ring->get_seqno(ring, false);
+			   __entry->seqno = ring->get_seqno(ring);
 			   ),
 
 	    TP_printk("dev=%u, ring=%u, seqno=%u",
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a1cf052..4432fe8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -9065,6 +9065,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	BUG_ON(atomic_read(&to_intel_crtc(work->crtc)->unpin_work_count) == 0);
 	atomic_dec(&to_intel_crtc(work->crtc)->unpin_work_count);
 
+	i915_request_put(work->flip_queued_request);
 	kfree(work);
 }
 
@@ -9455,7 +9456,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
 	else if (i915.enable_execlists)
 		return true;
 	else
-		return ring != obj->ring;
+		return ring != i915_request_ring(obj->last_write.request);
 }
 
 static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -9486,94 +9487,54 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
 	POSTING_READ(DSPSURF(intel_crtc->plane));
 }
 
-static int intel_postpone_flip(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *ring;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!obj->last_write_seqno)
-		return 0;
-
-	ring = obj->ring;
-
-	if (i915_seqno_passed(ring->get_seqno(ring, true),
-			      obj->last_write_seqno))
-		return 0;
-
-	ret = i915_gem_check_olr(ring, obj->last_write_seqno);
-	if (ret)
-		return ret;
-
-	if (WARN_ON(!ring->irq_get(ring)))
-		return 0;
-
-	return 1;
-}
+struct flip_work {
+	struct work_struct work;
+	struct i915_gem_request *rq;
+	struct intel_crtc *crtc;
+};
 
-void intel_notify_mmio_flip(struct intel_engine_cs *ring)
+static void intel_mmio_flip_work(struct work_struct *work)
 {
-	struct drm_i915_private *dev_priv = to_i915(ring->dev);
-	struct intel_crtc *intel_crtc;
-	unsigned long irq_flags;
-	u32 seqno;
-
-	seqno = ring->get_seqno(ring, false);
-
-	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	for_each_intel_crtc(ring->dev, intel_crtc) {
-		struct intel_mmio_flip *mmio_flip;
+	struct flip_work *flip = container_of(work, struct flip_work, work);
 
-		mmio_flip = &intel_crtc->mmio_flip;
-		if (mmio_flip->seqno == 0)
-			continue;
-
-		if (ring->id != mmio_flip->ring_id)
-			continue;
+	if (__i915_request_wait(flip->rq, false) == 0)
+		intel_do_mmio_flip(flip->crtc);
 
-		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
-			intel_do_mmio_flip(intel_crtc);
-			mmio_flip->seqno = 0;
-			ring->irq_put(ring);
-		}
-	}
-	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+	i915_request_put(flip->rq);
+	kfree(flip);
 }
 
-static int intel_queue_mmio_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *ring,
-				 uint32_t flags)
+static int intel_queue_mmio_flip(struct intel_crtc *crtc,
+				 struct i915_gem_request *rq)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned long irq_flags;
+	struct flip_work *flip;
 	int ret;
 
-	if (WARN_ON(intel_crtc->mmio_flip.seqno))
+	if (WARN_ON(crtc->mmio_flip))
 		return -EBUSY;
 
-	ret = intel_postpone_flip(obj);
-	if (ret < 0)
+	if (rq == NULL) {
+		intel_do_mmio_flip(crtc);
+		return 0;
+	}
+
+	ret = i915_gem_check_olr(rq);
+	if (ret)
 		return ret;
-	if (ret == 0) {
-		intel_do_mmio_flip(intel_crtc);
+
+	if (i915_request_complete(rq)) {
+		intel_do_mmio_flip(crtc);
 		return 0;
 	}
 
-	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
-	intel_crtc->mmio_flip.ring_id = obj->ring->id;
-	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+	flip = kmalloc(sizeof(*flip), GFP_KERNEL);
+	if (flip == NULL)
+		return -ENOMEM;
 
-	/*
-	 * Double check to catch cases where irq fired before
-	 * mmio flip data was ready
-	 */
-	intel_notify_mmio_flip(obj->ring);
+	INIT_WORK(&flip->work, intel_mmio_flip_work);
+	flip->rq = i915_request_get(rq);
+	flip->crtc = crtc;
+	schedule_work(&flip->work);
 	return 0;
 }
 
@@ -9587,6 +9548,7 @@ static int intel_default_queue_flip(struct drm_device *dev,
 	return -ENODEV;
 }
 
+
 static int intel_crtc_page_flip(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
 				struct drm_pending_vblank_event *event,
@@ -9600,6 +9562,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	enum pipe pipe = intel_crtc->pipe;
 	struct intel_unpin_work *work;
 	struct intel_engine_cs *ring;
+	struct i915_gem_request *rq;
 	unsigned long flags;
 	int ret;
 
@@ -9684,28 +9647,44 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev)) {
 		ring = &dev_priv->ring[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		ring = obj->ring;
+		ring = i915_request_ring(obj->last_write.request);
 		if (ring == NULL || ring->id != RCS)
 			ring = &dev_priv->ring[BCS];
 	} else {
 		ring = &dev_priv->ring[RCS];
 	}
 
-	ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
-	if (ret)
-		goto cleanup_pending;
+	if (use_mmio_flip(ring, obj)) {
+		rq = obj->last_write.request;
 
-	work->gtt_offset =
-		i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+		ret = intel_pin_and_fence_fb_obj(dev, obj, i915_request_ring(rq));
+		if (ret)
+			goto cleanup_pending;
+
+		work->gtt_offset =
+			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+
+		ret = intel_queue_mmio_flip(intel_crtc, rq);
+		if (ret)
+			goto cleanup_unpin;
+	} else {
+		ret = intel_pin_and_fence_fb_obj(dev, obj, ring);
+		if (ret)
+			goto cleanup_pending;
+
+		work->gtt_offset =
+			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
 
-	if (use_mmio_flip(ring, obj))
-		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, ring,
-					    page_flip_flags);
-	else
 		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
-				page_flip_flags);
-	if (ret)
-		goto cleanup_unpin;
+						   page_flip_flags);
+		if (ret)
+			goto cleanup_unpin;
+
+		rq = intel_ring_get_request(ring);
+	}
+
+	work->flip_queued_request = i915_request_get(rq);
+	work->enable_stall_check = true;
 
 	i915_gem_track_fb(work->old_fb_obj, obj,
 			  INTEL_FRONTBUFFER_PRIMARY(pipe));
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1b3d1d7..617af38 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -372,11 +372,6 @@ struct intel_pipe_wm {
 	bool sprites_scaled;
 };
 
-struct intel_mmio_flip {
-	u32 seqno;
-	u32 ring_id;
-};
-
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
@@ -426,7 +421,7 @@ struct intel_crtc {
 	} wm;
 
 	int scanline_offset;
-	struct intel_mmio_flip mmio_flip;
+	struct i915_gem_request *mmio_flip;
 };
 
 struct intel_plane_wm_parameters {
@@ -657,6 +652,7 @@ struct intel_unpin_work {
 #define INTEL_FLIP_COMPLETE	2
 	u32 flip_count;
 	u32 gtt_offset;
+	struct i915_gem_request *flip_queued_request;
 	bool enable_stall_check;
 };
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6b5f416..bbcc0e6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -122,7 +122,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
 
-		ret = i915_gem_object_sync(obj, ring);
+		ret = i915_gem_object_sync(obj, ring, obj->base.pending_write_domain == 0);
 		if (ret)
 			return ret;
 
@@ -262,30 +262,11 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
 	/* TODO: how to submit a context to the ELSP is not here yet */
 }
 
-static int logical_ring_alloc_seqno(struct intel_engine_cs *ring)
-{
-	if (ring->outstanding_lazy_seqno)
-		return 0;
-
-	if (ring->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
-
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
-
-		ring->preallocated_lazy_request = request;
-	}
-
-	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
-}
-
-static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
-				     int bytes)
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
+				       int bytes)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
+	struct i915_gem_request *rq;
 	int ret;
 
 	if (ringbuf->last_retired_head != -1) {
@@ -297,24 +278,20 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
 			return 0;
 	}
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= bytes) {
-			seqno = request->seqno;
+	list_for_each_entry(rq, &ring->request_list, list)
+		if (__intel_ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= bytes)
 			break;
-		}
-	}
 
-	if (seqno == 0)
+	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
 		return -ENOSPC;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_request(rq);
 	if (ret)
 		return ret;
 
+	i915_gem_retire_requests_ring(ring);
 	/* TODO: make sure we update the right ringbuffer's last_retired_head
 	 * when retiring requests */
-	i915_gem_retire_requests_ring(ring);
 	ringbuf->head = ringbuf->last_retired_head;
 	ringbuf->last_retired_head = -1;
 
@@ -322,58 +299,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
 	return 0;
 }
 
-static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
-				       int bytes)
-{
-	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long end;
-	int ret;
-
-	ret = logical_ring_wait_request(ringbuf, bytes);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* Force the context submission in case we have been skipping it */
-	intel_logical_ring_advance_and_submit(ringbuf);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	do {
-		ringbuf->head = I915_READ_HEAD(ring);
-		ringbuf->space = intel_ring_space(ringbuf);
-		if (ringbuf->space >= bytes) {
-			ret = 0;
-			break;
-		}
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-
-	return ret;
-}
-
 static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
 {
 	uint32_t __iomem *virt;
@@ -419,21 +344,14 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
 int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
 {
 	struct intel_engine_cs *ring = ringbuf->ring;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
 	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
 	if (ret)
 		return ret;
 
 	/* Preallocate the olr before touching the ring */
-	ret = logical_ring_alloc_seqno(ring);
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -620,7 +538,7 @@ static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
 	return 0;
 }
 
-static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+static u32 gen8_get_seqno(struct intel_engine_cs *ring)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
@@ -648,7 +566,7 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
 				(ring->status_page.gfx_addr +
 				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
 	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
+	intel_logical_ring_emit(ringbuf, intel_ring_get_seqno(ring));
 	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
 	intel_logical_ring_emit(ringbuf, MI_NOOP);
 	intel_logical_ring_advance_and_submit(ringbuf);
@@ -665,8 +583,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 
 	intel_logical_ring_stop(ring);
 	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+
+	kfree(ring->preallocated_request);
+	ring->preallocated_request = NULL;
 
 	if (ring->cleanup)
 		ring->cleanup(ring);
@@ -689,7 +608,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->buffer = NULL;
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
+	INIT_LIST_HEAD(&ring->fence_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	init_waitqueue_head(&ring->irq_queue);
 
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index dc2f4f2..42ebbf9 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -182,7 +182,7 @@ struct intel_overlay {
 	u32 flip_addr;
 	struct drm_i915_gem_object *reg_bo;
 	/* flip handling */
-	uint32_t last_flip_req;
+	struct i915_gem_request *flip_request;
 	void (*flip_tail)(struct intel_overlay *);
 };
 
@@ -208,29 +208,49 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
 		io_mapping_unmap(regs);
 }
 
-static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
-					 void (*tail)(struct intel_overlay *))
+/* recover from an interruption due to a signal
+ * We have to be careful not to repeat work forever an make forward progess. */
+static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
 	int ret;
 
-	BUG_ON(overlay->last_flip_req);
-	ret = i915_add_request(ring, &overlay->last_flip_req);
-	if (ret)
-		return ret;
+	if (overlay->flip_request == NULL)
+		return 0;
 
-	overlay->flip_tail = tail;
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
+	ret = i915_wait_request(overlay->flip_request);
 	if (ret)
 		return ret;
-	i915_gem_retire_requests(dev);
 
-	overlay->last_flip_req = 0;
+	i915_request_put(overlay->flip_request);
+	overlay->flip_request = NULL;
+
+	i915_gem_retire_requests(overlay->dev);
+
+	if (overlay->flip_tail)
+		overlay->flip_tail(overlay);
+
 	return 0;
 }
 
+static int intel_overlay_add_request(struct intel_overlay *overlay,
+				     struct intel_engine_cs *ring,
+				     void (*tail)(struct intel_overlay *))
+{
+	BUG_ON(overlay->flip_request);
+	overlay->flip_request = i915_request_get(intel_ring_get_request(ring));
+	overlay->flip_tail = tail;
+
+	return i915_add_request(ring);
+}
+
+static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
+					 struct intel_engine_cs *ring,
+					 void (*tail)(struct intel_overlay *))
+{
+	intel_overlay_add_request(overlay, ring, tail);
+	return intel_overlay_recover_from_interrupt(overlay);
+}
+
 /* overlay needs to be disable in OCMD reg */
 static int intel_overlay_on(struct intel_overlay *overlay)
 {
@@ -252,9 +272,9 @@ static int intel_overlay_on(struct intel_overlay *overlay)
 	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
 	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return intel_overlay_do_wait_request(overlay, NULL);
+	return intel_overlay_do_wait_request(overlay, ring, NULL);
 }
 
 /* overlay needs to be enabled in OCMD reg */
@@ -284,15 +304,18 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 
 	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	intel_ring_emit(ring, flip_addr);
-	intel_ring_advance(ring);
+	__intel_ring_advance(ring);
 
-	return i915_add_request(ring, &overlay->last_flip_req);
+	return intel_overlay_add_request(overlay, ring, NULL);
 }
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
 
+	i915_gem_track_fb(obj, NULL,
+			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
+
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 
@@ -352,33 +375,9 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 		intel_ring_emit(ring, flip_addr);
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	}
-	intel_ring_advance(ring);
-
-	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
-}
-
-/* recover from an interruption due to a signal
- * We have to be careful not to repeat work forever an make forward progess. */
-static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
-{
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
-
-	if (overlay->last_flip_req == 0)
-		return 0;
+	__intel_ring_advance(ring);
 
-	ret = i915_wait_seqno(ring, overlay->last_flip_req);
-	if (ret)
-		return ret;
-	i915_gem_retire_requests(dev);
-
-	if (overlay->flip_tail)
-		overlay->flip_tail(overlay);
-
-	overlay->last_flip_req = 0;
-	return 0;
+	return intel_overlay_do_wait_request(overlay, ring, intel_overlay_off_tail);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -387,10 +386,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
  */
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
-	int ret;
+	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
+	int ret = 0;
 
 	/* Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
@@ -399,6 +396,8 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		return 0;
 
 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
+		struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+
 		/* synchronous slowpath */
 		ret = intel_ring_begin(ring, 2);
 		if (ret)
@@ -406,20 +405,14 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 
 		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_advance(ring);
+		__intel_ring_advance(ring);
 
-		ret = intel_overlay_do_wait_request(overlay,
+		ret = intel_overlay_do_wait_request(overlay, ring,
 						    intel_overlay_release_old_vid_tail);
-		if (ret)
-			return ret;
-	}
-
-	intel_overlay_release_old_vid_tail(overlay);
+	} else
+		intel_overlay_release_old_vid_tail(overlay);
 
-
-	i915_gem_track_fb(overlay->old_vid_bo, NULL,
-			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
-	return 0;
+	return ret;
 }
 
 struct put_image_params {
@@ -821,12 +814,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	iowrite32(0, &regs->OCMD);
 	intel_overlay_unmap_regs(overlay, regs);
 
-	ret = intel_overlay_off(overlay);
-	if (ret != 0)
-		return ret;
-
-	intel_overlay_off_tail(overlay);
-	return 0;
+	return intel_overlay_off(overlay);
 }
 
 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 13543f8..ee656ea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -750,7 +750,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 					   PIPE_CONTROL_FLUSH_ENABLE);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
 		intel_ring_emit(signaller, 0);
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
@@ -787,7 +787,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
 		intel_ring_emit(signaller, 0);
@@ -818,7 +818,7 @@ static int gen6_signal(struct intel_engine_cs *signaller,
 		if (mbox_reg != GEN6_NOSYNC) {
 			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
 			intel_ring_emit(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+			intel_ring_emit(signaller, intel_ring_get_seqno(signaller));
 		}
 	}
 
@@ -853,7 +853,7 @@ gen6_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -971,7 +971,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -990,7 +990,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, 0);
 	__intel_ring_advance(ring);
 
@@ -998,21 +998,7 @@ pc_render_add_request(struct intel_engine_cs *ring)
 }
 
 static u32
-gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
-{
-	/* Workaround to force correct ordering between irq and seqno writes on
-	 * ivb (and maybe also on snb) by reading from a CS register (like
-	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency) {
-		struct drm_i915_private *dev_priv = ring->dev->dev_private;
-		POSTING_READ(RING_ACTHD(ring->mmio_base));
-	}
-
-	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
-}
-
-static u32
-ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+ring_get_seqno(struct intel_engine_cs *ring)
 {
 	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
 }
@@ -1024,7 +1010,7 @@ ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
 }
 
 static u32
-pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+pc_render_get_seqno(struct intel_engine_cs *ring)
 {
 	return ring->scratch.cpu_page[0];
 }
@@ -1230,7 +1216,7 @@ i9xx_add_request(struct intel_engine_cs *ring)
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, intel_ring_get_seqno(ring));
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	__intel_ring_advance(ring);
 
@@ -1247,6 +1233,11 @@ gen6_ring_get_irq(struct intel_engine_cs *ring)
 	if (!dev->irq_enabled)
 	       return false;
 
+	/* It looks like we need to prevent the gt from suspending while waiting
+	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
+	 * blt/bsd rings on ivb. */
+	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
 	if (ring->irq_refcount++ == 0) {
 		if (HAS_L3_DPF(dev) && ring->id == RCS)
@@ -1278,6 +1269,8 @@ gen6_ring_put_irq(struct intel_engine_cs *ring)
 		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+
+	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
 static bool
@@ -1610,7 +1603,8 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	}
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 	ringbuf->size = 32 * PAGE_SIZE;
 	ringbuf->ring = ring;
@@ -1671,8 +1665,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
 	intel_destroy_ringbuffer_obj(ringbuf);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
+	ring->preallocated_request = NULL;
 
 	if (ring->cleanup)
 		ring->cleanup(ring);
@@ -1688,8 +1681,7 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 {
 	struct intel_ringbuffer *ringbuf = ring->buffer;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
+	struct i915_gem_request *rq;
 	int ret;
 
 	if (ringbuf->last_retired_head != -1) {
@@ -1701,18 +1693,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 			return 0;
 	}
 
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= n) {
-			seqno = request->seqno;
+	list_for_each_entry(rq, &ring->request_list, list)
+		if (__intel_ring_space(rq->tail, ringbuf->tail, ringbuf->size) >= n)
 			break;
-		}
-	}
 
-	if (seqno == 0)
+	if (rq == list_entry(&ring->request_list, typeof(*rq), list))
 		return -ENOSPC;
 
-	ret = i915_wait_seqno(ring, seqno);
+	ret = i915_wait_request(rq);
 	if (ret)
 		return ret;
 
@@ -1729,6 +1717,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_ringbuffer *ringbuf = ring->buffer;
+	unsigned reset_counter;
 	unsigned long end;
 	int ret;
 
@@ -1739,6 +1728,13 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	/* force the tail write in case we have been skipping them */
 	__intel_ring_advance(ring);
 
+	reset_counter = 0;
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible,
+				   &reset_counter);
+	if (ret)
+		return ret;
+
 	/* With GEM the hangcheck timer should kick us out of the loop,
 	 * leaving it early runs the risk of corrupting GEM state (due
 	 * to running on almost untested codepaths). But on resume
@@ -1755,6 +1751,12 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 			break;
 		}
 
+		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+					   dev_priv->mm.interruptible,
+					   &reset_counter);
+		if (ret)
+			return ret;
+
 		if (!drm_core_check_feature(dev, DRIVER_MODESET) &&
 		    dev->primary->master) {
 			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
@@ -1764,16 +1766,6 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 
 		msleep(1);
 
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
 		if (time_after(jiffies, end)) {
 			ret = -EBUSY;
 			break;
@@ -1808,12 +1800,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 
 int intel_ring_idle(struct intel_engine_cs *ring)
 {
-	u32 seqno;
 	int ret;
 
 	/* We need to add any requests required to flush the objects and ring */
-	if (ring->outstanding_lazy_seqno) {
-		ret = i915_add_request(ring, NULL);
+	if (ring->preallocated_request) {
+		ret = i915_add_request(ring);
 		if (ret)
 			return ret;
 	}
@@ -1822,30 +1813,46 @@ int intel_ring_idle(struct intel_engine_cs *ring)
 	if (list_empty(&ring->request_list))
 		return 0;
 
-	seqno = list_entry(ring->request_list.prev,
-			   struct drm_i915_gem_request,
-			   list)->seqno;
-
-	return i915_wait_seqno(ring, seqno);
+	return i915_wait_request(container_of(ring->request_list.prev,
+					      struct i915_gem_request,
+					      list));
 }
 
-static int
-intel_ring_alloc_seqno(struct intel_engine_cs *ring)
+int
+intel_ring_alloc_request(struct intel_engine_cs *ring)
 {
-	if (ring->outstanding_lazy_seqno)
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	struct i915_gem_request *rq;
+	int ret;
+
+	if (ring->preallocated_request)
 		return 0;
 
-	if (ring->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
+	rq = kmalloc(sizeof(*rq), GFP_KERNEL);
+	if (rq == NULL)
+		return -ENOMEM;
 
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
+	rq->reset_counter = 0;
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible,
+				   &rq->reset_counter);
+	if (ret)
+		goto err;
 
-		ring->preallocated_lazy_request = request;
-	}
+	ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
+	if (ret)
+		goto err;
 
-	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
+	kref_init(&rq->kref);
+	rq->ring = ring;
+	rq->completed = false;
+
+	ring->preallocated_request = rq;
+	return 0;
+
+err:
+	kfree(rq);
+	return ret;
 }
 
 static int __intel_ring_prepare(struct intel_engine_cs *ring,
@@ -1872,20 +1879,20 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring,
 int intel_ring_begin(struct intel_engine_cs *ring,
 		     int num_dwords)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	int ret;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
+	/* Preallocate the olr before touching the ring, */
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
+	/* and by holding the seqno before we prepare we prevent recursion */
 	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
 	if (ret)
 		return ret;
 
-	/* Preallocate the olr before touching the ring */
-	ret = intel_ring_alloc_seqno(ring);
+	/* but we may flush the seqno during prepare. */
+	ret = intel_ring_alloc_request(ring);
 	if (ret)
 		return ret;
 
@@ -1920,7 +1927,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	BUG_ON(ring->outstanding_lazy_seqno);
+	BUG_ON(ring->preallocated_request);
 
 	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
@@ -2140,7 +2147,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-		ring->get_seqno = gen6_ring_get_seqno;
+		ring->get_seqno = ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		if (i915_semaphore_is_enabled(dev)) {
 			WARN_ON(!dev_priv->semaphore_obj);
@@ -2156,7 +2163,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 		ring->irq_get = gen6_ring_get_irq;
 		ring->irq_put = gen6_ring_put_irq;
 		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-		ring->get_seqno = gen6_ring_get_seqno;
+		ring->get_seqno = ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		if (i915_semaphore_is_enabled(dev)) {
 			ring->semaphore.sync_to = gen6_ring_sync;
@@ -2297,7 +2304,8 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
 	ring->cleanup = render_ring_cleanup;
 
 	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->read_list);
+	INIT_LIST_HEAD(&ring->write_list);
 	INIT_LIST_HEAD(&ring->request_list);
 
 	ringbuf->size = size;
@@ -2345,7 +2353,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 			ring->write_tail = gen6_bsd_ring_write_tail;
 		ring->flush = gen6_bsd_ring_flush;
 		ring->add_request = gen6_add_request;
-		ring->get_seqno = gen6_ring_get_seqno;
+		ring->get_seqno = ring_get_seqno;
 		ring->set_seqno = ring_set_seqno;
 		if (INTEL_INFO(dev)->gen >= 8) {
 			ring->irq_enable_mask =
@@ -2423,7 +2431,7 @@ int intel_init_bsd2_ring_buffer(struct drm_device *dev)
 	ring->mmio_base = GEN8_BSD2_RING_BASE;
 	ring->flush = gen6_bsd_ring_flush;
 	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
+	ring->get_seqno = ring_get_seqno;
 	ring->set_seqno = ring_set_seqno;
 	ring->irq_enable_mask =
 			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
@@ -2453,7 +2461,7 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
 	ring->write_tail = ring_write_tail;
 	ring->flush = gen6_ring_flush;
 	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
+	ring->get_seqno = ring_get_seqno;
 	ring->set_seqno = ring_set_seqno;
 	if (INTEL_INFO(dev)->gen >= 8) {
 		ring->irq_enable_mask =
@@ -2510,7 +2518,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)
 	ring->write_tail = ring_write_tail;
 	ring->flush = gen6_ring_flush;
 	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
+	ring->get_seqno = ring_get_seqno;
 	ring->set_seqno = ring_set_seqno;
 
 	if (INTEL_INFO(dev)->gen >= 8) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 24437da..eb4875a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -126,6 +126,7 @@ struct  intel_engine_cs {
 		VCS2
 	} id;
 #define I915_NUM_RINGS 5
+#define I915_NUM_RING_BITS 4
 #define LAST_USER_RING (VECS + 1)
 	u32		mmio_base;
 	struct		drm_device *dev;
@@ -153,8 +154,7 @@ struct  intel_engine_cs {
 	 * seen value is good enough. Note that the seqno will always be
 	 * monotonic, even if not coherent.
 	 */
-	u32		(*get_seqno)(struct intel_engine_cs *ring,
-				     bool lazy_coherency);
+	u32		(*get_seqno)(struct intel_engine_cs *ring);
 	void		(*set_seqno)(struct intel_engine_cs *ring,
 				     u32 seqno);
 	int		(*dispatch_execbuffer)(struct intel_engine_cs *ring,
@@ -242,7 +242,7 @@ struct  intel_engine_cs {
 	 *
 	 * A reference is held on the buffer while on this list.
 	 */
-	struct list_head active_list;
+	struct list_head read_list, write_list, fence_list;
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
@@ -253,8 +253,7 @@ struct  intel_engine_cs {
 	/**
 	 * Do we have some not yet emitted requests outstanding?
 	 */
-	struct drm_i915_gem_request *preallocated_lazy_request;
-	u32 outstanding_lazy_seqno;
+	struct i915_gem_request *preallocated_request;
 	bool gpu_caches_dirty;
 	bool fbc_dirty;
 
@@ -395,6 +394,7 @@ int intel_ring_space(struct intel_ringbuffer *ringbuf);
 bool intel_ring_stopped(struct intel_engine_cs *ring);
 void __intel_ring_advance(struct intel_engine_cs *ring);
 
+int __must_check intel_ring_alloc_request(struct intel_engine_cs *ring);
 int __must_check intel_ring_idle(struct intel_engine_cs *ring);
 void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
 int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
@@ -417,12 +417,15 @@ static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 	return ringbuf->tail;
 }
 
-static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
+static inline struct i915_gem_request *intel_ring_get_request(struct intel_engine_cs *ring)
 {
-	BUG_ON(ring->outstanding_lazy_seqno == 0);
-	return ring->outstanding_lazy_seqno;
+	BUG_ON(ring->preallocated_request == 0);
+	return ring->preallocated_request;
 }
 
+/* C - the bringer of joy */
+#define intel_ring_get_seqno(ring) intel_ring_get_request(ring)->seqno
+
 static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
 {
 	if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))
-- 
1.9.1




More information about the Intel-gfx mailing list