[Intel-gfx] [PATCH 3/3] drm/i915: s/seqno/request/ tracking inside objects

Chris Wilson chris at chris-wilson.co.uk
Sun Aug 24 17:54:22 CEST 2014


At the heart of this change is that the seqno is a too low level of an
abstraction to handle the growing complexities of command tracking, both
with the introduction of multiple command queues with execbuffer and the
potential for reordering with a scheduler. On top of the seqno we have
the request. Conceptually this is just a fence, but it also has
substantial bookkeeping of its own in order to track the context and
batch in flight, for example. It is the central structure upon which we
can extend with dependency tracking et al.

As regards the objects, they were using the seqno as a simple fence,
upon which is check or even wait upon for command completion. This patch
exchanges that seqno/ring pair with the request itself. For the
majority, lifetime of the request is ordered by how we retire objects
then requests. However, both the unlocked waits and probing elsewhere do
not tie into the normal request lifetimes and so we need to introduce a
kref. Extending the objects to use the request as the fence naturally
extends to segregating read/write fence tracking. This has significance
for it reduces the number of semaphores we need to emit, reducing the
likelihood of #54226, and improving performance overall.

v2: Rebase and split out the orthogonal tweaks.

A silly happened with this patch. It seemed to nullify our earlier
seqno-vs-interrupt w/a. I could not spot why, but gen6+ started to fail
with missed interrupts (a good test of our robustness handling). So I
ripped out the existing ACTHD read and replaced it with a RING_HEAD to
manually check whether the request is complete. That also had the nice
consequence of forcing __wait_request() to being the central arbiter of
request completion.

The keener eyed reviewer will also spot that the reset_counter is moved
into the request simplifying __wait_request() callsites and reducing the
number of atomic reads by virtue of moving the check for a pending GPU
reset to the endpoints of GPU access.

v3: Implement the grand plan

Since execlist landed with its upside-down abstraction, unveil the power
of the request to remove all the duplication. To gain access to a ring,
you must allocate a request. To allocate a request you must specify the
context. Ergo all ring commands are carefully tracked by individual
requests (which demarcate a single complete transaction with the GPU) in
a known context (logical partitioning of the GPU).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Jesse Barnes <jbarnes at virtuousgeek.org>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Damien Lespiau <damien.lespiau at intel.com>
Cc: Oscar Mateo <oscar.mateo at intel.com>
Cc: Brad Volkin <bradley.d.volkin at intel.com>
Cc: "Kukanova, Svetlana" <svetlana.kukanova at intel.com>
Cc: Akash Goel <akash.goel at intel.com>
Cc: "Daniel, Thomas" <thomas.daniel at intel.com>
---
 drivers/gpu/drm/i915/Makefile                |    3 +-
 drivers/gpu/drm/i915/i915_cmd_parser.c       |   10 +-
 drivers/gpu/drm/i915/i915_debugfs.c          |  123 +-
 drivers/gpu/drm/i915/i915_dma.c              |   10 +-
 drivers/gpu/drm/i915/i915_drv.c              |   15 +-
 drivers/gpu/drm/i915/i915_drv.h              |  320 +--
 drivers/gpu/drm/i915/i915_gem.c              | 1435 +++++---------
 drivers/gpu/drm/i915/i915_gem_context.c      |  439 ++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c   |  443 ++---
 drivers/gpu/drm/i915/i915_gem_gtt.c          |  185 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h          |    5 +-
 drivers/gpu/drm/i915/i915_gem_render_state.c |   28 +-
 drivers/gpu/drm/i915/i915_gem_request.c      |  600 ++++++
 drivers/gpu/drm/i915/i915_gem_tiling.c       |    2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c        |  144 +-
 drivers/gpu/drm/i915/i915_irq.c              |  105 +-
 drivers/gpu/drm/i915/i915_reg.h              |    1 +
 drivers/gpu/drm/i915/i915_trace.h            |  202 +-
 drivers/gpu/drm/i915/intel_display.c         |  375 ++--
 drivers/gpu/drm/i915/intel_drv.h             |   12 +-
 drivers/gpu/drm/i915/intel_lrc.c             | 1448 ++------------
 drivers/gpu/drm/i915/intel_lrc.h             |   78 +-
 drivers/gpu/drm/i915/intel_overlay.c         |  222 ++-
 drivers/gpu/drm/i915/intel_pm.c              |   62 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c      | 2758 +++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h      |  278 +--
 26 files changed, 4199 insertions(+), 5104 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_request.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c1dd485..ce89828 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -17,6 +17,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
+	  i915_gem.o \
 	  i915_gem_context.o \
 	  i915_gem_render_state.o \
 	  i915_gem_debug.o \
@@ -24,7 +25,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gem_evict.o \
 	  i915_gem_execbuffer.o \
 	  i915_gem_gtt.o \
-	  i915_gem.o \
+	  i915_gem_request.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
 	  i915_gem_userptr.o \
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index a15fbb7..408e0bd 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -640,12 +640,12 @@ int i915_cmd_parser_init_engine(struct intel_engine_cs *engine)
 	int cmd_table_count;
 	int ret;
 
-	if (!IS_GEN7(engine->dev))
+	if (!IS_GEN7(engine->i915))
 		return 0;
 
 	switch (engine->id) {
 	case RCS:
-		if (IS_HASWELL(engine->dev)) {
+		if (IS_HASWELL(engine->i915)) {
 			cmd_tables = hsw_render_ring_cmds;
 			cmd_table_count =
 				ARRAY_SIZE(hsw_render_ring_cmds);
@@ -657,7 +657,7 @@ int i915_cmd_parser_init_engine(struct intel_engine_cs *engine)
 		engine->reg_table = gen7_render_regs;
 		engine->reg_count = ARRAY_SIZE(gen7_render_regs);
 
-		if (IS_HASWELL(engine->dev)) {
+		if (IS_HASWELL(engine->i915)) {
 			engine->master_reg_table = hsw_master_regs;
 			engine->master_reg_count = ARRAY_SIZE(hsw_master_regs);
 		} else {
@@ -673,7 +673,7 @@ int i915_cmd_parser_init_engine(struct intel_engine_cs *engine)
 		engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
 		break;
 	case BCS:
-		if (IS_HASWELL(engine->dev)) {
+		if (IS_HASWELL(engine->i915)) {
 			cmd_tables = hsw_blt_ring_cmds;
 			cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
 		} else {
@@ -684,7 +684,7 @@ int i915_cmd_parser_init_engine(struct intel_engine_cs *engine)
 		engine->reg_table = gen7_blt_regs;
 		engine->reg_count = ARRAY_SIZE(gen7_blt_regs);
 
-		if (IS_HASWELL(engine->dev)) {
+		if (IS_HASWELL(engine->i915)) {
 			engine->master_reg_table = hsw_master_regs;
 			engine->master_reg_count = ARRAY_SIZE(hsw_master_regs);
 		} else {
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a7b9f37..8580910 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -122,10 +122,11 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 	struct i915_vma *vma;
 	int pin_count = 0;
 
-	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
@@ -133,9 +134,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
 		   obj->base.write_domain,
-		   obj->last_read_seqno,
-		   obj->last_write_seqno,
-		   obj->last_fenced_seqno,
+		   i915_request_seqno(rq),
+		   i915_request_seqno(obj->last_write.request),
+		   i915_request_seqno(obj->last_fence.request),
 		   i915_cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -168,15 +169,15 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		*t = '\0';
 		seq_printf(m, " (%s mappable)", s);
 	}
-	if (obj->ring != NULL)
-		seq_printf(m, " (%s)", obj->ring->name);
+	if (rq)
+		seq_printf(m, " (%s)", rq->engine->name);
 	if (obj->frontbuffer_bits)
 		seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
 }
 
 static void describe_ctx(struct seq_file *m, struct intel_context *ctx)
 {
-	seq_putc(m, ctx->legacy_hw_ctx.initialized ? 'I' : 'i');
+	seq_putc(m, ctx->ring[RCS].initialized ? 'I' : 'i');
 	seq_putc(m, ctx->remap_slice ? 'R' : 'r');
 	seq_putc(m, ' ');
 }
@@ -336,7 +337,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 			if (ppgtt->file_priv != stats->file_priv)
 				continue;
 
-			if (obj->ring) /* XXX per-vma statistic */
+			if (obj->active) /* XXX per-vma statistic */
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -346,7 +347,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 	} else {
 		if (i915_gem_obj_ggtt_bound(obj)) {
 			stats->global += obj->base.size;
-			if (obj->ring)
+			if (obj->active)
 				stats->active += obj->base.size;
 			else
 				stats->inactive += obj->base.size;
@@ -574,7 +575,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *engine;
-	struct drm_i915_gem_request *gem_request;
+	struct i915_gem_request *rq;
 	int ret, count, i;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -583,16 +584,14 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
 
 	count = 0;
 	for_each_engine(engine, dev_priv, i) {
-		if (list_empty(&engine->request_list))
+		if (list_empty(&engine->requests))
 			continue;
 
 		seq_printf(m, "%s requests:\n", engine->name);
-		list_for_each_entry(gem_request,
-				    &engine->request_list,
-				    list) {
+		list_for_each_entry(rq, &engine->requests, engine_list) {
 			seq_printf(m, "    %d @ %d\n",
-				   gem_request->seqno,
-				   (int) (jiffies - gem_request->emitted_jiffies));
+				   rq->seqno,
+				   (int)(jiffies - rq->emitted_jiffies));
 		}
 		count++;
 	}
@@ -609,7 +608,7 @@ static void i915_ring_seqno_info(struct seq_file *m,
 {
 	if (engine->get_seqno) {
 		seq_printf(m, "Current sequence (%s): %u\n",
-			   engine->name, engine->get_seqno(engine, false));
+			   engine->name, engine->get_seqno(engine));
 	}
 }
 
@@ -1677,12 +1676,10 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static void describe_ctx_ringbuf(struct seq_file *m,
-				 struct intel_ringbuffer *ringbuf)
+static void describe_ring(struct seq_file *m, struct intel_ringbuffer *ring)
 {
 	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
-		   ringbuf->space, ringbuf->head, ringbuf->tail,
-		   ringbuf->last_retired_head);
+		   ring->space, ring->head, ring->tail, ring->retired_head);
 }
 
 static int i915_context_status(struct seq_file *m, void *unused)
@@ -1704,17 +1701,7 @@ static int i915_context_status(struct seq_file *m, void *unused)
 		seq_putc(m, '\n');
 	}
 
-	if (dev_priv->ips.renderctx) {
-		seq_puts(m, "render context ");
-		describe_obj(m, dev_priv->ips.renderctx);
-		seq_putc(m, '\n');
-	}
-
 	list_for_each_entry(ctx, &dev_priv->context_list, link) {
-		if (!i915.enable_execlists &&
-		    ctx->legacy_hw_ctx.rcs_state == NULL)
-			continue;
-
 		seq_puts(m, "HW context ");
 		describe_ctx(m, ctx);
 		for_each_engine(engine, dev_priv, i) {
@@ -1723,23 +1710,17 @@ static int i915_context_status(struct seq_file *m, void *unused)
 					   engine->name);
 		}
 
-		if (i915.enable_execlists) {
+		seq_putc(m, '\n');
+		for_each_engine(engine, dev_priv, i) {
+			struct drm_i915_gem_object *obj = ctx->ring[i].state;
+			struct intel_ringbuffer *ring = ctx->ring[i].ring;
+
+			seq_printf(m, "%s: ", engine->name);
+			if (obj)
+				describe_obj(m, obj);
+			if (ring)
+				describe_ring(m, ring);
 			seq_putc(m, '\n');
-			for_each_engine(engine, dev_priv, i) {
-				struct drm_i915_gem_object *ctx_obj =
-					ctx->ring[i].state;
-				struct intel_ringbuffer *ringbuf =
-					ctx->ring[i].ringbuf;
-
-				seq_printf(m, "%s: ", engine->name);
-				if (ctx_obj)
-					describe_obj(m, ctx_obj);
-				if (ringbuf)
-					describe_ctx_ringbuf(m, ringbuf);
-				seq_putc(m, '\n');
-			}
-		} else {
-			describe_obj(m, ctx->legacy_hw_ctx.rcs_state);
 		}
 
 		seq_putc(m, '\n');
@@ -1778,10 +1759,15 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
 			if (ctx_obj) {
 				struct page *page = i915_gem_object_get_page(ctx_obj, 1);
 				uint32_t *reg_state = kmap_atomic(page);
+				struct task_struct *task;
 				int j;
 
-				seq_printf(m, "CONTEXT: %s %u\n", engine->name,
-						intel_execlists_ctx_id(ctx_obj));
+				seq_printf(m, "CONTEXT: %s", engine->name);
+
+				rcu_read_lock();
+				task = ctx->file_priv ? pid_task(ctx->file_priv->file->pid, PIDTYPE_PID) : NULL;
+				seq_printf(m, " %d:%d\n", task ? task->pid : 0, ctx->file_priv ? ctx->user_handle : 0);
+				rcu_read_unlock();
 
 				for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
 					seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
@@ -1826,7 +1812,7 @@ static int i915_execlists(struct seq_file *m, void *data)
 		return ret;
 
 	for_each_engine(engine, dev_priv, ring_id) {
-		struct intel_ctx_submit_request *head_req = NULL;
+		struct i915_gem_request *rq = NULL;
 		int count = 0;
 		unsigned long flags;
 
@@ -1856,21 +1842,25 @@ static int i915_execlists(struct seq_file *m, void *data)
 		}
 
 		spin_lock_irqsave(&engine->execlist_lock, flags);
-		list_for_each(cursor, &engine->execlist_queue)
+		list_for_each(cursor, &engine->pending)
 			count++;
-		head_req = list_first_entry_or_null(&engine->execlist_queue,
-				struct intel_ctx_submit_request, execlist_link);
+		rq = list_first_entry_or_null(&engine->pending, typeof(*rq), engine_list);
 		spin_unlock_irqrestore(&engine->execlist_lock, flags);
 
 		seq_printf(m, "\t%d requests in queue\n", count);
-		if (head_req) {
-			struct drm_i915_gem_object *ctx_obj;
-
-			ctx_obj = head_req->ctx->ring[ring_id].state;
-			seq_printf(m, "\tHead request id: %u\n",
-				   intel_execlists_ctx_id(ctx_obj));
-			seq_printf(m, "\tHead request tail: %u\n",
-				   head_req->tail);
+		if (rq) {
+			struct intel_context *ctx = rq->ctx;
+			struct task_struct *task;
+
+			seq_printf(m, "\tHead request ctx:");
+
+			rcu_read_lock();
+			task = ctx->file_priv ? pid_task(ctx->file_priv->file->pid, PIDTYPE_PID) : NULL;
+			seq_printf(m, " %d:%d\n", task ? task->pid : 0, ctx->file_priv ? ctx->user_handle : 0);
+			rcu_read_unlock();
+
+			seq_printf(m, "\tHead request tail: %u\n", rq->tail);
+			seq_printf(m, "\tHead request seqno: %d\n", rq->seqno);
 		}
 
 		seq_putc(m, '\n');
@@ -2529,7 +2519,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
 	int num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
 	int i, j, ret;
 
-	if (!i915_semaphore_is_enabled(dev)) {
+	if (!i915_semaphore_is_enabled(dev_priv)) {
 		seq_puts(m, "Semaphores are disabled\n");
 		return 0;
 	}
@@ -2578,15 +2568,6 @@ static int i915_semaphore_status(struct seq_file *m, void *unused)
 		seq_putc(m, '\n');
 	}
 
-	seq_puts(m, "\nSync seqno:\n");
-	for_each_engine(engine, dev_priv, i) {
-		for (j = 0; j < num_rings; j++) {
-			seq_printf(m, "  0x%08x ", engine->semaphore.sync_seqno[j]);
-		}
-		seq_putc(m, '\n');
-	}
-	seq_putc(m, '\n');
-
 	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index d681226..e74df1c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -177,7 +177,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = 1;
 		break;
 	case I915_PARAM_HAS_SEMAPHORES:
-		value = i915_semaphore_is_enabled(dev);
+		value = i915_semaphore_is_enabled(dev_priv);
 		break;
 	case I915_PARAM_HAS_PRIME_VMAP_FLUSH:
 		value = 1;
@@ -511,8 +511,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_cleanup_ringbuffer(dev);
-	i915_gem_context_fini(dev);
+	i915_gem_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
 	drm_irq_uninstall(dev);
@@ -721,6 +720,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	if (!drm_core_check_feature(dev, DRIVER_MODESET) && !dev->agp)
 		return -EINVAL;
 
+	BUILD_BUG_ON(I915_NUM_ENGINES >= (1 << I915_NUM_ENGINE_BITS));
+
 	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
 	if (dev_priv == NULL)
 		return -ENOMEM;
@@ -1020,8 +1021,7 @@ int i915_driver_unload(struct drm_device *dev)
 		flush_workqueue(dev_priv->wq);
 
 		mutex_lock(&dev->struct_mutex);
-		i915_gem_cleanup_ringbuffer(dev);
-		i915_gem_context_fini(dev);
+		i915_gem_fini(dev);
 		mutex_unlock(&dev->struct_mutex);
 		i915_gem_cleanup_stolen(dev);
 	}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5e9c3ac..5456656 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -473,9 +473,9 @@ void intel_detect_pch(struct drm_device *dev)
 	pci_dev_put(pch);
 }
 
-bool i915_semaphore_is_enabled(struct drm_device *dev)
+bool i915_semaphore_is_enabled(struct drm_i915_private *dev_priv)
 {
-	if (INTEL_INFO(dev)->gen < 6)
+	if (INTEL_INFO(dev_priv)->gen < 6)
 		return false;
 
 	if (i915.semaphores >= 0)
@@ -486,12 +486,12 @@ bool i915_semaphore_is_enabled(struct drm_device *dev)
 		return false;
 
 	/* Until we get further testing... */
-	if (IS_GEN8(dev))
+	if (IS_GEN8(dev_priv))
 		return false;
 
 #ifdef CONFIG_INTEL_IOMMU
 	/* Enable semaphores on SNB when IO remapping is off */
-	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
+	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
 		return false;
 #endif
 
@@ -804,10 +804,7 @@ int i915_reset(struct drm_device *dev)
 
 	mutex_lock(&dev->struct_mutex);
 
-	i915_gem_reset(dev);
-
 	simulated = dev_priv->gpu_error.stop_rings != 0;
-
 	ret = intel_gpu_reset(dev);
 
 	/* Also reset the gpu hangman. */
@@ -821,6 +818,10 @@ int i915_reset(struct drm_device *dev)
 		}
 	}
 
+	if (ret == 0)
+		atomic_inc(&dev_priv->gpu_error.reset_counter);
+	i915_gem_reset(dev);
+
 	if (ret) {
 		DRM_ERROR("Failed to reset chip: %i\n", ret);
 		mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9c26e6e..d38117d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -191,6 +191,7 @@ enum hpd_pin {
 
 struct drm_i915_private;
 struct i915_mmu_object;
+struct i915_gem_request;
 
 enum intel_dpll_id {
 	DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -341,6 +342,7 @@ struct drm_i915_error_state {
 	struct drm_i915_error_object *semaphore_obj;
 
 	struct drm_i915_error_ring {
+		int id;
 		bool valid;
 		/* Software tracked state */
 		bool waiting;
@@ -352,11 +354,10 @@ struct drm_i915_error_state {
 		u32 cpu_ring_head;
 		u32 cpu_ring_tail;
 
-		u32 semaphore_seqno[I915_NUM_ENGINES - 1];
-
 		/* Register state */
 		u32 tail;
 		u32 head;
+		u32 start;
 		u32 ctl;
 		u32 hws;
 		u32 ipeir;
@@ -366,6 +367,7 @@ struct drm_i915_error_state {
 		u32 instpm;
 		u32 instps;
 		u32 seqno;
+		u32 breadcrumb[I915_NUM_ENGINES];
 		u64 bbaddr;
 		u64 acthd;
 		u32 fault_reg;
@@ -381,8 +383,13 @@ struct drm_i915_error_state {
 
 		struct drm_i915_error_request {
 			long jiffies;
-			u32 seqno;
+			long pid;
+			u32 batch;
+			u32 head;
 			u32 tail;
+			u32 seqno;
+			u32 breadcrumb[I915_NUM_ENGINES];
+			u32 complete;
 		} *requests;
 
 		struct {
@@ -472,10 +479,10 @@ struct drm_i915_display_funcs {
 			  struct drm_display_mode *mode);
 	void (*fdi_link_train)(struct drm_crtc *crtc);
 	void (*init_clock_gating)(struct drm_device *dev);
-	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
+	int (*queue_flip)(struct i915_gem_request *rq,
+			  struct intel_crtc *crtc,
 			  struct drm_framebuffer *fb,
 			  struct drm_i915_gem_object *obj,
-			  struct intel_engine_cs *engine,
 			  uint32_t flags);
 	void (*update_primary_plane)(struct drm_crtc *crtc,
 				     struct drm_framebuffer *fb,
@@ -622,22 +629,17 @@ struct i915_ctx_hang_stats {
  */
 struct intel_context {
 	struct kref ref;
+	struct drm_i915_private *i915;
 	int user_handle;
 	uint8_t remap_slice;
 	struct drm_i915_file_private *file_priv;
 	struct i915_ctx_hang_stats hang_stats;
 	struct i915_hw_ppgtt *ppgtt;
 
-	/* Legacy ring buffer submission */
-	struct {
-		struct drm_i915_gem_object *rcs_state;
-		bool initialized;
-	} legacy_hw_ctx;
-
-	/* Execlists */
-	struct {
+	struct intel_engine_context {
+		struct intel_ringbuffer *ring;
 		struct drm_i915_gem_object *state;
-		struct intel_ringbuffer *ringbuf;
+		bool initialized;
 	} ring[I915_NUM_ENGINES];
 
 	struct list_head link;
@@ -1002,7 +1004,6 @@ struct intel_ilk_power_mgmt {
 	int r_t;
 
 	struct drm_i915_gem_object *pwrctx;
-	struct drm_i915_gem_object *renderctx;
 };
 
 struct drm_i915_private;
@@ -1431,8 +1432,9 @@ struct drm_i915_private {
 
 	struct pci_dev *bridge_dev;
 	struct intel_engine_cs engine[I915_NUM_ENGINES];
+	struct intel_context *default_context;
 	struct drm_i915_gem_object *semaphore_obj;
-	uint32_t last_seqno, next_seqno;
+	uint32_t next_seqno;
 
 	drm_dma_handle_t *status_page_dmah;
 	struct resource mch_res;
@@ -1625,21 +1627,6 @@ struct drm_i915_private {
 
 	/* Old ums support infrastructure, same warning applies. */
 	struct i915_ums_state ums;
-
-	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
-	struct {
-		int (*do_execbuf)(struct drm_device *dev, struct drm_file *file,
-				  struct intel_engine_cs *engine,
-				  struct intel_context *ctx,
-				  struct drm_i915_gem_execbuffer2 *args,
-				  struct list_head *vmas,
-				  struct drm_i915_gem_object *batch_obj,
-				  u64 exec_start, u32 flags);
-		int (*init_rings)(struct drm_device *dev);
-		void (*cleanup_ring)(struct intel_engine_cs *engine);
-		void (*stop_ring)(struct intel_engine_cs *engine);
-	} gt;
-
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -1719,16 +1706,15 @@ struct drm_i915_gem_object {
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	struct list_head ring_list;
 	/** Used in execbuf to temporarily hold a ref */
 	struct list_head obj_exec_link;
 
 	/**
 	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
+	 * rendering and so a submitted request), and is not set if it is on
+	 * inactive (ready to be unbound) list. We track activity per engine.
 	 */
-	unsigned int active:1;
+	unsigned int active:I915_NUM_ENGINE_BITS;
 
 	/**
 	 * This is set if the object has been written to since last bound
@@ -1796,13 +1782,11 @@ struct drm_i915_gem_object {
 	void *dma_buf_vmapping;
 	int vmapping_count;
 
-	struct intel_engine_cs *ring;
-
-	/** Breadcrumb of last rendering to the buffer. */
-	uint32_t last_read_seqno;
-	uint32_t last_write_seqno;
-	/** Breadcrumb of last fenced GPU access to the buffer. */
-	uint32_t last_fenced_seqno;
+	/** Breadcrumbs of last rendering to the buffer. */
+	struct {
+		struct i915_gem_request *request;
+		struct list_head engine_list;
+	} last_write, last_read[I915_NUM_ENGINES], last_fence;
 
 	/** Current tiling stride for the object, if it's tiled. */
 	uint32_t stride;
@@ -1835,49 +1819,20 @@ struct drm_i915_gem_object {
 };
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj);
+
 void i915_gem_track_fb(struct drm_i915_gem_object *old,
 		       struct drm_i915_gem_object *new,
 		       unsigned frontbuffer_bits);
 
 /**
- * Request queue structure.
- *
- * The request queue allows us to note sequence numbers that have been emitted
- * and may be associated with active buffers to be retired.
- *
- * By keeping this list, we can avoid having to do questionable
- * sequence-number comparisons on buffer last_rendering_seqnos, and associate
- * an emission time with seqnos for tracking how far ahead of the GPU we are.
+ * Returns true if seq1 is later than seq2.
  */
-struct drm_i915_gem_request {
-	/** On Which ring this request was generated */
-	struct intel_engine_cs *engine;
-
-	/** GEM sequence number associated with this request. */
-	uint32_t seqno;
-
-	/** Position in the ringbuffer of the start of the request */
-	u32 head;
-
-	/** Position in the ringbuffer of the end of the request */
-	u32 tail;
-
-	/** Context related to this request */
-	struct intel_context *ctx;
-
-	/** Batch buffer related to this request if any */
-	struct drm_i915_gem_object *batch_obj;
-
-	/** Time at which this request was emitted, in jiffies. */
-	unsigned long emitted_jiffies;
-
-	/** global list entry for this request */
-	struct list_head list;
-
-	struct drm_i915_file_private *file_priv;
-	/** file_priv list entry for this request */
-	struct list_head client_list;
-};
+static inline bool
+__i915_seqno_passed(uint32_t seq1, uint32_t seq2)
+{
+	return (int32_t)(seq1 - seq2) >= 0;
+}
 
 struct drm_i915_file_private {
 	struct drm_i915_private *dev_priv;
@@ -2071,7 +2026,7 @@ struct drm_i915_cmd_table {
 				 to_i915(dev)->ellc_size)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
-#define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
+#define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 5)
 #define HAS_LOGICAL_RING_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 8)
 #define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >= 6)
 #define HAS_PPGTT(dev)		(INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev))
@@ -2179,7 +2134,7 @@ struct i915_params {
 };
 extern struct i915_params i915 __read_mostly;
 
-				/* i915_dma.c */
+/* i915_dma.c */
 extern int i915_driver_load(struct drm_device *, unsigned long flags);
 extern int i915_driver_unload(struct drm_device *);
 extern int i915_driver_open(struct drm_device *dev, struct drm_file *file);
@@ -2349,22 +2304,12 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
-			 struct intel_engine_cs *to);
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *engine);
+			 struct i915_gem_request *rq);
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
-/**
- * Returns true if seq1 is later than seq2.
- */
-static inline bool
-i915_seqno_passed(uint32_t seq1, uint32_t seq2)
-{
-	return (int32_t)(seq1 - seq2) >= 0;
-}
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
@@ -2374,14 +2319,8 @@ int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj);
 bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj);
 void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj);
 
-struct drm_i915_gem_request *
-i915_gem_find_active_request(struct intel_engine_cs *engine);
-
 bool i915_gem_retire_requests(struct drm_device *dev);
 void i915_gem_retire_requests__engine(struct intel_engine_cs *engine);
-int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
-				      bool interruptible);
-int __must_check i915_gem_check_olr(struct intel_engine_cs *engine, u32 seqno);
 
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
@@ -2416,19 +2355,10 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
-int i915_gem_l3_remap(struct intel_engine_cs *engine, int slice);
+void i915_gem_fini(struct drm_device *dev);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
-int __i915_add_request(struct intel_engine_cs *engine,
-		       struct drm_file *file,
-		       struct drm_i915_gem_object *batch_obj,
-		       u32 *seqno);
-#define i915_add_request(ring, seqno) \
-	__i915_add_request(ring, NULL, NULL, seqno)
-int __must_check i915_wait_seqno(struct intel_engine_cs *engine,
-				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int __must_check
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
@@ -2438,7 +2368,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
 int __must_check
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_engine_cs *pipelined);
+				     struct i915_gem_request *pipelined);
 void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
 int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
 				int align);
@@ -2485,13 +2415,10 @@ static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) {
 }
 
 /* Some GGTT VM helpers */
-#define i915_obj_to_ggtt(obj) \
-	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
+#define i915_obj_to_ggtt(obj) (&to_i915((obj)->base.dev)->gtt.base)
 static inline bool i915_is_ggtt(struct i915_address_space *vm)
 {
-	struct i915_address_space *ggtt =
-		&((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
-	return vm == ggtt;
+	return vm == &to_i915(vm->dev)->gtt.base;
 }
 
 static inline struct i915_hw_ppgtt *
@@ -2540,11 +2467,10 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
 /* i915_gem_context.c */
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_fini(struct drm_device *dev);
-void i915_gem_context_reset(struct drm_device *dev);
 int i915_gem_context_open(struct drm_device *dev, struct drm_file *file);
 int i915_gem_context_enable(struct drm_i915_private *dev_priv);
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
-int i915_switch_context(struct intel_engine_cs *engine,
+int i915_switch_context(struct i915_gem_request *rq,
 			struct intel_context *to);
 struct intel_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
@@ -2572,7 +2498,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
 
 /* i915_gem_render_state.c */
-int i915_gem_render_state_init(struct intel_engine_cs *engine);
+int i915_gem_render_state_init(struct i915_gem_request *rq);
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  struct i915_address_space *vm,
@@ -2592,6 +2518,162 @@ static inline void i915_gem_chipset_flush(struct drm_device *dev)
 		intel_gtt_chipset_flush();
 }
 
+/* i915_gem_request.c */
+
+/**
+ * Request queue structure.
+ *
+ * The request queue allows us to note sequence numbers that have been emitted
+ * and may be associated with active buffers to be retired.
+ *
+ * By keeping this list, we can avoid having to do questionable
+ * sequence-number comparisons on buffer last_rendering_seqnos, and associate
+ * an emission time with seqnos for tracking how far ahead of the GPU we are.
+ */
+struct i915_gem_request {
+	struct kref kref;
+
+	/** On which ring/engine/ctx this request was generated */
+	struct drm_i915_private *i915;
+	struct intel_context *ctx;
+	struct intel_engine_cs *engine;
+	struct intel_ringbuffer *ring;
+
+	unsigned reset_counter;
+
+	/** GEM sequence number/breadcrumb associated with this request. */
+	u32 seqno;
+	u32 breadcrumb[I915_NUM_ENGINES];
+
+	/** Position in the ringbuffer of the start of the request */
+	u32 head;
+	/** Position in the ringbuffer of the end of the request */
+	u32 tail;
+
+	/** Batch buffer related to this request if any */
+	struct drm_i915_gem_object *batch_obj;
+	struct list_head vmas;
+
+	u32 semaphore[I915_NUM_ENGINES];
+
+	/** Time at which this request was emitted, in jiffies. */
+	unsigned long emitted_jiffies;
+
+	/** global list entry for this request */
+	struct list_head engine_list;
+	struct list_head breadcrumb_list;
+
+	struct drm_i915_file_private *file_priv;
+	/** file_priv list entry for this request */
+	struct list_head client_list;
+
+	unsigned remap_l3:8;
+	unsigned pending_flush:4;
+	bool outstanding:1;
+	bool completed:1;
+};
+
+static inline struct intel_engine_cs *i915_request_engine(struct i915_gem_request *rq)
+{
+	return rq ? rq->engine : NULL;
+}
+
+static inline int i915_request_engine_id(struct i915_gem_request *rq)
+{
+	return rq ? rq->engine->id : -1;
+}
+
+static inline u32 i915_request_seqno(struct i915_gem_request *rq)
+{
+	return rq ? rq->seqno : 0;
+}
+
+bool __i915_request_complete__wa(struct i915_gem_request *rq);
+
+static inline bool
+i915_request_complete(struct i915_gem_request *rq)
+{
+	if (!rq->completed &&
+	    __i915_seqno_passed(rq->engine->get_seqno(rq->engine),
+				rq->seqno)) {
+		trace_i915_gem_request_complete(rq);
+		rq->completed = true;
+	}
+	return rq->completed;
+}
+
+static inline struct i915_gem_request *
+i915_request_get(struct i915_gem_request *rq)
+{
+	if (rq)
+		kref_get(&rq->kref);
+	return rq;
+}
+
+void __i915_request_free(struct kref *kref);
+
+static inline void
+i915_request_put(struct i915_gem_request *rq)
+{
+	if (rq == NULL)
+		return;
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+	kref_put(&rq->kref, __i915_request_free);
+}
+
+static inline void
+i915_request_put__unlocked(struct i915_gem_request *rq)
+{
+	if (rq == NULL)
+		return;
+
+	if (!atomic_add_unless(&rq->kref.refcount, -1, 1)) {
+		struct drm_device *dev = rq->i915->dev;
+
+		mutex_lock(&dev->struct_mutex);
+		if (likely(atomic_dec_and_test(&rq->kref.refcount)))
+			__i915_request_free(&rq->kref);
+		mutex_unlock(&dev->struct_mutex);
+	}
+}
+
+int __must_check
+i915_request_add_vma(struct i915_gem_request *rq,
+		     struct i915_vma *vma,
+		     unsigned fenced);
+#define VMA_IS_FENCED 0x1
+#define VMA_HAS_FENCE 0x2
+int __must_check
+i915_request_emit_flush(struct i915_gem_request *rq,
+			unsigned flags);
+int __must_check
+__i915_request_emit_breadcrumb(struct i915_gem_request *rq, int id);
+static inline int __must_check
+i915_request_emit_breadcrumb(struct i915_gem_request *rq)
+{
+	return __i915_request_emit_breadcrumb(rq, rq->engine->id);
+}
+static inline int __must_check
+i915_request_emit_semaphore(struct i915_gem_request *rq, int id)
+{
+	return __i915_request_emit_breadcrumb(rq, id);
+}
+int __must_check
+i915_request_emit_batchbuffer(struct i915_gem_request *rq,
+			      struct drm_i915_gem_object *batch,
+			      uint64_t start, uint32_t len,
+			      unsigned flags);
+int __must_check
+i915_request_commit(struct i915_gem_request *rq);
+int __must_check
+i915_request_wait(struct i915_gem_request *rq);
+int __i915_request_wait(struct i915_gem_request *rq,
+			bool interruptible,
+			s64 *timeout,
+			struct drm_i915_file_private *file);
+void i915_request_retire(struct i915_gem_request *rq);
+
 /* i915_gem_stolen.c */
 int i915_gem_init_stolen(struct drm_device *dev);
 int i915_gem_stolen_setup_compression(struct drm_device *dev, int size, int fb_cpp);
@@ -2757,14 +2839,12 @@ extern void intel_detect_pch(struct drm_device *dev);
 extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
 extern int intel_enable_rc6(const struct drm_device *dev);
 
-extern bool i915_semaphore_is_enabled(struct drm_device *dev);
+extern bool i915_semaphore_is_enabled(struct drm_i915_private *i915);
 int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data,
 			       struct drm_file *file);
 
-void intel_notify_mmio_flip(struct intel_engine_cs *engine);
-
 /* overlay */
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 50e20da..b8dc94e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -44,9 +44,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
 static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly);
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
-
 static void i915_gem_write_fence(struct drm_device *dev, int reg,
 				 struct drm_i915_gem_object *obj);
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
@@ -108,6 +105,81 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
+{
+	intel_fb_obj_flush(obj, true);
+	obj->last_write.request = NULL;
+	list_del_init(&obj->last_write.engine_list);
+}
+
+static void
+i915_gem_object_retire__fence(struct drm_i915_gem_object *obj)
+{
+	obj->last_fence.request = NULL;
+	list_del_init(&obj->last_fence.engine_list);
+}
+
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj,
+			     struct intel_engine_cs *engine)
+{
+	struct i915_vma *vma;
+
+	BUG_ON(obj->active == 0);
+	BUG_ON(obj->base.write_domain);
+
+	obj->last_read[engine->id].request = NULL;
+	list_del_init(&obj->last_read[engine->id].engine_list);
+
+	if (--obj->active)
+		return;
+
+	BUG_ON(obj->last_write.request);
+	BUG_ON(obj->last_fence.request);
+
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (!list_empty(&vma->mm_list))
+			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
+	}
+
+	drm_gem_object_unreference(&obj->base);
+
+	WARN_ON(i915_verify_lists(dev));
+}
+
+static void
+i915_gem_object_retire(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_request *rq;
+	int i;
+
+	if (!obj->active)
+		return;
+
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
+		rq = obj->last_read[i].request;
+		if (rq && i915_request_complete(rq)) {
+			/* read-request is the master request */
+			if (i915_request_engine_id(obj->last_write.request) == i)
+				i915_gem_object_retire__write(obj);
+
+			if (i915_request_engine_id(obj->last_fence.request) == i)
+				i915_gem_object_retire__fence(obj);
+
+			i915_gem_object_retire__read(obj, rq->engine);
+		}
+	}
+
+	rq = obj->last_write.request;
+	if (rq && i915_request_complete(rq))
+		i915_gem_object_retire__write(obj);
+
+	rq = obj->last_fence.request;
+	if (rq && i915_request_complete(rq))
+		i915_gem_object_retire__fence(obj);
+}
+
 static int
 i915_gem_wait_for_error(struct i915_gpu_error *error)
 {
@@ -1073,229 +1145,6 @@ unlock:
 	return ret;
 }
 
-int
-i915_gem_check_wedge(struct i915_gpu_error *error,
-		     bool interruptible)
-{
-	if (i915_reset_in_progress(error)) {
-		/* Non-interruptible callers can't handle -EAGAIN, hence return
-		 * -EIO unconditionally for these. */
-		if (!interruptible)
-			return -EIO;
-
-		/* Recovery complete, but the reset failed ... */
-		if (i915_terminally_wedged(error))
-			return -EIO;
-
-		return -EAGAIN;
-	}
-
-	return 0;
-}
-
-/*
- * Compare seqno against outstanding lazy request. Emit a request if they are
- * equal.
- */
-int
-i915_gem_check_olr(struct intel_engine_cs *engine, u32 seqno)
-{
-	int ret;
-
-	BUG_ON(!mutex_is_locked(&engine->dev->struct_mutex));
-
-	ret = 0;
-	if (seqno == engine->outstanding_lazy_seqno)
-		ret = i915_add_request(engine, NULL);
-
-	return ret;
-}
-
-static void fake_irq(unsigned long data)
-{
-	wake_up_process((struct task_struct *)data);
-}
-
-static bool missed_irq(struct drm_i915_private *dev_priv,
-		       struct intel_engine_cs *engine)
-{
-	return test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings);
-}
-
-static bool can_wait_boost(struct drm_i915_file_private *file_priv)
-{
-	if (file_priv == NULL)
-		return true;
-
-	return !atomic_xchg(&file_priv->rps_wait_boost, true);
-}
-
-/**
- * __wait_seqno - wait until execution of seqno has finished
- * @ring: the ring expected to report seqno
- * @seqno: duh!
- * @reset_counter: reset sequence associated with the given seqno
- * @interruptible: do an interruptible wait (normally yes)
- * @timeout: in - how long to wait (NULL forever); out - how much time remaining
- *
- * Note: It is of utmost importance that the passed in seqno and reset_counter
- * values have been read by the caller in an smp safe manner. Where read-side
- * locks are involved, it is sufficient to read the reset_counter before
- * unlocking the lock that protects the seqno. For lockless tricks, the
- * reset_counter _must_ be read before, and an appropriate smp_rmb must be
- * inserted.
- *
- * Returns 0 if the seqno was found within the alloted time. Else returns the
- * errno with remaining time filled in timeout argument.
- */
-static int __wait_seqno(struct intel_engine_cs *engine, u32 seqno,
-			unsigned reset_counter,
-			bool interruptible,
-			s64 *timeout,
-			struct drm_i915_file_private *file_priv)
-{
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	const bool irq_test_in_progress =
-		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_engine_flag(engine);
-	DEFINE_WAIT(wait);
-	unsigned long timeout_expire;
-	s64 before, now;
-	int ret;
-
-	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
-
-	if (i915_seqno_passed(engine->get_seqno(engine, true), seqno))
-		return 0;
-
-	timeout_expire = timeout ? jiffies + nsecs_to_jiffies((u64)*timeout) : 0;
-
-	if (INTEL_INFO(dev)->gen >= 6 && engine->id == RCS && can_wait_boost(file_priv)) {
-		gen6_rps_boost(dev_priv);
-		if (file_priv)
-			mod_delayed_work(dev_priv->wq,
-					 &file_priv->mm.idle_work,
-					 msecs_to_jiffies(100));
-	}
-
-	if (!irq_test_in_progress && WARN_ON(!engine->irq_get(engine)))
-		return -ENODEV;
-
-	/* Record current time in case interrupted by signal, or wedged */
-	trace_i915_gem_request_wait_begin(engine, seqno);
-	before = ktime_get_raw_ns();
-	for (;;) {
-		struct timer_list timer;
-
-		prepare_to_wait(&engine->irq_queue, &wait,
-				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
-
-		/* We need to check whether any gpu reset happened in between
-		 * the caller grabbing the seqno and now ... */
-		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
-			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
-			 * is truely gone. */
-			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-			if (ret == 0)
-				ret = -EAGAIN;
-			break;
-		}
-
-		if (i915_seqno_passed(engine->get_seqno(engine, false), seqno)) {
-			ret = 0;
-			break;
-		}
-
-		if (interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		if (timeout && time_after_eq(jiffies, timeout_expire)) {
-			ret = -ETIME;
-			break;
-		}
-
-		timer.function = NULL;
-		if (timeout || missed_irq(dev_priv, engine)) {
-			unsigned long expire;
-
-			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
-			expire = missed_irq(dev_priv, engine) ? jiffies + 1 : timeout_expire;
-			mod_timer(&timer, expire);
-		}
-
-		io_schedule();
-
-		if (timer.function) {
-			del_singleshot_timer_sync(&timer);
-			destroy_timer_on_stack(&timer);
-		}
-	}
-	now = ktime_get_raw_ns();
-	trace_i915_gem_request_wait_end(engine, seqno);
-
-	if (!irq_test_in_progress)
-		engine->irq_put(engine);
-
-	finish_wait(&engine->irq_queue, &wait);
-
-	if (timeout) {
-		s64 tres = *timeout - (now - before);
-
-		*timeout = tres < 0 ? 0 : tres;
-	}
-
-	return ret;
-}
-
-/**
- * Waits for a sequence number to be signaled, and cleans up the
- * request and object lists appropriately for that event.
- */
-int
-i915_wait_seqno(struct intel_engine_cs *engine, uint32_t seqno)
-{
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	bool interruptible = dev_priv->mm.interruptible;
-	int ret;
-
-	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
-	BUG_ON(seqno == 0);
-
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_check_olr(engine, seqno);
-	if (ret)
-		return ret;
-
-	return __wait_seqno(engine, seqno,
-			    atomic_read(&dev_priv->gpu_error.reset_counter),
-			    interruptible, NULL, NULL);
-}
-
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj,
-				     struct intel_engine_cs *engine)
-{
-	if (!obj->active)
-		return 0;
-
-	/* Manually manage the write flush as we may have not yet
-	 * retired the buffer.
-	 *
-	 * Note that the last_write_seqno is always the earlier of
-	 * the two (read/write) seqno, so if we haved successfully waited,
-	 * we know we have passed the last write.
-	 */
-	obj->last_write_seqno = 0;
-
-	return 0;
-}
-
 /**
  * Ensures that all rendering to the object has completed and the object is
  * safe to unbind from the GTT or access from the CPU.
@@ -1304,19 +1153,27 @@ static __must_check int
 i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
 			       bool readonly)
 {
-	struct intel_engine_cs *engine = obj->ring;
-	u32 seqno;
-	int ret;
+	int i, ret;
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
-		return 0;
+	if (readonly) {
+		if (obj->last_write.request == NULL)
+			return 0;
 
-	ret = i915_wait_seqno(engine, seqno);
-	if (ret)
-		return ret;
+		ret = i915_request_wait(obj->last_write.request);
+		if (ret)
+			return ret;
+	} else {
+		for (i = 0; i < I915_NUM_ENGINES; i++) {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			ret = i915_request_wait(obj->last_read[i].request);
+			if (ret)
+				return ret;
+		}
+	}
 
-	return i915_gem_object_wait_rendering__tail(obj, engine);
+	return 0;
 }
 
 /* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1329,34 +1186,45 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = obj->ring;
-	unsigned reset_counter;
-	u32 seqno;
-	int ret;
+	struct i915_gem_request *rq[I915_NUM_ENGINES] = {};
+	int i, n, ret;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 	BUG_ON(!dev_priv->mm.interruptible);
 
-	seqno = readonly ? obj->last_write_seqno : obj->last_read_seqno;
-	if (seqno == 0)
+	n = 0;
+	if (readonly) {
+		if (obj->last_write.request)
+			rq[n++] = i915_request_get(obj->last_write.request);
+	} else {
+		for (i = 0; i < I915_NUM_ENGINES; i++)
+			if (obj->last_read[i].request)
+				rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+	if (n == 0)
 		return 0;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
-	if (ret)
-		return ret;
-
-	ret = i915_gem_check_olr(engine, seqno);
-	if (ret)
-		return ret;
+	for (i = 0; i < n; i++) {
+		ret = i915_request_emit_breadcrumb(rq[i]);
+		if (ret)
+			goto out;
+	}
 
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 	mutex_unlock(&dev->struct_mutex);
-	ret = __wait_seqno(engine, seqno, reset_counter, true, NULL, file_priv);
+
+	for (i = 0; i < n; i++) {
+		ret = __i915_request_wait(rq[i], true, NULL, file_priv);
+		if (ret)
+			break;
+	}
+
 	mutex_lock(&dev->struct_mutex);
-	if (ret)
-		return ret;
 
-	return i915_gem_object_wait_rendering__tail(obj, engine);
+out:
+	for (i = 0; i < n; i++)
+		i915_request_put(rq[i]);
+
+	return ret;
 }
 
 /**
@@ -2147,278 +2015,37 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *engine)
-{
-	u32 seqno = intel_engine_get_seqno(engine);
-
-	BUG_ON(engine == NULL);
-	if (obj->ring != engine && obj->last_write_seqno) {
-		/* Keep the seqno relative to the current ring */
-		obj->last_write_seqno = seqno;
-	}
-	obj->ring = engine;
-
-	/* Add a reference if we're newly entering the active list. */
-	if (!obj->active) {
-		drm_gem_object_reference(&obj->base);
-		obj->active = 1;
-	}
-
-	list_move_tail(&obj->ring_list, &engine->active_list);
-
-	obj->last_read_seqno = seqno;
-}
-
-void i915_vma_move_to_active(struct i915_vma *vma,
-			     struct intel_engine_cs *engine)
-{
-	list_move_tail(&vma->mm_list, &vma->vm->active_list);
-	return i915_gem_object_move_to_active(vma->obj, engine);
-}
-
-static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-	struct i915_address_space *vm;
-	struct i915_vma *vma;
-
-	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
-	BUG_ON(!obj->active);
-
-	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
-		vma = i915_gem_obj_to_vma(obj, vm);
-		if (vma && !list_empty(&vma->mm_list))
-			list_move_tail(&vma->mm_list, &vm->inactive_list);
-	}
-
-	intel_fb_obj_flush(obj, true);
-
-	list_del_init(&obj->ring_list);
-	obj->ring = NULL;
-
-	obj->last_read_seqno = 0;
-	obj->last_write_seqno = 0;
-	obj->base.write_domain = 0;
-
-	obj->last_fenced_seqno = 0;
-
-	obj->active = 0;
-	drm_gem_object_unreference(&obj->base);
-
-	WARN_ON(i915_verify_lists(dev));
-}
-
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *engine = obj->ring;
-
-	if (engine == NULL)
-		return;
-
-	if (i915_seqno_passed(engine->get_seqno(engine, true),
-			      obj->last_read_seqno))
-		i915_gem_object_move_to_inactive(obj);
-}
-
-static int
-i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine;
-	int ret, i, j;
-
-	/* Carefully retire all requests without writing to the rings */
-	for_each_engine(engine, dev_priv, i) {
-		ret = intel_engine_idle(engine);
-		if (ret)
-			return ret;
-	}
-	i915_gem_retire_requests(dev);
-
-	/* Finally reset hw state */
-	for_each_engine(engine, dev_priv, i) {
-		intel_engine_init_seqno(engine, seqno);
-
-		for (j = 0; j < ARRAY_SIZE(engine->semaphore.sync_seqno); j++)
-			engine->semaphore.sync_seqno[j] = 0;
-	}
-
-	return 0;
-}
-
 int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
+	struct intel_engine_cs *engine;
+	int i, ret;
 
 	if (seqno == 0)
 		return -EINVAL;
 
-	/* HWS page needs to be set less than what we
-	 * will inject to ring
-	 */
-	ret = i915_gem_init_seqno(dev, seqno - 1);
-	if (ret)
-		return ret;
-
-	/* Carefully set the last_seqno value so that wrap
-	 * detection still works
-	 */
-	dev_priv->next_seqno = seqno;
-	dev_priv->last_seqno = seqno - 1;
-	if (dev_priv->last_seqno == 0)
-		dev_priv->last_seqno--;
-
-	return 0;
-}
-
-int
-i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	if (seqno == dev_priv->next_seqno)
+		return 0;
 
-	/* reserve 0 for non-seqno */
-	if (dev_priv->next_seqno == 0) {
-		int ret = i915_gem_init_seqno(dev, 0);
+	if (__i915_seqno_passed(dev_priv->next_seqno, seqno)) {
+		ret = i915_gpu_idle(dev);
 		if (ret)
 			return ret;
 
-		dev_priv->next_seqno = 1;
-	}
-
-	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
-	return 0;
-}
-
-int __i915_add_request(struct intel_engine_cs *engine,
-		       struct drm_file *file,
-		       struct drm_i915_gem_object *obj,
-		       u32 *out_seqno)
-{
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	struct drm_i915_gem_request *request;
-	struct intel_ringbuffer *ringbuf;
-	u32 request_ring_position, request_start;
-	int ret;
-
-	request = engine->preallocated_lazy_request;
-	if (WARN_ON(request == NULL))
-		return -ENOMEM;
-
-	if (i915.enable_execlists) {
-		struct intel_context *ctx = request->ctx;
-		ringbuf = ctx->ring[engine->id].ringbuf;
-	} else
-		ringbuf = engine->buffer;
-
-	request_start = intel_ring_get_tail(ringbuf);
-	/*
-	 * Emit any outstanding flushes - execbuf can fail to emit the flush
-	 * after having emitted the batchbuffer command. Hence we need to fix
-	 * things up similar to emitting the lazy request. The difference here
-	 * is that the flush _must_ happen before the next request, no matter
-	 * what.
-	 */
-	if (i915.enable_execlists) {
-		ret = logical_ring_flush_all_caches(ringbuf);
-		if (ret)
-			return ret;
-	} else {
-		ret = intel_engine_flush_all_caches(engine);
-		if (ret)
-			return ret;
+		i915_gem_retire_requests(dev);
 	}
 
-	/* Record the position of the start of the request so that
-	 * should we detect the updated seqno part-way through the
-	 * GPU processing the request, we never over-estimate the
-	 * position of the head.
-	 */
-	request_ring_position = intel_ring_get_tail(ringbuf);
+	dev_priv->next_seqno = seqno;
 
-	if (i915.enable_execlists) {
-		ret = engine->emit_request(ringbuf);
-		if (ret)
-			return ret;
-	} else {
-		ret = engine->add_request(engine);
+	for_each_engine(engine, dev_priv, i) {
+		ret = intel_engine_flush(engine, engine->default_context, ~0);
 		if (ret)
 			return ret;
 	}
 
-	request->seqno = intel_engine_get_seqno(engine);
-	request->engine = engine;
-	request->head = request_start;
-	request->tail = request_ring_position;
-
-	/* Whilst this request exists, batch_obj will be on the
-	 * active_list, and so will hold the active reference. Only when this
-	 * request is retired will the the batch_obj be moved onto the
-	 * inactive_list and lose its active reference. Hence we do not need
-	 * to explicitly hold another reference here.
-	 */
-	request->batch_obj = obj;
-
-	if (!i915.enable_execlists) {
-		/* Hold a reference to the current context so that we can inspect
-		 * it later in case a hangcheck error event fires.
-		 */
-		request->ctx = engine->last_context;
-		if (request->ctx)
-			i915_gem_context_reference(request->ctx);
-	}
-
-	request->emitted_jiffies = jiffies;
-	list_add_tail(&request->list, &engine->request_list);
-	request->file_priv = NULL;
-
-	if (file) {
-		struct drm_i915_file_private *file_priv = file->driver_priv;
-
-		spin_lock(&file_priv->mm.lock);
-		request->file_priv = file_priv;
-		list_add_tail(&request->client_list,
-			      &file_priv->mm.request_list);
-		spin_unlock(&file_priv->mm.lock);
-	}
-
-	trace_i915_gem_request_add(engine, request->seqno);
-	engine->outstanding_lazy_seqno = 0;
-	engine->preallocated_lazy_request = NULL;
-
-	if (!dev_priv->ums.mm_suspended) {
-		i915_queue_hangcheck(engine->dev);
-
-		cancel_delayed_work_sync(&dev_priv->mm.idle_work);
-		queue_delayed_work(dev_priv->wq,
-				   &dev_priv->mm.retire_work,
-				   round_jiffies_up_relative(HZ));
-		intel_mark_busy(dev_priv->dev);
-	}
-
-	if (out_seqno)
-		*out_seqno = request->seqno;
 	return 0;
 }
 
-static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
-{
-	struct drm_i915_file_private *file_priv = request->file_priv;
-
-	if (!file_priv)
-		return;
-
-	spin_lock(&file_priv->mm.lock);
-	list_del(&request->client_list);
-	request->file_priv = NULL;
-	spin_unlock(&file_priv->mm.lock);
-}
-
 static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
 				   const struct intel_context *ctx)
 {
@@ -2463,65 +2090,53 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
 	}
 }
 
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
-{
-	list_del(&request->list);
-	i915_gem_request_remove_from_client(request);
-
-	if (request->ctx)
-		i915_gem_context_unreference(request->ctx);
-
-	kfree(request);
-}
-
-struct drm_i915_gem_request *
-i915_gem_find_active_request(struct intel_engine_cs *engine)
+static void i915_gem_reset_engine_status(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *request;
-	u32 completed_seqno;
+	struct i915_gem_request *rq;
+	bool ring_hung;
 
-	completed_seqno = engine->get_seqno(engine, false);
+	rq = intel_engine_find_active_request(engine);
+	if (rq == NULL)
+		return;
 
-	list_for_each_entry(request, &engine->request_list, list) {
-		if (i915_seqno_passed(completed_seqno, request->seqno))
-			continue;
+	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
 
-		return request;
-	}
+	i915_set_reset_status(engine->i915, rq->ctx, ring_hung);
 
-	return NULL;
+	list_for_each_entry_continue(rq, &engine->requests, engine_list)
+		i915_set_reset_status(engine->i915, rq->ctx, false);
 }
 
-static void i915_gem_reset_engine_status(struct drm_i915_private *dev_priv,
-				       struct intel_engine_cs *engine)
+static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_request *request;
-	bool ring_hung;
+	while (!list_empty(&engine->write_list)) {
+		struct drm_i915_gem_object *obj;
 
-	request = i915_gem_find_active_request(engine);
+		obj = list_first_entry(&engine->write_list,
+				       struct drm_i915_gem_object,
+				       last_write.engine_list);
 
-	if (request == NULL)
-		return;
+		i915_gem_object_retire__write(obj);
+	}
 
-	ring_hung = engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
+	while (!list_empty(&engine->fence_list)) {
+		struct drm_i915_gem_object *obj;
 
-	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
+		obj = list_first_entry(&engine->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.engine_list);
 
-	list_for_each_entry_continue(request, &engine->request_list, list)
-		i915_set_reset_status(dev_priv, request->ctx, false);
-}
+		i915_gem_object_retire__fence(obj);
+	}
 
-static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv,
-					struct intel_engine_cs *engine)
-{
-	while (!list_empty(&engine->active_list)) {
+	while (!list_empty(&engine->read_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&engine->active_list,
+		obj = list_first_entry(&engine->read_list,
 				       struct drm_i915_gem_object,
-				       ring_list);
+				       last_read[engine->id].engine_list);
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__read(obj, engine);
 	}
 
 	/*
@@ -2531,32 +2146,7 @@ static void i915_gem_reset_engine_cleanup(struct drm_i915_private *dev_priv,
 	 * implicit references on things like e.g. ppgtt address spaces through
 	 * the request.
 	 */
-	while (!list_empty(&engine->request_list)) {
-		struct drm_i915_gem_request *request;
-
-		request = list_first_entry(&engine->request_list,
-					   struct drm_i915_gem_request,
-					   list);
-
-		i915_gem_free_request(request);
-	}
-
-	while (!list_empty(&engine->execlist_queue)) {
-		struct intel_ctx_submit_request *submit_req;
-
-		submit_req = list_first_entry(&engine->execlist_queue,
-				struct intel_ctx_submit_request,
-				execlist_link);
-		list_del(&submit_req->execlist_link);
-		intel_runtime_pm_put(dev_priv);
-		i915_gem_context_unreference(submit_req->ctx);
-		kfree(submit_req);
-	}
-
-	/* These may not have been flush before the reset, do so now */
-	kfree(engine->preallocated_lazy_request);
-	engine->preallocated_lazy_request = NULL;
-	engine->outstanding_lazy_seqno = 0;
+	intel_engine_reset(engine);
 }
 
 void i915_gem_restore_fences(struct drm_device *dev)
@@ -2592,16 +2182,28 @@ void i915_gem_reset(struct drm_device *dev)
 	 * their reference to the objects, the inspection must be done first.
 	 */
 	for_each_engine(engine, dev_priv, i)
-		i915_gem_reset_engine_status(dev_priv, engine);
+		i915_gem_reset_engine_status(engine);
 
 	for_each_engine(engine, dev_priv, i)
-		i915_gem_reset_engine_cleanup(dev_priv, engine);
-
-	i915_gem_context_reset(dev);
+		i915_gem_reset_engine_cleanup(engine);
 
 	i915_gem_restore_fences(dev);
 }
 
+static u32 get_retire_seqno(struct intel_engine_cs *engine)
+{
+	u32 seqno = engine->get_seqno(engine);
+
+	if (seqno == engine->breadcrumb[engine->id] &&
+	    !list_last_entry(&engine->requests,
+			     typeof(struct i915_gem_request),
+			     engine_list)->breadcrumb[engine->id] &&
+	    intel_engine_idle(engine))
+		seqno = engine->i915->next_seqno;
+
+	return seqno;
+}
+
 /**
  * This function clears the request list as sequence numbers are passed.
  */
@@ -2610,67 +2212,74 @@ i915_gem_retire_requests__engine(struct intel_engine_cs *engine)
 {
 	uint32_t seqno;
 
-	if (list_empty(&engine->request_list))
+	if (list_empty(&engine->requests))
 		return;
 
 	WARN_ON(i915_verify_lists(engine->dev));
 
-	seqno = engine->get_seqno(engine, true);
+	seqno = get_retire_seqno(engine);
 
 	/* Move any buffers on the active list that are no longer referenced
 	 * by the ringbuffer to the flushing/inactive lists as appropriate,
 	 * before we free the context associated with the requests.
 	 */
-	while (!list_empty(&engine->active_list)) {
+	while (!list_empty(&engine->write_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj = list_first_entry(&engine->active_list,
-				      struct drm_i915_gem_object,
-				      ring_list);
+		obj = list_first_entry(&engine->write_list,
+				       struct drm_i915_gem_object,
+				       last_write.engine_list);
 
-		if (!i915_seqno_passed(seqno, obj->last_read_seqno))
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_write.request->seqno))
 			break;
 
-		i915_gem_object_move_to_inactive(obj);
+		i915_gem_object_retire__write(obj);
 	}
 
+	while (!list_empty(&engine->fence_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&engine->fence_list,
+				       struct drm_i915_gem_object,
+				       last_fence.engine_list);
 
-	while (!list_empty(&engine->request_list)) {
-		struct drm_i915_gem_request *request;
-		struct intel_ringbuffer *ringbuf;
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_fence.request->seqno))
+			break;
 
-		request = list_first_entry(&engine->request_list,
-					   struct drm_i915_gem_request,
-					   list);
+		i915_gem_object_retire__fence(obj);
+	}
 
-		if (!i915_seqno_passed(seqno, request->seqno))
+	while (!list_empty(&engine->read_list)) {
+		struct drm_i915_gem_object *obj;
+
+		obj = list_first_entry(&engine->read_list,
+				       struct drm_i915_gem_object,
+				       last_read[engine->id].engine_list);
+
+		if (!__i915_seqno_passed(seqno,
+					 obj->last_read[engine->id].request->seqno))
 			break;
 
-		trace_i915_gem_request_retire(engine, request->seqno);
+		i915_gem_object_retire__read(obj, engine);
+	}
 
-		/* This is one of the few common intersection points
-		 * between legacy ringbuffer submission and execlists:
-		 * we need to tell them apart in order to find the correct
-		 * ringbuffer to which the request belongs to.
-		 */
-		if (i915.enable_execlists) {
-			struct intel_context *ctx = request->ctx;
-			ringbuf = ctx->ring[engine->id].ringbuf;
-		} else
-			ringbuf = engine->buffer;
-
-		/* We know the GPU must have read the request to have
-		 * sent us the seqno + interrupt, so use the position
-		 * of tail of the request to update the last known position
-		 * of the GPU head.
-		 */
-		ringbuf->last_retired_head = request->tail;
+	while (!list_empty(&engine->requests)) {
+		struct i915_gem_request *rq;
+
+		rq = list_first_entry(&engine->requests,
+				      struct i915_gem_request,
+				      engine_list);
 
-		i915_gem_free_request(request);
+		if (!__i915_seqno_passed(seqno, rq->seqno))
+			break;
+
+		i915_request_retire(rq);
 	}
 
 	if (unlikely(engine->trace_irq_seqno &&
-		     i915_seqno_passed(seqno, engine->trace_irq_seqno))) {
+		     __i915_seqno_passed(seqno, engine->trace_irq_seqno))) {
 		engine->irq_put(engine);
 		engine->trace_irq_seqno = 0;
 	}
@@ -2688,7 +2297,7 @@ i915_gem_retire_requests(struct drm_device *dev)
 
 	for_each_engine(engine, dev_priv, i) {
 		i915_gem_retire_requests__engine(engine);
-		idle &= list_empty(&engine->request_list);
+		idle &= list_empty(&engine->requests);
 	}
 
 	if (idle)
@@ -2737,14 +2346,16 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 {
 	int ret;
 
-	if (obj->active) {
-		ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno);
+	if (!obj->active)
+		return 0;
+
+	if (obj->last_write.request) {
+		ret = i915_request_emit_breadcrumb(obj->last_write.request);
 		if (ret)
 			return ret;
-
-		i915_gem_retire_requests__engine(obj->ring);
 	}
 
+	i915_gem_retire_requests(obj->base.dev);
 	return 0;
 }
 
@@ -2773,13 +2384,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
 int
 i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_wait *args = data;
 	struct drm_i915_gem_object *obj;
-	struct intel_engine_cs *engine = NULL;
-	unsigned reset_counter;
-	u32 seqno = 0;
-	int ret = 0;
+	struct i915_gem_request *rq[I915_NUM_ENGINES] = {};
+	int i, n, ret = 0;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -2796,13 +2404,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 	if (ret)
 		goto out;
 
-	if (obj->active) {
-		seqno = obj->last_read_seqno;
-		engine = obj->ring;
-	}
-
-	if (seqno == 0)
-		 goto out;
+	if (!obj->active)
+		goto out;
 
 	/* Do this after OLR check to make sure we make forward progress polling
 	 * on this IOCTL with a timeout <=0 (like busy ioctl)
@@ -2812,17 +2415,75 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 		goto out;
 	}
 
-	drm_gem_object_unreference(&obj->base);
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
-	mutex_unlock(&dev->struct_mutex);
+	for (i = n = 0; i < I915_NUM_ENGINES; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		ret = i915_request_emit_breadcrumb(obj->last_read[i].request);
+		if (ret)
+			break;
+
+		rq[n++] = i915_request_get(obj->last_read[i].request);
+	}
+
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+
+	for (i = 0; i < n; i++) {
+		if (ret == 0)
+			ret = __i915_request_wait(rq[i], true, &args->timeout_ns, file->driver_priv);
+
+		i915_request_put__unlocked(rq[i]);
+	}
+
+	return ret;
+
+out:
+	drm_gem_object_unreference(&obj->base);
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static int
+__i915_request_sync(struct i915_gem_request *waiter,
+		    struct i915_gem_request *signaller,
+		    struct drm_i915_gem_object *obj,
+		    bool *retire)
+{
+	int ret;
+
+	if (signaller == NULL || i915_request_complete(signaller))
+		return 0;
+
+	if (waiter == NULL)
+		goto wait;
+
+	/* XXX still true with execlists? */
+	if (waiter->engine == signaller->engine)
+		return 0;
+
+	if (!i915_semaphore_is_enabled(to_i915(obj->base.dev)) ||
+	    /* across a wrap, the semaphore will be unreliably racy with us */
+	    signaller->seqno > waiter->seqno)
+		goto wait;
+
+	if (waiter->semaphore[signaller->engine->id] >= signaller->seqno)
+		return 0;
+
+	ret = i915_request_emit_semaphore(signaller, waiter->engine->id);
+	if (ret)
+		return ret;
+
+	trace_i915_gem_ring_wait(signaller->engine, waiter->engine, signaller->seqno);
+	if (waiter->engine->semaphore.wait(waiter, signaller))
+		goto wait;
 
-	return __wait_seqno(engine, seqno, reset_counter, true, &args->timeout_ns,
-			    file->driver_priv);
+	waiter->semaphore[signaller->engine->id] = signaller->breadcrumb[waiter->engine->id];
+	return 0;
 
-out:
-	drm_gem_object_unreference(&obj->base);
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
+wait:
+	*retire = true;
+	return i915_request_wait(signaller);
 }
 
 /**
@@ -2839,40 +2500,25 @@ out:
  */
 int
 i915_gem_object_sync(struct drm_i915_gem_object *obj,
-		     struct intel_engine_cs *to)
+		     struct i915_gem_request *rq)
 {
-	struct intel_engine_cs *from = obj->ring;
-	u32 seqno;
-	int ret, idx;
-
-	if (from == NULL || to == from)
-		return 0;
-
-	if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
-		return i915_gem_object_wait_rendering(obj, false);
-
-	idx = intel_engine_sync_index(from, to);
-
-	seqno = obj->last_read_seqno;
-	/* Optimization: Avoid semaphore sync when we are sure we already
-	 * waited for an object with higher seqno */
-	if (seqno <= from->semaphore.sync_seqno[idx])
-		return 0;
-
-	ret = i915_gem_check_olr(obj->ring, seqno);
-	if (ret)
-		return ret;
+	int ret = 0, i;
+	bool retire = false;
 
-	trace_i915_gem_ring_sync_to(from, to, seqno);
-	ret = to->semaphore.sync_to(to, from, seqno);
-	if (!ret)
-		/* We use last_read_seqno because sync_to()
-		 * might have just caused seqno wrap under
-		 * the radar.
-		 */
-		from->semaphore.sync_seqno[idx] = obj->last_read_seqno;
+	if (obj->base.pending_write_domain == 0) {
+		ret = __i915_request_sync(rq, obj->last_write.request, obj, &retire);
+	} else {
+		for (i = 0; i < I915_NUM_ENGINES; i++) {
+			ret = __i915_request_sync(rq, obj->last_read[i].request, obj, &retire);
+			if (ret)
+				break;
+		}
+	}
 
+	if (retire)
+		i915_gem_object_retire(obj);
 	return ret;
+
 }
 
 static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -2964,17 +2610,24 @@ int i915_vma_unbind(struct i915_vma *vma)
 
 int i915_gpu_idle(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *engine;
-	int ret, i;
+	int i;
 
 	/* Flush everything onto the inactive list. */
-	for_each_engine(engine, dev_priv, i) {
-		ret = i915_switch_context(engine, engine->default_context);
-		if (ret)
-			return ret;
+	for_each_engine(engine, to_i915(dev), i) {
+		struct i915_gem_request *rq;
+		int ret;
+
+		if (list_empty(&engine->requests))
+			continue;
+
+		rq = intel_engine_alloc_request(engine, engine->default_context);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
+
+		ret = i915_request_wait(rq);
+		i915_request_put(rq);
 
-		ret = intel_engine_idle(engine);
 		if (ret)
 			return ret;
 	}
@@ -3178,14 +2831,16 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
 static int
 i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 {
-	if (obj->last_fenced_seqno) {
-		int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
-		if (ret)
-			return ret;
+	int ret;
 
-		obj->last_fenced_seqno = 0;
-	}
+	if (obj->last_fence.request == NULL)
+		return 0;
+
+	ret = i915_request_wait(obj->last_fence.request);
+	if (ret)
+		return ret;
 
+	i915_gem_object_retire__fence(obj);
 	return 0;
 }
 
@@ -3844,17 +3499,15 @@ static bool is_pin_display(struct drm_i915_gem_object *obj)
 int
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
-				     struct intel_engine_cs *pipelined)
+				     struct i915_gem_request *pipelined)
 {
 	u32 old_read_domains, old_write_domain;
 	bool was_pin_display;
 	int ret;
 
-	if (pipelined != obj->ring) {
-		ret = i915_gem_object_sync(obj, pipelined);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_object_sync(obj, pipelined);
+	if (ret)
+		return ret;
 
 	/* Mark the pin_display early so that we account for the
 	 * display coherency whilst setting up the cache domains.
@@ -4002,38 +3655,53 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
-	struct drm_i915_gem_request *request;
-	struct intel_engine_cs *engine = NULL;
-	unsigned reset_counter;
-	u32 seqno = 0;
+	struct i915_gem_request *rq, *tmp;
 	int ret;
 
 	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
 	if (ret)
 		return ret;
 
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
-	if (ret)
-		return ret;
+	/* used for querying whethering the GPU is wedged by legacy userspace */
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return -EIO;
 
 	spin_lock(&file_priv->mm.lock);
-	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
-		if (time_after_eq(request->emitted_jiffies, recent_enough))
+	rq = NULL;
+	list_for_each_entry(tmp, &file_priv->mm.request_list, client_list) {
+		if (tmp->breadcrumb[tmp->engine->id] == 0)
+			continue;
+		if (time_after_eq(tmp->emitted_jiffies, recent_enough))
 			break;
-
-		engine = request->engine;
-		seqno = request->seqno;
+		rq = tmp;
 	}
-	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
+	rq = i915_request_get(rq);
 	spin_unlock(&file_priv->mm.lock);
 
-	if (seqno == 0)
-		return 0;
+	if (rq == NULL) {
+		spin_lock(&file_priv->mm.lock);
+		if (!list_empty(&file_priv->mm.request_list)) {
+			rq = list_last_entry(&file_priv->mm.request_list,
+					     typeof(*rq), client_list);
+			rq = i915_request_get(rq);
+		}
+		spin_unlock(&file_priv->mm.lock);
 
-	ret = __wait_seqno(engine, seqno, reset_counter, true, NULL, NULL);
-	if (ret == 0)
-		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
+		ret = 0;
+		if (rq && rq->breadcrumb[rq->engine->id] == 0) {
+			ret = i915_mutex_lock_interruptible(dev);
+			if (ret == 0) {
+				ret = intel_engine_flush(rq->engine, rq->ctx, 1 << rq->engine->id);
+				mutex_unlock(&dev->struct_mutex);
+			}
+		}
+	} else {
+		ret = __i915_request_wait(rq, true, NULL, NULL);
+		if (ret == 0)
+			queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
+	}
 
+	i915_request_put__unlocked(rq);
 	return ret;
 }
 
@@ -4247,7 +3915,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	int ret;
+	int ret, i;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -4266,10 +3934,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	 */
 	ret = i915_gem_object_flush_active(obj);
 
-	args->busy = obj->active;
-	if (obj->ring) {
+	args->busy = 0;
+	if (obj->active) {
 		BUILD_BUG_ON(I915_NUM_ENGINES > 16);
-		args->busy |= intel_engine_flag(obj->ring) << 16;
+		args->busy |= 1;
+		for (i = 0; i < I915_NUM_ENGINES; i++)  {
+			if (obj->last_read[i].request == NULL)
+				continue;
+
+			args->busy |= 1 << (16 + i);
+		}
 	}
 
 	drm_gem_object_unreference(&obj->base);
@@ -4335,8 +4009,13 @@ unlock:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
+	int i;
+
 	INIT_LIST_HEAD(&obj->global_list);
-	INIT_LIST_HEAD(&obj->ring_list);
+	INIT_LIST_HEAD(&obj->last_fence.engine_list);
+	INIT_LIST_HEAD(&obj->last_write.engine_list);
+	for (i = 0; i < I915_NUM_ENGINES; i++)
+		INIT_LIST_HEAD(&obj->last_read[i].engine_list);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 
@@ -4522,14 +4201,105 @@ void i915_gem_vma_destroy(struct i915_vma *vma)
 }
 
 static void
-i915_gem_stop_ringbuffers(struct drm_device *dev)
+i915_gem_cleanup_rings(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine;
 	int i;
 
-	for_each_engine(engine, dev_priv, i)
-		dev_priv->gt.stop_ring(engine);
+	/* Not the regular for_each_engine so we can cleanup a failed setup */
+	for (i =0; i < I915_NUM_ENGINES; i++) {
+		struct intel_engine_cs *engine = &to_i915(dev)->engine[i];
+
+		if (engine->i915 == NULL)
+			continue;
+
+		intel_engine_cleanup(engine);
+	}
+}
+
+static int
+i915_gem_resume_rings(struct drm_device *dev)
+{
+	struct intel_engine_cs *engine;
+	int i, ret;
+
+	for_each_engine(engine, to_i915(dev), i) {
+		ret = intel_engine_resume(engine);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+i915_gem_suspend_rings(struct drm_device *dev)
+{
+	struct intel_engine_cs *engine;
+	int i, ret;
+
+	for_each_engine(engine, to_i915(dev), i) {
+		ret = intel_engine_suspend(engine);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static bool
+intel_enable_blt(struct drm_i915_private *dev_priv)
+{
+	if (!HAS_BLT(dev_priv))
+		return false;
+
+	/* The blitter was dysfunctional on early prototypes */
+	if (IS_GEN6(dev_priv) && dev_priv->dev->pdev->revision < 8) {
+		DRM_INFO("BLT not supported on this pre-production hardware;"
+			 " graphics performance will be degraded.\n");
+		return false;
+	}
+
+	return true;
+}
+
+static int i915_gem_setup_rings(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = intel_init_render_engine(dev_priv);
+	if (ret)
+		goto cleanup;
+
+	if (HAS_BSD(dev_priv)) {
+		ret = intel_init_bsd_engine(dev_priv);
+		if (ret)
+			goto cleanup;
+	}
+
+	if (intel_enable_blt(dev_priv)) {
+		ret = intel_init_blt_engine(dev_priv);
+		if (ret)
+			goto cleanup;
+	}
+
+	if (HAS_VEBOX(dev_priv)) {
+		ret = intel_init_vebox_engine(dev_priv);
+		if (ret)
+			goto cleanup;
+	}
+
+	if (HAS_BSD2(dev_priv)) {
+		ret = intel_init_bsd2_engine(dev_priv);
+		if (ret)
+			goto cleanup;
+	}
+
+	return 0;
+
+cleanup:
+	i915_gem_cleanup_rings(dev);
+	return ret;
 }
 
 int
@@ -4552,7 +4322,9 @@ i915_gem_suspend(struct drm_device *dev)
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		i915_gem_evict_everything(dev);
 
-	i915_gem_stop_ringbuffers(dev);
+	ret = i915_gem_suspend_rings(dev);
+	if (ret)
+		goto err;
 
 	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
 	 * We need to replace this with a semaphore, or something.
@@ -4573,37 +4345,6 @@ err:
 	return ret;
 }
 
-int i915_gem_l3_remap(struct intel_engine_cs *engine, int slice)
-{
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
-	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
-	int i, ret;
-
-	if (!HAS_L3_DPF(dev) || !remap_info)
-		return 0;
-
-	ret = intel_ring_begin(engine, GEN7_L3LOG_SIZE / 4 * 3);
-	if (ret)
-		return ret;
-
-	/*
-	 * Note: We do not worry about the concurrent register cacheline hang
-	 * here because no other code should access these registers other than
-	 * at initialization time.
-	 */
-	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
-		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(engine, reg_base + i);
-		intel_ring_emit(engine, remap_info[i/4]);
-	}
-
-	intel_ring_advance(engine);
-
-	return ret;
-}
-
 void i915_gem_init_swizzling(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4629,80 +4370,11 @@ void i915_gem_init_swizzling(struct drm_device *dev)
 		BUG();
 }
 
-static bool
-intel_enable_blt(struct drm_device *dev)
-{
-	if (!HAS_BLT(dev))
-		return false;
-
-	/* The blitter was dysfunctional on early prototypes */
-	if (IS_GEN6(dev) && dev->pdev->revision < 8) {
-		DRM_INFO("BLT not supported on this pre-production hardware;"
-			 " graphics performance will be degraded.\n");
-		return false;
-	}
-
-	return true;
-}
-
-static int i915_gem_init_rings(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = intel_init_render_engine(dev);
-	if (ret)
-		return ret;
-
-	if (HAS_BSD(dev)) {
-		ret = intel_init_bsd_engine(dev);
-		if (ret)
-			goto cleanup_render;
-	}
-
-	if (intel_enable_blt(dev)) {
-		ret = intel_init_blt_engine(dev);
-		if (ret)
-			goto cleanup_bsd;
-	}
-
-	if (HAS_VEBOX(dev)) {
-		ret = intel_init_vebox_engine(dev);
-		if (ret)
-			goto cleanup_blt;
-	}
-
-	if (HAS_BSD2(dev)) {
-		ret = intel_init_bsd2_engine(dev);
-		if (ret)
-			goto cleanup_vebox;
-	}
-
-	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-	if (ret)
-		goto cleanup_bsd2;
-
-	return 0;
-
-cleanup_bsd2:
-	intel_cleanup_engine(&dev_priv->engine[VCS2]);
-cleanup_vebox:
-	intel_cleanup_engine(&dev_priv->engine[VECS]);
-cleanup_blt:
-	intel_cleanup_engine(&dev_priv->engine[BCS]);
-cleanup_bsd:
-	intel_cleanup_engine(&dev_priv->engine[VCS]);
-cleanup_render:
-	intel_cleanup_engine(&dev_priv->engine[RCS]);
-
-	return ret;
-}
-
 int
 i915_gem_init_hw(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret, i;
+	int ret;
 
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
@@ -4728,33 +4400,11 @@ i915_gem_init_hw(struct drm_device *dev)
 
 	i915_gem_init_swizzling(dev);
 
-	ret = dev_priv->gt.init_rings(dev);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < NUM_L3_SLICES(dev); i++)
-		i915_gem_l3_remap(&dev_priv->engine[RCS], i);
-
-	/*
-	 * XXX: Contexts should only be initialized once. Doing a switch to the
-	 * default context switch however is something we'd like to do after
-	 * reset or thaw (the latter may not actually be necessary for HW, but
-	 * goes with our code better). Context switching requires rings (for
-	 * the do_switch), but before enabling PPGTT. So don't move this.
-	 */
-	ret = i915_gem_context_enable(dev_priv);
-	if (ret && ret != -EIO) {
-		DRM_ERROR("Context enable failed %d\n", ret);
-		i915_gem_cleanup_ringbuffer(dev);
-
-		return ret;
-	}
-
 	ret = i915_ppgtt_init_hw(dev);
-	if (ret && ret != -EIO) {
-		DRM_ERROR("PPGTT enable failed %d\n", ret);
-		i915_gem_cleanup_ringbuffer(dev);
-	}
+	if (ret == 0)
+		ret = i915_gem_context_enable(dev_priv);
+	if (ret == 0)
+		ret = i915_gem_resume_rings(dev);
 
 	return ret;
 }
@@ -4764,8 +4414,7 @@ int i915_gem_init(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	i915.enable_execlists = intel_sanitize_enable_execlists(dev,
-			i915.enable_execlists);
+	intel_sanitize_enable_execlists(dev, &i915.enable_execlists);
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -4777,18 +4426,6 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
-	if (!i915.enable_execlists) {
-		dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission;
-		dev_priv->gt.init_rings = i915_gem_init_rings;
-		dev_priv->gt.cleanup_ring = intel_cleanup_engine;
-		dev_priv->gt.stop_ring = intel_stop_engine;
-	} else {
-		dev_priv->gt.do_execbuf = intel_execlists_submission;
-		dev_priv->gt.init_rings = intel_logical_rings_init;
-		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
-		dev_priv->gt.stop_ring = intel_logical_ring_stop;
-	}
-
 	ret = i915_gem_init_userptr(dev);
 	if (ret) {
 		mutex_unlock(&dev->struct_mutex);
@@ -4797,13 +4434,11 @@ int i915_gem_init(struct drm_device *dev)
 
 	i915_gem_init_global_gtt(dev);
 
-	ret = i915_gem_context_init(dev);
-	if (ret) {
-		mutex_unlock(&dev->struct_mutex);
-		return ret;
-	}
-
-	ret = i915_gem_init_hw(dev);
+	ret = i915_gem_setup_rings(dev);
+	if (ret == 0)
+		ret = i915_gem_context_init(dev);
+	if (ret == 0)
+		ret = i915_gem_init_hw(dev);
 	if (ret == -EIO) {
 		/* Allow ring initialisation to fail by marking the GPU as
 		 * wedged. But we only want to do this where the GPU is angry,
@@ -4818,15 +4453,10 @@ int i915_gem_init(struct drm_device *dev)
 	return ret;
 }
 
-void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+void i915_gem_fini(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine;
-	int i;
-
-	for_each_engine(engine, dev_priv, i)
-		dev_priv->gt.cleanup_ring(engine);
+	i915_gem_context_fini(dev);
+	i915_gem_cleanup_rings(dev);
 }
 
 int
@@ -4845,26 +4475,12 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	dev_priv->ums.mm_suspended = 0;
-
 	ret = i915_gem_init_hw(dev);
-	if (ret != 0) {
-		mutex_unlock(&dev->struct_mutex);
-		return ret;
-	}
-
+	if (ret == 0)
+		ret = drm_irq_install(dev, dev->pdev->irq);
+	if (ret == 0)
+		dev_priv->ums.mm_suspended = 0;
 	BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
-
-	ret = drm_irq_install(dev, dev->pdev->irq);
-	if (ret)
-		goto cleanup_ringbuffer;
-	mutex_unlock(&dev->struct_mutex);
-
-	return 0;
-
-cleanup_ringbuffer:
-	i915_gem_cleanup_ringbuffer(dev);
-	dev_priv->ums.mm_suspended = 1;
 	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
@@ -4898,10 +4514,13 @@ i915_gem_lastclose(struct drm_device *dev)
 }
 
 static void
-init_engine_lists(struct intel_engine_cs *engine)
+init_null_engine(struct intel_engine_cs *engine)
 {
-	INIT_LIST_HEAD(&engine->active_list);
-	INIT_LIST_HEAD(&engine->request_list);
+	INIT_LIST_HEAD(&engine->read_list);
+	INIT_LIST_HEAD(&engine->write_list);
+	INIT_LIST_HEAD(&engine->fence_list);
+	INIT_LIST_HEAD(&engine->requests);
+	INIT_LIST_HEAD(&engine->rings);
 }
 
 void i915_init_vm(struct drm_i915_private *dev_priv,
@@ -4936,7 +4555,7 @@ i915_gem_load(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
 	for (i = 0; i < I915_NUM_ENGINES; i++)
-		init_engine_lists(&dev_priv->engine[i]);
+		init_null_engine(&dev_priv->engine[i]);
 	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
 		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
 	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
@@ -4996,13 +4615,13 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
 	 */
 	spin_lock(&file_priv->mm.lock);
 	while (!list_empty(&file_priv->mm.request_list)) {
-		struct drm_i915_gem_request *request;
+		struct i915_gem_request *rq;
 
-		request = list_first_entry(&file_priv->mm.request_list,
-					   struct drm_i915_gem_request,
-					   client_list);
-		list_del(&request->client_list);
-		request->file_priv = NULL;
+		rq = list_first_entry(&file_priv->mm.request_list,
+				      struct i915_gem_request,
+				      client_list);
+		list_del(&rq->client_list);
+		rq->file_priv = NULL;
 	}
 	spin_unlock(&file_priv->mm.lock);
 }
@@ -5290,3 +4909,21 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 
 	return vma;
 }
+
+struct i915_gem_request *i915_gem_object_last_read(struct drm_i915_gem_object *obj)
+{
+	u32 seqno = 0;
+	struct i915_gem_request *rq = NULL;
+	int i;
+
+	/* This is approximate as seqno cannot be used across rings */
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
+		if (obj->last_read[i].request == NULL)
+			continue;
+
+		if (__i915_seqno_passed(obj->last_read[i].request->seqno, seqno))
+			rq = obj->last_read[i].request, seqno = rq->seqno;
+	}
+
+	return rq;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 2dde547..08129e4 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -96,9 +96,9 @@
 #define GEN6_CONTEXT_ALIGN (64<<10)
 #define GEN7_CONTEXT_ALIGN 4096
 
-static size_t get_context_alignment(struct drm_device *dev)
+static size_t get_context_alignment(struct drm_i915_private *i915)
 {
-	if (IS_GEN6(dev))
+	if (IS_GEN6(i915))
 		return GEN6_CONTEXT_ALIGN;
 
 	return GEN7_CONTEXT_ALIGN;
@@ -111,6 +111,9 @@ static int get_context_size(struct drm_device *dev)
 	u32 reg;
 
 	switch (INTEL_INFO(dev)->gen) {
+	case 5:
+		ret = ILK_CXT_TOTAL_SIZE;
+		break;
 	case 6:
 		reg = I915_READ(CXT_SIZE);
 		ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
@@ -134,16 +137,22 @@ static int get_context_size(struct drm_device *dev)
 
 void i915_gem_context_free(struct kref *ctx_ref)
 {
-	struct intel_context *ctx = container_of(ctx_ref,
-						 typeof(*ctx), ref);
-
-	if (i915.enable_execlists)
-		intel_lr_context_free(ctx);
+	struct intel_context *ctx =
+		container_of(ctx_ref, typeof(*ctx), ref);
+	struct drm_i915_private *dev_priv = ctx->i915;
+	int i;
 
 	i915_ppgtt_put(ctx->ppgtt);
 
-	if (ctx->legacy_hw_ctx.rcs_state)
-		drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
+	for (i = 0; i < I915_NUM_ENGINES; i++) {
+		if (intel_engine_initialized(&dev_priv->engine[i]) &&
+		    ctx->ring[i].ring != NULL)
+			dev_priv->engine[i].put_ring(ctx->ring[i].ring, ctx);
+
+		if (ctx->ring[i].state != NULL)
+			drm_gem_object_unreference(&ctx->ring[i].state->base);
+	}
+
 	list_del(&ctx->link);
 	kfree(ctx);
 }
@@ -192,15 +201,16 @@ __create_hw_context(struct drm_device *dev,
 
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->context_list);
+	ctx->i915 = dev_priv;
 
 	if (dev_priv->hw_context_size) {
 		struct drm_i915_gem_object *obj =
-				i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
+			i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size);
 		if (IS_ERR(obj)) {
 			ret = PTR_ERR(obj);
 			goto err_out;
 		}
-		ctx->legacy_hw_ctx.rcs_state = obj;
+		ctx->ring[RCS].state = obj;
 	}
 
 	/* Default context will never have a file_priv */
@@ -226,18 +236,11 @@ err_out:
 	return ERR_PTR(ret);
 }
 
-/**
- * The default context needs to exist per ring that uses contexts. It stores the
- * context state of the GPU for applications that don't utilize HW contexts, as
- * well as an idle case.
- */
 static struct intel_context *
 i915_gem_create_context(struct drm_device *dev,
 			struct drm_i915_file_private *file_priv)
 {
-	const bool is_global_default_ctx = file_priv == NULL;
 	struct intel_context *ctx;
-	int ret = 0;
 
 	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 
@@ -245,91 +248,27 @@ i915_gem_create_context(struct drm_device *dev,
 	if (IS_ERR(ctx))
 		return ctx;
 
-	if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state) {
-		/* We may need to do things with the shrinker which
-		 * require us to immediately switch back to the default
-		 * context. This can cause a problem as pinning the
-		 * default context also requires GTT space which may not
-		 * be available. To avoid this we always pin the default
-		 * context.
-		 */
-		ret = i915_gem_obj_ggtt_pin(ctx->legacy_hw_ctx.rcs_state,
-					    get_context_alignment(dev), 0);
-		if (ret) {
-			DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
-			goto err_destroy;
-		}
-	}
-
 	if (USES_FULL_PPGTT(dev)) {
 		struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
 
 		if (IS_ERR_OR_NULL(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
-			ret = PTR_ERR(ppgtt);
-			goto err_unpin;
+			i915_gem_context_unreference(ctx);
+			return ERR_CAST(ppgtt);
 		}
 
 		ctx->ppgtt = ppgtt;
 	}
 
 	return ctx;
-
-err_unpin:
-	if (is_global_default_ctx && ctx->legacy_hw_ctx.rcs_state)
-		i915_gem_object_ggtt_unpin(ctx->legacy_hw_ctx.rcs_state);
-err_destroy:
-	i915_gem_context_unreference(ctx);
-	return ERR_PTR(ret);
-}
-
-void i915_gem_context_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int i;
-
-	/* Prevent the hardware from restoring the last context (which hung) on
-	 * the next switch */
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct intel_engine_cs *engine = &dev_priv->engine[i];
-		struct intel_context *dctx = engine->default_context;
-		struct intel_context *lctx = engine->last_context;
-
-		/* Do a fake switch to the default context */
-		if (lctx == dctx)
-			continue;
-
-		if (!lctx)
-			continue;
-
-		if (dctx->legacy_hw_ctx.rcs_state && i == RCS) {
-			WARN_ON(i915_gem_obj_ggtt_pin(dctx->legacy_hw_ctx.rcs_state,
-						      get_context_alignment(dev), 0));
-			/* Fake a finish/inactive */
-			dctx->legacy_hw_ctx.rcs_state->base.write_domain = 0;
-			dctx->legacy_hw_ctx.rcs_state->active = 0;
-		}
-
-		if (lctx->legacy_hw_ctx.rcs_state && i == RCS)
-			i915_gem_object_ggtt_unpin(lctx->legacy_hw_ctx.rcs_state);
-
-		i915_gem_context_unreference(lctx);
-		i915_gem_context_reference(dctx);
-		engine->last_context = dctx;
-	}
 }
 
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_context *ctx;
-	int i;
-
-	/* Init should only be called once per module load. Eventually the
-	 * restriction on the context_disabled check can be loosened. */
-	if (WARN_ON(dev_priv->engine[RCS].default_context))
-		return 0;
+	int i, ret;
 
 	if (i915.enable_execlists) {
 		/* NB: intentionally left blank. We will allocate our own
@@ -344,67 +283,76 @@ int i915_gem_context_init(struct drm_device *dev)
 		}
 	}
 
-	ctx = i915_gem_create_context(dev, NULL);
+	/**
+	 * The default context needs to exist per ring that uses contexts.
+	 * It stores the context state of the GPU for applications that don't
+	 * utilize HW contexts or per-process VM, as well as an idle case.
+	 */
+	ctx = __create_hw_context(dev, NULL);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context (error %ld)\n",
 			  PTR_ERR(ctx));
 		return PTR_ERR(ctx);
 	}
 
+	if (dev_priv->hw_context_size) {
+		/* We may need to do things with the shrinker which
+		 * require us to immediately switch back to the default
+		 * context. This can cause a problem as pinning the
+		 * default context also requires GTT space which may not
+		 * be available. To avoid this we always pin the default
+		 * context.
+		 */
+		ret = i915_gem_obj_ggtt_pin(ctx->ring[RCS].state,
+					    get_context_alignment(dev_priv), 0);
+		if (ret) {
+			DRM_ERROR("Failed to pin global default context\n");
+			i915_gem_context_unreference(ctx);
+			return ret;
+		}
+	}
+
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
 		struct intel_engine_cs *engine = &dev_priv->engine[i];
 
-		/* NB: RCS will hold a ref for all rings */
+		if (engine->i915 == NULL)
+			continue;
+
 		engine->default_context = ctx;
+		i915_gem_context_reference(ctx);
 	}
 
+	dev_priv->default_context = ctx;
+
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			i915.enable_execlists ? "LR" :
-			dev_priv->hw_context_size ? "HW" : "fake");
+			 i915.enable_execlists ? "LR" :
+			 dev_priv->hw_context_size ? "HW" : "fake");
 	return 0;
 }
 
 void i915_gem_context_fini(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_context *dctx = dev_priv->engine[RCS].default_context;
+	struct intel_engine_cs *engine;
 	int i;
 
-	if (dctx->legacy_hw_ctx.rcs_state) {
+	if (dev_priv->hw_context_size)
 		/* The only known way to stop the gpu from accessing the hw context is
 		 * to reset it. Do this as the very last operation to avoid confusing
 		 * other code, leading to spurious errors. */
 		intel_gpu_reset(dev);
 
-		/* When default context is created and switched to, base object refcount
-		 * will be 2 (+1 from object creation and +1 from do_switch()).
-		 * i915_gem_context_fini() will be called after gpu_idle() has switched
-		 * to default context. So we need to unreference the base object once
-		 * to offset the do_switch part, so that i915_gem_context_unreference()
-		 * can then free the base object correctly. */
-		WARN_ON(!dev_priv->engine[RCS].last_context);
-		if (dev_priv->engine[RCS].last_context == dctx) {
-			/* Fake switch to NULL context */
-			WARN_ON(dctx->legacy_hw_ctx.rcs_state->active);
-			i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
-			i915_gem_context_unreference(dctx);
-			dev_priv->engine[RCS].last_context = NULL;
-		}
-
-		i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
-	}
-
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct intel_engine_cs *engine = &dev_priv->engine[i];
-
-		if (engine->last_context)
-			i915_gem_context_unreference(engine->last_context);
-
+	for_each_engine(engine, dev_priv, i) {
+		i915_gem_context_unreference(engine->default_context);
 		engine->default_context = NULL;
-		engine->last_context = NULL;
 	}
 
-	i915_gem_context_unreference(dctx);
+	if (dev_priv->default_context) {
+		if (dev_priv->hw_context_size)
+			i915_gem_object_ggtt_unpin(dev_priv->default_context->ring[RCS].state);
+		i915_gem_context_unreference(dev_priv->default_context);
+		dev_priv->default_context = NULL;
+	}
 }
 
 int i915_gem_context_enable(struct drm_i915_private *dev_priv)
@@ -412,16 +360,31 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	int ret, i;
 
-	/* FIXME: We should make this work, even in reset */
-	if (i915_reset_in_progress(&dev_priv->gpu_error))
-		return 0;
-
-	BUG_ON(!dev_priv->engine[RCS].default_context);
+	if (WARN_ON(!intel_engine_initialized(&dev_priv->engine[RCS])))
+		return -EIO;
 
 	for_each_engine(engine, dev_priv, i) {
-		ret = i915_switch_context(engine, engine->default_context);
-		if (ret)
+		struct intel_context *ctx = engine->default_context;
+		struct i915_gem_request *rq;
+
+		ctx->remap_slice = (1 << NUM_L3_SLICES(dev_priv)) - 1;
+		rq = intel_engine_alloc_request(engine, ctx);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			goto err;
+		}
+
+		ret = 0;
+		if (i == RCS)
+			ret = i915_gem_render_state_init(rq);
+		if (ret == 0)
+			ret = i915_request_commit(rq);
+		i915_request_put(rq);
+		if (ret) {
+err:
+			DRM_ERROR("failed to enabled contexts (%s): %d\n", engine->name, ret);
 			return ret;
+		}
 	}
 
 	return 0;
@@ -475,36 +438,51 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id)
 }
 
 static inline int
-mi_set_context(struct intel_engine_cs *engine,
-	       struct intel_context *new_context,
+mi_set_context(struct i915_gem_request *rq,
+	       struct intel_engine_context *new_context,
 	       u32 hw_flags)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
+	int ret, len;
 
 	/* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
 	 * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
 	 * explicitly, so we rely on the value at engine init, stored in
 	 * itlb_before_ctx_switch.
 	 */
-	if (IS_GEN6(engine->dev)) {
-		ret = engine->flush(engine, I915_GEM_GPU_DOMAINS, 0);
+	if (IS_GEN6(rq->i915)) {
+		ret = i915_request_emit_flush(rq, I915_INVALIDATE_CACHES);
 		if (ret)
 			return ret;
 	}
 
-	ret = intel_ring_begin(engine, 6);
-	if (ret)
-		return ret;
+	len = 4;
+	switch (INTEL_INFO(rq->i915)->gen) {
+	case 8:
+	case 7:
+	case 5: len += 2;
+		break;
+	}
 
-	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-	if (INTEL_INFO(engine->dev)->gen >= 7)
-		intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE);
-	else
-		intel_ring_emit(engine, MI_NOOP);
+	ring = intel_ring_begin(rq, len);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_emit(engine, MI_SET_CONTEXT);
-	intel_ring_emit(engine, i915_gem_obj_ggtt_offset(new_context->legacy_hw_ctx.rcs_state) |
+	switch (INTEL_INFO(rq->i915)->gen) {
+	case 8:
+	case 7:
+		/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
+		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
+		break;
+	case 5:
+		intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
+		break;
+	}
+
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, MI_SET_CONTEXT);
+	intel_ring_emit(ring,
+			i915_gem_obj_ggtt_offset(new_context->state) |
 			MI_MM_SPACE_GTT |
 			MI_SAVE_EXT_STATE_EN |
 			MI_RESTORE_EXT_STATE_EN |
@@ -513,62 +491,98 @@ mi_set_context(struct intel_engine_cs *engine,
 	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
 	 * WaMiSetContext_Hang:snb,ivb,vlv
 	 */
-	intel_ring_emit(engine, MI_NOOP);
+	intel_ring_emit(ring, MI_NOOP);
 
-	if (INTEL_INFO(engine->dev)->gen >= 7)
-		intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_ENABLE);
-	else
-		intel_ring_emit(engine, MI_NOOP);
+	switch (INTEL_INFO(rq->i915)->gen) {
+	case 8:
+	case 7:
+		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
+		break;
+	case 5:
+		intel_ring_emit(ring, MI_SUSPEND_FLUSH);
+		break;
+	}
 
-	intel_ring_advance(engine);
+	intel_ring_advance(ring);
 
-	return ret;
+	return 0;
 }
 
-static int do_switch(struct intel_engine_cs *engine,
-		     struct intel_context *to)
+static int l3_remap(struct i915_gem_request *rq, int slice)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	struct intel_context *from = engine->last_context;
+	const u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
+	const u32 *remap_info;
+	struct intel_ringbuffer *ring;
+	int i;
+
+	remap_info = rq->i915->l3_parity.remap_info[slice];
+	if (remap_info == NULL)
+		return 0;
+
+	ring = intel_ring_begin(rq, GEN7_L3LOG_SIZE / 4 * 3);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
+
+	/*
+	 * Note: We do not worry about the concurrent register cacheline hang
+	 * here because no other code should access these registers other than
+	 * at initialization time.
+	 */
+	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, reg_base + i);
+		intel_ring_emit(ring, remap_info[i/4]);
+	}
+
+	intel_ring_advance(ring);
+	return 0;
+}
+
+/**
+ * i915_switch_context() - perform a GPU context switch.
+ * @ring: ring for which we'll execute the context switch
+ * @to: the context to switch to
+ *
+ * The context life cycle is simple. The context refcount is incremented and
+ * decremented by 1 and create and destroy. If the context is in use by the GPU,
+ * it will have a refoucnt > 1. This allows us to destroy the context abstract
+ * object while letting the normal object tracking destroy the backing BO.
+ */
+int i915_switch_context(struct i915_gem_request *rq,
+			struct intel_context *to)
+{
+	struct intel_engine_context *ctx = &to->ring[rq->engine->id];
+	struct intel_context *from;
 	u32 hw_flags = 0;
-	bool uninitialized = false;
 	int ret, i;
 
-	if (from != NULL && engine == &dev_priv->engine[RCS]) {
-		BUG_ON(from->legacy_hw_ctx.rcs_state == NULL);
-		BUG_ON(!i915_gem_obj_is_pinned(from->legacy_hw_ctx.rcs_state));
-	}
+	WARN_ON(!mutex_is_locked(&rq->i915->dev->struct_mutex));
 
-	if (from == to && !to->remap_slice)
+	if (ctx->state == NULL)
+		return 0;
+
+	if (rq->ring->last_context == to && !to->remap_slice)
 		return 0;
 
 	/* Trying to pin first makes error handling easier. */
-	if (engine == &dev_priv->engine[RCS]) {
-		ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
-					    get_context_alignment(engine->dev), 0);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_obj_ggtt_pin(ctx->state,
+				    get_context_alignment(rq->i915), 0);
+	if (ret)
+		return ret;
 
 	/*
 	 * Pin can switch back to the default context if we end up calling into
 	 * evict_everything - as a last ditch gtt defrag effort that also
 	 * switches to the default context. Hence we need to reload from here.
 	 */
-	from = engine->last_context;
+	from = rq->ring->last_context;
 
 	if (to->ppgtt) {
-		ret = to->ppgtt->switch_mm(to->ppgtt, engine, false);
+		ret = to->ppgtt->switch_mm(rq, to->ppgtt);
 		if (ret)
 			goto unpin_out;
 	}
 
-	if (engine != &dev_priv->engine[RCS]) {
-		if (from)
-			i915_gem_context_unreference(from);
-		goto done;
-	}
-
 	/*
 	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
 	 * that thanks to write = false in this call and us not setting any gpu
@@ -577,20 +591,21 @@ static int do_switch(struct intel_engine_cs *engine,
 	 *
 	 * XXX: We need a real interface to do this instead of trickery.
 	 */
-	ret = i915_gem_object_set_to_gtt_domain(to->legacy_hw_ctx.rcs_state, false);
+	ret = i915_gem_object_set_to_gtt_domain(ctx->state, false);
 	if (ret)
 		goto unpin_out;
 
-	if (!to->legacy_hw_ctx.rcs_state->has_global_gtt_mapping) {
-		struct i915_vma *vma = i915_gem_obj_to_vma(to->legacy_hw_ctx.rcs_state,
-							   &dev_priv->gtt.base);
-		vma->bind_vma(vma, to->legacy_hw_ctx.rcs_state->cache_level, GLOBAL_BIND);
+	if (!ctx->state->has_global_gtt_mapping) {
+		struct i915_vma *vma = i915_gem_obj_to_vma(ctx->state,
+							   &rq->i915->gtt.base);
+		vma->bind_vma(vma, ctx->state->cache_level, GLOBAL_BIND);
 	}
 
-	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
+	if (!ctx->initialized || i915_gem_context_is_default(to))
 		hw_flags |= MI_RESTORE_INHIBIT;
 
-	ret = mi_set_context(engine, to, hw_flags);
+	trace_i915_gem_ring_switch_context(rq->engine, to, hw_flags);
+	ret = mi_set_context(rq, ctx, hw_flags);
 	if (ret)
 		goto unpin_out;
 
@@ -598,12 +613,9 @@ static int do_switch(struct intel_engine_cs *engine,
 		if (!(to->remap_slice & (1<<i)))
 			continue;
 
-		ret = i915_gem_l3_remap(engine, i);
 		/* If it failed, try again next round */
-		if (ret)
-			DRM_DEBUG_DRIVER("L3 remapping failed\n");
-		else
-			to->remap_slice &= ~(1<<i);
+		if (l3_remap(rq, i) == 0)
+			rq->remap_l3 |= 1 << i;
 	}
 
 	/* The backing object for the context is done after switching to the
@@ -613,8 +625,13 @@ static int do_switch(struct intel_engine_cs *engine,
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
-		from->legacy_hw_ctx.rcs_state->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-		i915_vma_move_to_active(i915_gem_obj_to_ggtt(from->legacy_hw_ctx.rcs_state), engine);
+		struct drm_i915_gem_object *from_obj = from->ring[rq->engine->id].state;
+
+		from_obj->base.pending_read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		ret = i915_request_add_vma(rq, i915_gem_obj_to_ggtt(from_obj),  0);
+		if (ret)
+			goto unpin_out;
+
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
 		 * object dirty. The only exception is that the context must be
@@ -622,65 +639,19 @@ static int do_switch(struct intel_engine_cs *engine,
 		 * able to defer doing this until we know the object would be
 		 * swapped, but there is no way to do that yet.
 		 */
-		from->legacy_hw_ctx.rcs_state->dirty = 1;
-		BUG_ON(from->legacy_hw_ctx.rcs_state->ring != engine);
+		from_obj->dirty = 1;
 
 		/* obj is kept alive until the next request by its active ref */
-		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
-		i915_gem_context_unreference(from);
-	}
-
-	uninitialized = !to->legacy_hw_ctx.initialized && from == NULL;
-	to->legacy_hw_ctx.initialized = true;
-
-done:
-	i915_gem_context_reference(to);
-	engine->last_context = to;
-
-	if (uninitialized) {
-		ret = i915_gem_render_state_init(engine);
-		if (ret)
-			DRM_ERROR("init render state: %d\n", ret);
+		i915_gem_object_ggtt_unpin(from_obj);
 	}
 
 	return 0;
 
 unpin_out:
-	if (engine->id == RCS)
-		i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
+	i915_gem_object_ggtt_unpin(ctx->state);
 	return ret;
 }
 
-/**
- * i915_switch_context() - perform a GPU context switch.
- * @ring: ring for which we'll execute the context switch
- * @to: the context to switch to
- *
- * The context life cycle is simple. The context refcount is incremented and
- * decremented by 1 and create and destroy. If the context is in use by the GPU,
- * it will have a refoucnt > 1. This allows us to destroy the context abstract
- * object while letting the normal object tracking destroy the backing BO.
- */
-int i915_switch_context(struct intel_engine_cs *engine,
-			struct intel_context *to)
-{
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-
-	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
-
-	if (to->legacy_hw_ctx.rcs_state == NULL) { /* We have the fake context */
-		if (to != engine->last_context) {
-			i915_gem_context_reference(to);
-			if (engine->last_context)
-				i915_gem_context_unreference(engine->last_context);
-			engine->last_context = to;
-		}
-		return 0;
-	}
-
-	return do_switch(engine, to);
-}
-
 static bool contexts_enabled(struct drm_device *dev)
 {
 	return i915.enable_execlists || to_i915(dev)->hw_context_size;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index cbdae18..6ae4813 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -121,7 +121,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
 			goto err;
 		}
 
-		drm_gem_object_reference(&obj->base);
 		list_add_tail(&obj->obj_exec_link, &objects);
 	}
 	spin_unlock(&file->table_lock);
@@ -174,7 +173,6 @@ err:
 				       struct drm_i915_gem_object,
 				       obj_exec_link);
 		list_del_init(&obj->obj_exec_link);
-		drm_gem_object_unreference(&obj->base);
 	}
 	/*
 	 * Objects already transfered to the vmas list will be unreferenced by
@@ -236,7 +234,6 @@ static void eb_destroy(struct eb_vmas *eb)
 				       exec_list);
 		list_del_init(&vma->exec_list);
 		i915_gem_execbuffer_unreserve_vma(vma);
-		drm_gem_object_unreference(&vma->obj->base);
 	}
 	kfree(eb);
 }
@@ -256,7 +253,7 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	uint32_t page_offset = offset_in_page(reloc->offset);
-	uint64_t delta = reloc->delta + target_offset;
+	uint64_t delta = (int)reloc->delta + target_offset;
 	char *vaddr;
 	int ret;
 
@@ -292,7 +289,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint64_t delta = reloc->delta + target_offset;
+	uint64_t delta = (int)reloc->delta + target_offset;
 	uint64_t offset;
 	void __iomem *reloc_page;
 	int ret;
@@ -618,7 +615,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 	struct i915_vma *vma;
 	struct i915_address_space *vm;
 	struct list_head ordered_vmas;
-	bool has_fenced_gpu_access = INTEL_INFO(engine->dev)->gen < 4;
+	bool has_fenced_gpu_access = INTEL_INFO(engine->i915)->gen < 4;
 	int retry;
 
 	i915_gem_retire_requests__engine(engine);
@@ -706,7 +703,7 @@ err:
 }
 
 static int
-i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
+i915_gem_execbuffer_relocate_slow(struct drm_i915_private *i915,
 				  struct drm_i915_gem_execbuffer2 *args,
 				  struct drm_file *file,
 				  struct intel_engine_cs *engine,
@@ -728,10 +725,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
 		list_del_init(&vma->exec_list);
 		i915_gem_execbuffer_unreserve_vma(vma);
-		drm_gem_object_unreference(&vma->obj->base);
 	}
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&i915->dev->struct_mutex);
 
 	total = 0;
 	for (i = 0; i < count; i++)
@@ -742,7 +738,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 	if (reloc == NULL || reloc_offset == NULL) {
 		drm_free_large(reloc);
 		drm_free_large(reloc_offset);
-		mutex_lock(&dev->struct_mutex);
+		mutex_lock(&i915->dev->struct_mutex);
 		return -ENOMEM;
 	}
 
@@ -757,7 +753,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 		if (copy_from_user(reloc+total, user_relocs,
 				   exec[i].relocation_count * sizeof(*reloc))) {
 			ret = -EFAULT;
-			mutex_lock(&dev->struct_mutex);
+			mutex_lock(&i915->dev->struct_mutex);
 			goto err;
 		}
 
@@ -775,7 +771,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 					   &invalid_offset,
 					   sizeof(invalid_offset))) {
 				ret = -EFAULT;
-				mutex_lock(&dev->struct_mutex);
+				mutex_lock(&i915->dev->struct_mutex);
 				goto err;
 			}
 		}
@@ -784,9 +780,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 		total += exec[i].relocation_count;
 	}
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = i915_mutex_lock_interruptible(i915->dev);
 	if (ret) {
-		mutex_lock(&dev->struct_mutex);
+		mutex_lock(&i915->dev->struct_mutex);
 		goto err;
 	}
 
@@ -822,17 +818,19 @@ err:
 }
 
 static int
-i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *engine,
-				struct list_head *vmas)
+vmas_move_to_rq(struct list_head *vmas,
+		struct i915_gem_request *rq)
 {
 	struct i915_vma *vma;
 	uint32_t flush_domains = 0;
 	bool flush_chipset = false;
 	int ret;
 
+	/* 1: flush/serialise damage from other sources */
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		ret = i915_gem_object_sync(obj, engine);
+
+		ret = i915_gem_object_sync(obj, rq);
 		if (ret)
 			return ret;
 
@@ -843,15 +841,34 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *engine,
 	}
 
 	if (flush_chipset)
-		i915_gem_chipset_flush(engine->dev);
+		i915_gem_chipset_flush(rq->i915->dev);
 
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
 		wmb();
 
-	/* Unconditionally invalidate gpu caches and ensure that we do flush
-	 * any residual writes from the previous batch.
-	 */
-	return intel_engine_invalidate_all_caches(engine);
+	/* 2: invalidate the caches from this ring after emitting semaphores */
+	ret = i915_request_emit_flush(rq, I915_INVALIDATE_CACHES);
+	if (ret)
+		return ret;
+
+	/* 3: track flushes and objects for this rq */
+	list_for_each_entry(vma, vmas, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+		unsigned fenced;
+
+		fenced = 0;
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			fenced |= VMA_IS_FENCED;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+				fenced |= VMA_HAS_FENCE;
+		}
+
+		ret = i915_request_add_vma(rq, vma, fenced);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static bool
@@ -864,7 +881,7 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 }
 
 static int
-validate_exec_list(struct drm_device *dev,
+validate_exec_list(struct drm_i915_private *dev_priv,
 		   struct drm_i915_gem_exec_object2 *exec,
 		   int count)
 {
@@ -874,7 +891,7 @@ validate_exec_list(struct drm_device *dev,
 	int i;
 
 	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
-	if (USES_FULL_PPGTT(dev))
+	if (USES_FULL_PPGTT(dev_priv))
 		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
 	for (i = 0; i < count; i++) {
@@ -912,8 +929,9 @@ validate_exec_list(struct drm_device *dev,
 }
 
 static struct intel_context *
-i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
-			  struct intel_engine_cs *engine, const u32 ctx_id)
+i915_gem_validate_context(struct drm_file *file,
+			  struct intel_engine_cs *engine,
+			  const u32 ctx_id)
 {
 	struct intel_context *ctx = NULL;
 	struct i915_ctx_hang_stats *hs;
@@ -931,162 +949,149 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
 		return ERR_PTR(-EIO);
 	}
 
-	if (i915.enable_execlists && !ctx->ring[engine->id].state) {
-		int ret = intel_lr_context_deferred_create(ctx, engine);
-		if (ret) {
-			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
-			return ERR_PTR(ret);
-		}
-	}
-
 	return ctx;
 }
 
-void
-i915_gem_execbuffer_move_to_active(struct list_head *vmas,
-				   struct intel_engine_cs *engine)
+static int
+reset_sol_offsets(struct i915_gem_request *rq)
 {
-	u32 seqno = intel_engine_get_seqno(engine);
-	struct i915_vma *vma;
-
-	list_for_each_entry(vma, vmas, exec_list) {
-		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-		struct drm_i915_gem_object *obj = vma->obj;
-		u32 old_read = obj->base.read_domains;
-		u32 old_write = obj->base.write_domain;
-
-		obj->base.write_domain = obj->base.pending_write_domain;
-		if (obj->base.write_domain == 0)
-			obj->base.pending_read_domains |= obj->base.read_domains;
-		obj->base.read_domains = obj->base.pending_read_domains;
+	struct intel_ringbuffer *ring;
+	int i;
 
-		i915_vma_move_to_active(vma, engine);
-		if (obj->base.write_domain) {
-			obj->dirty = 1;
-			obj->last_write_seqno = seqno;
+	if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) {
+		DRM_DEBUG("sol reset is gen7/rcs only\n");
+		return -EINVAL;
+	}
 
-			intel_fb_obj_invalidate(obj, engine);
+	ring = intel_ring_begin(rq, 4 * 3);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-			/* update for the implicit flush after a batch */
-			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
-		}
-		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			obj->last_fenced_seqno = seqno;
-			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
-				struct drm_i915_private *dev_priv = to_i915(engine->dev);
-				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
-					       &dev_priv->mm.fence_list);
-			}
-		}
-
-		trace_i915_gem_object_change_domain(obj, old_read, old_write);
+	for (i = 0; i < 4; i++) {
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
+		intel_ring_emit(ring, 0);
 	}
-}
-
-void
-i915_gem_execbuffer_retire_commands(struct drm_device *dev,
-				    struct drm_file *file,
-				    struct intel_engine_cs *engine,
-				    struct drm_i915_gem_object *obj)
-{
-	/* Unconditionally force add_request to emit a full flush. */
-	engine->gpu_caches_dirty = true;
 
-	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(engine, file, obj, NULL);
+	intel_ring_advance(ring);
+	return 0;
 }
 
 static int
-i915_reset_gen7_sol_offsets(struct drm_device *dev,
-			    struct intel_engine_cs *engine)
+emit_box(struct i915_gem_request *rq,
+	 struct drm_clip_rect *box,
+	 int DR1, int DR4)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret, i;
+	struct intel_ringbuffer *ring;
 
-	if (!IS_GEN7(dev) || engine != &dev_priv->engine[RCS]) {
-		DRM_DEBUG("sol reset is gen7/rcs only\n");
+	if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
+	    box->y2 <= 0 || box->x2 <= 0) {
+		DRM_DEBUG("Bad box %d,%d..%d,%d\n",
+			  box->x1, box->y1, box->x2, box->y2);
 		return -EINVAL;
 	}
 
-	ret = intel_ring_begin(engine, 4 * 3);
-	if (ret)
-		return ret;
+	if (INTEL_INFO(rq->i915)->gen >= 4) {
+		ring = intel_ring_begin(rq, 4);
+		if (IS_ERR(ring))
+			return PTR_ERR(ring);
 
-	for (i = 0; i < 4; i++) {
-		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(engine, GEN7_SO_WRITE_OFFSET(i));
-		intel_ring_emit(engine, 0);
+		intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO_I965);
+		intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
+		intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
+		intel_ring_emit(ring, DR4);
+	} else {
+		ring = intel_ring_begin(rq, 6);
+		if (IS_ERR(ring))
+			return PTR_ERR(ring);
+
+		intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO);
+		intel_ring_emit(ring, DR1);
+		intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
+		intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
+		intel_ring_emit(ring, DR4);
+		intel_ring_emit(ring, 0);
 	}
-
-	intel_ring_advance(engine);
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
-static int
-i915_emit_box(struct intel_engine_cs *engine,
-	      struct drm_clip_rect *box,
-	      int DR1, int DR4)
+static int set_contants_base(struct i915_gem_request *rq,
+			     struct drm_i915_gem_execbuffer2 *args)
 {
-	int ret;
+	int mode = args->flags & I915_EXEC_CONSTANTS_MASK;
+	u32 mask = I915_EXEC_CONSTANTS_MASK;
 
-	if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
-	    box->y2 <= 0 || box->x2 <= 0) {
-		DRM_ERROR("Bad box %d,%d..%d,%d\n",
-			  box->x1, box->y1, box->x2, box->y2);
+	switch (mode) {
+	case I915_EXEC_CONSTANTS_REL_GENERAL:
+	case I915_EXEC_CONSTANTS_ABSOLUTE:
+	case I915_EXEC_CONSTANTS_REL_SURFACE:
+		if (mode != 0 && rq->engine->id != RCS) {
+			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
+			return -EINVAL;
+		}
+
+		if (mode != rq->engine->i915->relative_constants_mode) {
+			if (INTEL_INFO(rq->engine->i915)->gen < 4) {
+				DRM_DEBUG("no rel constants on pre-gen4\n");
+				return -EINVAL;
+			}
+
+			if (INTEL_INFO(rq->engine->i915)->gen > 5 &&
+			    mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
+				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
+				return -EINVAL;
+			}
+
+			/* The HW changed the meaning on this bit on gen6 */
+			if (INTEL_INFO(rq->i915)->gen >= 6)
+				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
+		}
+		break;
+	default:
+		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
 		return -EINVAL;
 	}
 
-	if (INTEL_INFO(engine->dev)->gen >= 4) {
-		ret = intel_ring_begin(engine, 4);
-		if (ret)
-			return ret;
+	if (rq->engine->id == RCS && mode != rq->i915->relative_constants_mode) {
+		struct intel_ringbuffer *ring;
 
-		intel_ring_emit(engine, GFX_OP_DRAWRECT_INFO_I965);
-		intel_ring_emit(engine, (box->x1 & 0xffff) | box->y1 << 16);
-		intel_ring_emit(engine, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
-		intel_ring_emit(engine, DR4);
-	} else {
-		ret = intel_ring_begin(engine, 6);
-		if (ret)
-			return ret;
+		ring = intel_ring_begin(rq, 4);
+		if (IS_ERR(ring))
+			return PTR_ERR(ring);
+
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, INSTPM);
+		intel_ring_emit(ring, mask << 16 | mode);
+		intel_ring_advance(ring);
 
-		intel_ring_emit(engine, GFX_OP_DRAWRECT_INFO);
-		intel_ring_emit(engine, DR1);
-		intel_ring_emit(engine, (box->x1 & 0xffff) | box->y1 << 16);
-		intel_ring_emit(engine, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
-		intel_ring_emit(engine, DR4);
-		intel_ring_emit(engine, 0);
+		rq->i915->relative_constants_mode = mode;
 	}
-	intel_ring_advance(engine);
 
 	return 0;
 }
 
-
-int
-i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
-			       struct intel_engine_cs *engine,
-			       struct intel_context *ctx,
-			       struct drm_i915_gem_execbuffer2 *args,
-			       struct list_head *vmas,
-			       struct drm_i915_gem_object *batch_obj,
-			       u64 exec_start, u32 flags)
+static int
+submit_execbuf(struct intel_engine_cs *engine,
+	       struct intel_context *ctx,
+	       struct drm_i915_gem_execbuffer2 *args,
+	       struct list_head *vmas,
+	       struct drm_i915_gem_object *batch_obj,
+	       u64 exec_start, u32 flags)
 {
 	struct drm_clip_rect *cliprects = NULL;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u64 exec_len;
-	int instp_mode;
-	u32 instp_mask;
+	struct i915_gem_request *rq = NULL;
 	int i, ret = 0;
 
 	if (args->num_cliprects != 0) {
-		if (engine != &dev_priv->engine[RCS]) {
+		if (engine->id != RCS) {
 			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
 			return -EINVAL;
 		}
 
-		if (INTEL_INFO(dev)->gen >= 5) {
+		if (INTEL_INFO(engine->i915)->gen >= 5) {
 			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
 			return -EINVAL;
 		}
@@ -1108,7 +1113,6 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 		if (copy_from_user(cliprects,
 				   to_user_ptr(args->cliprects_ptr),
 				   sizeof(*cliprects)*args->num_cliprects)) {
-			ret = -EFAULT;
 			goto error;
 		}
 	} else {
@@ -1123,133 +1127,89 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
 		}
 	}
 
-	ret = i915_gem_execbuffer_move_to_gpu(engine, vmas);
-	if (ret)
-		goto error;
+	rq = intel_engine_alloc_request(engine, ctx);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
 
-	ret = i915_switch_context(engine, ctx);
+	ret = vmas_move_to_rq(vmas, rq);
 	if (ret)
 		goto error;
 
-	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
-	instp_mask = I915_EXEC_CONSTANTS_MASK;
-	switch (instp_mode) {
-	case I915_EXEC_CONSTANTS_REL_GENERAL:
-	case I915_EXEC_CONSTANTS_ABSOLUTE:
-	case I915_EXEC_CONSTANTS_REL_SURFACE:
-		if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) {
-			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
-			ret = -EINVAL;
-			goto error;
-		}
-
-		if (instp_mode != dev_priv->relative_constants_mode) {
-			if (INTEL_INFO(dev)->gen < 4) {
-				DRM_DEBUG("no rel constants on pre-gen4\n");
-				ret = -EINVAL;
-				goto error;
-			}
-
-			if (INTEL_INFO(dev)->gen > 5 &&
-			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
-				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
-				ret = -EINVAL;
-				goto error;
-			}
-
-			/* The HW changed the meaning on this bit on gen6 */
-			if (INTEL_INFO(dev)->gen >= 6)
-				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
-		}
-		break;
-	default:
-		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
-		ret = -EINVAL;
+	ret = set_contants_base(rq, args);
+	if (ret)
 		goto error;
-	}
-
-	if (engine == &dev_priv->engine[RCS] &&
-			instp_mode != dev_priv->relative_constants_mode) {
-		ret = intel_ring_begin(engine, 4);
-		if (ret)
-			goto error;
-
-		intel_ring_emit(engine, MI_NOOP);
-		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(engine, INSTPM);
-		intel_ring_emit(engine, instp_mask << 16 | instp_mode);
-		intel_ring_advance(engine);
-
-		dev_priv->relative_constants_mode = instp_mode;
-	}
 
 	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
-		ret = i915_reset_gen7_sol_offsets(dev, engine);
+		ret = reset_sol_offsets(rq);
 		if (ret)
 			goto error;
 	}
 
-	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
-			ret = i915_emit_box(engine, &cliprects[i],
-					    args->DR1, args->DR4);
+			ret = emit_box(rq, &cliprects[i],
+				       args->DR1, args->DR4);
 			if (ret)
 				goto error;
 
-			ret = engine->dispatch_execbuffer(engine,
-							exec_start, exec_len,
-							flags);
+			ret = i915_request_emit_batchbuffer(rq, batch_obj,
+							    exec_start, args->batch_len,
+							    flags);
 			if (ret)
 				goto error;
 		}
 	} else {
-		ret = engine->dispatch_execbuffer(engine,
-						exec_start, exec_len,
-						flags);
+		ret = i915_request_emit_batchbuffer(rq, batch_obj,
+						    exec_start, args->batch_len,
+						    flags);
 		if (ret)
-			return ret;
+			goto error;
 	}
 
-	trace_i915_gem_ring_dispatch(engine, intel_engine_get_seqno(engine), flags);
+	ret = i915_request_commit(rq);
+	if (ret)
+		goto error;
+
+	i915_queue_hangcheck(rq->i915->dev);
 
-	i915_gem_execbuffer_move_to_active(vmas, engine);
-	i915_gem_execbuffer_retire_commands(dev, file, engine, batch_obj);
+	cancel_delayed_work_sync(&rq->i915->mm.idle_work);
+	queue_delayed_work(rq->i915->wq,
+			   &rq->i915->mm.retire_work,
+			   round_jiffies_up_relative(HZ));
+	intel_mark_busy(rq->i915->dev);
 
 error:
+	i915_request_put(rq);
 	kfree(cliprects);
 	return ret;
 }
 
 /**
  * Find one BSD ring to dispatch the corresponding BSD command.
- * The Ring ID is returned.
  */
-static int gen8_dispatch_bsd_engine(struct drm_device *dev,
-				  struct drm_file *file)
+static struct intel_engine_cs *
+gen8_select_bsd_engine(struct drm_i915_private *dev_priv,
+		       struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 
-	/* Check whether the file_priv is using one ring */
-	if (file_priv->bsd_engine)
-		return file_priv->bsd_engine->id;
-	else {
-		/* If no, use the ping-pong mechanism to select one ring */
-		int ring_id;
+	/* Use the ping-pong mechanism to select one ring for this client */
+	if (file_priv->bsd_engine == NULL) {
+		int id;
 
-		mutex_lock(&dev->struct_mutex);
+		mutex_lock(&dev_priv->dev->struct_mutex);
 		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
-			ring_id = VCS;
+			id = VCS;
 			dev_priv->mm.bsd_ring_dispatch_index = 1;
 		} else {
-			ring_id = VCS2;
+			id = VCS2;
 			dev_priv->mm.bsd_ring_dispatch_index = 0;
 		}
-		file_priv->bsd_engine = &dev_priv->engine[ring_id];
-		mutex_unlock(&dev->struct_mutex);
-		return ring_id;
+		file_priv->bsd_engine = &dev_priv->engine[id];
+		mutex_unlock(&dev_priv->dev->struct_mutex);
 	}
+
+	return file_priv->bsd_engine;
 }
 
 static struct drm_i915_gem_object *
@@ -1272,12 +1232,11 @@ eb_get_batch(struct eb_vmas *eb)
 }
 
 static int
-i915_gem_do_execbuffer(struct drm_device *dev, void *data,
+i915_gem_do_execbuffer(struct drm_i915_private *dev_priv, void *data,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
 		       struct drm_i915_gem_exec_object2 *exec)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct eb_vmas *eb;
 	struct drm_i915_gem_object *batch_obj;
 	struct intel_engine_cs *engine;
@@ -1292,7 +1251,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
 
-	ret = validate_exec_list(dev, exec, args->buffer_count);
+	ret = validate_exec_list(dev_priv, exec, args->buffer_count);
 	if (ret)
 		return ret;
 
@@ -1315,11 +1274,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
 		engine = &dev_priv->engine[RCS];
 	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
-		if (HAS_BSD2(dev)) {
-			int ring_id;
-			ring_id = gen8_dispatch_bsd_engine(dev, file);
-			engine = &dev_priv->engine[ring_id];
-		} else
+		if (HAS_BSD2(dev_priv))
+			engine = gen8_select_bsd_engine(dev_priv, file);
+		else
 			engine = &dev_priv->engine[VCS];
 	} else
 		engine = &dev_priv->engine[(args->flags & I915_EXEC_RING_MASK) - 1];
@@ -1337,19 +1294,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	intel_runtime_pm_get(dev_priv);
 
-	ret = i915_mutex_lock_interruptible(dev);
+	ret = i915_mutex_lock_interruptible(dev_priv->dev);
 	if (ret)
 		goto pre_mutex_err;
 
 	if (dev_priv->ums.mm_suspended) {
-		mutex_unlock(&dev->struct_mutex);
+		mutex_unlock(&dev_priv->dev->struct_mutex);
 		ret = -EBUSY;
 		goto pre_mutex_err;
 	}
 
-	ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
+	ctx = i915_gem_validate_context(file, engine, ctx_id);
 	if (IS_ERR(ctx)) {
-		mutex_unlock(&dev->struct_mutex);
+		mutex_unlock(&dev_priv->dev->struct_mutex);
 		ret = PTR_ERR(ctx);
 		goto pre_mutex_err;
 	}
@@ -1364,7 +1321,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	eb = eb_create(args);
 	if (eb == NULL) {
 		i915_gem_context_unreference(ctx);
-		mutex_unlock(&dev->struct_mutex);
+		mutex_unlock(&dev_priv->dev->struct_mutex);
 		ret = -ENOMEM;
 		goto pre_mutex_err;
 	}
@@ -1388,9 +1345,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		ret = i915_gem_execbuffer_relocate(eb);
 	if (ret) {
 		if (ret == -EFAULT) {
-			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, engine,
+			ret = i915_gem_execbuffer_relocate_slow(dev_priv, args, file, engine,
 								eb, exec);
-			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+			BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
 		}
 		if (ret)
 			goto err;
@@ -1444,8 +1401,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
-	ret = dev_priv->gt.do_execbuf(dev, file, engine, ctx, args,
-				      &eb->vmas, batch_obj, exec_start, flags);
+	ret = submit_execbuf(engine, ctx, args,
+			     &eb->vmas, batch_obj, exec_start, flags);
 
 	/*
 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
@@ -1460,7 +1417,7 @@ err:
 	i915_gem_context_unreference(ctx);
 	eb_destroy(eb);
 
-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->dev->struct_mutex);
 
 pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
@@ -1532,7 +1489,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	exec2.flags = I915_EXEC_RENDER;
 	i915_execbuffer2_set_context_id(exec2, 0);
 
-	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
+	ret = i915_gem_do_execbuffer(to_i915(dev), data, file, &exec2, exec2_list);
 	if (!ret) {
 		struct drm_i915_gem_exec_object __user *user_exec_list =
 			to_user_ptr(args->buffers_ptr);
@@ -1596,7 +1553,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
+	ret = i915_gem_do_execbuffer(to_i915(dev), data, file, args, exec2_list);
 	if (!ret) {
 		/* Copy the new buffer offsets back to the user's exec list. */
 		struct drm_i915_gem_exec_object2 __user *user_exec_list =
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8574cb8..0f1b17a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -203,38 +203,29 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 }
 
 /* Broadwell Page Directory Pointer Descriptors */
-static int gen8_write_pdp(struct intel_engine_cs *engine, unsigned entry,
-			   uint64_t val, bool synchronous)
+static int gen8_write_pdp(struct i915_gem_request *rq, unsigned entry, uint64_t val)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	int ret;
+	struct intel_ringbuffer *ring;
 
 	BUG_ON(entry >= 4);
 
-	if (synchronous) {
-		I915_WRITE(GEN8_RING_PDP_UDW(engine, entry), val >> 32);
-		I915_WRITE(GEN8_RING_PDP_LDW(engine, entry), (u32)val);
-		return 0;
-	}
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	ret = intel_ring_begin(engine, 6);
-	if (ret)
-		return ret;
-
-	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(engine, GEN8_RING_PDP_UDW(engine, entry));
-	intel_ring_emit(engine, (u32)(val >> 32));
-	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(engine, GEN8_RING_PDP_LDW(engine, entry));
-	intel_ring_emit(engine, (u32)(val));
-	intel_ring_advance(engine);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN8_RING_PDP_UDW(rq->engine, entry));
+	intel_ring_emit(ring, (u32)(val >> 32));
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, GEN8_RING_PDP_LDW(rq->engine, entry));
+	intel_ring_emit(ring, (u32)(val));
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
-static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *engine,
-			  bool synchronous)
+static int gen8_mm_switch(struct i915_gem_request *rq,
+			  struct i915_hw_ppgtt *ppgtt)
 {
 	int i, ret;
 
@@ -243,7 +234,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 
 	for (i = used_pd - 1; i >= 0; i--) {
 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
-		ret = gen8_write_pdp(engine, i, addr, synchronous);
+		ret = gen8_write_pdp(rq, i, addr);
 		if (ret)
 			return ret;
 	}
@@ -707,94 +698,58 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
 	return (ppgtt->pd_offset / 64) << 16;
 }
 
-static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_engine_cs *engine,
-			 bool synchronous)
+static int hsw_mm_switch(struct i915_gem_request *rq,
+			 struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_device *dev = ppgtt->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ringbuffer *ring;
 	int ret;
 
-	/* If we're in reset, we can assume the GPU is sufficiently idle to
-	 * manually frob these bits. Ideally we could use the ring functions,
-	 * except our error handling makes it quite difficult (can't use
-	 * intel_ring_begin, ring->flush, or intel_ring_advance)
-	 *
-	 * FIXME: We should try not to special case reset
-	 */
-	if (synchronous ||
-	    i915_reset_in_progress(&dev_priv->gpu_error)) {
-		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
-		I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
-		I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
-		POSTING_READ(RING_PP_DIR_BASE(engine));
-		return 0;
-	}
-
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = engine->flush(engine, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = i915_request_emit_flush(rq, I915_INVALIDATE_CACHES);
 	if (ret)
 		return ret;
 
-	ret = intel_ring_begin(engine, 6);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
-	intel_ring_emit(engine, RING_PP_DIR_DCLV(engine));
-	intel_ring_emit(engine, PP_DIR_DCLV_2G);
-	intel_ring_emit(engine, RING_PP_DIR_BASE(engine));
-	intel_ring_emit(engine, get_pd_offset(ppgtt));
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_advance(engine);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
+	intel_ring_emit(ring, RING_PP_DIR_DCLV(rq->engine));
+	intel_ring_emit(ring, PP_DIR_DCLV_2G);
+	intel_ring_emit(ring, RING_PP_DIR_BASE(rq->engine));
+	intel_ring_emit(ring, get_pd_offset(ppgtt));
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
-static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *engine,
-			  bool synchronous)
+static int gen7_mm_switch(struct i915_gem_request *rq,
+			  struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_device *dev = ppgtt->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ringbuffer *ring;
 	int ret;
 
-	/* If we're in reset, we can assume the GPU is sufficiently idle to
-	 * manually frob these bits. Ideally we could use the ring functions,
-	 * except our error handling makes it quite difficult (can't use
-	 * intel_ring_begin, ring->flush, or intel_ring_advance)
-	 *
-	 * FIXME: We should try not to special case reset
-	 */
-	if (synchronous ||
-	    i915_reset_in_progress(&dev_priv->gpu_error)) {
-		WARN_ON(ppgtt != dev_priv->mm.aliasing_ppgtt);
-		I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
-		I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
-		POSTING_READ(RING_PP_DIR_BASE(engine));
-		return 0;
-	}
-
 	/* NB: TLBs must be flushed and invalidated before a switch */
-	ret = engine->flush(engine, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	ret = i915_request_emit_flush(rq, I915_INVALIDATE_CACHES);
 	if (ret)
 		return ret;
 
-	ret = intel_ring_begin(engine, 6);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(2));
-	intel_ring_emit(engine, RING_PP_DIR_DCLV(engine));
-	intel_ring_emit(engine, PP_DIR_DCLV_2G);
-	intel_ring_emit(engine, RING_PP_DIR_BASE(engine));
-	intel_ring_emit(engine, get_pd_offset(ppgtt));
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_advance(engine);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
+	intel_ring_emit(ring, RING_PP_DIR_DCLV(rq->engine));
+	intel_ring_emit(ring, PP_DIR_DCLV_2G);
+	intel_ring_emit(ring, RING_PP_DIR_BASE(rq->engine));
+	intel_ring_emit(ring, get_pd_offset(ppgtt));
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
 	/* XXX: RCS is the only one to auto invalidate the TLBs? */
-	if (engine->id != RCS) {
-		ret = engine->flush(engine, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	if (rq->engine->id != RCS) {
+		ret = i915_request_emit_flush(rq, I915_INVALIDATE_CACHES);
 		if (ret)
 			return ret;
 	}
@@ -802,22 +757,10 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
-static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
-			  struct intel_engine_cs *engine,
-			  bool synchronous)
+static int gen6_mm_switch(struct i915_gem_request *rq,
+			  struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_device *dev = ppgtt->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (!synchronous)
-		return 0;
-
-	I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
-	I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
-
-	POSTING_READ(RING_PP_DIR_DCLV(engine));
-
-	return 0;
+	return -ENODEV;
 }
 
 static void gen8_ppgtt_enable(struct drm_device *dev)
@@ -832,10 +775,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
 	if (i915.enable_execlists)
 		return;
 
-	for_each_engine(engine, dev_priv, j) {
+	for_each_engine(engine, dev_priv, j)
 		I915_WRITE(RING_MODE_GEN7(engine),
 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-	}
 }
 
 static void gen7_ppgtt_enable(struct drm_device *dev)
@@ -861,6 +803,11 @@ static void gen7_ppgtt_enable(struct drm_device *dev)
 		/* GFX_MODE is per-ring on gen7+ */
 		I915_WRITE(RING_MODE_GEN7(engine),
 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+
+		I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
+		I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(dev_priv->mm.aliasing_ppgtt));
+
+		POSTING_READ(RING_PP_DIR_DCLV(engine));
 	}
 }
 
@@ -868,6 +815,8 @@ static void gen6_ppgtt_enable(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t ecochk, gab_ctl, ecobits;
+	struct intel_engine_cs *engine;
+	int i;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
@@ -880,6 +829,13 @@ static void gen6_ppgtt_enable(struct drm_device *dev)
 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
 
 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+
+	for_each_engine(engine, dev_priv, i) {
+		I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
+		I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(dev_priv->mm.aliasing_ppgtt));
+
+		POSTING_READ(RING_PP_DIR_DCLV(engine));
+	}
 }
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
@@ -1170,11 +1126,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 
 int i915_ppgtt_init_hw(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-	int i, ret = 0;
-
 	if (!USES_PPGTT(dev))
 		return 0;
 
@@ -1187,15 +1138,7 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
 	else
 		WARN_ON(1);
 
-	if (ppgtt) {
-		for_each_engine(engine, dev_priv, i) {
-			ret = ppgtt->switch_mm(ppgtt, engine, true);
-			if (ret != 0)
-				return ret;
-		}
-	}
-
-	return ret;
+	return 0;
 }
 struct i915_hw_ppgtt *
 i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 6280648..0802832 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -263,9 +263,8 @@ struct i915_hw_ppgtt {
 	struct drm_i915_file_private *file_priv;
 
 	int (*enable)(struct i915_hw_ppgtt *ppgtt);
-	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
-			 struct intel_engine_cs *ring,
-			 bool synchronous);
+	int (*switch_mm)(struct i915_gem_request *rq,
+			 struct i915_hw_ppgtt *ppgtt);
 	void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m);
 };
 
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index e60be3f..335182d 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -36,7 +36,7 @@ struct render_state {
 };
 
 static const struct intel_renderstate_rodata *
-render_state_get_rodata(struct drm_device *dev, const int gen)
+render_state_get_rodata(const int gen)
 {
 	switch (gen) {
 	case 6:
@@ -50,19 +50,19 @@ render_state_get_rodata(struct drm_device *dev, const int gen)
 	return NULL;
 }
 
-static int render_state_init(struct render_state *so, struct drm_device *dev)
+static int render_state_init(struct render_state *so, struct i915_gem_request *rq)
 {
 	int ret;
 
-	so->gen = INTEL_INFO(dev)->gen;
-	so->rodata = render_state_get_rodata(dev, so->gen);
+	so->gen = INTEL_INFO(rq->i915)->gen;
+	so->rodata = render_state_get_rodata(so->gen);
 	if (so->rodata == NULL)
 		return 0;
 
 	if (so->rodata->batch_items * 4 > 4096)
 		return -EINVAL;
 
-	so->obj = i915_gem_alloc_object(dev, 4096);
+	so->obj = i915_gem_alloc_object(rq->i915->dev, 4096);
 	if (so->obj == NULL)
 		return -ENOMEM;
 
@@ -133,15 +133,15 @@ static void render_state_fini(struct render_state *so)
 	drm_gem_object_unreference(&so->obj->base);
 }
 
-int i915_gem_render_state_init(struct intel_engine_cs *ring)
+int i915_gem_render_state_init(struct i915_gem_request *rq)
 {
 	struct render_state so;
 	int ret;
 
-	if (WARN_ON(ring->id != RCS))
+	if (WARN_ON(rq->engine->id != RCS))
 		return -ENOENT;
 
-	ret = render_state_init(&so, ring->dev);
+	ret = render_state_init(&so, rq);
 	if (ret)
 		return ret;
 
@@ -152,16 +152,16 @@ int i915_gem_render_state_init(struct intel_engine_cs *ring)
 	if (ret)
 		goto out;
 
-	ret = ring->dispatch_execbuffer(ring,
-					so.ggtt_offset,
-					so.rodata->batch_items * 4,
-					I915_DISPATCH_SECURE);
+	ret = i915_request_emit_batchbuffer(rq, NULL,
+					    so.ggtt_offset,
+					    so.rodata->batch_items * 4,
+					    I915_DISPATCH_SECURE);
 	if (ret)
 		goto out;
 
-	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
+	so.obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
+	ret = i915_request_add_vma(rq, i915_gem_obj_to_ggtt(so.obj), 0);
 
-	ret = __i915_add_request(ring, NULL, so.obj, NULL);
 	/* __i915_add_request moves object to inactive if it fails */
 out:
 	render_state_fini(&so);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
new file mode 100644
index 0000000..5da19f8
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -0,0 +1,600 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drmP.h>
+#include "i915_drv.h"
+#include <drm/i915_drm.h>
+#include "i915_trace.h"
+#include "intel_drv.h"
+
+struct i915_gem_request__vma {
+	struct list_head link;
+	struct i915_vma *vma;
+	u32 write, fence;
+};
+
+static int check_wedged(struct i915_gem_request *rq,
+			bool interruptible)
+{
+	struct i915_gpu_error *error = &rq->i915->gpu_error;
+	unsigned wedged = atomic_read(&error->reset_counter);
+
+	if (wedged & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)) {
+		/* Non-interruptible callers can't handle -EAGAIN, hence return
+		 * -EIO unconditionally for these. */
+		if (!interruptible)
+			return -EIO;
+
+		/* Recovery complete, but the reset failed ... */
+		if (wedged & I915_WEDGED)
+			return -EIO;
+
+		return -EAGAIN;
+	}
+
+	if (wedged != rq->reset_counter)
+		return -EIO;
+
+	return 0;
+}
+
+int
+i915_request_add_vma(struct i915_gem_request *rq,
+		     struct i915_vma *vma,
+		     unsigned fenced)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	u32 old_read = obj->base.read_domains;
+	u32 old_write = obj->base.write_domain;
+	struct i915_gem_request__vma *ref;
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+	BUG_ON(!rq->outstanding);
+
+	obj->base.write_domain = obj->base.pending_write_domain;
+	if (obj->base.write_domain == 0)
+		obj->base.pending_read_domains |= obj->base.read_domains;
+	obj->base.read_domains = obj->base.pending_read_domains;
+
+	obj->base.pending_read_domains = 0;
+	obj->base.pending_write_domain = 0;
+
+	trace_i915_gem_object_change_domain(obj, old_read, old_write);
+	if (obj->base.read_domains == 0)
+		return 0;
+
+	ref = kmalloc(sizeof(*ref), GFP_KERNEL);
+	if (ref == NULL)
+		return -ENOMEM;
+
+	list_add(&ref->link, &rq->vmas);
+	ref->vma = vma;
+	ref->write = obj->base.write_domain;
+	ref->fence = fenced;
+	drm_gem_object_reference(&obj->base);
+
+	if (obj->base.write_domain) {
+		rq->pending_flush |= I915_FLUSH_CACHES;
+		intel_fb_obj_invalidate(obj, rq);
+	}
+
+	/* update for the implicit flush after the rq */
+	obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+	return 0;
+}
+
+static void vma_free(struct i915_gem_request__vma *ref)
+{
+	drm_gem_object_unreference(&ref->vma->obj->base);
+	list_del(&ref->link);
+	kfree(ref);
+}
+
+int
+i915_request_emit_flush(struct i915_gem_request *rq,
+			unsigned flags)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	int ret;
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+	BUG_ON(!rq->outstanding);
+
+	if ((flags & rq->pending_flush) == 0)
+		return 0;
+
+	trace_i915_gem_request_emit_flush(rq);
+	ret = engine->emit_flush(rq, rq->pending_flush);
+	if (ret)
+		return ret;
+
+	rq->pending_flush = 0;
+	return 0;
+}
+
+int
+__i915_request_emit_breadcrumb(struct i915_gem_request *rq, int id)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	u32 seqno;
+	int ret;
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+
+	if (rq->breadcrumb[id])
+		return 0;
+
+	if (rq->outstanding) {
+		ret = i915_request_emit_flush(rq, I915_FLUSH_CACHES);
+		if (ret)
+			return ret;
+
+		trace_i915_gem_request_emit_breadcrumb(rq);
+		if (id == engine->id)
+			ret = engine->emit_breadcrumb(rq);
+		else
+			ret = engine->semaphore.signal(rq, id);
+		if (ret)
+			return ret;
+
+		seqno = rq->seqno;
+	} else if (__i915_seqno_passed(rq->seqno, engine->breadcrumb[id])) {
+		struct i915_gem_request *tmp;
+
+		tmp = intel_engine_alloc_request(engine,
+						 rq->ring->last_context);
+		if (IS_ERR(tmp))
+			return PTR_ERR(tmp);
+
+		/* Masquerade as a continuation of the earlier request */
+		tmp->reset_counter = rq->reset_counter;
+
+		ret = __i915_request_emit_breadcrumb(tmp, id);
+		if (ret == 0)
+			ret = i915_request_commit(tmp);
+
+		i915_request_put(tmp);
+		if (ret)
+			return ret;
+
+		seqno = tmp->seqno;
+	} else
+		seqno = engine->breadcrumb[id];
+
+	rq->breadcrumb[id] = seqno;
+	return 0;
+}
+
+int
+i915_request_emit_batchbuffer(struct i915_gem_request *rq,
+			      struct drm_i915_gem_object *batch,
+			      uint64_t start, uint32_t len,
+			      unsigned flags)
+{
+	struct intel_engine_cs *engine = rq->engine;
+	int ret;
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+	BUG_ON(!rq->outstanding);
+	BUG_ON(rq->breadcrumb[rq->engine->id]);
+
+	/* Whilst this request exists, batch_obj will be on the
+	 * active_list, and so will hold the active reference. Only when this
+	 * request is retired will the the batch_obj be moved onto the
+	 * inactive_list and lose its active reference. Hence we do not need
+	 * to explicitly hold another reference here.
+	 */
+	trace_i915_gem_request_emit_batch(rq);
+	ret = engine->emit_batchbuffer(rq, start, len, flags);
+	if (ret)
+		return ret;
+
+	rq->batch_obj = batch;
+	return 0;
+}
+
+static void
+add_to_client(struct i915_gem_request *rq)
+{
+	struct drm_i915_file_private *file_priv = rq->ctx->file_priv;
+
+	if (file_priv) {
+		spin_lock(&file_priv->mm.lock);
+		list_add_tail(&rq->client_list,
+			      &file_priv->mm.request_list);
+		rq->file_priv = file_priv;
+		spin_unlock(&file_priv->mm.lock);
+	}
+}
+
+static void
+remove_from_client(struct i915_gem_request *rq)
+{
+	struct drm_i915_file_private *file_priv = rq->file_priv;
+
+	if (!file_priv)
+		return;
+
+	spin_lock(&file_priv->mm.lock);
+	if (rq->file_priv) {
+		list_del(&rq->client_list);
+		rq->file_priv = NULL;
+	}
+	spin_unlock(&file_priv->mm.lock);
+}
+
+static void add_to_obj(struct i915_gem_request *rq,
+		       struct i915_gem_request__vma *ref)
+{
+	struct i915_vma *vma = ref->vma;
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct intel_engine_cs *engine = rq->engine;
+
+	/* Add a reference if we're newly entering the active list. */
+	if (obj->last_read[engine->id].request == NULL && obj->active++ == 0)
+		drm_gem_object_reference(&obj->base);
+
+	obj->last_read[engine->id].request = rq;
+	list_move_tail(&obj->last_read[engine->id].engine_list,
+		       &engine->read_list);
+
+	if (ref->write) {
+		obj->dirty = 1;
+		obj->last_write.request = rq;
+		list_move_tail(&obj->last_write.engine_list,
+			       &engine->write_list);
+	}
+
+	if (ref->fence & VMA_IS_FENCED) {
+		obj->last_fence.request = rq;
+		list_move_tail(&obj->last_fence.engine_list,
+			       &engine->fence_list);
+		if (ref->fence & VMA_HAS_FENCE)
+			list_move_tail(&rq->i915->fence_regs[obj->fence_reg].lru_list,
+					&rq->i915->mm.fence_list);
+	}
+
+	list_move_tail(&vma->mm_list, &vma->vm->active_list);
+}
+
+static bool leave_breadcrumb(struct i915_gem_request *rq)
+{
+	if (rq->breadcrumb[rq->engine->id])
+		return false;
+
+	/* Semaphores are not stable across wrap-around.
+	 * Be conservative and always explicitly update
+	 * the breadcrumbs if the seqno wraps.
+	 */
+	if (rq->seqno < rq->engine->breadcrumb[rq->engine->id])
+		return true;
+
+	/* Auto-report HEAD every 4k to make sure that we can always wait on
+	 * some available ring space in the future. This also caps the
+	 * latency of future waits for missed breadcrumbs.
+	 */
+	if (__intel_ring_space(rq->ring->tail, rq->ring->breadcrumb_tail,
+			       rq->ring->size, 0) >= PAGE_SIZE)
+		return true;
+
+	return false;
+}
+
+int i915_request_commit(struct i915_gem_request *rq)
+{
+	int ret, n;
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+
+	if (!rq->outstanding)
+		return 0;
+
+	if (rq->head == rq->ring->tail)
+		goto done;
+
+	ret = check_wedged(rq, rq->i915->mm.interruptible);
+	if (ret)
+		return ret;
+
+	ret = i915_request_emit_flush(rq, I915_FLUSH_CACHES);
+	if (ret)
+		return ret;
+
+	if (leave_breadcrumb(rq)) {
+		ret = i915_request_emit_breadcrumb(rq);
+		if (ret)
+			return ret;
+	}
+
+	rq->tail = rq->ring->tail;
+	rq->emitted_jiffies = jiffies;
+
+	intel_runtime_pm_get(rq->i915);
+
+	trace_i915_gem_request_commit(rq);
+	ret = rq->engine->add_request(rq);
+	if (ret) {
+		intel_runtime_pm_put(rq->i915);
+		return ret;
+	}
+
+	i915_request_get(rq);
+
+	rq->outstanding = false;
+	if (rq->breadcrumb[rq->engine->id]) {
+		list_add_tail(&rq->breadcrumb_list, &rq->ring->breadcrumbs);
+		rq->ring->breadcrumb_tail = rq->tail;
+	} else
+		INIT_LIST_HEAD(&rq->breadcrumb_list);
+
+	for (n = 0; n < ARRAY_SIZE(rq->breadcrumb); n++)
+		if (rq->breadcrumb[n])
+			rq->engine->breadcrumb[n] = rq->breadcrumb[n];
+
+	add_to_client(rq);
+
+	while (!list_empty(&rq->vmas)) {
+		struct i915_gem_request__vma *ref =
+			list_first_entry(&rq->vmas, typeof(*ref), link);
+
+		add_to_obj(rq, ref);
+		vma_free(ref);
+	}
+
+	rq->ctx->remap_slice &= ~rq->remap_l3;
+	rq->ctx->ring[rq->engine->id].initialized = true;
+done:
+	rq->ring->last_context = rq->ctx;
+	return 0;
+}
+
+static void fake_irq(unsigned long data)
+{
+	wake_up_process((struct task_struct *)data);
+}
+
+static bool missed_irq(struct i915_gem_request *rq)
+{
+	return test_bit(rq->engine->id, &rq->i915->gpu_error.missed_irq_rings);
+}
+
+static bool can_wait_boost(struct drm_i915_file_private *file_priv)
+{
+	if (file_priv == NULL)
+		return true;
+
+	return !atomic_xchg(&file_priv->rps_wait_boost, true);
+}
+
+bool __i915_request_complete__wa(struct i915_gem_request *rq)
+{
+	struct drm_i915_private *dev_priv = rq->i915;
+	unsigned head, tail;
+
+	if (i915_request_complete(rq))
+		return true;
+
+	/* Sadly not all architectures are coherent wrt to the seqno
+	 * write being visible before the CPU is woken up by the
+	 * interrupt. In order to avoid going to sleep without seeing
+	 * the last seqno and never waking up again, we explicity check
+	 * whether the ring has advanced past our request. The uncached
+	 * register read (which requires waking the GT up) is pure brute
+	 * force, and only just enough.
+	 */
+	head = __intel_ring_space(I915_READ_HEAD(rq->engine) & HEAD_ADDR,
+				  rq->ring->tail, rq->ring->size, 0);
+	tail = __intel_ring_space(rq->tail,
+				  rq->ring->tail, rq->ring->size, 0);
+	if (head >= tail) {
+		trace_i915_gem_request_complete(rq);
+		rq->completed = true;
+	}
+
+	return rq->completed;
+}
+
+/**
+ * __wait_request - wait until execution of request has finished
+ * @request: the request to wait upon
+ * @interruptible: do an interruptible wait (normally yes)
+ * @timeout_ns: in - how long to wait (NULL forever); out - how much time remaining
+ *
+ * Returns 0 if the request was completed within the alloted time. Else returns the
+ * errno with remaining time filled in timeout argument.
+ */
+static int __wait_request(struct i915_gem_request *rq,
+			  bool interruptible,
+			  s64 *timeout_ns,
+			  struct drm_i915_file_private *file_priv)
+{
+	const bool irq_test_in_progress =
+		ACCESS_ONCE(rq->i915->gpu_error.test_irq_rings) & intel_engine_flag(rq->engine);
+	DEFINE_WAIT(wait);
+	unsigned long timeout_expire;
+	unsigned long before, now;
+	int ret;
+
+	WARN(!intel_irqs_enabled(rq->i915), "IRQs disabled");
+
+	if (__i915_request_complete__wa(rq))
+		return 0;
+
+	timeout_expire = timeout_ns ? jiffies + nsecs_to_jiffies((u64)*timeout_ns) : 0;
+
+	if (INTEL_INFO(rq->i915)->gen >= 6 && rq->engine->id == RCS && can_wait_boost(file_priv)) {
+		gen6_rps_boost(rq->i915);
+		if (file_priv)
+			mod_delayed_work(rq->i915->wq,
+					 &file_priv->mm.idle_work,
+					 msecs_to_jiffies(100));
+	}
+
+	if (!irq_test_in_progress && WARN_ON(!rq->engine->irq_get(rq->engine)))
+		return -ENODEV;
+
+	/* Record current time in case interrupted by signal, or wedged */
+	trace_i915_gem_request_wait_begin(rq);
+	before = jiffies;
+	for (;;) {
+		struct timer_list timer;
+
+		prepare_to_wait(&rq->engine->irq_queue, &wait,
+				interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+
+		/* We need to check whether any gpu reset happened in between
+		 * the caller grabbing the seqno and now ... */
+		ret = check_wedged(rq, interruptible);
+		if (ret)
+			break;
+
+		if (__i915_request_complete__wa(rq))
+			break;
+
+		if (interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		if (timeout_ns && time_after_eq(jiffies, timeout_expire)) {
+			ret = -ETIME;
+			break;
+		}
+
+		timer.function = NULL;
+		if (timeout_ns || missed_irq(rq)) {
+			unsigned long expire;
+
+			setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
+			expire = missed_irq(rq) ? jiffies + 1 : timeout_expire;
+			mod_timer(&timer, expire);
+		}
+
+		io_schedule();
+
+		if (timer.function) {
+			del_singleshot_timer_sync(&timer);
+			destroy_timer_on_stack(&timer);
+		}
+	}
+	now = jiffies;
+	trace_i915_gem_request_wait_end(rq);
+
+	if (!irq_test_in_progress)
+		rq->engine->irq_put(rq->engine);
+
+	finish_wait(&rq->engine->irq_queue, &wait);
+
+	if (timeout_ns) {
+		s64 tres = *timeout_ns - jiffies_to_nsecs(now - before);
+		*timeout_ns = tres <= 0 ? 0 : tres;
+	}
+
+	return ret;
+}
+
+int
+__i915_request_wait(struct i915_gem_request *rq,
+		    bool interruptible,
+		    s64 *timeout_ns,
+		    struct drm_i915_file_private *file)
+{
+	if (WARN_ON(rq->breadcrumb[rq->engine->id] == 0))
+		return -ENODEV;
+
+	if (WARN_ON(rq->outstanding))
+		return -ENODEV;
+
+	return __wait_request(rq, interruptible, timeout_ns, file);
+}
+
+int
+i915_request_wait(struct i915_gem_request *rq)
+{
+	int ret;
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+
+	ret = i915_request_emit_breadcrumb(rq);
+	if (ret)
+		return ret;
+
+	ret = i915_request_commit(rq);
+	if (ret)
+		return ret;
+
+	return __wait_request(rq, rq->i915->mm.interruptible, NULL, NULL);
+}
+
+void
+i915_request_retire(struct i915_gem_request *rq)
+{
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+
+	if (!rq->completed) {
+		trace_i915_gem_request_complete(rq);
+		rq->completed = true;
+	}
+	trace_i915_gem_request_retire(rq);
+
+	/* We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 */
+	rq->ring->retired_head = rq->tail;
+
+	rq->batch_obj = NULL;
+
+	list_del(&rq->breadcrumb_list);
+	list_del(&rq->engine_list);
+	remove_from_client(rq);
+
+	intel_runtime_pm_put(rq->i915);
+	i915_request_put(rq);
+}
+
+void
+__i915_request_free(struct kref *kref)
+{
+	struct i915_gem_request *rq = container_of(kref, struct i915_gem_request, kref);
+
+	lockdep_assert_held(&rq->i915->dev->struct_mutex);
+
+	if (rq->outstanding) {
+		/* Rollback this partial transaction as we never committed
+		 * the request to the hardware queue.
+		 */
+		rq->ring->tail = rq->head;
+		rq->ring->space = intel_ring_space(rq->ring);
+	}
+
+	while (!list_empty(&rq->vmas))
+		vma_free(list_first_entry(&rq->vmas,
+					  struct i915_gem_request__vma,
+					  link));
+
+	i915_gem_context_unreference(rq->ctx);
+	kfree(rq);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 7e623bf..a45651d 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 		if (ret == 0) {
 			obj->fence_dirty =
-				obj->last_fenced_seqno ||
+				obj->last_fence.request ||
 				obj->fence_reg != I915_FENCE_REG_NONE;
 
 			obj->tiling_mode = args->tiling_mode;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 285d72d..495515e 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -244,13 +244,18 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 				  struct drm_device *dev,
 				  struct drm_i915_error_ring *ring)
 {
+	int n;
+
 	if (!ring->valid)
 		return;
 
-	err_printf(m, "  HEAD: 0x%08x\n", ring->head);
-	err_printf(m, "  TAIL: 0x%08x\n", ring->tail);
-	err_printf(m, "  CTL: 0x%08x\n", ring->ctl);
-	err_printf(m, "  HWS: 0x%08x\n", ring->hws);
+	err_printf(m, "%s command stream:\n", ring_str(ring->id));
+
+	err_printf(m, "  START: 0x%08x\n", ring->start);
+	err_printf(m, "  HEAD:  0x%08x\n", ring->head);
+	err_printf(m, "  TAIL:  0x%08x\n", ring->tail);
+	err_printf(m, "  CTL:   0x%08x\n", ring->ctl);
+	err_printf(m, "  HWS:   0x%08x\n", ring->hws);
 	err_printf(m, "  ACTHD: 0x%08x %08x\n", (u32)(ring->acthd>>32), (u32)ring->acthd);
 	err_printf(m, "  IPEIR: 0x%08x\n", ring->ipeir);
 	err_printf(m, "  IPEHR: 0x%08x\n", ring->ipehr);
@@ -266,17 +271,13 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	if (INTEL_INFO(dev)->gen >= 6) {
 		err_printf(m, "  RC PSMI: 0x%08x\n", ring->rc_psmi);
 		err_printf(m, "  FAULT_REG: 0x%08x\n", ring->fault_reg);
-		err_printf(m, "  SYNC_0: 0x%08x [last synced 0x%08x]\n",
-			   ring->semaphore_mboxes[0],
-			   ring->semaphore_seqno[0]);
-		err_printf(m, "  SYNC_1: 0x%08x [last synced 0x%08x]\n",
-			   ring->semaphore_mboxes[1],
-			   ring->semaphore_seqno[1]);
-		if (HAS_VEBOX(dev)) {
-			err_printf(m, "  SYNC_2: 0x%08x [last synced 0x%08x]\n",
-				   ring->semaphore_mboxes[2],
-				   ring->semaphore_seqno[2]);
-		}
+		err_printf(m, "  SYNC_0: 0x%08x\n",
+			   ring->semaphore_mboxes[0]);
+		err_printf(m, "  SYNC_1: 0x%08x\n",
+			   ring->semaphore_mboxes[1]);
+		if (HAS_VEBOX(dev))
+			err_printf(m, "  SYNC_2: 0x%08x\n",
+				   ring->semaphore_mboxes[2]);
 	}
 	if (USES_PPGTT(dev)) {
 		err_printf(m, "  GFX_MODE: 0x%08x\n", ring->vm_info.gfx_mode);
@@ -291,7 +292,11 @@ static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 				   ring->vm_info.pp_dir_base);
 		}
 	}
-	err_printf(m, "  seqno: 0x%08x\n", ring->seqno);
+	err_printf(m, "  seqno: 0x%08x [last breadcrumb 0x%08x]\n", ring->seqno, ring->breadcrumb[ring->id]);
+	err_printf(m, "  semaphore: [");
+	for (n = 0; n < ARRAY_SIZE(ring->breadcrumb); n++)
+		err_printf(m, " %s%08x", n == ring->id ? "*" : "", ring->breadcrumb[n]);
+	err_printf(m, " ]\n");
 	err_printf(m, "  waiting: %s\n", yesno(ring->waiting));
 	err_printf(m, "  ring->head: 0x%08x\n", ring->cpu_ring_head);
 	err_printf(m, "  ring->tail: 0x%08x\n", ring->cpu_ring_tail);
@@ -388,10 +393,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 	if (INTEL_INFO(dev)->gen == 7)
 		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
 
-	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
-		err_printf(m, "%s command stream:\n", ring_str(i));
+	for (i = 0; i < ARRAY_SIZE(error->ring); i++)
 		i915_ring_error_state(m, dev, &error->ring[i]);
-	}
 
 	for (i = 0; i < error->vm_count; i++) {
 		err_printf(m, "vm[%d]\n", i);
@@ -430,10 +433,15 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->engine[i].name,
 				   error->ring[i].num_requests);
 			for (j = 0; j < error->ring[i].num_requests; j++) {
-				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+				err_printf(m, "  pid %ld, seqno 0x%08x, emitted %dus ago (at %ld jiffies), head 0x%08x, tail 0x%08x, batch 0x%08x, complete? %d\n",
+					   error->ring[i].requests[j].pid,
 					   error->ring[i].requests[j].seqno,
+					   jiffies_to_usecs(jiffies - error->ring[i].requests[j].jiffies),
 					   error->ring[i].requests[j].jiffies,
-					   error->ring[i].requests[j].tail);
+					   error->ring[i].requests[j].head,
+					   error->ring[i].requests[j].tail,
+					   error->ring[i].requests[j].batch,
+					   error->ring[i].requests[j].complete);
 			}
 		}
 
@@ -661,11 +669,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 		       struct i915_vma *vma)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_gem_request *rq = i915_gem_object_last_read(obj);
 
 	err->size = obj->base.size;
 	err->name = obj->base.name;
-	err->rseqno = obj->last_read_seqno;
-	err->wseqno = obj->last_write_seqno;
+	err->rseqno = i915_request_seqno(rq);
+	err->wseqno = i915_request_seqno(obj->last_write.request);
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
@@ -679,7 +688,7 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->dirty = obj->dirty;
 	err->purgeable = obj->madv != I915_MADV_WILLNEED;
 	err->userptr = obj->userptr.mm != NULL;
-	err->ring = obj->ring ? obj->ring->id : -1;
+	err->ring = i915_request_engine_id(rq);
 	err->cache_level = obj->cache_level;
 }
 
@@ -797,7 +806,7 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv,
 	struct intel_engine_cs *to;
 	int i;
 
-	if (!i915_semaphore_is_enabled(dev_priv->dev))
+	if (dev_priv->semaphore_obj == NULL)
 		return;
 
 	if (!error->semaphore_obj)
@@ -808,19 +817,17 @@ static void gen8_record_semaphore_state(struct drm_i915_private *dev_priv,
 
 	for_each_engine(to, dev_priv, i) {
 		int idx;
-		u16 signal_offset;
+		u16 offset;
 		u32 *tmp;
 
 		if (engine == to)
 			continue;
 
-		signal_offset = (GEN8_SIGNAL_OFFSET(engine, i) & (PAGE_SIZE - 1))
-				/ 4;
 		tmp = error->semaphore_obj->pages[0];
+		offset = GEN8_SEMAPHORE_OFFSET(dev_priv, engine->id, i) & (PAGE_SIZE - 1) / 4;
 		idx = intel_engine_sync_index(engine, to);
 
-		ering->semaphore_mboxes[idx] = tmp[signal_offset];
-		ering->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx];
+		ering->semaphore_mboxes[idx] = tmp[offset];
 	}
 }
 
@@ -830,22 +837,20 @@ static void gen6_record_semaphore_state(struct drm_i915_private *dev_priv,
 {
 	ering->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base));
 	ering->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base));
-	ering->semaphore_seqno[0] = engine->semaphore.sync_seqno[0];
-	ering->semaphore_seqno[1] = engine->semaphore.sync_seqno[1];
-
 	if (HAS_VEBOX(dev_priv->dev)) {
 		ering->semaphore_mboxes[2] =
 			I915_READ(RING_SYNC_2(engine->mmio_base));
-		ering->semaphore_seqno[2] = engine->semaphore.sync_seqno[2];
 	}
 }
 
 static void i915_record_ring_state(struct drm_device *dev,
 				   struct drm_i915_error_state *error,
 				   struct intel_engine_cs *engine,
+				   struct i915_gem_request *rq,
 				   struct drm_i915_error_ring *ering)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ringbuffer *ring;
 
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ering->rc_psmi = I915_READ(engine->mmio_base + 0x50);
@@ -877,8 +882,10 @@ static void i915_record_ring_state(struct drm_device *dev,
 
 	ering->waiting = waitqueue_active(&engine->irq_queue);
 	ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
-	ering->seqno = engine->get_seqno(engine, false);
 	ering->acthd = intel_engine_get_active_head(engine);
+	ering->seqno = engine->get_seqno(engine);
+	memcpy(ering->breadcrumb, engine->breadcrumb, sizeof(ering->breadcrumb));
+	ering->start = I915_READ_START(engine);
 	ering->head = I915_READ_HEAD(engine);
 	ering->tail = I915_READ_TAIL(engine);
 	ering->ctl = I915_READ_CTL(engine);
@@ -902,7 +909,7 @@ static void i915_record_ring_state(struct drm_device *dev,
 				mmio = VEBOX_HWS_PGA_GEN7;
 				break;
 			}
-		} else if (IS_GEN6(engine->dev)) {
+		} else if (IS_GEN6(engine->i915)) {
 			mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
 		} else {
 			/* XXX: gen8 returns to sanity */
@@ -912,8 +919,11 @@ static void i915_record_ring_state(struct drm_device *dev,
 		ering->hws = I915_READ(mmio);
 	}
 
-	ering->cpu_ring_head = engine->buffer->head;
-	ering->cpu_ring_tail = engine->buffer->tail;
+	ring = rq ? rq->ctx->ring[engine->id].ring : engine->default_context->ring[engine->id].ring;
+	if (ring) {
+		ering->cpu_ring_head = ring->head;
+		ering->cpu_ring_tail = ring->tail;
+	}
 
 	ering->hangcheck_score = engine->hangcheck.score;
 	ering->hangcheck_action = engine->hangcheck.action;
@@ -945,12 +955,11 @@ static void i915_record_ring_state(struct drm_device *dev,
 	}
 }
 
-
 static void i915_gem_record_active_context(struct intel_engine_cs *engine,
 					   struct drm_i915_error_state *error,
 					   struct drm_i915_error_ring *ering)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_gem_object *obj;
 
 	/* Currently render ring is the only HW context user */
@@ -972,7 +981,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 				  struct drm_i915_error_state *error)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_request *request;
+	struct i915_gem_request *rq;
 	int i, count;
 
 	for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -980,20 +989,18 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
 		error->ring[i].pid = -1;
 
-		if (engine->dev == NULL)
+		if (engine->i915 == NULL)
 			continue;
 
 		error->ring[i].valid = true;
+		error->ring[i].id = i;
 
-		i915_record_ring_state(dev, error, engine, &error->ring[i]);
-
-		request = i915_gem_find_active_request(engine);
-		if (request) {
+		rq = intel_engine_find_active_request(engine);
+		if (rq) {
 			struct i915_address_space *vm;
 
-			vm = request->ctx && request->ctx->ppgtt ?
-				&request->ctx->ppgtt->base :
-				&dev_priv->gtt.base;
+			vm = rq->ctx->ppgtt ?
+				&rq->ctx->ppgtt->base : &dev_priv->gtt.base;
 
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
@@ -1001,7 +1008,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			 */
 			error->ring[i].batchbuffer =
 				i915_error_object_create(dev_priv,
-							 request->batch_obj,
+							 rq->batch_obj,
 							 vm);
 
 			if (HAS_BROKEN_CS_TLB(dev_priv->dev))
@@ -1009,11 +1016,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
 					i915_error_ggtt_object_create(dev_priv,
 							     engine->scratch.obj);
 
-			if (request->file_priv) {
+			if (rq->file_priv) {
 				struct task_struct *task;
 
 				rcu_read_lock();
-				task = pid_task(request->file_priv->file->pid,
+				task = pid_task(rq->file_priv->file->pid,
 						PIDTYPE_PID);
 				if (task) {
 					strcpy(error->ring[i].comm, task->comm);
@@ -1023,8 +1030,12 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			}
 		}
 
-		error->ring[i].ringbuffer =
-			i915_error_ggtt_object_create(dev_priv, engine->buffer->obj);
+		i915_record_ring_state(dev, error, engine, rq, &error->ring[i]);
+
+		if (engine->default_context && engine->default_context->ring[engine->id].ring)
+			error->ring[i].ringbuffer =
+				i915_error_ggtt_object_create(dev_priv,
+							      engine->default_context->ring[engine->id].ring->obj);
 
 		error->ring[i].hws_page =
 			i915_error_ggtt_object_create(dev_priv, engine->status_page.obj);
@@ -1032,7 +1043,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		i915_gem_record_active_context(engine, error, &error->ring[i]);
 
 		count = 0;
-		list_for_each_entry(request, &engine->request_list, list)
+		list_for_each_entry(rq, &engine->requests, engine_list)
 			count++;
 
 		error->ring[i].num_requests = count;
@@ -1045,13 +1056,28 @@ static void i915_gem_record_rings(struct drm_device *dev,
 		}
 
 		count = 0;
-		list_for_each_entry(request, &engine->request_list, list) {
+		list_for_each_entry(rq, &engine->requests, engine_list) {
 			struct drm_i915_error_request *erq;
+			struct task_struct *task;
+
 
 			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
-			erq->jiffies = request->emitted_jiffies;
-			erq->tail = request->tail;
+			erq->seqno = rq->seqno;
+			erq->jiffies = rq->emitted_jiffies;
+			erq->head = rq->head;
+			erq->tail = rq->tail;
+			if (rq->batch_obj)
+				erq->batch = i915_gem_obj_offset(rq->batch_obj,
+								 rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &dev_priv->gtt.base);
+			else
+				erq->batch = 0;
+			memcpy(erq->breadcrumb, rq->breadcrumb, sizeof(rq->breadcrumb));
+			erq->complete = i915_request_complete(rq);
+
+			rcu_read_lock();
+			task = rq->file_priv ? pid_task(rq->file_priv->file->pid, PIDTYPE_PID) : NULL;
+			erq->pid = task ? task->pid : 0;
+			rcu_read_unlock();
 		}
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 2973c00..2785785 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1261,10 +1261,7 @@ static void notify_ring(struct drm_device *dev,
 	if (!intel_engine_initialized(engine))
 		return;
 
-	trace_i915_gem_request_complete(engine);
-
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		intel_notify_mmio_flip(engine);
+	trace_i915_gem_ring_complete(engine);
 
 	wake_up_all(&engine->irq_queue);
 	i915_queue_hangcheck(dev);
@@ -1646,14 +1643,14 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 			if (rcs & GT_RENDER_USER_INTERRUPT)
 				notify_ring(dev, engine);
 			if (rcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_execlists_handle_ctx_events(engine);
+				intel_execlists_irq_handler(engine);
 
 			bcs = tmp >> GEN8_BCS_IRQ_SHIFT;
 			engine = &dev_priv->engine[BCS];
 			if (bcs & GT_RENDER_USER_INTERRUPT)
 				notify_ring(dev, engine);
 			if (bcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_execlists_handle_ctx_events(engine);
+				intel_execlists_irq_handler(engine);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT0)!\n");
 	}
@@ -1669,14 +1666,14 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 			if (vcs & GT_RENDER_USER_INTERRUPT)
 				notify_ring(dev, engine);
 			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_execlists_handle_ctx_events(engine);
+				intel_execlists_irq_handler(engine);
 
 			vcs = tmp >> GEN8_VCS2_IRQ_SHIFT;
 			engine = &dev_priv->engine[VCS2];
 			if (vcs & GT_RENDER_USER_INTERRUPT)
 				notify_ring(dev, engine);
 			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_execlists_handle_ctx_events(engine);
+				intel_execlists_irq_handler(engine);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT1)!\n");
 	}
@@ -1703,7 +1700,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 			if (vcs & GT_RENDER_USER_INTERRUPT)
 				notify_ring(dev, engine);
 			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
-				intel_execlists_handle_ctx_events(engine);
+				intel_execlists_irq_handler(engine);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT3)!\n");
 	}
@@ -2721,9 +2718,7 @@ static void i915_error_work_func(struct work_struct *work)
 			 * updates before
 			 * the counter increment.
 			 */
-			smp_mb__before_atomic();
-			atomic_inc(&dev_priv->gpu_error.reset_counter);
-
+			smp_mb__after_atomic();
 			kobject_uevent_env(&dev->primary->kdev->kobj,
 					   KOBJ_CHANGE, reset_done_event);
 		} else {
@@ -3054,24 +3049,24 @@ static void gen8_disable_vblank(struct drm_device *dev, int pipe)
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
-static u32
-engine_last_seqno(struct intel_engine_cs *engine)
-{
-	return list_entry(engine->request_list.prev,
-			  struct drm_i915_gem_request, list)->seqno;
-}
-
 static bool
-engine_idle(struct intel_engine_cs *engine, u32 seqno)
+engine_idle(struct intel_engine_cs *engine)
 {
-	return (list_empty(&engine->request_list) ||
-		i915_seqno_passed(seqno, engine_last_seqno(engine)));
+	if (list_empty(&engine->requests))
+		return true;
+
+	if (i915_request_complete(list_entry(engine->requests.prev,
+					     struct i915_gem_request,
+					     engine_list)))
+		return true;
+
+	return intel_engine_idle(engine);
 }
 
 static bool
-ipehr_is_semaphore_wait(struct drm_device *dev, u32 ipehr)
+ipehr_is_semaphore_wait(struct drm_i915_private *i915, u32 ipehr)
 {
-	if (INTEL_INFO(dev)->gen >= 8) {
+	if (INTEL_INFO(i915)->gen >= 8) {
 		return (ipehr >> 23) == 0x1c;
 	} else {
 		ipehr &= ~MI_SEMAPHORE_SYNC_MASK;
@@ -3083,7 +3078,7 @@ ipehr_is_semaphore_wait(struct drm_device *dev, u32 ipehr)
 static struct intel_engine_cs *
 semaphore_wait_to_signaller_engine(struct intel_engine_cs *engine, u32 ipehr, u64 offset)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_engine_cs *signaller;
 	int i;
 
@@ -3092,7 +3087,7 @@ semaphore_wait_to_signaller_engine(struct intel_engine_cs *engine, u32 ipehr, u6
 			if (engine == signaller)
 				continue;
 
-			if (offset == signaller->semaphore.signal_ggtt[engine->id])
+			if (offset == GEN8_SEMAPHORE_OFFSET(dev_priv, signaller->id, engine->id))
 				return signaller;
 		}
 	} else {
@@ -3116,13 +3111,19 @@ semaphore_wait_to_signaller_engine(struct intel_engine_cs *engine, u32 ipehr, u6
 static struct intel_engine_cs *
 semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct intel_ringbuffer *ring;
 	u32 cmd, ipehr, head;
 	u64 offset = 0;
 	int i, backwards;
 
 	ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
-	if (!ipehr_is_semaphore_wait(engine->dev, ipehr))
+	if (!ipehr_is_semaphore_wait(engine->i915, ipehr))
+		return NULL;
+
+	/* XXX execlists */
+	ring =  engine->default_context->ring[RCS].ring;
+	if (ring == NULL)
 		return NULL;
 
 	/*
@@ -3134,18 +3135,18 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 	 * ringbuffer itself.
 	 */
 	head = I915_READ_HEAD(engine) & HEAD_ADDR;
-	backwards = (INTEL_INFO(engine->dev)->gen >= 8) ? 5 : 4;
+	backwards = (INTEL_INFO(dev_priv)->gen >= 8) ? 5 : 4;
 
 	for (i = backwards; i; --i) {
 		/*
 		 * Be paranoid and presume the hw has gone off into the wild -
-		 * our ring is smaller than what the hardware (and hence
+		 * our engine is smaller than what the hardware (and hence
 		 * HEAD_ADDR) allows. Also handles wrap-around.
 		 */
-		head &= engine->buffer->size - 1;
+		head &= ring->size - 1;
 
 		/* This here seems to blow up */
-		cmd = ioread32(engine->buffer->virtual_start + head);
+		cmd = ioread32(ring->virtual_start + head);
 		if (cmd == ipehr)
 			break;
 
@@ -3155,19 +3156,20 @@ semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno)
 	if (!i)
 		return NULL;
 
-	*seqno = ioread32(engine->buffer->virtual_start + head + 4) + 1;
-	if (INTEL_INFO(engine->dev)->gen >= 8) {
-		offset = ioread32(engine->buffer->virtual_start + head + 12);
+	*seqno = ioread32(ring->virtual_start + head + 4) + 1;
+	if (INTEL_INFO(dev_priv)->gen >= 8) {
+		offset = ioread32(ring->virtual_start + head + 12);
 		offset <<= 32;
-		offset = ioread32(engine->buffer->virtual_start + head + 8);
+		offset = ioread32(ring->virtual_start + head + 8);
 	}
 	return semaphore_wait_to_signaller_engine(engine, ipehr, offset);
 }
 
 static int semaphore_passed(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	struct intel_engine_cs *signaller;
+	struct i915_gem_request *rq;
 	u32 seqno;
 
 	engine->hangcheck.deadlock++;
@@ -3180,7 +3182,8 @@ static int semaphore_passed(struct intel_engine_cs *engine)
 	if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
 		return -1;
 
-	if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
+	rq = intel_engine_seqno_to_request(engine, seqno);
+	if (rq == NULL || i915_request_complete(rq))
 		return 1;
 
 	/* cursory check for an unkickable deadlock */
@@ -3203,8 +3206,7 @@ static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv)
 static enum intel_engine_hangcheck_action
 engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 {
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	u32 tmp;
 
 	if (acthd != engine->hangcheck.acthd) {
@@ -3216,7 +3218,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 		return HANGCHECK_ACTIVE_LOOP;
 	}
 
-	if (IS_GEN2(dev))
+	if (IS_GEN2(dev_priv))
 		return HANGCHECK_HUNG;
 
 	/* Is the chip hanging on a WAIT_FOR_EVENT?
@@ -3226,19 +3228,19 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
 	 */
 	tmp = I915_READ_CTL(engine);
 	if (tmp & RING_WAIT) {
-		i915_handle_error(dev, false,
+		i915_handle_error(dev_priv->dev, false,
 				  "Kicking stuck wait on %s",
 				  engine->name);
 		I915_WRITE_CTL(engine, tmp);
 		return HANGCHECK_KICK;
 	}
 
-	if (INTEL_INFO(dev)->gen >= 6 && tmp & RING_WAIT_SEMAPHORE) {
+	if (INTEL_INFO(dev_priv)->gen >= 6 && tmp & RING_WAIT_SEMAPHORE) {
 		switch (semaphore_passed(engine)) {
 		default:
 			return HANGCHECK_HUNG;
 		case 1:
-			i915_handle_error(dev, false,
+			i915_handle_error(dev_priv->dev, false,
 					  "Kicking stuck semaphore on %s",
 					  engine->name);
 			I915_WRITE_CTL(engine, tmp);
@@ -3261,8 +3263,7 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
  */
 static void i915_hangcheck_elapsed(unsigned long data)
 {
-	struct drm_device *dev = (struct drm_device *)data;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = (struct drm_i915_private *)data;
 	struct intel_engine_cs *engine;
 	int i;
 	int busy_count = 0, rings_hung = 0;
@@ -3281,11 +3282,11 @@ static void i915_hangcheck_elapsed(unsigned long data)
 
 		semaphore_clear_deadlocks(dev_priv);
 
-		seqno = engine->get_seqno(engine, false);
 		acthd = intel_engine_get_active_head(engine);
+		seqno = engine->get_seqno(engine);
 
 		if (engine->hangcheck.seqno == seqno) {
-			if (engine_idle(engine, seqno)) {
+			if (engine_idle(engine)) {
 				engine->hangcheck.action = HANGCHECK_IDLE;
 
 				if (waitqueue_active(&engine->irq_queue)) {
@@ -3320,7 +3321,7 @@ static void i915_hangcheck_elapsed(unsigned long data)
 				 * for stalling the machine.
 				 */
 				engine->hangcheck.action = engine_stuck(engine,
-								    acthd);
+									acthd);
 
 				switch (engine->hangcheck.action) {
 				case HANGCHECK_IDLE:
@@ -3366,12 +3367,12 @@ static void i915_hangcheck_elapsed(unsigned long data)
 	}
 
 	if (rings_hung)
-		return i915_handle_error(dev, true, "Ring hung");
+		return i915_handle_error(dev_priv->dev, true, "Ring hung");
 
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
 		 * being added */
-		i915_queue_hangcheck(dev);
+		i915_queue_hangcheck(dev_priv->dev);
 }
 
 void i915_queue_hangcheck(struct drm_device *dev)
@@ -4680,7 +4681,7 @@ void intel_irq_init(struct drm_device *dev)
 
 	setup_timer(&dev_priv->gpu_error.hangcheck_timer,
 		    i915_hangcheck_elapsed,
-		    (unsigned long) dev);
+		    (unsigned long) dev_priv);
 	INIT_DELAYED_WORK(&dev_priv->hotplug_reenable_work,
 			  intel_hpd_irq_reenable);
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 203062e..e7ad5a7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2284,6 +2284,7 @@ enum punit_power_well {
  *   doesn't need saving on GT1
  */
 #define CXT_SIZE		0x21a0
+#define ILK_CXT_TOTAL_SIZE		(1 * PAGE_SIZE)
 #define GEN6_CXT_POWER_SIZE(cxt_reg)	((cxt_reg >> 24) & 0x3f)
 #define GEN6_CXT_RING_SIZE(cxt_reg)	((cxt_reg >> 18) & 0x3f)
 #define GEN6_CXT_RENDER_SIZE(cxt_reg)	((cxt_reg >> 12) & 0x3f)
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index f5aa006..6a66b2f 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -325,7 +325,7 @@ TRACE_EVENT(i915_gem_evict_vm,
 	    TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
 );
 
-TRACE_EVENT(i915_gem_ring_sync_to,
+TRACE_EVENT(i915_gem_ring_wait,
 	    TP_PROTO(struct intel_engine_cs *from,
 		     struct intel_engine_cs *to,
 		     u32 seqno),
@@ -339,18 +339,40 @@ TRACE_EVENT(i915_gem_ring_sync_to,
 			     ),
 
 	    TP_fast_assign(
-			   __entry->dev = from->dev->primary->index;
+			   __entry->dev = from->i915->dev->primary->index;
 			   __entry->sync_from = from->id;
 			   __entry->sync_to = to->id;
 			   __entry->seqno = seqno;
 			   ),
 
-	    TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u",
+	    TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%x",
 		      __entry->dev,
 		      __entry->sync_from, __entry->sync_to,
 		      __entry->seqno)
 );
 
+TRACE_EVENT(i915_gem_ring_switch_context,
+	    TP_PROTO(struct intel_engine_cs *engine, struct intel_context *ctx, u32 flags),
+	    TP_ARGS(engine, ctx, flags),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, ring)
+			     __field(u32, ctx)
+			     __field(u32, flags)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = engine->i915->dev->primary->index;
+			   __entry->ring = engine->id;
+			   __entry->ctx = ctx->file_priv ? ctx->user_handle : -1;
+			   __entry->flags = flags;
+			   ),
+
+	    TP_printk("dev=%u, ring=%u, ctx=%d, flags=0x%08x",
+		      __entry->dev, __entry->ring, __entry->ctx, __entry->flags)
+);
+
 TRACE_EVENT(i915_gem_ring_dispatch,
 	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno, u32 flags),
 	    TP_ARGS(ring, seqno, flags),
@@ -363,66 +385,84 @@ TRACE_EVENT(i915_gem_ring_dispatch,
 			     ),
 
 	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
+			   __entry->dev = ring->i915->dev->primary->index;
 			   __entry->ring = ring->id;
 			   __entry->seqno = seqno;
 			   __entry->flags = flags;
 			   i915_trace_irq_get(ring, seqno);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x",
+	    TP_printk("dev=%u, ring=%u, seqno=%x, flags=%x",
 		      __entry->dev, __entry->ring, __entry->seqno, __entry->flags)
 );
 
-TRACE_EVENT(i915_gem_ring_flush,
-	    TP_PROTO(struct intel_engine_cs *ring, u32 invalidate, u32 flush),
-	    TP_ARGS(ring, invalidate, flush),
+TRACE_EVENT(intel_ringbuffer_begin,
+	    TP_PROTO(struct intel_ringbuffer *ring, int need),
+	    TP_ARGS(ring, need),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, invalidate)
-			     __field(u32, flush)
+			     __field(u32, need)
+			     __field(u32, space)
 			     ),
 
 	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
-			   __entry->ring = ring->id;
-			   __entry->invalidate = invalidate;
-			   __entry->flush = flush;
+			   __entry->dev = ring->engine->i915->dev->primary->index;
+			   __entry->ring = ring->engine->id;
+			   __entry->need = need;
+			   __entry->space = intel_ring_space(ring);
 			   ),
 
-	    TP_printk("dev=%u, ring=%x, invalidate=%04x, flush=%04x",
-		      __entry->dev, __entry->ring,
-		      __entry->invalidate, __entry->flush)
+	    TP_printk("dev=%u, ring=%u, need=%u, space=%u",
+		      __entry->dev, __entry->ring, __entry->need, __entry->space)
 );
 
-DECLARE_EVENT_CLASS(i915_gem_request,
-	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
-	    TP_ARGS(ring, seqno),
+TRACE_EVENT(intel_ringbuffer_wait,
+	    TP_PROTO(struct intel_ringbuffer *ring, int need),
+	    TP_ARGS(ring, need),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
-			     __field(u32, seqno)
+			     __field(u32, need)
+			     __field(u32, space)
 			     ),
 
 	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
-			   __entry->ring = ring->id;
-			   __entry->seqno = seqno;
+			   __entry->dev = ring->engine->i915->dev->primary->index;
+			   __entry->ring = ring->engine->id;
+			   __entry->need = need;
+			   __entry->space = intel_ring_space(ring);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, seqno=%u",
-		      __entry->dev, __entry->ring, __entry->seqno)
+	    TP_printk("dev=%u, ring=%u, need=%u, space=%u",
+		      __entry->dev, __entry->ring, __entry->need, __entry->space)
 );
 
-DEFINE_EVENT(i915_gem_request, i915_gem_request_add,
-	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
-	    TP_ARGS(ring, seqno)
+TRACE_EVENT(intel_ringbuffer_wrap,
+	    TP_PROTO(struct intel_ringbuffer *ring, int rem),
+	    TP_ARGS(ring, rem),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, ring)
+			     __field(u32, rem)
+			     __field(u32, size)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = ring->engine->i915->dev->primary->index;
+			   __entry->ring = ring->engine->id;
+			   __entry->rem = rem;
+			   __entry->size = ring->effective_size;
+			   ),
+
+	    TP_printk("dev=%u, ring=%u, rem=%u, size=%u",
+		      __entry->dev, __entry->ring, __entry->rem, __entry->size)
 );
 
-TRACE_EVENT(i915_gem_request_complete,
+TRACE_EVENT(i915_gem_ring_complete,
 	    TP_PROTO(struct intel_engine_cs *ring),
 	    TP_ARGS(ring),
 
@@ -433,23 +473,68 @@ TRACE_EVENT(i915_gem_request_complete,
 			     ),
 
 	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
+			   __entry->dev = ring->i915->dev->primary->index;
 			   __entry->ring = ring->id;
-			   __entry->seqno = ring->get_seqno(ring, false);
+			   __entry->seqno = ring->get_seqno(ring);
+			   ),
+
+	    TP_printk("dev=%u, ring=%u, seqno=%x",
+		      __entry->dev, __entry->ring, __entry->seqno)
+);
+
+DECLARE_EVENT_CLASS(i915_gem_request,
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, ring)
+			     __field(u32, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = rq->i915->dev->primary->index;
+			   __entry->ring = rq->engine->id;
+			   __entry->seqno = rq->seqno;
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, seqno=%u",
+	    TP_printk("dev=%u, ring=%u, seqno=%x",
 		      __entry->dev, __entry->ring, __entry->seqno)
 );
 
+DEFINE_EVENT(i915_gem_request, i915_gem_request_emit_flush,
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq)
+);
+
+DEFINE_EVENT(i915_gem_request, i915_gem_request_emit_batch,
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq)
+);
+
+DEFINE_EVENT(i915_gem_request, i915_gem_request_emit_breadcrumb,
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq)
+);
+
+DEFINE_EVENT(i915_gem_request, i915_gem_request_commit,
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq)
+);
+
+DEFINE_EVENT(i915_gem_request, i915_gem_request_complete,
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq)
+);
+
 DEFINE_EVENT(i915_gem_request, i915_gem_request_retire,
-	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
-	    TP_ARGS(ring, seqno)
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq)
 );
 
 TRACE_EVENT(i915_gem_request_wait_begin,
-	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
-	    TP_ARGS(ring, seqno),
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
@@ -465,47 +550,38 @@ TRACE_EVENT(i915_gem_request_wait_begin,
 	     * less desirable.
 	     */
 	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
-			   __entry->ring = ring->id;
-			   __entry->seqno = seqno;
-			   __entry->blocking = mutex_is_locked(&ring->dev->struct_mutex);
+			   __entry->dev = rq->i915->dev->primary->index;
+			   __entry->ring = rq->engine->id;
+			   __entry->seqno = rq->seqno;
+			   __entry->blocking = mutex_is_locked(&rq->i915->dev->struct_mutex);
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s",
+	    TP_printk("dev=%u, ring=%u, seqno=%x, blocking?=%s",
 		      __entry->dev, __entry->ring, __entry->seqno,
 		      __entry->blocking ?  "yes (NB)" : "no")
 );
 
-DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end,
-	    TP_PROTO(struct intel_engine_cs *ring, u32 seqno),
-	    TP_ARGS(ring, seqno)
-);
-
-DECLARE_EVENT_CLASS(i915_ring,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring),
+TRACE_EVENT(i915_gem_request_wait_end,
+	    TP_PROTO(struct i915_gem_request *rq),
+	    TP_ARGS(rq),
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
 			     __field(u32, ring)
+			     __field(u32, seqno)
+			     __field(bool, completed)
 			     ),
 
 	    TP_fast_assign(
-			   __entry->dev = ring->dev->primary->index;
-			   __entry->ring = ring->id;
+			   __entry->dev = rq->i915->dev->primary->index;
+			   __entry->ring = rq->engine->id;
+			   __entry->seqno = rq->seqno;
+			   __entry->completed = rq->completed;
 			   ),
 
-	    TP_printk("dev=%u, ring=%u", __entry->dev, __entry->ring)
-);
-
-DEFINE_EVENT(i915_ring, i915_ring_wait_begin,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring)
-);
-
-DEFINE_EVENT(i915_ring, i915_ring_wait_end,
-	    TP_PROTO(struct intel_engine_cs *ring),
-	    TP_ARGS(ring)
+	    TP_printk("dev=%u, ring=%u, seqno=%x, completed=%s",
+		      __entry->dev, __entry->ring, __entry->seqno,
+		      __entry->completed ?  "yes" : "no")
 );
 
 TRACE_EVENT(i915_flip_request,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f2bc198..39b2fa5 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2165,7 +2165,7 @@ static int intel_align_height(struct drm_device *dev, int height, bool tiled)
 int
 intel_pin_and_fence_fb_obj(struct drm_device *dev,
 			   struct drm_i915_gem_object *obj,
-			   struct intel_engine_cs *pipelined)
+			   struct i915_gem_request *pipelined)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 alignment;
@@ -8968,7 +8968,7 @@ out:
  */
 static void intel_mark_fb_busy(struct drm_device *dev,
 			       unsigned frontbuffer_bits,
-			       struct intel_engine_cs *engine)
+			       struct i915_gem_request *rq)
 {
 	enum pipe pipe;
 
@@ -8980,8 +8980,8 @@ static void intel_mark_fb_busy(struct drm_device *dev,
 			continue;
 
 		intel_increase_pllclock(dev, pipe);
-		if (engine && intel_fbc_enabled(dev))
-			engine->fbc_dirty = true;
+		if (rq && intel_fbc_enabled(dev))
+			rq->pending_flush |= I915_KICK_FBC;
 	}
 }
 
@@ -8997,7 +8997,7 @@ static void intel_mark_fb_busy(struct drm_device *dev,
  * scheduled.
  */
 void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			     struct intel_engine_cs *engine)
+			     struct i915_gem_request *rq)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -9007,7 +9007,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 	if (!obj->frontbuffer_bits)
 		return;
 
-	if (engine) {
+	if (rq) {
 		mutex_lock(&dev_priv->fb_tracking.lock);
 		dev_priv->fb_tracking.busy_bits
 			|= obj->frontbuffer_bits;
@@ -9016,7 +9016,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 		mutex_unlock(&dev_priv->fb_tracking.lock);
 	}
 
-	intel_mark_fb_busy(dev, obj->frontbuffer_bits, engine);
+	intel_mark_fb_busy(dev, obj->frontbuffer_bits, rq);
 
 	intel_edp_psr_invalidate(dev, obj->frontbuffer_bits);
 }
@@ -9162,6 +9162,7 @@ static void intel_unpin_work_fn(struct work_struct *__work)
 	intel_unpin_fb_obj(work->old_fb_obj);
 	drm_gem_object_unreference(&work->pending_flip_obj->base);
 	drm_gem_object_unreference(&work->old_fb_obj->base);
+	i915_request_put(work->flip_queued_request);
 
 	intel_update_fbc(dev);
 	mutex_unlock(&dev->struct_mutex);
@@ -9300,97 +9301,88 @@ static inline void intel_mark_page_flip_active(struct intel_crtc *intel_crtc)
 	smp_wmb();
 }
 
-static int intel_gen2_queue_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
+static int intel_gen2_queue_flip(struct i915_gem_request *rq,
+				 struct intel_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *engine,
 				 uint32_t flags)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_ringbuffer *ring;
 	u32 flip_mask;
-	int ret;
 
-	ret = intel_ring_begin(engine, 6);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	/* Can't queue multiple flips, so wait for the previous
 	 * one to finish before executing the next.
 	 */
-	if (intel_crtc->plane)
+	if (crtc->plane)
 		flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
 	else
 		flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
-	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_emit(engine, MI_DISPLAY_FLIP |
-			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
-	intel_ring_emit(engine, fb->pitches[0]);
-	intel_ring_emit(engine, intel_crtc->unpin_work->gtt_offset);
-	intel_ring_emit(engine, 0); /* aux display base address, unused */
+	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, MI_DISPLAY_FLIP |
+			MI_DISPLAY_FLIP_PLANE(crtc->plane));
+	intel_ring_emit(ring, fb->pitches[0]);
+	intel_ring_emit(ring, crtc->unpin_work->gtt_offset);
+	intel_ring_emit(ring, 0); /* aux display base address, unused */
 
-	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(engine);
 	return 0;
 }
 
-static int intel_gen3_queue_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
+static int intel_gen3_queue_flip(struct i915_gem_request *rq,
+				 struct intel_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *engine,
 				 uint32_t flags)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_ringbuffer *ring;
 	u32 flip_mask;
-	int ret;
 
-	ret = intel_ring_begin(engine, 6);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	if (intel_crtc->plane)
+	if (crtc->plane)
 		flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
 	else
 		flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
-	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | flip_mask);
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 |
-			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
-	intel_ring_emit(engine, fb->pitches[0]);
-	intel_ring_emit(engine, intel_crtc->unpin_work->gtt_offset);
-	intel_ring_emit(engine, MI_NOOP);
+	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
+			MI_DISPLAY_FLIP_PLANE(crtc->plane));
+	intel_ring_emit(ring, fb->pitches[0]);
+	intel_ring_emit(ring, crtc->unpin_work->gtt_offset);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
-	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(engine);
 	return 0;
 }
 
-static int intel_gen4_queue_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
+static int intel_gen4_queue_flip(struct i915_gem_request *rq,
+				 struct intel_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *engine,
 				 uint32_t flags)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_i915_private *dev_priv = rq->i915;
+	struct intel_ringbuffer *ring;
 	uint32_t pf, pipesrc;
-	int ret;
 
-	ret = intel_ring_begin(engine, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	/* i965+ uses the linear or tiled offsets from the
 	 * Display Registers (which do not change across a page-flip)
 	 * so we need only reprogram the base address.
 	 */
-	intel_ring_emit(engine, MI_DISPLAY_FLIP |
-			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
-	intel_ring_emit(engine, fb->pitches[0]);
-	intel_ring_emit(engine, intel_crtc->unpin_work->gtt_offset |
+	intel_ring_emit(ring, MI_DISPLAY_FLIP |
+			MI_DISPLAY_FLIP_PLANE(crtc->plane));
+	intel_ring_emit(ring, fb->pitches[0]);
+	intel_ring_emit(ring, crtc->unpin_work->gtt_offset |
 			obj->tiling_mode);
 
 	/* XXX Enabling the panel-fitter across page-flip is so far
@@ -9398,62 +9390,57 @@ static int intel_gen4_queue_flip(struct drm_device *dev,
 	 * pf = I915_READ(pipe == 0 ? PFA_CTL_1 : PFB_CTL_1) & PF_ENABLE;
 	 */
 	pf = 0;
-	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
-	intel_ring_emit(engine, pf | pipesrc);
+	pipesrc = I915_READ(PIPESRC(crtc->pipe)) & 0x0fff0fff;
+	intel_ring_emit(ring, pf | pipesrc);
+	intel_ring_advance(ring);
 
-	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(engine);
 	return 0;
 }
 
-static int intel_gen6_queue_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
+static int intel_gen6_queue_flip(struct i915_gem_request *rq,
+				 struct intel_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *engine,
 				 uint32_t flags)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_i915_private *dev_priv = rq->i915;
+	struct intel_ringbuffer *ring;
 	uint32_t pf, pipesrc;
-	int ret;
 
-	ret = intel_ring_begin(engine, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	intel_ring_emit(engine, MI_DISPLAY_FLIP |
-			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
-	intel_ring_emit(engine, fb->pitches[0] | obj->tiling_mode);
-	intel_ring_emit(engine, intel_crtc->unpin_work->gtt_offset);
+	intel_ring_emit(ring, MI_DISPLAY_FLIP |
+			MI_DISPLAY_FLIP_PLANE(crtc->plane));
+	intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode);
+	intel_ring_emit(ring, crtc->unpin_work->gtt_offset);
 
 	/* Contrary to the suggestions in the documentation,
 	 * "Enable Panel Fitter" does not seem to be required when page
 	 * flipping with a non-native mode, and worse causes a normal
 	 * modeset to fail.
-	 * pf = I915_READ(PF_CTL(intel_crtc->pipe)) & PF_ENABLE;
+	 * pf = I915_READ(PF_CTL(crtc->pipe)) & PF_ENABLE;
 	 */
 	pf = 0;
-	pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff;
-	intel_ring_emit(engine, pf | pipesrc);
+	pipesrc = I915_READ(PIPESRC(crtc->pipe)) & 0x0fff0fff;
+	intel_ring_emit(ring, pf | pipesrc);
+	intel_ring_advance(ring);
 
-	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(engine);
 	return 0;
 }
 
-static int intel_gen7_queue_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
+static int intel_gen7_queue_flip(struct i915_gem_request *rq,
+				 struct intel_crtc *crtc,
 				 struct drm_framebuffer *fb,
 				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *engine,
 				 uint32_t flags)
 {
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_ringbuffer *ring;
 	uint32_t plane_bit = 0;
 	int len, ret;
 
-	switch (intel_crtc->plane) {
+	switch (crtc->plane) {
 	case PLANE_A:
 		plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A;
 		break;
@@ -9469,14 +9456,14 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	}
 
 	len = 4;
-	if (engine->id == RCS) {
+	if (rq->engine->id == RCS) {
 		len += 6;
 		/*
 		 * On Gen 8, SRM is now taking an extra dword to accommodate
 		 * 48bits addresses, and we need a NOOP for the batch size to
 		 * stay even.
 		 */
-		if (IS_GEN8(dev))
+		if (IS_GEN8(rq->i915))
 			len += 2;
 	}
 
@@ -9490,13 +9477,13 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	 * then do the cacheline alignment, and finally emit the
 	 * MI_DISPLAY_FLIP.
 	 */
-	ret = intel_ring_cacheline_align(engine);
+	ret = intel_ring_cacheline_align(rq);
 	if (ret)
 		return ret;
 
-	ret = intel_ring_begin(engine, len);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, len);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	/* Unmask the flip-done completion message. Note that the bspec says that
 	 * we should do this for both the BCS and RCS, and that we must not unmask
@@ -9507,33 +9494,32 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 	 * for the RCS also doesn't appear to drop events. Setting the DERRMR
 	 * to zero does lead to lockups within MI_DISPLAY_FLIP.
 	 */
-	if (engine->id == RCS) {
-		intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(engine, DERRMR);
-		intel_ring_emit(engine, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
+	if (rq->engine->id == RCS) {
+		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE |
 					DERRMR_PIPEB_PRI_FLIP_DONE |
 					DERRMR_PIPEC_PRI_FLIP_DONE));
-		if (IS_GEN8(dev))
-			intel_ring_emit(engine, MI_STORE_REGISTER_MEM_GEN8(1) |
+		if (IS_GEN8(rq->i915))
+			intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8(1) |
 					      MI_SRM_LRM_GLOBAL_GTT);
 		else
-			intel_ring_emit(engine, MI_STORE_REGISTER_MEM(1) |
+			intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) |
 					      MI_SRM_LRM_GLOBAL_GTT);
-		intel_ring_emit(engine, DERRMR);
-		intel_ring_emit(engine, engine->scratch.gtt_offset + 256);
-		if (IS_GEN8(dev)) {
-			intel_ring_emit(engine, 0);
-			intel_ring_emit(engine, MI_NOOP);
+		intel_ring_emit(ring, DERRMR);
+		intel_ring_emit(ring, rq->engine->scratch.gtt_offset + 256);
+		if (IS_GEN8(rq->i915)) {
+			intel_ring_emit(ring, 0);
+			intel_ring_emit(ring, MI_NOOP);
 		}
 	}
 
-	intel_ring_emit(engine, MI_DISPLAY_FLIP_I915 | plane_bit);
-	intel_ring_emit(engine, (fb->pitches[0] | obj->tiling_mode));
-	intel_ring_emit(engine, intel_crtc->unpin_work->gtt_offset);
-	intel_ring_emit(engine, (MI_NOOP));
+	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
+	intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode);
+	intel_ring_emit(ring, crtc->unpin_work->gtt_offset);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
-	intel_mark_page_flip_active(intel_crtc);
-	__intel_ring_advance(engine);
 	return 0;
 }
 
@@ -9551,7 +9537,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine,
 	if (engine == NULL)
 		return true;
 
-	if (INTEL_INFO(engine->dev)->gen < 5)
+	if (INTEL_INFO(engine->i915)->gen < 5)
 		return false;
 
 	if (i915.use_mmio_flip < 0)
@@ -9561,7 +9547,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine,
 	else if (i915.enable_execlists)
 		return true;
 	else
-		return engine != obj->ring;
+		return engine != i915_request_engine(obj->last_write.request);
 }
 
 static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -9592,102 +9578,61 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
 	POSTING_READ(DSPSURF(intel_crtc->plane));
 }
 
-static int intel_postpone_flip(struct drm_i915_gem_object *obj)
-{
-	struct intel_engine_cs *engine;
-	int ret;
-
-	lockdep_assert_held(&obj->base.dev->struct_mutex);
-
-	if (!obj->last_write_seqno)
-		return 0;
-
-	engine = obj->ring;
-
-	if (i915_seqno_passed(engine->get_seqno(engine, true),
-			      obj->last_write_seqno))
-		return 0;
-
-	ret = i915_gem_check_olr(engine, obj->last_write_seqno);
-	if (ret)
-		return ret;
-
-	if (WARN_ON(!engine->irq_get(engine)))
-		return 0;
-
-	return 1;
-}
+struct flip_work {
+	struct work_struct work;
+	struct i915_gem_request *rq;
+	struct intel_crtc *crtc;
+};
 
-void intel_notify_mmio_flip(struct intel_engine_cs *engine)
+static void intel_mmio_flip_work(struct work_struct *work)
 {
-	struct drm_i915_private *dev_priv = to_i915(engine->dev);
-	struct intel_crtc *intel_crtc;
-	unsigned long irq_flags;
-	u32 seqno;
-
-	seqno = engine->get_seqno(engine, false);
+	struct flip_work *flip = container_of(work, struct flip_work, work);
 
-	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	for_each_intel_crtc(engine->dev, intel_crtc) {
-		struct intel_mmio_flip *mmio_flip;
+	if (__i915_request_wait(flip->rq, false, NULL, NULL) == 0)
+		intel_do_mmio_flip(flip->crtc);
 
-		mmio_flip = &intel_crtc->mmio_flip;
-		if (mmio_flip->seqno == 0)
-			continue;
-
-		if (engine->id != mmio_flip->ring_id)
-			continue;
-
-		if (i915_seqno_passed(seqno, mmio_flip->seqno)) {
-			intel_do_mmio_flip(intel_crtc);
-			mmio_flip->seqno = 0;
-			engine->irq_put(engine);
-		}
-	}
-	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+	i915_request_put__unlocked(flip->rq);
+	kfree(flip);
 }
 
-static int intel_queue_mmio_flip(struct drm_device *dev,
-				 struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj,
-				 struct intel_engine_cs *engine,
-				 uint32_t flags)
+static int intel_queue_mmio_flip(struct intel_crtc *crtc,
+				 struct i915_gem_request *rq)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	unsigned long irq_flags;
+	struct flip_work *flip;
 	int ret;
 
-	if (WARN_ON(intel_crtc->mmio_flip.seqno))
+	if (WARN_ON(crtc->mmio_flip))
 		return -EBUSY;
 
-	ret = intel_postpone_flip(obj);
-	if (ret < 0)
-		return ret;
-	if (ret == 0) {
-		intel_do_mmio_flip(intel_crtc);
+	if (rq == NULL) {
+		intel_do_mmio_flip(crtc);
 		return 0;
 	}
 
-	spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags);
-	intel_crtc->mmio_flip.seqno = obj->last_write_seqno;
-	intel_crtc->mmio_flip.ring_id = obj->ring->id;
-	spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags);
+	if (i915_request_complete(rq)) {
+		intel_do_mmio_flip(crtc);
+		return 0;
+	}
 
-	/*
-	 * Double check to catch cases where irq fired before
-	 * mmio flip data was ready
-	 */
-	intel_notify_mmio_flip(obj->ring);
+	ret = i915_request_emit_breadcrumb(rq);
+	if (ret)
+		return ret;
+
+	flip = kmalloc(sizeof(*flip), GFP_KERNEL);
+	if (flip == NULL)
+		return -ENOMEM;
+
+	INIT_WORK(&flip->work, intel_mmio_flip_work);
+	flip->rq = i915_request_get(rq);
+	flip->crtc = crtc;
+	schedule_work(&flip->work);
 	return 0;
 }
 
-static int intel_default_queue_flip(struct drm_device *dev,
-				    struct drm_crtc *crtc,
+static int intel_default_queue_flip(struct i915_gem_request *rq,
+				    struct intel_crtc *crtc,
 				    struct drm_framebuffer *fb,
 				    struct drm_i915_gem_object *obj,
-				    struct intel_engine_cs *engine,
 				    uint32_t flags)
 {
 	return -ENODEV;
@@ -9706,6 +9651,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	enum pipe pipe = intel_crtc->pipe;
 	struct intel_unpin_work *work;
 	struct intel_engine_cs *engine;
+	struct i915_gem_request *rq;
 	unsigned long flags;
 	int ret;
 
@@ -9790,28 +9736,57 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev)) {
 		engine = &dev_priv->engine[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		engine = obj->ring;
+		engine = i915_request_engine(obj->last_write.request);
 		if (engine == NULL || engine->id != RCS)
 			engine = &dev_priv->engine[BCS];
 	} else {
 		engine = &dev_priv->engine[RCS];
 	}
 
-	ret = intel_pin_and_fence_fb_obj(dev, obj, engine);
-	if (ret)
-		goto cleanup_pending;
+	if (use_mmio_flip(engine, obj)) {
+		rq = i915_request_get(obj->last_write.request);
 
-	work->gtt_offset =
-		i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+		ret = intel_pin_and_fence_fb_obj(dev, obj, rq);
+		if (ret)
+			goto cleanup_rq;
 
-	if (use_mmio_flip(engine, obj))
-		ret = intel_queue_mmio_flip(dev, crtc, fb, obj, engine,
-					    page_flip_flags);
-	else
-		ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, engine,
-				page_flip_flags);
-	if (ret)
-		goto cleanup_unpin;
+		work->gtt_offset =
+			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+
+		ret = intel_queue_mmio_flip(intel_crtc, rq);
+		if (ret)
+			goto cleanup_unpin;
+	} else {
+		struct intel_context *ctx = engine->default_context;
+		if (obj->last_write.request)
+			ctx = obj->last_write.request->ctx;
+		rq = intel_engine_alloc_request(engine, ctx);
+		if (IS_ERR(rq)) {
+			ret = PTR_ERR(rq);
+			goto cleanup_pending;
+		}
+
+		ret = intel_pin_and_fence_fb_obj(dev, obj, rq);
+		if (ret)
+			goto cleanup_rq;
+
+		work->gtt_offset =
+			i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset;
+
+		ret = dev_priv->display.queue_flip(rq, intel_crtc, fb, obj,
+						   page_flip_flags);
+		if (ret)
+			goto cleanup_unpin;
+
+		ret = i915_request_commit(rq);
+		if (ret)
+			goto cleanup_unpin;
+
+		intel_mark_page_flip_active(intel_crtc);
+	}
+
+	work->flip_queued_request = rq;
+	work->enable_stall_check = true;
 
 	i915_gem_track_fb(work->old_fb_obj, obj,
 			  INTEL_FRONTBUFFER_PRIMARY(pipe));
@@ -9826,6 +9801,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 cleanup_unpin:
 	intel_unpin_fb_obj(obj);
+cleanup_rq:
+	i915_request_put(rq);
 cleanup_pending:
 	atomic_dec(&intel_crtc->unpin_work_count);
 	crtc->primary->fb = old_fb;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4a76788..bb7cf47 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -378,11 +378,6 @@ struct intel_pipe_wm {
 	bool sprites_scaled;
 };
 
-struct intel_mmio_flip {
-	u32 seqno;
-	u32 ring_id;
-};
-
 struct intel_crtc {
 	struct drm_crtc base;
 	enum pipe pipe;
@@ -433,7 +428,7 @@ struct intel_crtc {
 	} wm;
 
 	int scanline_offset;
-	struct intel_mmio_flip mmio_flip;
+	struct i915_gem_request *mmio_flip;
 };
 
 struct intel_plane_wm_parameters {
@@ -664,6 +659,7 @@ struct intel_unpin_work {
 #define INTEL_FLIP_COMPLETE	2
 	u32 flip_count;
 	u32 gtt_offset;
+	struct i915_gem_request *flip_queued_request;
 	bool enable_stall_check;
 };
 
@@ -781,7 +777,7 @@ bool intel_has_pending_fb_unpin(struct drm_device *dev);
 int intel_pch_rawclk(struct drm_device *dev);
 void intel_mark_busy(struct drm_device *dev);
 void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
-			     struct intel_engine_cs *engine);
+			     struct i915_gem_request *rq);
 void intel_frontbuffer_flip_prepare(struct drm_device *dev,
 				    unsigned frontbuffer_bits);
 void intel_frontbuffer_flip_complete(struct drm_device *dev,
@@ -840,7 +836,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct intel_load_detect_pipe *old);
 int intel_pin_and_fence_fb_obj(struct drm_device *dev,
 			       struct drm_i915_gem_object *obj,
-			       struct intel_engine_cs *pipelined);
+			       struct i915_gem_request *pipelined);
 void intel_unpin_fb_obj(struct drm_i915_gem_object *obj);
 struct drm_framebuffer *
 __intel_framebuffer_create(struct drm_device *dev,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fbc877b..bee3e3e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -215,46 +215,25 @@ enum {
  *
  * Return: 1 if Execlists is supported and has to be enabled.
  */
-int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
+void intel_sanitize_enable_execlists(struct drm_device *dev, int *enable_execlists)
 {
-	WARN_ON(i915.enable_ppgtt == -1);
+	int val;
 
-	if (enable_execlists == 0)
-		return 0;
-
-	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
-	    i915.use_mmio_flip >= 0)
-		return 1;
-
-	return 0;
-}
+	val = *enable_execlists;
 
-/**
- * intel_execlists_ctx_id() - get the Execlists Context ID
- * @ctx_obj: Logical Ring Context backing object.
- *
- * Do not confuse with ctx->id! Unfortunately we have a name overload
- * here: the old context ID we pass to userspace as a handler so that
- * they can refer to a context, and the new context ID we pass to the
- * ELSP so that the GPU can inform us of the context status via
- * interrupts.
- *
- * Return: 20-bits globally unique context ID.
- */
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
-{
-	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+	if (!HAS_LOGICAL_RING_CONTEXTS(dev) ||
+	    !USES_PPGTT(dev))
+		val = 0;
 
-	/* LRCA is required to be 4K aligned so the more significant 20 bits
-	 * are globally unique */
-	return lrca >> 12;
+	*enable_execlists = val;
 }
 
-static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj,
+					 u32 ctx_id)
 {
-	uint64_t desc;
-	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+	uint64_t desc, lrca;
 
+	lrca = i915_gem_obj_ggtt_offset(ctx_obj);
 	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
 
 	desc = GEN8_CTX_VALID;
@@ -262,7 +241,7 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
 	desc |= GEN8_CTX_L3LLC_COHERENT;
 	desc |= GEN8_CTX_PRIVILEGE;
 	desc |= lrca;
-	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
+	desc |= (u64)ctx_id << GEN8_CTX_ID_SHIFT;
 
 	/* TODO: WaDisableLiteRestore when we start using semaphore
 	 * signalling between Command Streamers */
@@ -271,26 +250,39 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
 	return desc;
 }
 
-static void execlists_elsp_write(struct intel_engine_cs *engine,
-				 struct drm_i915_gem_object *ctx_obj0,
-				 struct drm_i915_gem_object *ctx_obj1)
+static u32 execlists_ctx_write_tail(struct drm_i915_gem_object *obj, u32 tail, u32 tag)
+{
+	uint32_t *reg_state;
+
+	reg_state = kmap_atomic(i915_gem_object_get_page(obj, 1));
+	reg_state[CTX_RING_TAIL+1] = tail;
+	kunmap_atomic(reg_state);
+
+	return execlists_ctx_descriptor(obj, tag);
+}
+
+static void execlists_submit_pair(struct intel_engine_cs *engine,
+				  struct i915_gem_request *rq[2])
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	uint64_t temp = 0;
+	struct drm_i915_private *dev_priv = engine->i915;
+	uint64_t tmp;
 	uint32_t desc[4];
 	unsigned long flags;
 
 	/* XXX: You must always write both descriptors in the order below. */
-	if (ctx_obj1)
-		temp = execlists_ctx_descriptor(ctx_obj1);
-	else
-		temp = 0;
-	desc[1] = (u32)(temp >> 32);
-	desc[0] = (u32)temp;
 
-	temp = execlists_ctx_descriptor(ctx_obj0);
-	desc[3] = (u32)(temp >> 32);
-	desc[2] = (u32)temp;
+	tmp = execlists_ctx_write_tail(rq[0]->ctx->ring[engine->id].state,
+				       rq[0]->tail, rq[0]->seqno);
+	desc[3] = upper_32_bits(tmp);
+	desc[2] = lower_32_bits(tmp);
+
+	if (rq[1])
+		tmp = execlists_ctx_write_tail(rq[1]->ctx->ring[engine->id].state,
+					       rq[1]->tail, rq[1]->seqno);
+	else
+		tmp = 0;
+	desc[1] = upper_32_bits(tmp);
+	desc[0] = lower_32_bits(tmp);
 
 	/* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes
 	 * are in progress.
@@ -320,115 +312,43 @@ static void execlists_elsp_write(struct intel_engine_cs *engine,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 }
 
-static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
-{
-	struct page *page;
-	uint32_t *reg_state;
-
-	page = i915_gem_object_get_page(ctx_obj, 1);
-	reg_state = kmap_atomic(page);
-
-	reg_state[CTX_RING_TAIL+1] = tail;
-
-	kunmap_atomic(reg_state);
-
-	return 0;
-}
-
-static int execlists_submit_context(struct intel_engine_cs *engine,
-				    struct intel_context *to0, u32 tail0,
-				    struct intel_context *to1, u32 tail1)
+static void execlists_submit(struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *ctx_obj0;
-	struct drm_i915_gem_object *ctx_obj1 = NULL;
-
-	ctx_obj0 = to0->ring[engine->id].state;
-	BUG_ON(!ctx_obj0);
-	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
-
-	execlists_ctx_write_tail(ctx_obj0, tail0);
-
-	if (to1) {
-		ctx_obj1 = to1->ring[engine->id].state;
-		BUG_ON(!ctx_obj1);
-		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
-
-		execlists_ctx_write_tail(ctx_obj1, tail1);
-	}
-
-	execlists_elsp_write(engine, ctx_obj0, ctx_obj1);
-
-	return 0;
-}
-
-static void execlists_context_unqueue(struct intel_engine_cs *engine)
-{
-	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
-	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	struct i915_gem_request *rq[2] = {};
+	int i = 0;
 
 	assert_spin_locked(&engine->execlist_lock);
 
-	if (list_empty(&engine->execlist_queue))
-		return;
-
 	/* Try to read in pairs */
-	list_for_each_entry_safe(cursor, tmp, &engine->execlist_queue,
-				 execlist_link) {
-		if (!req0) {
-			req0 = cursor;
-		} else if (req0->ctx == cursor->ctx) {
+	while (!list_empty(&engine->pending)) {
+		struct i915_gem_request *next;
+
+		next = list_first_entry(&engine->pending,
+					typeof(*next),
+					engine_list);
+
+		if (rq[i] == NULL) {
+new_slot:
+			rq[i] = next;
+		} else if (rq[i]->ctx == next->ctx) {
 			/* Same ctx: ignore first request, as second request
 			 * will update tail past first request's workload */
-			cursor->elsp_submitted = req0->elsp_submitted;
-			list_del(&req0->execlist_link);
-			queue_work(dev_priv->wq, &req0->work);
-			req0 = cursor;
+			rq[i] = next;
 		} else {
-			req1 = cursor;
-			break;
-		}
-	}
+			if (++i == ARRAY_SIZE(rq))
+				break;
 
-	WARN_ON(req1 && req1->elsp_submitted);
-
-	WARN_ON(execlists_submit_context(engine, req0->ctx, req0->tail,
-					 req1 ? req1->ctx : NULL,
-					 req1 ? req1->tail : 0));
-
-	req0->elsp_submitted++;
-	if (req1)
-		req1->elsp_submitted++;
-}
-
-static bool execlists_check_remove_request(struct intel_engine_cs *engine,
-					   u32 request_id)
-{
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	struct intel_ctx_submit_request *head_req;
-
-	assert_spin_locked(&engine->execlist_lock);
-
-	head_req = list_first_entry_or_null(&engine->execlist_queue,
-					    struct intel_ctx_submit_request,
-					    execlist_link);
-
-	if (head_req != NULL) {
-		struct drm_i915_gem_object *ctx_obj =
-				head_req->ctx->ring[engine->id].state;
-		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-			WARN(head_req->elsp_submitted == 0,
-			     "Never submitted head request\n");
-
-			if (--head_req->elsp_submitted <= 0) {
-				list_del(&head_req->execlist_link);
-				queue_work(dev_priv->wq, &head_req->work);
-				return true;
-			}
+			goto new_slot;
 		}
+
+		list_move_tail(&next->engine_list, &engine->requests);
 	}
 
-	return false;
+	execlists_submit_pair(engine, rq);
+
+	engine->execlists_submitted++;
+	if (rq[1])
+		engine->execlists_submitted++;
 }
 
 /**
@@ -438,1023 +358,56 @@ static bool execlists_check_remove_request(struct intel_engine_cs *engine,
  * Check the unread Context Status Buffers and manage the submission of new
  * contexts to the ELSP accordingly.
  */
-void intel_execlists_handle_ctx_events(struct intel_engine_cs *engine)
+void intel_execlists_irq_handler(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	u32 status_pointer;
+	struct drm_i915_private *dev_priv = engine->i915;
 	u8 read_pointer;
 	u8 write_pointer;
-	u32 status;
-	u32 status_id;
-	u32 submit_contexts = 0;
-
-	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
 
 	read_pointer = engine->next_context_status_buffer;
-	write_pointer = status_pointer & 0x07;
+	write_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(engine)) & 0x07;
 	if (read_pointer > write_pointer)
 		write_pointer += 6;
 
 	spin_lock(&engine->execlist_lock);
 
-	while (read_pointer < write_pointer) {
-		read_pointer++;
-		status = I915_READ(RING_CONTEXT_STATUS_BUF(engine) +
-				(read_pointer % 6) * 8);
-		status_id = I915_READ(RING_CONTEXT_STATUS_BUF(engine) +
-				(read_pointer % 6) * 8 + 4);
+	while (read_pointer++ < write_pointer) {
+		u32 status = I915_READ(RING_CONTEXT_STATUS_BUF(engine) +
+				       (read_pointer % 6) * 8);
+#if 0
+		u32 seqno = I915_READ(RING_CONTEXT_STATUS_BUF(engine) +
+				      (read_pointer % 6) * 8 + 4);
+#endif
 
 		if (status & GEN8_CTX_STATUS_PREEMPTED) {
-			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
-				if (execlists_check_remove_request(engine, status_id))
-					WARN(1, "Lite Restored request removed from queue\n");
-			} else
+			if (status & GEN8_CTX_STATUS_LITE_RESTORE)
+				WARN(1, "Lite Restored request removed from queue\n");
+			else
 				WARN(1, "Preemption without Lite Restore\n");
 		}
 
-		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
-		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
-			if (execlists_check_remove_request(engine, status_id))
-				submit_contexts++;
+		if (status & (GEN8_CTX_STATUS_ACTIVE_IDLE | GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
+			engine->execlists_submitted--;
 		}
 	}
 
-	if (submit_contexts != 0)
-		execlists_context_unqueue(engine);
+	if (engine->execlists_submitted < 2)
+		execlists_submit(engine);
 
 	spin_unlock(&engine->execlist_lock);
 
-	WARN(submit_contexts > 2, "More than two context complete events?\n");
 	engine->next_context_status_buffer = write_pointer % 6;
-
 	I915_WRITE(RING_CONTEXT_STATUS_PTR(engine),
 		   ((u32)engine->next_context_status_buffer & 0x07) << 8);
 }
 
-static void execlists_free_request_task(struct work_struct *work)
-{
-	struct intel_ctx_submit_request *req =
-		container_of(work, struct intel_ctx_submit_request, work);
-	struct drm_device *dev = req->engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	intel_runtime_pm_put(dev_priv);
-
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_context_unreference(req->ctx);
-	mutex_unlock(&dev->struct_mutex);
-
-	kfree(req);
-}
-
-static int execlists_context_queue(struct intel_engine_cs *engine,
-				   struct intel_context *to,
-				   u32 tail)
-{
-	struct intel_ctx_submit_request *req = NULL, *cursor;
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	unsigned long flags;
-	int num_elements = 0;
-
-	req = kzalloc(sizeof(*req), GFP_KERNEL);
-	if (req == NULL)
-		return -ENOMEM;
-	req->ctx = to;
-	i915_gem_context_reference(req->ctx);
-	req->engine = engine;
-	req->tail = tail;
-	INIT_WORK(&req->work, execlists_free_request_task);
-
-	intel_runtime_pm_get(dev_priv);
-
-	spin_lock_irqsave(&engine->execlist_lock, flags);
-
-	list_for_each_entry(cursor, &engine->execlist_queue, execlist_link)
-		if (++num_elements > 2)
-			break;
-
-	if (num_elements > 2) {
-		struct intel_ctx_submit_request *tail_req;
-
-		tail_req = list_last_entry(&engine->execlist_queue,
-					   struct intel_ctx_submit_request,
-					   execlist_link);
-
-		if (to == tail_req->ctx) {
-			WARN(tail_req->elsp_submitted != 0,
-			     "More than 2 already-submitted reqs queued\n");
-			list_del(&tail_req->execlist_link);
-			queue_work(dev_priv->wq, &tail_req->work);
-		}
-	}
-
-	list_add_tail(&req->execlist_link, &engine->execlist_queue);
-	if (num_elements == 0)
-		execlists_context_unqueue(engine);
-
-	spin_unlock_irqrestore(&engine->execlist_lock, flags);
-
-	return 0;
-}
-
-static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	uint32_t flush_domains;
-	int ret;
-
-	flush_domains = 0;
-	if (engine->gpu_caches_dirty)
-		flush_domains = I915_GEM_GPU_DOMAINS;
-
-	ret = engine->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
-	if (ret)
-		return ret;
-
-	engine->gpu_caches_dirty = false;
-	return 0;
-}
-
-static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
-				 struct list_head *vmas)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	struct i915_vma *vma;
-	uint32_t flush_domains = 0;
-	bool flush_chipset = false;
-	int ret;
-
-	list_for_each_entry(vma, vmas, exec_list) {
-		struct drm_i915_gem_object *obj = vma->obj;
-
-		ret = i915_gem_object_sync(obj, engine);
-		if (ret)
-			return ret;
-
-		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			flush_chipset |= i915_gem_clflush_object(obj, false);
-
-		flush_domains |= obj->base.write_domain;
-	}
-
-	if (flush_domains & I915_GEM_DOMAIN_GTT)
-		wmb();
-
-	/* Unconditionally invalidate gpu caches and ensure that we do flush
-	 * any residual writes from the previous batch.
-	 */
-	return logical_ring_invalidate_all_caches(ringbuf);
-}
-
-/**
- * execlists_submission() - submit a batchbuffer for execution, Execlists style
- * @dev: DRM device.
- * @file: DRM file.
- * @ring: Engine Command Streamer to submit to.
- * @ctx: Context to employ for this submission.
- * @args: execbuffer call arguments.
- * @vmas: list of vmas.
- * @batch_obj: the batchbuffer to submit.
- * @exec_start: batchbuffer start virtual address pointer.
- * @flags: translated execbuffer call flags.
- *
- * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
- * away the submission details of the execbuffer ioctl call.
- *
- * Return: non-zero if the submission fails.
- */
-int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
-			       struct intel_engine_cs *engine,
-			       struct intel_context *ctx,
-			       struct drm_i915_gem_execbuffer2 *args,
-			       struct list_head *vmas,
-			       struct drm_i915_gem_object *batch_obj,
-			       u64 exec_start, u32 flags)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ringbuffer *ringbuf = ctx->ring[engine->id].ringbuf;
-	int instp_mode;
-	u32 instp_mask;
-	int ret;
-
-	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
-	instp_mask = I915_EXEC_CONSTANTS_MASK;
-	switch (instp_mode) {
-	case I915_EXEC_CONSTANTS_REL_GENERAL:
-	case I915_EXEC_CONSTANTS_ABSOLUTE:
-	case I915_EXEC_CONSTANTS_REL_SURFACE:
-		if (instp_mode != 0 && engine != &dev_priv->engine[RCS]) {
-			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
-			return -EINVAL;
-		}
-
-		if (instp_mode != dev_priv->relative_constants_mode) {
-			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
-				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
-				return -EINVAL;
-			}
-
-			/* The HW changed the meaning on this bit on gen6 */
-			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
-		}
-		break;
-	default:
-		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
-		return -EINVAL;
-	}
-
-	if (args->num_cliprects != 0) {
-		DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
-		return -EINVAL;
-	} else {
-		if (args->DR4 == 0xffffffff) {
-			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
-			args->DR4 = 0;
-		}
-
-		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
-			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
-			return -EINVAL;
-		}
-	}
-
-	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
-		DRM_DEBUG("sol reset is gen7 only\n");
-		return -EINVAL;
-	}
-
-	ret = execlists_move_to_gpu(ringbuf, vmas);
-	if (ret)
-		return ret;
-
-	if (engine == &dev_priv->engine[RCS] &&
-	    instp_mode != dev_priv->relative_constants_mode) {
-		ret = intel_logical_ring_begin(ringbuf, 4);
-		if (ret)
-			return ret;
-
-		intel_logical_ring_emit(ringbuf, MI_NOOP);
-		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
-		intel_logical_ring_emit(ringbuf, INSTPM);
-		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
-		intel_logical_ring_advance(ringbuf);
-
-		dev_priv->relative_constants_mode = instp_mode;
-	}
-
-	ret = engine->emit_bb_start(ringbuf, exec_start, flags);
-	if (ret)
-		return ret;
-
-	i915_gem_execbuffer_move_to_active(vmas, engine);
-	i915_gem_execbuffer_retire_commands(dev, file, engine, batch_obj);
-
-	return 0;
-}
-
-void intel_logical_ring_stop(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	int ret;
-
-	if (!intel_engine_initialized(engine))
-		return;
-
-	ret = intel_engine_idle(engine);
-	if (ret && !i915_reset_in_progress(&to_i915(engine->dev)->gpu_error))
-		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
-			  engine->name, ret);
-
-	/* TODO: Is this correct with Execlists enabled? */
-	I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
-	if (wait_for_atomic((I915_READ_MODE(engine) & MODE_IDLE) != 0, 1000)) {
-		DRM_ERROR("%s :timed out trying to stop engine\n", engine->name);
-		return;
-	}
-	I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
-}
-
-int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	int ret;
-
-	if (!engine->gpu_caches_dirty)
-		return 0;
-
-	ret = engine->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
-	if (ret)
-		return ret;
-
-	engine->gpu_caches_dirty = false;
-	return 0;
-}
-
-/**
- * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
- * @ringbuf: Logical Ringbuffer to advance.
- *
- * The tail is updated in our logical ringbuffer struct, not in the actual context. What
- * really happens during submission is that the context and current tail will be placed
- * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
- * point, the tail *inside* the context is updated and the ELSP written to.
- */
-void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
-
-	intel_logical_ring_advance(ringbuf);
-
-	if (intel_engine_stopped(engine))
-		return;
-
-	execlists_context_queue(engine, ctx, ringbuf->tail);
-}
-
-static int logical_ring_alloc_seqno(struct intel_engine_cs *engine,
-				    struct intel_context *ctx)
-{
-	if (engine->outstanding_lazy_seqno)
-		return 0;
-
-	if (engine->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
-
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
-
-		/* Hold a reference to the context this request belongs to
-		 * (we will need it when the time comes to emit/retire the
-		 * request).
-		 */
-		request->ctx = ctx;
-		i915_gem_context_reference(request->ctx);
-
-		engine->preallocated_lazy_request = request;
-	}
-
-	return i915_gem_get_seqno(engine->dev, &engine->outstanding_lazy_seqno);
-}
-
-static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
-				     int bytes)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
-	int ret;
-
-	if (ringbuf->last_retired_head != -1) {
-		ringbuf->head = ringbuf->last_retired_head;
-		ringbuf->last_retired_head = -1;
-
-		ringbuf->space = intel_ring_space(ringbuf);
-		if (ringbuf->space >= bytes)
-			return 0;
-	}
-
-	list_for_each_entry(request, &engine->request_list, list) {
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= bytes) {
-			seqno = request->seqno;
-			break;
-		}
-	}
-
-	if (seqno == 0)
-		return -ENOSPC;
-
-	ret = i915_wait_seqno(engine, seqno);
-	if (ret)
-		return ret;
-
-	i915_gem_retire_requests__engine(engine);
-	ringbuf->head = ringbuf->last_retired_head;
-	ringbuf->last_retired_head = -1;
-
-	ringbuf->space = intel_ring_space(ringbuf);
-	return 0;
-}
-
-static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
-				       int bytes)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long end;
-	int ret;
-
-	ret = logical_ring_wait_request(ringbuf, bytes);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* Force the context submission in case we have been skipping it */
-	intel_logical_ring_advance_and_submit(ringbuf);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	do {
-		ringbuf->head = I915_READ_HEAD(engine);
-		ringbuf->space = intel_ring_space(ringbuf);
-		if (ringbuf->space >= bytes) {
-			ret = 0;
-			break;
-		}
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-
-	return ret;
-}
-
-static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
-{
-	uint32_t __iomem *virt;
-	int rem = ringbuf->size - ringbuf->tail;
-
-	if (ringbuf->space < rem) {
-		int ret = logical_ring_wait_for_space(ringbuf, rem);
-
-		if (ret)
-			return ret;
-	}
-
-	virt = ringbuf->virtual_start + ringbuf->tail;
-	rem /= 4;
-	while (rem--)
-		iowrite32(MI_NOOP, virt++);
-
-	ringbuf->tail = 0;
-	ringbuf->space = intel_ring_space(ringbuf);
-
-	return 0;
-}
-
-static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
-{
-	int ret;
-
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = logical_ring_wrap_buffer(ringbuf);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = logical_ring_wait_for_space(ringbuf, bytes);
-		if (unlikely(ret))
-			return ret;
-	}
-
-	return 0;
-}
-
-/**
- * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
- *
- * @ringbuf: Logical ringbuffer.
- * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
- *
- * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
- * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
- * and also preallocates a request (every workload submission is still mediated through
- * requests, same as it did with legacy ringbuffer submission).
- *
- * Return: non-zero if the ringbuffer is not ready to be written to.
- */
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
-	if (ret)
-		return ret;
-
-	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
-	if (ret)
-		return ret;
-
-	/* Preallocate the olr before touching the ring */
-	ret = logical_ring_alloc_seqno(engine, ringbuf->FIXME_lrc_ctx);
-	if (ret)
-		return ret;
-
-	ringbuf->space -= num_dwords * sizeof(uint32_t);
-	return 0;
-}
-
-static int gen8_init_common_engine(struct intel_engine_cs *engine)
-{
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	I915_WRITE_IMR(engine, ~(engine->irq_enable_mask | engine->irq_keep_mask));
-	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
-
-	I915_WRITE(RING_MODE_GEN7(engine),
-		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
-		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
-	POSTING_READ(RING_MODE_GEN7(engine));
-	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
-
-	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
-
-	return 0;
-}
-
-static int gen8_init_render_engine(struct intel_engine_cs *engine)
-{
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = gen8_init_common_engine(engine);
-	if (ret)
-		return ret;
-
-	/* We need to disable the AsyncFlip performance optimisations in order
-	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
-	 * programmed to '1' on all products.
-	 *
-	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
-	 */
-	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
-
-	ret = intel_init_pipe_control(engine);
-	if (ret)
-		return ret;
-
-	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
-
-	return ret;
-}
-
-static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
-			      u64 offset, unsigned flags)
-{
-	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
-	int ret;
-
-	ret = intel_logical_ring_begin(ringbuf, 4);
-	if (ret)
-		return ret;
-
-	/* FIXME(BDW): Address space and security selectors. */
-	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
-	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
-	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
-	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_advance(ringbuf);
-
-	return 0;
-}
-
-static bool gen8_logical_ring_get_irq(struct intel_engine_cs *engine)
-{
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long flags;
-
-	if (!dev->irq_enabled)
-		return false;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (engine->irq_refcount++ == 0) {
-		I915_WRITE_IMR(engine, ~(engine->irq_enable_mask | engine->irq_keep_mask));
-		POSTING_READ(RING_IMR(engine->mmio_base));
-	}
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
-
-	return true;
-}
-
-static void gen8_logical_ring_put_irq(struct intel_engine_cs *engine)
-{
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--engine->irq_refcount == 0) {
-		I915_WRITE_IMR(engine, ~engine->irq_keep_mask);
-		POSTING_READ(RING_IMR(engine->mmio_base));
-	}
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
-}
-
-static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
-			   u32 invalidate_domains,
-			   u32 unused)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	struct drm_device *dev = engine->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t cmd;
-	int ret;
-
-	ret = intel_logical_ring_begin(ringbuf, 4);
-	if (ret)
-		return ret;
-
-	cmd = MI_FLUSH_DW + 1;
-
-	if (engine == &dev_priv->engine[VCS]) {
-		if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
-				MI_FLUSH_DW_STORE_INDEX |
-				MI_FLUSH_DW_OP_STOREDW;
-	} else {
-		if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
-			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-				MI_FLUSH_DW_OP_STOREDW;
-	}
-
-	intel_logical_ring_emit(ringbuf, cmd);
-	intel_logical_ring_emit(ringbuf,
-				I915_GEM_HWS_SCRATCH_ADDR |
-				MI_FLUSH_DW_USE_GTT);
-	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
-	intel_logical_ring_emit(ringbuf, 0); /* value */
-	intel_logical_ring_advance(ringbuf);
-
-	return 0;
-}
-
-static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
-				  u32 invalidate_domains,
-				  u32 flush_domains)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-	u32 flags = 0;
-	int ret;
-
-	flags |= PIPE_CONTROL_CS_STALL;
-
-	if (flush_domains) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-	}
-
-	if (invalidate_domains) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-	}
-
-	ret = intel_logical_ring_begin(ringbuf, 6);
-	if (ret)
-		return ret;
-
-	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
-	intel_logical_ring_emit(ringbuf, flags);
-	intel_logical_ring_emit(ringbuf, scratch_addr);
-	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_advance(ringbuf);
-
-	return 0;
-}
-
-static u32 gen8_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
-{
-	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
-}
-
-static void gen8_set_seqno(struct intel_engine_cs *engine, u32 seqno)
-{
-	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-}
-
-static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
-{
-	struct intel_engine_cs *engine = ringbuf->engine;
-	u32 cmd;
-	int ret;
-
-	ret = intel_logical_ring_begin(ringbuf, 6);
-	if (ret)
-		return ret;
-
-	cmd = MI_STORE_DWORD_IMM_GEN8;
-	cmd |= MI_GLOBAL_GTT;
-
-	intel_logical_ring_emit(ringbuf, cmd);
-	intel_logical_ring_emit(ringbuf,
-				(engine->status_page.gfx_addr +
-				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
-	intel_logical_ring_emit(ringbuf, 0);
-	intel_logical_ring_emit(ringbuf, engine->outstanding_lazy_seqno);
-	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
-	intel_logical_ring_emit(ringbuf, MI_NOOP);
-	intel_logical_ring_advance_and_submit(ringbuf);
-
-	return 0;
-}
-
-/**
- * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
- *
- * @ring: Engine Command Streamer.
- *
- */
-void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-
-	if (!intel_engine_initialized(engine))
-		return;
-
-	intel_logical_ring_stop(engine);
-	WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
-	engine->preallocated_lazy_request = NULL;
-	engine->outstanding_lazy_seqno = 0;
-
-	if (engine->cleanup)
-		engine->cleanup(engine);
-
-	i915_cmd_parser_fini_engine(engine);
-
-	if (engine->status_page.obj) {
-		kunmap(sg_page(engine->status_page.obj->pages->sgl));
-		engine->status_page.obj = NULL;
-	}
-}
-
-static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *engine)
-{
-	int ret;
-	struct intel_context *dctx = engine->default_context;
-	struct drm_i915_gem_object *dctx_obj;
-
-	/* Intentionally left blank. */
-	engine->buffer = NULL;
-
-	engine->dev = dev;
-	INIT_LIST_HEAD(&engine->active_list);
-	INIT_LIST_HEAD(&engine->request_list);
-	init_waitqueue_head(&engine->irq_queue);
-
-	INIT_LIST_HEAD(&engine->execlist_queue);
-	spin_lock_init(&engine->execlist_lock);
-	engine->next_context_status_buffer = 0;
-
-	ret = intel_lr_context_deferred_create(dctx, engine);
-	if (ret)
-		return ret;
-
-	/* The status page is offset 0 from the context object in LRCs. */
-	dctx_obj = dctx->ring[engine->id].state;
-	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj);
-	engine->status_page.page_addr = kmap(sg_page(dctx_obj->pages->sgl));
-	if (engine->status_page.page_addr == NULL)
-		return -ENOMEM;
-	engine->status_page.obj = dctx_obj;
-
-	ret = i915_cmd_parser_init_engine(engine);
-	if (ret)
-		return ret;
-
-	if (engine->init) {
-		ret = engine->init(engine);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int logical_render_engine_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-
-	engine->name = "render ring";
-	engine->id = RCS;
-	engine->mmio_base = RENDER_RING_BASE;
-	engine->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
-	engine->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
-	if (HAS_L3_DPF(dev))
-		engine->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-
-	engine->init = gen8_init_render_engine;
-	engine->cleanup = intel_fini_pipe_control;
-	engine->get_seqno = gen8_get_seqno;
-	engine->set_seqno = gen8_set_seqno;
-	engine->emit_request = gen8_emit_request;
-	engine->emit_flush = gen8_emit_flush_render;
-	engine->irq_get = gen8_logical_ring_get_irq;
-	engine->irq_put = gen8_logical_ring_put_irq;
-	engine->emit_bb_start = gen8_emit_bb_start;
-
-	return logical_ring_init(dev, engine);
-}
-
-static int logical_bsd_engine_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[VCS];
-
-	engine->name = "bsd ring";
-	engine->id = VCS;
-	engine->mmio_base = GEN6_BSD_RING_BASE;
-	engine->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
-	engine->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
-
-	engine->init = gen8_init_common_engine;
-	engine->get_seqno = gen8_get_seqno;
-	engine->set_seqno = gen8_set_seqno;
-	engine->emit_request = gen8_emit_request;
-	engine->emit_flush = gen8_emit_flush;
-	engine->irq_get = gen8_logical_ring_get_irq;
-	engine->irq_put = gen8_logical_ring_put_irq;
-	engine->emit_bb_start = gen8_emit_bb_start;
-
-	return logical_ring_init(dev, engine);
-}
-
-static int logical_bsd2_engine_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[VCS2];
-
-	engine->name = "bds2 ring";
-	engine->id = VCS2;
-	engine->mmio_base = GEN8_BSD2_RING_BASE;
-	engine->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
-	engine->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
-
-	engine->init = gen8_init_common_engine;
-	engine->get_seqno = gen8_get_seqno;
-	engine->set_seqno = gen8_set_seqno;
-	engine->emit_request = gen8_emit_request;
-	engine->emit_flush = gen8_emit_flush;
-	engine->irq_get = gen8_logical_ring_get_irq;
-	engine->irq_put = gen8_logical_ring_put_irq;
-	engine->emit_bb_start = gen8_emit_bb_start;
-
-	return logical_ring_init(dev, engine);
-}
-
-static int logical_blt_engine_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[BCS];
-
-	engine->name = "blitter ring";
-	engine->id = BCS;
-	engine->mmio_base = BLT_RING_BASE;
-	engine->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
-	engine->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
-
-	engine->init = gen8_init_common_engine;
-	engine->get_seqno = gen8_get_seqno;
-	engine->set_seqno = gen8_set_seqno;
-	engine->emit_request = gen8_emit_request;
-	engine->emit_flush = gen8_emit_flush;
-	engine->irq_get = gen8_logical_ring_get_irq;
-	engine->irq_put = gen8_logical_ring_put_irq;
-	engine->emit_bb_start = gen8_emit_bb_start;
-
-	return logical_ring_init(dev, engine);
-}
-
-static int logical_vebox_engine_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[VECS];
-
-	engine->name = "video enhancement engine";
-	engine->id = VECS;
-	engine->mmio_base = VEBOX_RING_BASE;
-	engine->irq_enable_mask =
-		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
-	engine->irq_keep_mask =
-		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
-
-	engine->init = gen8_init_common_engine;
-	engine->get_seqno = gen8_get_seqno;
-	engine->set_seqno = gen8_set_seqno;
-	engine->emit_request = gen8_emit_request;
-	engine->emit_flush = gen8_emit_flush;
-	engine->irq_get = gen8_logical_ring_get_irq;
-	engine->irq_put = gen8_logical_ring_put_irq;
-	engine->emit_bb_start = gen8_emit_bb_start;
-
-	return logical_ring_init(dev, engine);
-}
-
-/**
- * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
- * @dev: DRM device.
- *
- * This function inits the engines for an Execlists submission style (the equivalent in the
- * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
- * those engines that are present in the hardware.
- *
- * Return: non-zero if the initialization failed.
- */
-int intel_logical_rings_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = logical_render_engine_init(dev);
-	if (ret)
-		return ret;
-
-	if (HAS_BSD(dev)) {
-		ret = logical_bsd_engine_init(dev);
-		if (ret)
-			goto cleanup_render_engine;
-	}
-
-	if (HAS_BLT(dev)) {
-		ret = logical_blt_engine_init(dev);
-		if (ret)
-			goto cleanup_bsd_engine;
-	}
-
-	if (HAS_VEBOX(dev)) {
-		ret = logical_vebox_engine_init(dev);
-		if (ret)
-			goto cleanup_blt_engine;
-	}
-
-	if (HAS_BSD2(dev)) {
-		ret = logical_bsd2_engine_init(dev);
-		if (ret)
-			goto cleanup_vebox_engine;
-	}
-
-	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-	if (ret)
-		goto cleanup_bsd2_engine;
-
-	return 0;
-
-cleanup_bsd2_engine:
-	intel_logical_ring_cleanup(&dev_priv->engine[VCS2]);
-cleanup_vebox_engine:
-	intel_logical_ring_cleanup(&dev_priv->engine[VECS]);
-cleanup_blt_engine:
-	intel_logical_ring_cleanup(&dev_priv->engine[BCS]);
-cleanup_bsd_engine:
-	intel_logical_ring_cleanup(&dev_priv->engine[VCS]);
-cleanup_render_engine:
-	intel_logical_ring_cleanup(&dev_priv->engine[RCS]);
-
-	return ret;
-}
-
 static int
-populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
-		    struct intel_engine_cs *engine, struct intel_ringbuffer *ringbuf)
+populate_lr_context(struct intel_context *ctx,
+		    struct drm_i915_gem_object *ctx_obj,
+		    struct intel_engine_cs *engine)
 {
-	struct drm_i915_gem_object *ring_obj = ringbuf->obj;
+	struct intel_ringbuffer *ring = ctx->ring[engine->id].ring;
 	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
-	struct page *page;
 	uint32_t *reg_state;
 	int ret;
 
@@ -1470,12 +423,9 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 		return ret;
 	}
 
-	i915_gem_object_pin_pages(ctx_obj);
-
 	/* The second page of the context object contains some fields which must
 	 * be set up prior to the first execution. */
-	page = i915_gem_object_get_page(ctx_obj, 1);
-	reg_state = kmap_atomic(page);
+	reg_state = kmap_atomic(i915_gem_object_get_page(ctx_obj, 1));
 
 	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
 	 * commands followed by (reg, value) pairs. The values we are setting here are
@@ -1495,10 +445,10 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_RING_TAIL] = RING_TAIL(engine->mmio_base);
 	reg_state[CTX_RING_TAIL+1] = 0;
 	reg_state[CTX_RING_BUFFER_START] = RING_START(engine->mmio_base);
-	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring->obj);
 	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(engine->mmio_base);
 	reg_state[CTX_RING_BUFFER_CONTROL+1] =
-			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
+			((ring->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
 	reg_state[CTX_BB_HEAD_U] = engine->mmio_base + 0x168;
 	reg_state[CTX_BB_HEAD_U+1] = 0;
 	reg_state[CTX_BB_HEAD_L] = engine->mmio_base + 0x140;
@@ -1550,43 +500,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 
 	kunmap_atomic(reg_state);
 
-	ctx_obj->dirty = 1;
-	set_page_dirty(page);
-	i915_gem_object_unpin_pages(ctx_obj);
-
 	return 0;
 }
 
-/**
- * intel_lr_context_free() - free the LRC specific bits of a context
- * @ctx: the LR context to free.
- *
- * The real context freeing is done in i915_gem_context_free: this only
- * takes care of the bits that are LRC related: the per-engine backing
- * objects and the logical ringbuffer.
- */
-void intel_lr_context_free(struct intel_context *ctx)
-{
-	int i;
-
-	for (i = 0; i < I915_NUM_ENGINES; i++) {
-		struct drm_i915_gem_object *ctx_obj = ctx->ring[i].state;
-		struct intel_ringbuffer *ringbuf = ctx->ring[i].ringbuf;
-
-		if (ctx_obj) {
-			intel_destroy_ringbuffer_obj(ringbuf);
-			kfree(ringbuf);
-			i915_gem_object_ggtt_unpin(ctx_obj);
-			drm_gem_object_unreference(&ctx_obj->base);
-		}
-	}
-}
-
 static uint32_t get_lr_context_size(struct intel_engine_cs *engine)
 {
 	int ret = 0;
 
-	WARN_ON(INTEL_INFO(engine->dev)->gen != 8);
+	WARN_ON(INTEL_INFO(engine->i915)->gen != 8);
 
 	switch (engine->id) {
 	case RCS:
@@ -1603,95 +524,162 @@ static uint32_t get_lr_context_size(struct intel_engine_cs *engine)
 	return ret;
 }
 
-/**
- * intel_lr_context_deferred_create() - create the LRC specific bits of a context
- * @ctx: LR context to create.
- * @ring: engine to be used with the context.
- *
- * This function can be called more than once, with different engines, if we plan
- * to use the context with them. The context backing objects and the ringbuffers
- * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
- * the creation is a deferred call: it's better to make sure first that we need to use
- * a given ring with the context.
- *
- * Return: non-zero on eror.
- */
-int intel_lr_context_deferred_create(struct intel_context *ctx,
-				     struct intel_engine_cs *engine)
+static struct intel_ringbuffer *
+execlists_get_ring(struct intel_engine_cs *engine,
+		   struct intel_context *ctx)
 {
-	struct drm_device *dev = engine->dev;
 	struct drm_i915_gem_object *ctx_obj;
+	struct intel_ringbuffer *ring;
 	uint32_t context_size;
-	struct intel_ringbuffer *ringbuf;
 	int ret;
 
-	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
-	if (ctx->ring[engine->id].state)
-		return 0;
+	ring = intel_engine_alloc_ring(engine, 32 * PAGE_SIZE);
+	if (IS_ERR(ring)) {
+		DRM_ERROR("Failed to allocate ringbuffer %s: %ld\n", engine->name, PTR_ERR(ring));
+		return ERR_CAST(ring);
+	}
 
 	context_size = round_up(get_lr_context_size(engine), 4096);
 
-	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
+	ctx_obj = i915_gem_alloc_context_obj(engine->i915->dev, context_size);
 	if (IS_ERR(ctx_obj)) {
 		ret = PTR_ERR(ctx_obj);
 		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
-		return ret;
+		return ERR_CAST(ctx_obj);
 	}
 
 	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
-		drm_gem_object_unreference(&ctx_obj->base);
-		return ret;
+		goto err_unref;
 	}
 
-	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
-	if (!ringbuf) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
-				engine->name);
-		i915_gem_object_ggtt_unpin(ctx_obj);
-		drm_gem_object_unreference(&ctx_obj->base);
-		ret = -ENOMEM;
-		return ret;
+	ret = populate_lr_context(ctx, ctx_obj, engine);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
+		goto err_unpin;
 	}
 
-	ringbuf->engine = engine;
-	ringbuf->FIXME_lrc_ctx = ctx;
+	ctx->ring[engine->id].state = ctx_obj;
 
-	ringbuf->size = 32 * PAGE_SIZE;
-	ringbuf->effective_size = ringbuf->size;
-	ringbuf->head = 0;
-	ringbuf->tail = 0;
-	ringbuf->space = ringbuf->size;
-	ringbuf->last_retired_head = -1;
+	if (ctx == engine->default_context) {
+		struct drm_i915_private *dev_priv = engine->i915;
+		u32 reg;
 
-	/* TODO: For now we put this in the mappable region so that we can reuse
-	 * the existing ringbuffer code which ioremaps it. When we start
-	 * creating many contexts, this will no longer work and we must switch
-	 * to a kmapish interface.
-	 */
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
-				engine->name, ret);
-		goto error;
-	}
+		/* The status page is offset 0 from the context object in LRCs. */
+		engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(ctx_obj);
+		engine->status_page.page_addr = kmap(sg_page(ctx_obj->pages->sgl));
+		if (engine->status_page.page_addr == NULL) {
+			ret = -ENOMEM;
+			goto err_unpin;
+		}
 
-	ret = populate_lr_context(ctx, ctx_obj, engine, ringbuf);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
-		intel_destroy_ringbuffer_obj(ringbuf);
-		goto error;
-	}
+		engine->status_page.obj = ctx_obj;
 
-	ctx->ring[engine->id].ringbuf = ringbuf;
-	ctx->ring[engine->id].state = ctx_obj;
+		reg = RING_HWS_PGA(engine->mmio_base);
+		I915_WRITE(reg, engine->status_page.gfx_addr);
+		POSTING_READ(reg);
+	}
 
 	return 0;
 
-error:
-	kfree(ringbuf);
+err_unpin:
 	i915_gem_object_ggtt_unpin(ctx_obj);
+err_unref:
 	drm_gem_object_unreference(&ctx_obj->base);
-	return ret;
+	return ERR_PTR(ret);
+}
+
+static void execlists_put_ring(struct intel_ringbuffer *ring,
+			       struct intel_context *ctx)
+{
+	intel_ring_free(ring);
+}
+
+static int execlists_add_request(struct i915_gem_request *rq)
+{
+	unsigned long flags;
+
+	if (intel_engine_stopped(rq->engine))
+		return -EIO;
+
+	spin_lock_irqsave(&rq->engine->execlist_lock, flags);
+
+	list_add_tail(&rq->engine_list, &rq->engine->pending);
+	if (rq->engine->execlists_submitted < 2)
+		execlists_submit(rq->engine);
+
+	spin_unlock_irqrestore(&rq->engine->execlist_lock, flags);
+
+	return 0;
+}
+
+static int execlists_suspend(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	unsigned long flags;
+
+	/* disable submitting more requests until resume */
+	spin_lock_irqsave(&engine->execlist_lock, flags);
+	engine->execlists_submitted = ~0;
+	spin_unlock_irqrestore(&engine->execlist_lock, flags);
+
+	I915_WRITE(RING_MODE_GEN7(engine),
+		   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE) |
+		   _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE));
+	POSTING_READ(RING_MODE_GEN7(engine));
+	DRM_DEBUG_DRIVER("Execlists disabled for %s\n", engine->name);
+
+	return 0;
+}
+
+static int execlists_resume(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	unsigned long flags;
+
+	/* XXX */
+	I915_WRITE_IMR(engine, ~(engine->irq_enable_mask | engine->irq_keep_mask));
+	I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
+
+	I915_WRITE(RING_MODE_GEN7(engine),
+		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
+		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+	POSTING_READ(RING_MODE_GEN7(engine));
+	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
+
+	spin_lock_irqsave(&engine->execlist_lock, flags);
+	engine->execlists_submitted = 0;
+	execlists_submit(engine);
+	spin_unlock_irqrestore(&engine->execlist_lock, flags);
+
+	return 0;
+}
+
+static int execlists_reset(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&engine->execlist_lock, flags);
+	while (!list_empty(&engine->pending))
+		list_move_tail(engine->pending.next, &engine->requests);
+	spin_unlock_irqrestore(&engine->execlist_lock, flags);
+
+	return 0;
+}
+
+int intel_engine_enable_execlist(struct intel_engine_cs *engine)
+{
+	if (!i915.enable_execlists)
+		return 0;
+
+	engine->get_ring = execlists_get_ring;
+	engine->put_ring = execlists_put_ring;
+	engine->add_request = execlists_add_request;
+
+	engine->suspend = execlists_suspend;
+	engine->resume = execlists_resume;
+	engine->reset = execlists_reset;
+
+	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index a6f8004..58066cd 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -31,82 +31,10 @@
 #define RING_CONTEXT_STATUS_BUF(ring)	((ring)->mmio_base+0x370)
 #define RING_CONTEXT_STATUS_PTR(ring)	((ring)->mmio_base+0x3a0)
 
-/* Logical Rings */
-void intel_logical_ring_stop(struct intel_engine_cs *ring);
-void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
-int intel_logical_rings_init(struct drm_device *dev);
-
-int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
-void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
-/**
- * intel_logical_ring_advance() - advance the ringbuffer tail
- * @ringbuf: Ringbuffer to advance.
- *
- * The tail is only updated in our logical ringbuffer struct.
- */
-static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
-{
-	ringbuf->tail &= ringbuf->size - 1;
-}
-/**
- * intel_logical_ring_emit() - write a DWORD to the ringbuffer.
- * @ringbuf: Ringbuffer to write to.
- * @data: DWORD to write.
- */
-static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
-					   u32 data)
-{
-	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-	ringbuf->tail += 4;
-}
-int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
-
-/* Logical Ring Contexts */
-void intel_lr_context_free(struct intel_context *ctx);
-int intel_lr_context_deferred_create(struct intel_context *ctx,
-				     struct intel_engine_cs *ring);
-
 /* Execlists */
-int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
-int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
-			       struct intel_engine_cs *ring,
-			       struct intel_context *ctx,
-			       struct drm_i915_gem_execbuffer2 *args,
-			       struct list_head *vmas,
-			       struct drm_i915_gem_object *batch_obj,
-			       u64 exec_start, u32 flags);
-u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
-
-/**
- * struct intel_ctx_submit_request - queued context submission request
- * @ctx: Context to submit to the ELSP.
- * @ring: Engine to submit it to.
- * @tail: how far in the context's ringbuffer this request goes to.
- * @execlist_link: link in the submission queue.
- * @work: workqueue for processing this request in a bottom half.
- * @elsp_submitted: no. of times this request has been sent to the ELSP.
- *
- * The ELSP only accepts two elements at a time, so we queue context/tail
- * pairs on a given queue (ring->execlist_queue) until the hardware is
- * available. The queue serves a double purpose: we also use it to keep track
- * of the up to 2 contexts currently in the hardware (usually one in execution
- * and the other queued up by the GPU): We only remove elements from the head
- * of the queue when the hardware informs us that an element has been
- * completed.
- *
- * All accesses to the queue are mediated by a spinlock (ring->execlist_lock).
- */
-struct intel_ctx_submit_request {
-	struct intel_context *ctx;
-	struct intel_engine_cs *engine;
-	u32 tail;
-
-	struct list_head execlist_link;
-	struct work_struct work;
-
-	int elsp_submitted;
-};
+void intel_sanitize_enable_execlists(struct drm_device *dev, int *enable_execlists);
 
-void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
+int intel_engine_enable_execlist(struct intel_engine_cs *engine);
+void intel_execlists_irq_handler(struct intel_engine_cs *engine);
 
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 39b8e90..3fb8e6c 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -182,7 +182,7 @@ struct intel_overlay {
 	u32 flip_addr;
 	struct drm_i915_gem_object *reg_bo;
 	/* flip handling */
-	uint32_t last_flip_req;
+	struct i915_gem_request *flip_request;
 	void (*flip_tail)(struct intel_overlay *);
 };
 
@@ -208,53 +208,87 @@ static void intel_overlay_unmap_regs(struct intel_overlay *overlay,
 		io_mapping_unmap(regs);
 }
 
-static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
-					 void (*tail)(struct intel_overlay *))
+/* recover from an interruption due to a signal
+ * We have to be careful not to repeat work forever an make forward progess. */
+static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	int ret;
 
-	BUG_ON(overlay->last_flip_req);
-	ret = i915_add_request(engine, &overlay->last_flip_req);
-	if (ret)
-		return ret;
+	if (overlay->flip_request == NULL)
+		return 0;
 
-	overlay->flip_tail = tail;
-	ret = i915_wait_seqno(engine, overlay->last_flip_req);
+	ret = i915_request_wait(overlay->flip_request);
 	if (ret)
 		return ret;
-	i915_gem_retire_requests(dev);
 
-	overlay->last_flip_req = 0;
+	i915_request_put(overlay->flip_request);
+	overlay->flip_request = NULL;
+
+	i915_gem_retire_requests(overlay->dev);
+
+	if (overlay->flip_tail)
+		overlay->flip_tail(overlay);
+
 	return 0;
 }
 
+static int intel_overlay_add_request(struct intel_overlay *overlay,
+				     struct i915_gem_request *rq,
+				     void (*tail)(struct intel_overlay *))
+{
+	BUG_ON(overlay->flip_request);
+	overlay->flip_request = rq;
+	overlay->flip_tail = tail;
+
+	return i915_request_commit(rq);
+}
+
+static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
+					 struct i915_gem_request *rq,
+					 void (*tail)(struct intel_overlay *))
+{
+	intel_overlay_add_request(overlay, rq, tail);
+	return intel_overlay_recover_from_interrupt(overlay);
+}
+
+static struct i915_gem_request *
+intel_overlay_alloc_request(struct intel_overlay *overlay)
+{
+	struct drm_i915_private *i915 = to_i915(overlay->dev);
+	return intel_engine_alloc_request(&i915->engine[RCS],
+					  i915->engine[RCS].default_context);
+}
+
 /* overlay needs to be disable in OCMD reg */
 static int intel_overlay_on(struct intel_overlay *overlay)
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-	int ret;
+	struct i915_gem_request *rq;
+	struct intel_ringbuffer *ring;
 
 	BUG_ON(overlay->active);
 	overlay->active = 1;
 
 	WARN_ON(IS_I830(dev) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE));
 
-	ret = intel_ring_begin(engine, 4);
-	if (ret)
-		return ret;
+	rq = intel_overlay_alloc_request(overlay);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring)) {
+		i915_request_put(rq);
+		return PTR_ERR(ring);
+	}
 
-	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
-	intel_ring_emit(engine, overlay->flip_addr | OFC_UPDATE);
-	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_advance(engine);
+	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON);
+	intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE);
+	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
-	return intel_overlay_do_wait_request(overlay, NULL);
+	return intel_overlay_do_wait_request(overlay, rq, NULL);
 }
 
 /* overlay needs to be enabled in OCMD reg */
@@ -263,10 +297,10 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 {
 	struct drm_device *dev = overlay->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	u32 flip_addr = overlay->flip_addr;
+	struct i915_gem_request *rq;
+	struct intel_ringbuffer *ring;
 	u32 tmp;
-	int ret;
 
 	BUG_ON(!overlay->active);
 
@@ -278,21 +312,30 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	if (tmp & (1 << 17))
 		DRM_DEBUG("overlay underrun, DOVSTA: %x\n", tmp);
 
-	ret = intel_ring_begin(engine, 2);
-	if (ret)
-		return ret;
+	rq = intel_overlay_alloc_request(overlay);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring)) {
+		i915_request_put(rq);
+		return PTR_ERR(ring);
+	}
 
-	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
-	intel_ring_emit(engine, flip_addr);
-	intel_ring_advance(engine);
+	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
+	intel_ring_emit(ring, flip_addr);
+	intel_ring_advance(ring);
 
-	return i915_add_request(engine, &overlay->last_flip_req);
+	return intel_overlay_add_request(overlay, rq, NULL);
 }
 
 static void intel_overlay_release_old_vid_tail(struct intel_overlay *overlay)
 {
 	struct drm_i915_gem_object *obj = overlay->old_vid_bo;
 
+	i915_gem_track_fb(obj, NULL,
+			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
+
 	i915_gem_object_ggtt_unpin(obj);
 	drm_gem_object_unreference(&obj->base);
 
@@ -319,10 +362,9 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
 static int intel_overlay_off(struct intel_overlay *overlay)
 {
 	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	u32 flip_addr = overlay->flip_addr;
-	int ret;
+	struct i915_gem_request *rq;
+	struct intel_ringbuffer *ring;
 
 	BUG_ON(!overlay->active);
 
@@ -332,53 +374,35 @@ static int intel_overlay_off(struct intel_overlay *overlay)
 	 * of the hw. Do it in both cases */
 	flip_addr |= OFC_UPDATE;
 
-	ret = intel_ring_begin(engine, 6);
-	if (ret)
-		return ret;
+	rq = intel_overlay_alloc_request(overlay);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring)) {
+		i915_request_put(rq);
+		return PTR_ERR(ring);
+	}
 
 	/* wait for overlay to go idle */
-	intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
-	intel_ring_emit(engine, flip_addr);
-	intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+	intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
+	intel_ring_emit(ring, flip_addr);
+	intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	/* turn overlay off */
 	if (IS_I830(dev)) {
 		/* Workaround: Don't disable the overlay fully, since otherwise
 		 * it dies on the next OVERLAY_ON cmd. */
-		intel_ring_emit(engine, MI_NOOP);
-		intel_ring_emit(engine, MI_NOOP);
-		intel_ring_emit(engine, MI_NOOP);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_emit(ring, MI_NOOP);
 	} else {
-		intel_ring_emit(engine, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
-		intel_ring_emit(engine, flip_addr);
-		intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+		intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
+		intel_ring_emit(ring, flip_addr);
+		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
 	}
-	intel_ring_advance(engine);
+	intel_ring_advance(ring);
 
-	return intel_overlay_do_wait_request(overlay, intel_overlay_off_tail);
-}
-
-/* recover from an interruption due to a signal
- * We have to be careful not to repeat work forever an make forward progess. */
-static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
-{
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-	int ret;
-
-	if (overlay->last_flip_req == 0)
-		return 0;
-
-	ret = i915_wait_seqno(engine, overlay->last_flip_req);
-	if (ret)
-		return ret;
-	i915_gem_retire_requests(dev);
-
-	if (overlay->flip_tail)
-		overlay->flip_tail(overlay);
-
-	overlay->last_flip_req = 0;
-	return 0;
+	return intel_overlay_do_wait_request(overlay, rq, intel_overlay_off_tail);
 }
 
 /* Wait for pending overlay flip and release old frame.
@@ -387,10 +411,8 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
  */
 static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 {
-	struct drm_device *dev = overlay->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-	int ret;
+	struct drm_i915_private *dev_priv = to_i915(overlay->dev);
+	int ret = 0;
 
 	/* Only wait if there is actually an old frame to release to
 	 * guarantee forward progress.
@@ -399,27 +421,30 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay)
 		return 0;
 
 	if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) {
-		/* synchronous slowpath */
-		ret = intel_ring_begin(engine, 2);
-		if (ret)
-			return ret;
+		struct i915_gem_request *rq;
+		struct intel_ringbuffer *ring;
 
-		intel_ring_emit(engine, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
-		intel_ring_emit(engine, MI_NOOP);
-		intel_ring_advance(engine);
+		rq = intel_overlay_alloc_request(overlay);
+		if (IS_ERR(rq))
+			return PTR_ERR(rq);
 
-		ret = intel_overlay_do_wait_request(overlay,
-						    intel_overlay_release_old_vid_tail);
-		if (ret)
-			return ret;
-	}
+		/* synchronous slowpath */
+		ring = intel_ring_begin(rq, 2);
+		if (IS_ERR(ring)) {
+			i915_request_put(rq);
+			return PTR_ERR(ring);
+		}
 
-	intel_overlay_release_old_vid_tail(overlay);
+		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
 
+		ret = intel_overlay_do_wait_request(overlay, rq,
+						    intel_overlay_release_old_vid_tail);
+	} else
+		intel_overlay_release_old_vid_tail(overlay);
 
-	i915_gem_track_fb(overlay->old_vid_bo, NULL,
-			  INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe));
-	return 0;
+	return ret;
 }
 
 struct put_image_params {
@@ -821,12 +846,7 @@ int intel_overlay_switch_off(struct intel_overlay *overlay)
 	iowrite32(0, &regs->OCMD);
 	intel_overlay_unmap_regs(overlay, regs);
 
-	ret = intel_overlay_off(overlay);
-	if (ret != 0)
-		return ret;
-
-	intel_overlay_off_tail(overlay);
-	return 0;
+	return intel_overlay_off(overlay);
 }
 
 static int check_overlay_possible_on_crtc(struct intel_overlay *overlay,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 10e1133..016c845 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3573,9 +3573,11 @@ static int sanitize_rc6_option(const struct drm_device *dev, int enable_rc6)
 		return enable_rc6 & mask;
 	}
 
-	/* Disable RC6 on Ironlake */
-	if (INTEL_INFO(dev)->gen == 5)
+#ifdef CONFIG_INTEL_IOMMU
+	/* Ironlake + RC6 + VT-d empirically blows up */
+	if (IS_GEN5(dev) && intel_iommu_gfx_mapped)
 		return 0;
+#endif
 
 	if (IS_IVYBRIDGE(dev))
 		return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
@@ -4340,12 +4342,6 @@ void ironlake_teardown_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->ips.renderctx) {
-		i915_gem_object_ggtt_unpin(dev_priv->ips.renderctx);
-		drm_gem_object_unreference(&dev_priv->ips.renderctx->base);
-		dev_priv->ips.renderctx = NULL;
-	}
-
 	if (dev_priv->ips.pwrctx) {
 		i915_gem_object_ggtt_unpin(dev_priv->ips.pwrctx);
 		drm_gem_object_unreference(&dev_priv->ips.pwrctx->base);
@@ -4375,11 +4371,6 @@ static int ironlake_setup_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->ips.renderctx == NULL)
-		dev_priv->ips.renderctx = intel_alloc_context_page(dev);
-	if (!dev_priv->ips.renderctx)
-		return -ENOMEM;
-
 	if (dev_priv->ips.pwrctx == NULL)
 		dev_priv->ips.pwrctx = intel_alloc_context_page(dev);
 	if (!dev_priv->ips.pwrctx) {
@@ -4393,9 +4384,6 @@ static int ironlake_setup_rc6(struct drm_device *dev)
 static void ironlake_enable_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
-	bool was_interruptible;
-	int ret;
 
 	/* rc6 disabled by default due to repeated reports of hanging during
 	 * boot and resume.
@@ -4405,46 +4393,8 @@ static void ironlake_enable_rc6(struct drm_device *dev)
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	ret = ironlake_setup_rc6(dev);
-	if (ret)
-		return;
-
-	was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
-
-	/*
-	 * GPU can automatically power down the render unit if given a page
-	 * to save state.
-	 */
-	ret = intel_ring_begin(engine, 6);
-	if (ret) {
-		ironlake_teardown_rc6(dev);
-		dev_priv->mm.interruptible = was_interruptible;
-		return;
-	}
-
-	intel_ring_emit(engine, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
-	intel_ring_emit(engine, MI_SET_CONTEXT);
-	intel_ring_emit(engine, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
-			MI_MM_SPACE_GTT |
-			MI_SAVE_EXT_STATE_EN |
-			MI_RESTORE_EXT_STATE_EN |
-			MI_RESTORE_INHIBIT);
-	intel_ring_emit(engine, MI_SUSPEND_FLUSH);
-	intel_ring_emit(engine, MI_NOOP);
-	intel_ring_emit(engine, MI_FLUSH);
-	intel_ring_advance(engine);
-
-	/*
-	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
-	 * does an implicit flush, combined with MI_FLUSH above, it should be
-	 * safe to assume that renderctx is valid
-	 */
-	ret = intel_engine_idle(engine);
-	dev_priv->mm.interruptible = was_interruptible;
-	if (ret) {
+	if (ironlake_setup_rc6(dev)) {
 		DRM_ERROR("failed to enable ironlake power savings\n");
-		ironlake_teardown_rc6(dev);
 		return;
 	}
 
@@ -4913,7 +4863,7 @@ bool i915_gpu_busy(void)
 	dev_priv = i915_mch_dev;
 
 	for_each_engine(engine, dev_priv, i)
-		ret |= !list_empty(&engine->request_list);
+		ret |= !list_empty(&engine->requests);
 
 out_unlock:
 	spin_unlock_irq(&mchdev_lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 025b6bb..0e5e79f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,70 +33,22 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-bool
-intel_engine_initialized(struct intel_engine_cs *engine)
-{
-	struct drm_device *dev = engine->dev;
-
-	if (!dev)
-		return false;
-
-	if (i915.enable_execlists) {
-		struct intel_context *dctx = engine->default_context;
-		struct intel_ringbuffer *ringbuf = dctx->ring[engine->id].ringbuf;
-
-		return ringbuf->obj;
-	} else
-		return engine->buffer && engine->buffer->obj;
-}
-
-int __intel_ring_space(int head, int tail, int size)
-{
-	int space = head - (tail + I915_RING_FREE_SPACE);
-	if (space < 0)
-		space += size;
-	return space;
-}
-
-int intel_ring_space(struct intel_ringbuffer *ringbuf)
-{
-	return __intel_ring_space(ringbuf->head & HEAD_ADDR,
-				  ringbuf->tail, ringbuf->size);
-}
-
-bool intel_engine_stopped(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-	return dev_priv->gpu_error.stop_rings & intel_engine_flag(engine);
-}
-
-void __intel_ring_advance(struct intel_engine_cs *ring)
-{
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	ringbuf->tail &= ringbuf->size - 1;
-	if (intel_engine_stopped(ring))
-		return;
-	ring->write_tail(ring, ringbuf->tail);
-}
-
 static int
-gen2_render_ring_flush(struct intel_engine_cs *ring,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen2_emit_flush(struct i915_gem_request *rq, u32 flags)
 {
+	struct intel_ringbuffer *ring;
 	u32 cmd;
-	int ret;
 
 	cmd = MI_FLUSH;
-	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
+	if ((flags & (I915_FLUSH_CACHES | I915_INVALIDATE_CACHES)) == 0)
 		cmd |= MI_NO_WRITE_FLUSH;
 
-	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
+	if (flags & I915_INVALIDATE_CACHES)
 		cmd |= MI_READ_FLUSH;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, MI_NOOP);
@@ -106,13 +58,10 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-gen4_render_ring_flush(struct intel_engine_cs *ring,
-		       u32	invalidate_domains,
-		       u32	flush_domains)
+gen4_emit_flush(struct i915_gem_request *rq, u32 flags)
 {
-	struct drm_device *dev = ring->dev;
+	struct intel_ringbuffer *ring;
 	u32 cmd;
-	int ret;
 
 	/*
 	 * read/write caches:
@@ -142,19 +91,18 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
 	 * are flushed at any MI_FLUSH.
 	 */
 
-	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
-	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
-		cmd &= ~MI_NO_WRITE_FLUSH;
-	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
+	cmd = MI_FLUSH;
+	if ((flags & (I915_FLUSH_CACHES | I915_INVALIDATE_CACHES)) == 0)
+		cmd |= MI_NO_WRITE_FLUSH;
+	if (flags & I915_INVALIDATE_CACHES) {
 		cmd |= MI_EXE_FLUSH;
+		if (IS_G4X(rq->i915) || IS_GEN5(rq->i915))
+			cmd |= MI_INVALIDATE_ISP;
+	}
 
-	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
-	    (IS_G4X(dev) || IS_GEN5(dev)))
-		cmd |= MI_INVALIDATE_ISP;
-
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, MI_NOOP);
@@ -201,15 +149,14 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,
  * really our business.  That leaves only stall at scoreboard.
  */
 static int
-intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
+gen6_emit_post_sync_nonzero_flush(struct i915_gem_request *rq)
 {
-	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-	int ret;
-
+	const u32 scratch_addr = rq->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 6);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
@@ -220,9 +167,9 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
 
-	ret = intel_ring_begin(ring, 6);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
 	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
@@ -236,15 +183,15 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
 }
 
 static int
-gen6_render_ring_flush(struct intel_engine_cs *ring,
-                         u32 invalidate_domains, u32 flush_domains)
+gen6_render_emit_flush(struct i915_gem_request *rq, u32 flags)
 {
-	u32 flags = 0;
-	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	const u32 scratch_addr = rq->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	struct intel_ringbuffer *ring;
+	u32 cmd = 0;
 	int ret;
 
 	/* Force SNB workarounds for PIPE_CONTROL flushes */
-	ret = intel_emit_post_sync_nonzero_flush(ring);
+	ret = gen6_emit_post_sync_nonzero_flush(rq);
 	if (ret)
 		return ret;
 
@@ -252,34 +199,34 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	if (flags & I915_FLUSH_CACHES) {
+		cmd |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		cmd |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 		/*
 		 * Ensure that any following seqno writes only happen
 		 * when the render cache is indeed flushed.
 		 */
-		flags |= PIPE_CONTROL_CS_STALL;
+		cmd |= PIPE_CONTROL_CS_STALL;
 	}
-	if (invalidate_domains) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+	if (flags & I915_INVALIDATE_CACHES) {
+		cmd |= PIPE_CONTROL_TLB_INVALIDATE;
+		cmd |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 		/*
 		 * TLB invalidate requires a post-sync write.
 		 */
-		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
+		cmd |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
 	}
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
-	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
 	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
@@ -288,13 +235,13 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
+gen7_render_ring_cs_stall_wa(struct i915_gem_request *rq)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
 	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
@@ -306,35 +253,32 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
 	return 0;
 }
 
-static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
+static int gen7_ring_fbc_flush(struct i915_gem_request *rq, u32 value)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	if (!ring->fbc_dirty)
-		return 0;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(rq);
 
-	ret = intel_ring_begin(ring, 6);
-	if (ret)
-		return ret;
 	/* WaFbcNukeOn3DBlt:ivb/hsw */
 	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
 	intel_ring_emit(ring, MSG_FBC_REND_STATE);
 	intel_ring_emit(ring, value);
 	intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
 	intel_ring_emit(ring, MSG_FBC_REND_STATE);
-	intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
+	intel_ring_emit(ring, rq->engine->scratch.gtt_offset + 256);
 	intel_ring_advance(ring);
 
-	ring->fbc_dirty = false;
 	return 0;
 }
 
 static int
-gen7_render_ring_flush(struct intel_engine_cs *ring,
-		       u32 invalidate_domains, u32 flush_domains)
+gen7_render_emit_flush(struct i915_gem_request *rq, u32 flags)
 {
-	u32 flags = 0;
-	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	const u32 scratch_addr = rq->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	struct intel_ringbuffer *ring;
+	u32 cmd = 0;
 	int ret;
 
 	/*
@@ -345,63 +289,68 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
 	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
 	 * don't try to be clever and just set it unconditionally.
 	 */
-	flags |= PIPE_CONTROL_CS_STALL;
+	cmd |= PIPE_CONTROL_CS_STALL;
 
 	/* Just flush everything.  Experiments have shown that reducing the
 	 * number of bits based on the write domains has little performance
 	 * impact.
 	 */
-	if (flush_domains) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	if (flags & I915_FLUSH_CACHES) {
+		cmd |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		cmd |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 	}
-	if (invalidate_domains) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+	if (flags & I915_INVALIDATE_CACHES) {
+		cmd |= PIPE_CONTROL_TLB_INVALIDATE;
+		cmd |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 		/*
 		 * TLB invalidate requires a post-sync write.
 		 */
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+		cmd |= PIPE_CONTROL_QW_WRITE;
+		cmd |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
 		/* Workaround: we must issue a pipe_control with CS-stall bit
 		 * set before a pipe_control command that has the state cache
 		 * invalidate bit set. */
-		gen7_render_ring_cs_stall_wa(ring);
+		ret = gen7_render_ring_cs_stall_wa(rq);
+		if (ret)
+			return ret;
 	}
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
-	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, scratch_addr);
 	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
 
-	if (!invalidate_domains && flush_domains)
-		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
+	if (flags & I915_KICK_FBC) {
+		ret = gen7_ring_fbc_flush(rq, FBC_REND_NUKE);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
 static int
-gen8_emit_pipe_control(struct intel_engine_cs *ring,
-		       u32 flags, u32 scratch_addr)
+gen8_emit_pipe_control(struct i915_gem_request *rq,
+		       u32 cmd, u32 scratch_addr)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 6);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
-	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, scratch_addr);
 	intel_ring_emit(ring, 0);
 	intel_ring_emit(ring, 0);
@@ -412,31 +361,31 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring,
 }
 
 static int
-gen8_render_ring_flush(struct intel_engine_cs *ring,
-		       u32 invalidate_domains, u32 flush_domains)
+gen8_render_emit_flush(struct i915_gem_request *rq,
+		       u32 flags)
 {
-	u32 flags = 0;
-	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	const u32 scratch_addr = rq->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	u32 cmd = 0;
 	int ret;
 
-	flags |= PIPE_CONTROL_CS_STALL;
+	cmd |= PIPE_CONTROL_CS_STALL;
 
-	if (flush_domains) {
-		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	if (flags & I915_FLUSH_CACHES) {
+		cmd |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		cmd |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
 	}
-	if (invalidate_domains) {
-		flags |= PIPE_CONTROL_TLB_INVALIDATE;
-		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
-		flags |= PIPE_CONTROL_QW_WRITE;
-		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+	if (flags & I915_INVALIDATE_CACHES) {
+		cmd |= PIPE_CONTROL_TLB_INVALIDATE;
+		cmd |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		cmd |= PIPE_CONTROL_QW_WRITE;
+		cmd |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
 		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
-		ret = gen8_emit_pipe_control(ring,
+		ret = gen8_emit_pipe_control(rq,
 					     PIPE_CONTROL_CS_STALL |
 					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
 					     0);
@@ -444,25 +393,25 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
 			return ret;
 	}
 
-	return gen8_emit_pipe_control(ring, flags, scratch_addr);
+	return gen8_emit_pipe_control(rq, cmd, scratch_addr);
 }
 
-static void ring_write_tail(struct intel_engine_cs *ring,
+static void ring_write_tail(struct intel_engine_cs *engine,
 			    u32 value)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	I915_WRITE_TAIL(ring, value);
+	struct drm_i915_private *dev_priv = engine->i915;
+	I915_WRITE_TAIL(engine, value);
 }
 
 u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	u64 acthd;
 
-	if (INTEL_INFO(engine->dev)->gen >= 8)
+	if (INTEL_INFO(dev_priv)->gen >= 8)
 		acthd = I915_READ64_2x32(RING_ACTHD(engine->mmio_base),
 					 RING_ACTHD_UDW(engine->mmio_base));
-	else if (INTEL_INFO(engine->dev)->gen >= 4)
+	else if (INTEL_INFO(dev_priv)->gen >= 4)
 		acthd = I915_READ(RING_ACTHD(engine->mmio_base));
 	else
 		acthd = I915_READ(ACTHD);
@@ -470,192 +419,320 @@ u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
 	return acthd;
 }
 
-static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
-{
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 addr;
-
-	addr = dev_priv->status_page_dmah->busaddr;
-	if (INTEL_INFO(ring->dev)->gen >= 4)
-		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
-	I915_WRITE(HWS_PGA, addr);
-}
-
-static bool stop_ring(struct intel_engine_cs *ring)
+static bool engine_stop(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+	struct drm_i915_private *dev_priv = engine->i915;
 
-	if (!IS_GEN2(ring->dev)) {
-		I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
-		if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
-			DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
+	if (!IS_GEN2(dev_priv)) {
+		I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING));
+		if (wait_for((I915_READ_MODE(engine) & MODE_IDLE) != 0, 1000)) {
+			DRM_ERROR("%s : timed out trying to stop ring\n", engine->name);
 			/* Sometimes we observe that the idle flag is not
 			 * set even though the ring is empty. So double
 			 * check before giving up.
 			 */
-			if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
+			if (I915_READ_HEAD(engine) != I915_READ_TAIL(engine))
 				return false;
 		}
 	}
 
-	I915_WRITE_CTL(ring, 0);
-	I915_WRITE_HEAD(ring, 0);
-	ring->write_tail(ring, 0);
+	I915_WRITE_CTL(engine, 0);
+	I915_WRITE_HEAD(engine, 0);
+	engine->write_tail(engine, 0);
+
+	if (!IS_GEN2(dev_priv)) {
+		(void)I915_READ_CTL(engine);
+		I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING));
+	}
+
+	return (I915_READ_HEAD(engine) & HEAD_ADDR) == 0;
+}
+
+static int engine_suspend(struct intel_engine_cs *engine)
+{
+	return engine_stop(engine) ? 0 : -EIO;
+}
+
+static int enable_status_page(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	u32 mmio, addr;
+	int ret = 0;
+
+	if (!I915_NEED_GFX_HWS(dev_priv)) {
+		addr = dev_priv->status_page_dmah->busaddr;
+		if (INTEL_INFO(dev_priv)->gen >= 4)
+			addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
+		mmio = HWS_PGA;
+	} else {
+		addr = engine->status_page.gfx_addr;
+		/* The ring status page addresses are no longer next to the rest of
+		 * the ring registers as of gen7.
+		 */
+		if (IS_GEN7(dev_priv)) {
+			switch (engine->id) {
+			default:
+			case RCS:
+				mmio = RENDER_HWS_PGA_GEN7;
+				break;
+			case BCS:
+				mmio = BLT_HWS_PGA_GEN7;
+				break;
+				/*
+				 * VCS2 actually doesn't exist on Gen7. Only shut up
+				 * gcc switch check warning
+				 */
+			case VCS2:
+			case VCS:
+				mmio = BSD_HWS_PGA_GEN7;
+				break;
+			case VECS:
+				mmio = VEBOX_HWS_PGA_GEN7;
+				break;
+			}
+		} else if (IS_GEN6(dev_priv)) {
+			mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
+		} else {
+			/* XXX: gen8 returns to sanity */
+			mmio = RING_HWS_PGA(engine->mmio_base);
+		}
+	}
+
+	I915_WRITE(mmio, addr);
+	POSTING_READ(mmio);
+
+	/*
+	 * Flush the TLB for this page
+	 *
+	 * FIXME: These two bits have disappeared on gen8, so a question
+	 * arises: do we still need this and if so how should we go about
+	 * invalidating the TLB?
+	 */
+	if (INTEL_INFO(dev_priv)->gen >= 6 && INTEL_INFO(dev_priv)->gen < 8) {
+		u32 reg = RING_INSTPM(engine->mmio_base);
+
+		/* ring should be idle before issuing a sync flush*/
+		WARN_ON((I915_READ_MODE(engine) & MODE_IDLE) == 0);
 
-	if (!IS_GEN2(ring->dev)) {
-		(void)I915_READ_CTL(ring);
-		I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
+		I915_WRITE(reg,
+			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
+					      INSTPM_SYNC_FLUSH));
+		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
+			     1000)) {
+			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+				  engine->name);
+			ret = -EIO;
+		}
 	}
 
-	return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
+	return ret;
 }
 
-static int init_ring_common(struct intel_engine_cs *ring)
+static struct intel_ringbuffer *
+engine_get_ring(struct intel_engine_cs *engine,
+		struct intel_context *ctx)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	struct drm_i915_gem_object *obj = ringbuf->obj;
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct intel_ringbuffer *ring;
 	int ret = 0;
 
-	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+	ring = engine->legacy_ring;
+	if (ring)
+		return ring;
 
-	if (!stop_ring(ring)) {
+	ring = intel_engine_alloc_ring(engine, 32 * PAGE_SIZE);
+	if (IS_ERR(ring)) {
+		DRM_ERROR("Failed to allocate ringbuffer for %s: %ld\n", engine->name, PTR_ERR(ring));
+		return ERR_CAST(ring);
+	}
+
+	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+	if (!engine_stop(engine)) {
 		/* G45 ring initialization often fails to reset head to zero */
 		DRM_DEBUG_KMS("%s head not reset to zero "
 			      "ctl %08x head %08x tail %08x start %08x\n",
-			      ring->name,
-			      I915_READ_CTL(ring),
-			      I915_READ_HEAD(ring),
-			      I915_READ_TAIL(ring),
-			      I915_READ_START(ring));
-
-		if (!stop_ring(ring)) {
+			      engine->name,
+			      I915_READ_CTL(engine),
+			      I915_READ_HEAD(engine),
+			      I915_READ_TAIL(engine),
+			      I915_READ_START(engine));
+		if (!engine_stop(engine)) {
 			DRM_ERROR("failed to set %s head to zero "
 				  "ctl %08x head %08x tail %08x start %08x\n",
-				  ring->name,
-				  I915_READ_CTL(ring),
-				  I915_READ_HEAD(ring),
-				  I915_READ_TAIL(ring),
-				  I915_READ_START(ring));
+				  engine->name,
+				  I915_READ_CTL(engine),
+				  I915_READ_HEAD(engine),
+				  I915_READ_TAIL(engine),
+				  I915_READ_START(engine));
 			ret = -EIO;
-			goto out;
 		}
 	}
+	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 
-	if (I915_NEED_GFX_HWS(dev))
-		intel_ring_setup_status_page(ring);
-	else
-		ring_setup_phys_status_page(ring);
+	if (ret == 0) {
+		engine->legacy_ring = ring;
+	} else {
+		intel_ring_free(ring);
+		ring = ERR_PTR(ret);
+	}
+
+	return ring;
+}
+
+static int engine_resume(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	struct intel_ringbuffer *ring = engine->legacy_ring;
+	int retry = 3, ret;
 
+	if (WARN_ON(ring == NULL))
+		return -ENODEV;
+
+	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
+	ret = enable_status_page(engine);
+
+reset:
 	/* Enforce ordering by reading HEAD register back */
-	I915_READ_HEAD(ring);
+	engine->write_tail(engine, ring->tail);
+	I915_WRITE_HEAD(engine, ring->head);
+	(void)I915_READ_HEAD(engine);
 
 	/* Initialize the ring. This must happen _after_ we've cleared the ring
 	 * registers with the above sequence (the readback of the HEAD registers
 	 * also enforces ordering), otherwise the hw might lose the new ring
 	 * register values. */
-	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
-	I915_WRITE_CTL(ring,
-			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
-			| RING_VALID);
-
-	/* If the head is still not zero, the ring is dead */
-	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
-		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
-		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
+	I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(ring->obj));
+
+	/* WaClearRingBufHeadRegAtInit:ctg,elk */
+	if (I915_READ_HEAD(engine) != ring->head)
+		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
+			  engine->name, I915_READ_HEAD(engine));
+	I915_WRITE_HEAD(engine, ring->head);
+	(void)I915_READ_HEAD(engine);
+
+	I915_WRITE_CTL(engine,
+		       ((ring->size - PAGE_SIZE) & RING_NR_PAGES)
+		       | RING_VALID);
+
+	if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0, 50)) {
+		if (retry-- && engine_stop(engine))
+			goto reset;
+	}
+
+	if ((I915_READ_CTL(engine) & RING_VALID) == 0 ||
+	    I915_READ_START(engine) != i915_gem_obj_ggtt_offset(ring->obj)) {
 		DRM_ERROR("%s initialization failed "
-			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
-			  ring->name,
-			  I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
-			  I915_READ_HEAD(ring), I915_READ_TAIL(ring),
-			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
+			  "ctl %08x (valid? %d) head %08x [expected %08x], tail %08x [expected %08x], start %08x [expected %08lx]\n",
+			  engine->name,
+			  I915_READ_CTL(engine), I915_READ_CTL(engine) & RING_VALID,
+			  I915_READ_HEAD(engine), ring->head,
+			  I915_READ_TAIL(engine), ring->tail,
+			  I915_READ_START(engine), (unsigned long)i915_gem_obj_ggtt_offset(ring->obj));
 		ret = -EIO;
-		goto out;
 	}
 
-	ringbuf->head = I915_READ_HEAD(ring);
-	ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
-	ringbuf->space = intel_ring_space(ringbuf);
-	ringbuf->last_retired_head = -1;
-
-	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
-
-out:
 	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
-
 	return ret;
 }
 
-void
-intel_fini_pipe_control(struct intel_engine_cs *ring)
+static void engine_put_ring(struct intel_ringbuffer *ring,
+			    struct intel_context *ctx)
+{
+	if (ring->last_context == ctx) {
+		struct i915_gem_request *rq;
+		int ret = -EINVAL;
+
+		rq = intel_engine_alloc_request(ring->engine,
+						ring->engine->default_context);
+		if (!IS_ERR(rq)) {
+			ret = i915_request_commit(rq);
+			i915_request_put(rq);
+		}
+		if (WARN_ON(ret))
+			ring->last_context = ring->engine->default_context;
+	}
+}
+
+static int engine_add_request(struct i915_gem_request *rq)
 {
-	struct drm_device *dev = ring->dev;
+	if (intel_engine_stopped(rq->engine))
+		return -EIO;
+
+	rq->engine->write_tail(rq->engine, rq->tail);
+	list_add_tail(&rq->engine_list, &rq->engine->requests);
+	return 0;
+}
 
-	if (ring->scratch.obj == NULL)
+static void
+fini_pipe_control(struct intel_engine_cs *engine)
+{
+	if (engine->scratch.obj == NULL)
 		return;
 
-	if (INTEL_INFO(dev)->gen >= 5) {
-		kunmap(sg_page(ring->scratch.obj->pages->sgl));
-		i915_gem_object_ggtt_unpin(ring->scratch.obj);
+	if (INTEL_INFO(engine->i915)->gen >= 5) {
+		kunmap(sg_page(engine->scratch.obj->pages->sgl));
+		i915_gem_object_ggtt_unpin(engine->scratch.obj);
 	}
 
-	drm_gem_object_unreference(&ring->scratch.obj->base);
-	ring->scratch.obj = NULL;
+	drm_gem_object_unreference(&engine->scratch.obj->base);
+	engine->scratch.obj = NULL;
 }
 
-int
-intel_init_pipe_control(struct intel_engine_cs *ring)
+static int
+init_pipe_control(struct intel_engine_cs *engine)
 {
 	int ret;
 
-	if (ring->scratch.obj)
+	if (engine->scratch.obj)
 		return 0;
 
-	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
-	if (ring->scratch.obj == NULL) {
+	engine->scratch.obj = i915_gem_alloc_object(engine->i915->dev, 4096);
+	if (engine->scratch.obj == NULL) {
 		DRM_ERROR("Failed to allocate seqno page\n");
 		ret = -ENOMEM;
 		goto err;
 	}
 
-	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
+	ret = i915_gem_object_set_cache_level(engine->scratch.obj, I915_CACHE_LLC);
 	if (ret)
 		goto err_unref;
 
-	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
+	ret = i915_gem_obj_ggtt_pin(engine->scratch.obj, 4096, 0);
 	if (ret)
 		goto err_unref;
 
-	ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
-	ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
-	if (ring->scratch.cpu_page == NULL) {
+	engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(engine->scratch.obj);
+	engine->scratch.cpu_page = kmap(sg_page(engine->scratch.obj->pages->sgl));
+	if (engine->scratch.cpu_page == NULL) {
 		ret = -ENOMEM;
 		goto err_unpin;
 	}
 
 	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
-			 ring->name, ring->scratch.gtt_offset);
+			 engine->name, engine->scratch.gtt_offset);
 	return 0;
 
 err_unpin:
-	i915_gem_object_ggtt_unpin(ring->scratch.obj);
+	i915_gem_object_ggtt_unpin(engine->scratch.obj);
 err_unref:
-	drm_gem_object_unreference(&ring->scratch.obj->base);
+	drm_gem_object_unreference(&engine->scratch.obj->base);
 err:
+	engine->scratch.obj = NULL;
 	return ret;
 }
 
-static int init_render_ring(struct intel_engine_cs *ring)
+static int render_resume(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret = init_ring_common(ring);
+	struct drm_i915_private *dev_priv = engine->i915;
+	int ret;
+
+	ret = engine_resume(engine);
 	if (ret)
 		return ret;
 
 	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
-	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
+	if (INTEL_INFO(dev_priv)->gen >= 4 && INTEL_INFO(dev_priv)->gen < 7)
 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
 
 	/* We need to disable the AsyncFlip performance optimisations in order
@@ -664,28 +741,22 @@ static int init_render_ring(struct intel_engine_cs *ring)
 	 *
 	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
 	 */
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (INTEL_INFO(dev_priv)->gen >= 6)
 		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
 
 	/* Required for the hardware to program scanline values for waiting */
 	/* WaEnableFlushTlbInvalidationMode:snb */
-	if (INTEL_INFO(dev)->gen == 6)
+	if (INTEL_INFO(dev_priv)->gen == 6)
 		I915_WRITE(GFX_MODE,
 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
 
 	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
-	if (IS_GEN7(dev))
+	if (IS_GEN7(dev_priv))
 		I915_WRITE(GFX_MODE_GEN7,
 			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
 
-	if (INTEL_INFO(dev)->gen >= 5) {
-		ret = intel_init_pipe_control(ring);
-		if (ret)
-			return ret;
-	}
-
-	if (IS_GEN6(dev)) {
+	if (IS_GEN6(dev_priv)) {
 		/* From the Sandybridge PRM, volume 1 part 3, page 24:
 		 * "If this bit is set, STCunit will have LRA as replacement
 		 *  policy. [...] This bit must be reset.  LRA replacement
@@ -695,172 +766,137 @@ static int init_render_ring(struct intel_engine_cs *ring)
 			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
 	}
 
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (INTEL_INFO(dev_priv)->gen >= 6)
 		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
-	if (HAS_L3_DPF(dev))
-		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
+	if (HAS_L3_DPF(dev_priv))
+		I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv));
 
-	return ret;
+	return 0;
 }
 
-static void render_ring_cleanup(struct intel_engine_cs *ring)
+static void cleanup_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
 
-	if (dev_priv->semaphore_obj) {
-		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
-		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
-		dev_priv->semaphore_obj = NULL;
-	}
+	obj = engine->status_page.obj;
+	if (obj == NULL)
+		return;
 
-	intel_fini_pipe_control(ring);
+	kunmap(sg_page(obj->pages->sgl));
+	i915_gem_object_ggtt_unpin(obj);
+	drm_gem_object_unreference(&obj->base);
+	engine->status_page.obj = NULL;
 }
 
-static int gen8_rcs_signal(struct intel_engine_cs *signaller,
-			   unsigned int num_dwords)
+static void engine_cleanup(struct intel_engine_cs *engine)
 {
-#define MBOX_UPDATE_DWORDS 8
-	struct drm_device *dev = signaller->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *waiter;
-	int i, ret, num_rings;
+	if (engine->legacy_ring)
+		intel_ring_free(engine->legacy_ring);
 
-	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
-	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
-#undef MBOX_UPDATE_DWORDS
+	cleanup_status_page(engine);
+	i915_cmd_parser_fini_engine(engine);
+}
 
-	ret = intel_ring_begin(signaller, num_dwords);
-	if (ret)
-		return ret;
+static void render_cleanup(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
 
-	for_each_engine(waiter, dev_priv, i) {
-		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
-		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
-			continue;
+	engine_cleanup(engine);
 
-		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
-		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
-					   PIPE_CONTROL_QW_WRITE |
-					   PIPE_CONTROL_FLUSH_ENABLE);
-		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
-		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
-		intel_ring_emit(signaller, 0);
-		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
-					   MI_SEMAPHORE_TARGET(waiter->id));
-		intel_ring_emit(signaller, 0);
+	if (dev_priv->semaphore_obj) {
+		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
+		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
+		dev_priv->semaphore_obj = NULL;
 	}
 
-	return 0;
+	fini_pipe_control(engine);
 }
 
-static int gen8_xcs_signal(struct intel_engine_cs *signaller,
-			   unsigned int num_dwords)
+static int
+gen8_rcs_emit_signal(struct i915_gem_request *rq, int id)
 {
-#define MBOX_UPDATE_DWORDS 6
-	struct drm_device *dev = signaller->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *waiter;
-	int i, ret, num_rings;
-
-	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
-	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
-#undef MBOX_UPDATE_DWORDS
-
-	ret = intel_ring_begin(signaller, num_dwords);
-	if (ret)
-		return ret;
+	u64 offset = GEN8_SEMAPHORE_OFFSET(rq->i915, rq->engine->id, id);
+	struct intel_ringbuffer *ring;
 
-	for_each_engine(waiter, dev_priv, i) {
-		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
-		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
-			continue;
+	ring = intel_ring_begin(rq, 8);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
-					   MI_FLUSH_DW_OP_STOREDW);
-		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
-					   MI_FLUSH_DW_USE_GTT);
-		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
-		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
-		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
-					   MI_SEMAPHORE_TARGET(waiter->id));
-		intel_ring_emit(signaller, 0);
-	}
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
+	intel_ring_emit(ring,
+			PIPE_CONTROL_GLOBAL_GTT_IVB |
+			PIPE_CONTROL_QW_WRITE |
+			PIPE_CONTROL_FLUSH_ENABLE);
+	intel_ring_emit(ring, lower_32_bits(offset));
+	intel_ring_emit(ring, upper_32_bits(offset));
+	intel_ring_emit(ring, rq->seqno);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring,
+			MI_SEMAPHORE_SIGNAL |
+			MI_SEMAPHORE_TARGET(id));
+	intel_ring_emit(ring, 0);
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
-static int gen6_signal(struct intel_engine_cs *signaller,
-		       unsigned int num_dwords)
+static int
+gen8_xcs_emit_signal(struct i915_gem_request *rq, int id)
 {
-	struct drm_device *dev = signaller->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *useless;
-	int i, ret, num_rings;
-
-#define MBOX_UPDATE_DWORDS 3
-	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
-	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
-#undef MBOX_UPDATE_DWORDS
+	u64 offset = GEN8_SEMAPHORE_OFFSET(rq->i915, rq->engine->id, id);
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(signaller, num_dwords);
-	if (ret)
-		return ret;
-
-	for_each_engine(useless, dev_priv, i) {
-		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
-		if (mbox_reg != GEN6_NOSYNC) {
-			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
-			intel_ring_emit(signaller, mbox_reg);
-			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
-		}
-	}
+	ring = intel_ring_begin(rq, 6);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	/* If num_dwords was rounded, make sure the tail pointer is correct */
-	if (num_rings % 2 == 0)
-		intel_ring_emit(signaller, MI_NOOP);
+	intel_ring_emit(ring,
+			(MI_FLUSH_DW + 1) |
+			MI_FLUSH_DW_OP_STOREDW);
+	intel_ring_emit(ring,
+			lower_32_bits(offset) |
+			MI_FLUSH_DW_USE_GTT);
+	intel_ring_emit(ring, upper_32_bits(offset));
+	intel_ring_emit(ring, rq->seqno);
+	intel_ring_emit(ring,
+			MI_SEMAPHORE_SIGNAL |
+			MI_SEMAPHORE_TARGET(id));
+	intel_ring_emit(ring, 0);
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
-/**
- * gen6_add_request - Update the semaphore mailbox registers
- * 
- * @ring - ring that is adding a request
- * @seqno - return seqno stuck into the ring
- *
- * Update the mailbox registers in the *other* rings with the current seqno.
- * This acts like a signal in the canonical semaphore.
- */
 static int
-gen6_add_request(struct intel_engine_cs *ring)
+gen6_emit_semaphore(struct i915_gem_request *rq, int id)
 {
-	int ret;
-
-	if (ring->semaphore.signal)
-		ret = ring->semaphore.signal(ring, 4);
-	else
-		ret = intel_ring_begin(ring, 4);
+	struct intel_ringbuffer *ring;
 
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
-	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
-	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	__intel_ring_advance(ring);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, rq->engine->semaphore.mbox.signal[id]);
+	intel_ring_emit(ring, rq->seqno);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
-static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
-					      u32 seqno)
+static int
+no_emit_semaphore(struct i915_gem_request *rq, int id)
+{
+	return -ENODEV;
+}
+
+static int
+no_wait_semaphore(struct i915_gem_request *waiter,
+		  struct i915_gem_request *signaller)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	return dev_priv->last_seqno < seqno;
+	return -ENODEV;
 }
 
 /**
@@ -872,66 +908,53 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
  */
 
 static int
-gen8_ring_sync(struct intel_engine_cs *waiter,
-	       struct intel_engine_cs *signaller,
-	       u32 seqno)
+gen8_emit_wait(struct i915_gem_request *waiter,
+	       struct i915_gem_request *signaller)
 {
-	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
-	int ret;
+	u64 offset = GEN8_SEMAPHORE_OFFSET(waiter->i915, signaller->engine->id, waiter->engine->id);
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(waiter, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(waiter, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
-	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
-				MI_SEMAPHORE_GLOBAL_GTT |
-				MI_SEMAPHORE_POLL |
-				MI_SEMAPHORE_SAD_GTE_SDD);
-	intel_ring_emit(waiter, seqno);
-	intel_ring_emit(waiter,
-			lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
-	intel_ring_emit(waiter,
-			upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
-	intel_ring_advance(waiter);
+	intel_ring_emit(ring,
+			MI_SEMAPHORE_WAIT |
+			MI_SEMAPHORE_GLOBAL_GTT |
+			MI_SEMAPHORE_POLL |
+			MI_SEMAPHORE_SAD_GTE_SDD);
+	intel_ring_emit(ring, signaller->seqno);
+	intel_ring_emit(ring, lower_32_bits(offset));
+	intel_ring_emit(ring, upper_32_bits(offset));
+	intel_ring_advance(ring);
 	return 0;
 }
 
 static int
-gen6_ring_sync(struct intel_engine_cs *waiter,
-	       struct intel_engine_cs *signaller,
-	       u32 seqno)
+gen6_emit_wait(struct i915_gem_request *waiter,
+	       struct i915_gem_request *signaller)
 {
 	u32 dw1 = MI_SEMAPHORE_MBOX |
 		  MI_SEMAPHORE_COMPARE |
 		  MI_SEMAPHORE_REGISTER;
-	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
-	int ret;
+	u32 wait_mbox = signaller->engine->semaphore.mbox.wait[waiter->engine->id];
+	struct intel_ringbuffer *ring;
+
+	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
 
+	ring = intel_ring_begin(waiter, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
+
+	intel_ring_emit(ring, dw1 | wait_mbox);
 	/* Throughout all of the GEM code, seqno passed implies our current
 	 * seqno is >= the last seqno executed. However for hardware the
 	 * comparison is strictly greater than.
 	 */
-	seqno -= 1;
-
-	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
-
-	ret = intel_ring_begin(waiter, 4);
-	if (ret)
-		return ret;
-
-	/* If seqno wrap happened, omit the wait with no-ops */
-	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
-		intel_ring_emit(waiter, dw1 | wait_mbox);
-		intel_ring_emit(waiter, seqno);
-		intel_ring_emit(waiter, 0);
-		intel_ring_emit(waiter, MI_NOOP);
-	} else {
-		intel_ring_emit(waiter, MI_NOOP);
-		intel_ring_emit(waiter, MI_NOOP);
-		intel_ring_emit(waiter, MI_NOOP);
-		intel_ring_emit(waiter, MI_NOOP);
-	}
-	intel_ring_advance(waiter);
+	intel_ring_emit(ring, signaller->seqno - 1);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
 
 	return 0;
 }
@@ -946,10 +969,10 @@ do {									\
 } while (0)
 
 static int
-pc_render_add_request(struct intel_engine_cs *ring)
+gen5_emit_breadcrumb(struct i915_gem_request *rq)
 {
-	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
-	int ret;
+	u32 scratch_addr = rq->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	struct intel_ringbuffer *ring;
 
 	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
 	 * incoherent with writes to memory, i.e. completely fubar,
@@ -959,15 +982,15 @@ pc_render_add_request(struct intel_engine_cs *ring)
 	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
 	 * memory before requesting an interrupt.
 	 */
-	ret = intel_ring_begin(ring, 32);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 32);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
-	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, rq->engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, rq->seqno);
 	intel_ring_emit(ring, 0);
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
@@ -985,96 +1008,79 @@ pc_render_add_request(struct intel_engine_cs *ring)
 			PIPE_CONTROL_WRITE_FLUSH |
 			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
-	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, rq->engine->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, rq->seqno);
 	intel_ring_emit(ring, 0);
-	__intel_ring_advance(ring);
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
 static u32
-gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
-{
-	/* Workaround to force correct ordering between irq and seqno writes on
-	 * ivb (and maybe also on snb) by reading from a CS register (like
-	 * ACTHD) before reading the status page. */
-	if (!lazy_coherency) {
-		struct drm_i915_private *dev_priv = ring->dev->dev_private;
-		POSTING_READ(RING_ACTHD(ring->mmio_base));
-	}
-
-	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
-}
-
-static u32
-ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+ring_get_seqno(struct intel_engine_cs *engine)
 {
-	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
+	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
 }
 
 static void
-ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
+ring_set_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
-	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
+	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 }
 
 static u32
-pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+gen5_render_get_seqno(struct intel_engine_cs *engine)
 {
-	return ring->scratch.cpu_page[0];
+	return engine->scratch.cpu_page[0];
 }
 
 static void
-pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
+gen5_render_set_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
-	ring->scratch.cpu_page[0] = seqno;
+	engine->scratch.cpu_page[0] = seqno;
 }
 
 static bool
-gen5_ring_get_irq(struct intel_engine_cs *ring)
+gen5_get_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *i915 = engine->i915;
 	unsigned long flags;
 
-	if (!dev->irq_enabled)
+	if (!i915->dev->irq_enabled)
 		return false;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount++ == 0)
-		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+	spin_lock_irqsave(&i915->irq_lock, flags);
+	if (engine->irq_refcount++ == 0)
+		gen5_enable_gt_irq(i915, engine->irq_enable_mask);
+	spin_unlock_irqrestore(&i915->irq_lock, flags);
 
 	return true;
 }
 
 static void
-gen5_ring_put_irq(struct intel_engine_cs *ring)
+gen5_put_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *i915 = engine->i915;
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount == 0)
-		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+	spin_lock_irqsave(&i915->irq_lock, flags);
+	if (--engine->irq_refcount == 0)
+		gen5_disable_gt_irq(i915, engine->irq_enable_mask);
+	spin_unlock_irqrestore(&i915->irq_lock, flags);
 }
 
 static bool
-i9xx_ring_get_irq(struct intel_engine_cs *ring)
+i9xx_get_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
-	if (!dev->irq_enabled)
+	if (!dev_priv->dev->irq_enabled)
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount++ == 0) {
-		dev_priv->irq_mask &= ~ring->irq_enable_mask;
+	if (engine->irq_refcount++ == 0) {
+		dev_priv->irq_mask &= ~engine->irq_enable_mask;
 		I915_WRITE(IMR, dev_priv->irq_mask);
 		POSTING_READ(IMR);
 	}
@@ -1084,15 +1090,14 @@ i9xx_ring_get_irq(struct intel_engine_cs *ring)
 }
 
 static void
-i9xx_ring_put_irq(struct intel_engine_cs *ring)
+i9xx_put_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount == 0) {
-		dev_priv->irq_mask |= ring->irq_enable_mask;
+	if (--engine->irq_refcount == 0) {
+		dev_priv->irq_mask |= engine->irq_enable_mask;
 		I915_WRITE(IMR, dev_priv->irq_mask);
 		POSTING_READ(IMR);
 	}
@@ -1100,18 +1105,17 @@ i9xx_ring_put_irq(struct intel_engine_cs *ring)
 }
 
 static bool
-i8xx_ring_get_irq(struct intel_engine_cs *ring)
+i8xx_get_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
-	if (!dev->irq_enabled)
+	if (!dev_priv->dev->irq_enabled)
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount++ == 0) {
-		dev_priv->irq_mask &= ~ring->irq_enable_mask;
+	if (engine->irq_refcount++ == 0) {
+		dev_priv->irq_mask &= ~engine->irq_enable_mask;
 		I915_WRITE16(IMR, dev_priv->irq_mask);
 		POSTING_READ16(IMR);
 	}
@@ -1121,93 +1125,29 @@ i8xx_ring_get_irq(struct intel_engine_cs *ring)
 }
 
 static void
-i8xx_ring_put_irq(struct intel_engine_cs *ring)
+i8xx_put_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount == 0) {
-		dev_priv->irq_mask |= ring->irq_enable_mask;
+	if (--engine->irq_refcount == 0) {
+		dev_priv->irq_mask |= engine->irq_enable_mask;
 		I915_WRITE16(IMR, dev_priv->irq_mask);
 		POSTING_READ16(IMR);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
-static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
+static int
+bsd_emit_flush(struct i915_gem_request *rq,
+	       u32 flags)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	u32 mmio = 0;
+	struct intel_ringbuffer *ring;
 
-	/* The ring status page addresses are no longer next to the rest of
-	 * the ring registers as of gen7.
-	 */
-	if (IS_GEN7(dev)) {
-		switch (ring->id) {
-		case RCS:
-			mmio = RENDER_HWS_PGA_GEN7;
-			break;
-		case BCS:
-			mmio = BLT_HWS_PGA_GEN7;
-			break;
-		/*
-		 * VCS2 actually doesn't exist on Gen7. Only shut up
-		 * gcc switch check warning
-		 */
-		case VCS2:
-		case VCS:
-			mmio = BSD_HWS_PGA_GEN7;
-			break;
-		case VECS:
-			mmio = VEBOX_HWS_PGA_GEN7;
-			break;
-		}
-	} else if (IS_GEN6(ring->dev)) {
-		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
-	} else {
-		/* XXX: gen8 returns to sanity */
-		mmio = RING_HWS_PGA(ring->mmio_base);
-	}
-
-	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
-	POSTING_READ(mmio);
-
-	/*
-	 * Flush the TLB for this page
-	 *
-	 * FIXME: These two bits have disappeared on gen8, so a question
-	 * arises: do we still need this and if so how should we go about
-	 * invalidating the TLB?
-	 */
-	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
-		u32 reg = RING_INSTPM(ring->mmio_base);
-
-		/* ring should be idle before issuing a sync flush*/
-		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-
-		I915_WRITE(reg,
-			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
-					      INSTPM_SYNC_FLUSH));
-		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
-			     1000))
-			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
-				  ring->name);
-	}
-}
-
-static int
-bsd_ring_flush(struct intel_engine_cs *ring,
-	       u32     invalidate_domains,
-	       u32     flush_domains)
-{
-	int ret;
-
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, MI_FLUSH);
 	intel_ring_emit(ring, MI_NOOP);
@@ -1216,42 +1156,46 @@ bsd_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-i9xx_add_request(struct intel_engine_cs *ring)
+i9xx_emit_breadcrumb(struct i915_gem_request *rq)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
+	intel_ring_emit(ring, rq->seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
-	__intel_ring_advance(ring);
+	intel_ring_advance(ring);
 
 	return 0;
 }
 
 static bool
-gen6_ring_get_irq(struct intel_engine_cs *ring)
+gen6_ring_get_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
-	if (!dev->irq_enabled)
+	if (!dev_priv->dev->irq_enabled)
 	       return false;
 
+	/* It looks like we need to prevent the gt from suspending while waiting
+	 * for an notifiy irq, otherwise irqs seem to get lost on at least the
+	 * blt/bsd rings on ivb. */
+	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
+
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount++ == 0) {
-		if (HAS_L3_DPF(dev) && ring->id == RCS)
-			I915_WRITE_IMR(ring,
-				       ~(ring->irq_enable_mask |
-					 GT_PARITY_ERROR(dev)));
+	if (engine->irq_refcount++ == 0) {
+		if (HAS_L3_DPF(dev_priv) && engine->id == RCS)
+			I915_WRITE_IMR(engine,
+				       ~(engine->irq_enable_mask |
+					 GT_PARITY_ERROR(dev_priv)));
 		else
-			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
-		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
+			I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
+		gen5_enable_gt_irq(dev_priv, engine->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
@@ -1259,37 +1203,37 @@ gen6_ring_get_irq(struct intel_engine_cs *ring)
 }
 
 static void
-gen6_ring_put_irq(struct intel_engine_cs *ring)
+gen6_ring_put_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount == 0) {
-		if (HAS_L3_DPF(dev) && ring->id == RCS)
-			I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
+	if (--engine->irq_refcount == 0) {
+		if (HAS_L3_DPF(dev_priv) && engine->id == RCS)
+			I915_WRITE_IMR(engine, ~GT_PARITY_ERROR(dev_priv));
 		else
-			I915_WRITE_IMR(ring, ~0);
-		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
+			I915_WRITE_IMR(engine, ~0);
+		gen5_disable_gt_irq(dev_priv, engine->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+
+	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
 }
 
 static bool
-hsw_vebox_get_irq(struct intel_engine_cs *ring)
+hsw_vebox_get_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
-	if (!dev->irq_enabled)
+	if (!dev_priv->dev->irq_enabled)
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount++ == 0) {
-		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
-		gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
+	if (engine->irq_refcount++ == 0) {
+		I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
+		gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
@@ -1297,43 +1241,40 @@ hsw_vebox_get_irq(struct intel_engine_cs *ring)
 }
 
 static void
-hsw_vebox_put_irq(struct intel_engine_cs *ring)
+hsw_vebox_put_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
-	if (!dev->irq_enabled)
+	if (!dev_priv->dev->irq_enabled)
 		return;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount == 0) {
-		I915_WRITE_IMR(ring, ~0);
-		gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
+	if (--engine->irq_refcount == 0) {
+		I915_WRITE_IMR(engine, ~0);
+		gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
 static bool
-gen8_ring_get_irq(struct intel_engine_cs *ring)
+gen8_ring_get_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
-	if (!dev->irq_enabled)
+	if (!dev_priv->dev->irq_enabled)
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount++ == 0) {
-		if (HAS_L3_DPF(dev) && ring->id == RCS) {
-			I915_WRITE_IMR(ring,
-				       ~(ring->irq_enable_mask |
+	if (engine->irq_refcount++ == 0) {
+		if (HAS_L3_DPF(dev_priv) && engine->id == RCS)
+			I915_WRITE_IMR(engine,
+				       ~(engine->irq_enable_mask |
 					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
-		} else {
-			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
-		}
-		POSTING_READ(RING_IMR(ring->mmio_base));
+		else
+			I915_WRITE_IMR(engine, ~engine->irq_enable_mask);
+		POSTING_READ(RING_IMR(engine->mmio_base));
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
@@ -1341,35 +1282,33 @@ gen8_ring_get_irq(struct intel_engine_cs *ring)
 }
 
 static void
-gen8_ring_put_irq(struct intel_engine_cs *ring)
+gen8_ring_put_irq(struct intel_engine_cs *engine)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount == 0) {
-		if (HAS_L3_DPF(dev) && ring->id == RCS) {
-			I915_WRITE_IMR(ring,
+	if (--engine->irq_refcount == 0) {
+		if (HAS_L3_DPF(dev_priv) && engine->id == RCS)
+			I915_WRITE_IMR(engine,
 				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
-		} else {
-			I915_WRITE_IMR(ring, ~0);
-		}
-		POSTING_READ(RING_IMR(ring->mmio_base));
+		else
+			I915_WRITE_IMR(engine, ~0);
+		POSTING_READ(RING_IMR(engine->mmio_base));
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
 static int
-i965_dispatch_execbuffer(struct intel_engine_cs *ring,
-			 u64 offset, u32 length,
-			 unsigned flags)
+i965_emit_batchbuffer(struct i915_gem_request *rq,
+		      u64 offset, u32 length,
+		      unsigned flags)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring,
 			MI_BATCH_BUFFER_START |
@@ -1384,16 +1323,16 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,
 /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
 #define I830_BATCH_LIMIT (256*1024)
 static int
-i830_dispatch_execbuffer(struct intel_engine_cs *ring,
-				u64 offset, u32 len,
-				unsigned flags)
+i830_emit_batchbuffer(struct i915_gem_request *rq,
+		      u64 offset, u32 len,
+		      unsigned flags)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
 	if (flags & I915_DISPATCH_PINNED) {
-		ret = intel_ring_begin(ring, 4);
-		if (ret)
-			return ret;
+		ring = intel_ring_begin(rq, 4);
+		if (IS_ERR(ring))
+			return PTR_ERR(ring);
 
 		intel_ring_emit(ring, MI_BATCH_BUFFER);
 		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
@@ -1401,14 +1340,15 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
 		intel_ring_emit(ring, MI_NOOP);
 		intel_ring_advance(ring);
 	} else {
-		u32 cs_offset = ring->scratch.gtt_offset;
+		u32 cs_offset = rq->engine->scratch.gtt_offset;
 
 		if (len > I830_BATCH_LIMIT)
 			return -ENOSPC;
 
-		ret = intel_ring_begin(ring, 9+3);
-		if (ret)
-			return ret;
+		ring = intel_ring_begin(rq, 9+3);
+		if (IS_ERR(ring))
+			return PTR_ERR(ring);
+
 		/* Blit the batch (which has now all relocs applied) to the stable batch
 		 * scratch bo area (so that the CS never stumbles over its tlb
 		 * invalidation bug) ... */
@@ -1435,15 +1375,15 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,
 }
 
 static int
-i915_dispatch_execbuffer(struct intel_engine_cs *ring,
-			 u64 offset, u32 len,
-			 unsigned flags)
+i915_emit_batchbuffer(struct i915_gem_request *rq,
+		      u64 offset, u32 len,
+		      unsigned flags)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
 	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
@@ -1452,488 +1392,207 @@ i915_dispatch_execbuffer(struct intel_engine_cs *ring,
 	return 0;
 }
 
-static void cleanup_status_page(struct intel_engine_cs *ring)
-{
-	struct drm_i915_gem_object *obj;
-
-	obj = ring->status_page.obj;
-	if (obj == NULL)
-		return;
-
-	kunmap(sg_page(obj->pages->sgl));
-	i915_gem_object_ggtt_unpin(obj);
-	drm_gem_object_unreference(&obj->base);
-	ring->status_page.obj = NULL;
-}
-
-static int init_status_page(struct intel_engine_cs *ring)
+static int setup_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_object *obj;
+	unsigned flags;
+	int ret;
 
-	if ((obj = ring->status_page.obj) == NULL) {
-		unsigned flags;
-		int ret;
+	obj = i915_gem_alloc_object(engine->i915->dev, 4096);
+	if (obj == NULL) {
+		DRM_ERROR("Failed to allocate status page\n");
+		return -ENOMEM;
+	}
 
-		obj = i915_gem_alloc_object(ring->dev, 4096);
-		if (obj == NULL) {
-			DRM_ERROR("Failed to allocate status page\n");
-			return -ENOMEM;
-		}
+	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+	if (ret)
+		goto err_unref;
 
-		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
-		if (ret)
-			goto err_unref;
-
-		flags = 0;
-		if (!HAS_LLC(ring->dev))
-			/* On g33, we cannot place HWS above 256MiB, so
-			 * restrict its pinning to the low mappable arena.
-			 * Though this restriction is not documented for
-			 * gen4, gen5, or byt, they also behave similarly
-			 * and hang if the HWS is placed at the top of the
-			 * GTT. To generalise, it appears that all !llc
-			 * platforms have issues with us placing the HWS
-			 * above the mappable region (even though we never
-			 * actualy map it).
-			 */
-			flags |= PIN_MAPPABLE;
-		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
-		if (ret) {
+	flags = 0;
+	if (!HAS_LLC(engine->i915))
+		/* On g33, we cannot place HWS above 256MiB, so
+		 * restrict its pinning to the low mappable arena.
+		 * Though this restriction is not documented for
+		 * gen4, gen5, or byt, they also behave similarly
+		 * and hang if the HWS is placed at the top of the
+		 * GTT. To generalise, it appears that all !llc
+		 * platforms have issues with us placing the HWS
+		 * above the mappable region (even though we never
+		 * actualy map it).
+		 */
+		flags |= PIN_MAPPABLE;
+	ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
+	if (ret) {
 err_unref:
-			drm_gem_object_unreference(&obj->base);
-			return ret;
-		}
-
-		ring->status_page.obj = obj;
+		drm_gem_object_unreference(&obj->base);
+		return ret;
 	}
 
-	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
-	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
-	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
+	engine->status_page.obj = obj;
 
-	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
-			ring->name, ring->status_page.gfx_addr);
+	engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
+	engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
+	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
 
+	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
+			engine->name, engine->status_page.gfx_addr);
 	return 0;
 }
 
-static int init_phys_status_page(struct intel_engine_cs *ring)
+static int setup_phys_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_private *i915 = engine->i915;
 
-	if (!dev_priv->status_page_dmah) {
-		dev_priv->status_page_dmah =
-			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
-		if (!dev_priv->status_page_dmah)
-			return -ENOMEM;
-	}
+	i915->status_page_dmah =
+		drm_pci_alloc(i915->dev, PAGE_SIZE, PAGE_SIZE);
+	if (!i915->status_page_dmah)
+		return -ENOMEM;
 
-	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
-	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
+	engine->status_page.page_addr = i915->status_page_dmah->vaddr;
+	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
 
 	return 0;
 }
 
-void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+void intel_ring_free(struct intel_ringbuffer *ring)
 {
-	if (!ringbuf->obj)
-		return;
+	if (ring->obj) {
+		iounmap(ring->virtual_start);
+		i915_gem_object_ggtt_unpin(ring->obj);
+		drm_gem_object_unreference(&ring->obj->base);
+	}
 
-	iounmap(ringbuf->virtual_start);
-	i915_gem_object_ggtt_unpin(ringbuf->obj);
-	drm_gem_object_unreference(&ringbuf->obj->base);
-	ringbuf->obj = NULL;
+	list_del(&ring->engine_list);
+	kfree(ring);
 }
 
-int intel_alloc_ringbuffer_obj(struct drm_device *dev,
-			       struct intel_ringbuffer *ringbuf)
+struct intel_ringbuffer *
+intel_engine_alloc_ring(struct intel_engine_cs *engine,
+			int size)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *i915 = engine->i915;
+	struct intel_ringbuffer *ring;
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	if (ringbuf->obj)
-		return 0;
+	DRM_DEBUG("creating ringbuffer for %s, size %d\n", engine->name, size);
 
-	obj = NULL;
-	if (!HAS_LLC(dev))
-		obj = i915_gem_object_create_stolen(dev, ringbuf->size);
+	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	if (ring == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ring->engine = engine;
+
+	obj = i915_gem_object_create_stolen(i915->dev, size);
 	if (obj == NULL)
-		obj = i915_gem_alloc_object(dev, ringbuf->size);
+		obj = i915_gem_alloc_object(i915->dev, size);
 	if (obj == NULL)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* mark ring buffers as read-only from GPU side by default */
 	obj->gt_ro = 1;
 
 	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
-	if (ret)
+	if (ret) {
+		DRM_ERROR("failed pin ringbuffer into GGTT\n");
 		goto err_unref;
+	}
 
 	ret = i915_gem_object_set_to_gtt_domain(obj, true);
-	if (ret)
+	if (ret) {
+		DRM_ERROR("failed mark ringbuffer for GTT writes\n");
 		goto err_unpin;
+	}
 
-	ringbuf->virtual_start =
-		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
-				ringbuf->size);
-	if (ringbuf->virtual_start == NULL) {
+	ring->virtual_start =
+		ioremap_wc(i915->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
+			   size);
+	if (ring->virtual_start == NULL) {
+		DRM_ERROR("failed to map ringbuffer through GTT\n");
 		ret = -EINVAL;
 		goto err_unpin;
 	}
 
-	ringbuf->obj = obj;
-	return 0;
-
-err_unpin:
-	i915_gem_object_ggtt_unpin(obj);
-err_unref:
-	drm_gem_object_unreference(&obj->base);
-	return ret;
-}
-
-static int intel_init_ring_buffer(struct drm_device *dev,
-				  struct intel_engine_cs *ring)
-{
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	int ret;
-
-	if (ringbuf == NULL) {
-		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
-		if (!ringbuf)
-			return -ENOMEM;
-		ring->buffer = ringbuf;
-	}
-
-	ring->dev = dev;
-	INIT_LIST_HEAD(&ring->active_list);
-	INIT_LIST_HEAD(&ring->request_list);
-	INIT_LIST_HEAD(&ring->execlist_queue);
-	ringbuf->size = 32 * PAGE_SIZE;
-	ringbuf->engine = ring;
-	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
-
-	init_waitqueue_head(&ring->irq_queue);
-
-	if (I915_NEED_GFX_HWS(dev)) {
-		ret = init_status_page(ring);
-		if (ret)
-			goto error;
-	} else {
-		BUG_ON(ring->id != RCS);
-		ret = init_phys_status_page(ring);
-		if (ret)
-			goto error;
-	}
-
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-	if (ret) {
-		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret);
-		goto error;
-	}
+	ring->obj = obj;
+	ring->size = size;
 
 	/* Workaround an erratum on the i830 which causes a hang if
 	 * the TAIL pointer points to within the last 2 cachelines
 	 * of the buffer.
 	 */
-	ringbuf->effective_size = ringbuf->size;
-	if (IS_I830(dev) || IS_845G(dev))
-		ringbuf->effective_size -= 2 * CACHELINE_BYTES;
-
-	ret = i915_cmd_parser_init_engine(ring);
-	if (ret)
-		goto error;
-
-	ret = ring->init(ring);
-	if (ret)
-		goto error;
-
-	return 0;
-
-error:
-	kfree(ringbuf);
-	ring->buffer = NULL;
-	return ret;
-}
-
-void intel_cleanup_engine(struct intel_engine_cs *ring)
-{
-	struct drm_i915_private *dev_priv = to_i915(ring->dev);
-	struct intel_ringbuffer *ringbuf = ring->buffer;
+	ring->effective_size = size;
+	if (IS_I830(i915) || IS_845G(i915))
+		ring->effective_size -= 2 * CACHELINE_BYTES;
 
-	if (!intel_engine_initialized(ring))
-		return;
-
-	intel_stop_engine(ring);
-	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
-
-	intel_destroy_ringbuffer_obj(ringbuf);
-	ring->preallocated_lazy_request = NULL;
-	ring->outstanding_lazy_seqno = 0;
-
-	if (ring->cleanup)
-		ring->cleanup(ring);
+	ring->space = intel_ring_space(ring);
+	ring->retired_head = -1;
 
-	cleanup_status_page(ring);
+	INIT_LIST_HEAD(&ring->requests);
+	INIT_LIST_HEAD(&ring->breadcrumbs);
+	list_add_tail(&ring->engine_list, &engine->rings);
 
-	i915_cmd_parser_fini_engine(ring);
+	return ring;
 
-	kfree(ringbuf);
-	ring->buffer = NULL;
-}
-
-static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
-{
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	struct drm_i915_gem_request *request;
-	u32 seqno = 0;
-	int ret;
-
-	if (ringbuf->last_retired_head != -1) {
-		ringbuf->head = ringbuf->last_retired_head;
-		ringbuf->last_retired_head = -1;
-
-		ringbuf->space = intel_ring_space(ringbuf);
-		if (ringbuf->space >= n)
-			return 0;
-	}
-
-	list_for_each_entry(request, &ring->request_list, list) {
-		if (__intel_ring_space(request->tail, ringbuf->tail,
-				       ringbuf->size) >= n) {
-			seqno = request->seqno;
-			break;
-		}
-	}
-
-	if (seqno == 0)
-		return -ENOSPC;
-
-	ret = i915_wait_seqno(ring, seqno);
-	if (ret)
-		return ret;
-
-	i915_gem_retire_requests__engine(ring);
-	ringbuf->head = ringbuf->last_retired_head;
-	ringbuf->last_retired_head = -1;
-
-	ringbuf->space = intel_ring_space(ringbuf);
-	return 0;
-}
-
-static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
-{
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	unsigned long end;
-	int ret;
-
-	ret = intel_ring_wait_request(ring, n);
-	if (ret != -ENOSPC)
-		return ret;
-
-	/* force the tail write in case we have been skipping them */
-	__intel_ring_advance(ring);
-
-	/* With GEM the hangcheck timer should kick us out of the loop,
-	 * leaving it early runs the risk of corrupting GEM state (due
-	 * to running on almost untested codepaths). But on resume
-	 * timers don't work yet, so prevent a complete hang in that
-	 * case by choosing an insanely large timeout. */
-	end = jiffies + 60 * HZ;
-
-	trace_i915_ring_wait_begin(ring);
-	do {
-		ringbuf->head = I915_READ_HEAD(ring);
-		ringbuf->space = intel_ring_space(ringbuf);
-		if (ringbuf->space >= n) {
-			ret = 0;
-			break;
-		}
-
-		if (!drm_core_check_feature(dev, DRIVER_MODESET) &&
-		    dev->primary->master) {
-			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
-			if (master_priv->sarea_priv)
-				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;
-		}
-
-		msleep(1);
-
-		if (dev_priv->mm.interruptible && signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-
-		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-					   dev_priv->mm.interruptible);
-		if (ret)
-			break;
-
-		if (time_after(jiffies, end)) {
-			ret = -EBUSY;
-			break;
-		}
-	} while (1);
-	trace_i915_ring_wait_end(ring);
-	return ret;
-}
-
-static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
-{
-	uint32_t __iomem *virt;
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	int rem = ringbuf->size - ringbuf->tail;
-
-	if (ringbuf->space < rem) {
-		int ret = ring_wait_for_space(ring, rem);
-		if (ret)
-			return ret;
-	}
-
-	virt = ringbuf->virtual_start + ringbuf->tail;
-	rem /= 4;
-	while (rem--)
-		iowrite32(MI_NOOP, virt++);
-
-	ringbuf->tail = 0;
-	ringbuf->space = intel_ring_space(ringbuf);
-
-	return 0;
+err_unpin:
+	i915_gem_object_ggtt_unpin(obj);
+err_unref:
+	drm_gem_object_unreference(&obj->base);
+	return ERR_PTR(ret);
 }
 
-int intel_engine_idle(struct intel_engine_cs *ring)
+static int intel_engine_init(struct intel_engine_cs *engine,
+			     struct drm_i915_private *i915)
 {
-	u32 seqno;
 	int ret;
 
-	/* We need to add any requests required to flush the objects and ring */
-	if (ring->outstanding_lazy_seqno) {
-		ret = i915_add_request(ring, NULL);
-		if (ret)
-			return ret;
-	}
-
-	/* Wait upon the last request to be completed */
-	if (list_empty(&ring->request_list))
-		return 0;
-
-	seqno = list_entry(ring->request_list.prev,
-			   struct drm_i915_gem_request,
-			   list)->seqno;
+	engine->i915 = i915;
 
-	return i915_wait_seqno(ring, seqno);
-}
+	INIT_LIST_HEAD(&engine->rings);
+	INIT_LIST_HEAD(&engine->read_list);
+	INIT_LIST_HEAD(&engine->write_list);
+	INIT_LIST_HEAD(&engine->requests);
+	INIT_LIST_HEAD(&engine->pending);
 
-static int
-intel_ring_alloc_seqno(struct intel_engine_cs *ring)
-{
-	if (ring->outstanding_lazy_seqno)
-		return 0;
+	spin_lock_init(&engine->execlist_lock);
 
-	if (ring->preallocated_lazy_request == NULL) {
-		struct drm_i915_gem_request *request;
+	engine->suspend = engine_suspend;
+	engine->resume = engine_resume;
+	engine->cleanup = engine_cleanup;
 
-		request = kmalloc(sizeof(*request), GFP_KERNEL);
-		if (request == NULL)
-			return -ENOMEM;
+	engine->get_seqno = ring_get_seqno;
+	engine->set_seqno = ring_set_seqno;
 
-		ring->preallocated_lazy_request = request;
-	}
+	engine->get_ring = engine_get_ring;
+	engine->put_ring = engine_put_ring;
 
-	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
-}
+	engine->semaphore.signal = no_emit_semaphore;
+	engine->semaphore.wait = no_wait_semaphore;
 
-static int __intel_ring_prepare(struct intel_engine_cs *ring,
-				int bytes)
-{
-	struct intel_ringbuffer *ringbuf = ring->buffer;
-	int ret;
+	engine->add_request = engine_add_request;
+	engine->write_tail = ring_write_tail;
 
-	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
-		ret = intel_wrap_ring_buffer(ring);
-		if (unlikely(ret))
-			return ret;
-	}
+	init_waitqueue_head(&engine->irq_queue);
 
-	if (unlikely(ringbuf->space < bytes)) {
-		ret = ring_wait_for_space(ring, bytes);
-		if (unlikely(ret))
-			return ret;
+	if (I915_NEED_GFX_HWS(i915)) {
+		ret = setup_status_page(engine);
+	} else {
+		BUG_ON(engine->id != RCS);
+		ret = setup_phys_status_page(engine);
 	}
-
-	return 0;
-}
-
-int intel_ring_begin(struct intel_engine_cs *ring,
-		     int num_dwords)
-{
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	int ret;
-
-	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-				   dev_priv->mm.interruptible);
 	if (ret)
 		return ret;
 
-	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
+	ret = i915_cmd_parser_init_engine(engine);
 	if (ret)
 		return ret;
 
-	/* Preallocate the olr before touching the ring */
-	ret = intel_ring_alloc_seqno(ring);
-	if (ret)
-		return ret;
-
-	ring->buffer->space -= num_dwords * sizeof(uint32_t);
 	return 0;
 }
 
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct intel_engine_cs *ring)
-{
-	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
-	int ret;
-
-	if (num_dwords == 0)
-		return 0;
-
-	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
-	ret = intel_ring_begin(ring, num_dwords);
-	if (ret)
-		return ret;
-
-	while (num_dwords--)
-		intel_ring_emit(ring, MI_NOOP);
-
-	intel_ring_advance(ring);
-
-	return 0;
-}
-
-void intel_engine_init_seqno(struct intel_engine_cs *ring, u32 seqno)
-{
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	BUG_ON(ring->outstanding_lazy_seqno);
-
-	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
-		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
-		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
-		if (HAS_VEBOX(dev))
-			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
-	}
-
-	ring->set_seqno(ring, seqno);
-	ring->hangcheck.seqno = seqno;
-}
-
-static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
+static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
 				     u32 value)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_private *dev_priv = engine->i915;
 
        /* Every tail move must follow the sequence below */
 
@@ -1953,8 +1612,8 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
 		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
 
 	/* Now that the ring is fully powered up, update the tail */
-	I915_WRITE_TAIL(ring, value);
-	POSTING_READ(RING_TAIL(ring->mmio_base));
+	I915_WRITE_TAIL(engine, value);
+	POSTING_READ(RING_TAIL(engine->mmio_base));
 
 	/* Let the ring send IDLE messages to the GT again,
 	 * and so let it sleep to conserve power when idle.
@@ -1963,18 +1622,18 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
 		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
 }
 
-static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
-			       u32 invalidate, u32 flush)
+static int gen6_bsd_emit_flush(struct i915_gem_request *rq,
+			       u32 flags)
 {
+	struct intel_ringbuffer *ring;
 	uint32_t cmd;
-	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_INFO(ring->dev)->gen >= 8)
+	if (INTEL_INFO(rq->i915)->gen >= 8)
 		cmd += 1;
 	/*
 	 * Bspec vol 1c.5 - video engine command streamer:
@@ -1982,12 +1641,12 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 	 * operation is complete. This bit is only valid when the
 	 * Post-Sync Operation field is a value of 1h or 3h."
 	 */
-	if (invalidate & I915_GEM_GPU_DOMAINS)
+	if (flags & I915_INVALIDATE_CACHES)
 		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	if (INTEL_INFO(ring->dev)->gen >= 8) {
+	if (INTEL_INFO(rq->i915)->gen >= 8) {
 		intel_ring_emit(ring, 0); /* upper addr */
 		intel_ring_emit(ring, 0); /* value */
 	} else  {
@@ -1999,16 +1658,16 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 }
 
 static int
-gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
-			      u64 offset, u32 len,
-			      unsigned flags)
+gen8_emit_batchbuffer(struct i915_gem_request *rq,
+		      u64 offset, u32 len,
+		      unsigned flags)
 {
-	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
-	int ret;
+	struct intel_ringbuffer *ring;
+	bool ppgtt = USES_PPGTT(rq->i915) && !(flags & I915_DISPATCH_SECURE);
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	/* FIXME(BDW): Address space and security selectors. */
 	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
@@ -2021,15 +1680,15 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 }
 
 static int
-hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
-			      u64 offset, u32 len,
-			      unsigned flags)
+hsw_emit_batchbuffer(struct i915_gem_request *rq,
+		     u64 offset, u32 len,
+		     unsigned flags)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring,
 			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
@@ -2042,15 +1701,15 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 }
 
 static int
-gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
-			      u64 offset, u32 len,
-			      unsigned flags)
+gen6_emit_batchbuffer(struct i915_gem_request *rq,
+		      u64 offset, u32 len,
+		      unsigned flags)
 {
-	int ret;
+	struct intel_ringbuffer *ring;
 
-	ret = intel_ring_begin(ring, 2);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 2);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	intel_ring_emit(ring,
 			MI_BATCH_BUFFER_START |
@@ -2064,19 +1723,18 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 
 /* Blitter support (SandyBridge+) */
 
-static int gen6_ring_flush(struct intel_engine_cs *ring,
-			   u32 invalidate, u32 flush)
+static int gen6_blt_emit_flush(struct i915_gem_request *rq,
+			       u32 flags)
 {
-	struct drm_device *dev = ring->dev;
+	struct intel_ringbuffer *ring;
 	uint32_t cmd;
-	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	ring = intel_ring_begin(rq, 4);
+	if (IS_ERR(ring))
+		return PTR_ERR(ring);
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_INFO(ring->dev)->gen >= 8)
+	if (INTEL_INFO(rq->i915)->gen >= 8)
 		cmd += 1;
 	/*
 	 * Bspec vol 1c.3 - blitter engine command streamer:
@@ -2084,12 +1742,12 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
 	 * operation is complete. This bit is only valid when the
 	 * Post-Sync Operation field is a value of 1h or 3h."
 	 */
-	if (invalidate & I915_GEM_DOMAIN_RENDER)
+	if (flags & I915_INVALIDATE_CACHES)
 		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
 			MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	if (INTEL_INFO(ring->dev)->gen >= 8) {
+	if (INTEL_INFO(rq->i915)->gen >= 8) {
 		intel_ring_emit(ring, 0); /* upper addr */
 		intel_ring_emit(ring, 0); /* value */
 	} else  {
@@ -2098,26 +1756,39 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
 	}
 	intel_ring_advance(ring);
 
-	if (IS_GEN7(dev) && !invalidate && flush)
-		return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
+	if (IS_GEN7(rq->i915) && flags & I915_KICK_FBC)
+		return gen7_ring_fbc_flush(rq, FBC_REND_CACHE_CLEAN);
 
 	return 0;
 }
 
-int intel_init_render_engine(struct drm_device *dev)
+static void gen8_engine_init_semaphore(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->engine[RCS];
+	if (engine->i915->semaphore_obj == NULL)
+		return;
+
+	engine->semaphore.wait = gen8_emit_wait;
+	engine->semaphore.signal =
+		engine->id == RCS ? gen8_rcs_emit_signal : gen8_xcs_emit_signal;
+}
+
+int intel_init_render_engine(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine = &dev_priv->engine[RCS];
 	struct drm_i915_gem_object *obj;
 	int ret;
 
-	ring->name = "render ring";
-	ring->id = RCS;
-	ring->mmio_base = RENDER_RING_BASE;
+	ret = intel_engine_init(engine, dev_priv);
+	if (ret)
+		return ret;
 
-	if (INTEL_INFO(dev)->gen >= 8) {
-		if (i915_semaphore_is_enabled(dev)) {
-			obj = i915_gem_alloc_object(dev, 4096);
+	engine->name = "render ring";
+	engine->id = RCS;
+	engine->mmio_base = RENDER_RING_BASE;
+
+	if (INTEL_INFO(dev_priv)->gen >= 8) {
+		if (i915_semaphore_is_enabled(dev_priv)) {
+			obj = i915_gem_alloc_object(dev_priv->dev, 4096);
 			if (obj == NULL) {
 				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
 				i915.semaphores = 0;
@@ -2132,32 +1803,23 @@ int intel_init_render_engine(struct drm_device *dev)
 					dev_priv->semaphore_obj = obj;
 			}
 		}
-		ring->add_request = gen6_add_request;
-		ring->flush = gen8_render_ring_flush;
-		ring->irq_get = gen8_ring_get_irq;
-		ring->irq_put = gen8_ring_put_irq;
-		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-		ring->get_seqno = gen6_ring_get_seqno;
-		ring->set_seqno = ring_set_seqno;
-		if (i915_semaphore_is_enabled(dev)) {
-			WARN_ON(!dev_priv->semaphore_obj);
-			ring->semaphore.sync_to = gen8_ring_sync;
-			ring->semaphore.signal = gen8_rcs_signal;
-			GEN8_RING_SEMAPHORE_INIT;
-		}
-	} else if (INTEL_INFO(dev)->gen >= 6) {
-		ring->add_request = gen6_add_request;
-		ring->flush = gen7_render_ring_flush;
-		if (INTEL_INFO(dev)->gen == 6)
-			ring->flush = gen6_render_ring_flush;
-		ring->irq_get = gen6_ring_get_irq;
-		ring->irq_put = gen6_ring_put_irq;
-		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-		ring->get_seqno = gen6_ring_get_seqno;
-		ring->set_seqno = ring_set_seqno;
-		if (i915_semaphore_is_enabled(dev)) {
-			ring->semaphore.sync_to = gen6_ring_sync;
-			ring->semaphore.signal = gen6_signal;
+		engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+		engine->emit_flush = gen8_render_emit_flush;
+		engine->irq_get = gen8_ring_get_irq;
+		engine->irq_put = gen8_ring_put_irq;
+		engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
+		gen8_engine_init_semaphore(engine);
+	} else if (INTEL_INFO(dev_priv)->gen >= 6) {
+		engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+		engine->emit_flush = gen7_render_emit_flush;
+		if (INTEL_INFO(dev_priv)->gen == 6)
+			engine->emit_flush = gen6_render_emit_flush;
+		engine->irq_get = gen6_ring_get_irq;
+		engine->irq_put = gen6_ring_put_irq;
+		engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
+		if (i915_semaphore_is_enabled(dev_priv)) {
+			engine->semaphore.wait = gen6_emit_wait;
+			engine->semaphore.signal = gen6_emit_semaphore;
 			/*
 			 * The current semaphore is only applied on pre-gen8
 			 * platform.  And there is no VCS2 ring on the pre-gen8
@@ -2165,63 +1827,61 @@ int intel_init_render_engine(struct drm_device *dev)
 			 * initialized as INVALID.  Gen8 will initialize the
 			 * sema between VCS2 and RCS later.
 			 */
-			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
-			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
-			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
-			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
-			ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
-			ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
-			ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
-			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
+			engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
+			engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
+			engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
+			engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
+			engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+			engine->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
+			engine->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
+			engine->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
+			engine->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
+			engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 		}
-	} else if (IS_GEN5(dev)) {
-		ring->add_request = pc_render_add_request;
-		ring->flush = gen4_render_ring_flush;
-		ring->get_seqno = pc_render_get_seqno;
-		ring->set_seqno = pc_render_set_seqno;
-		ring->irq_get = gen5_ring_get_irq;
-		ring->irq_put = gen5_ring_put_irq;
-		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
+	} else if (IS_GEN5(dev_priv)) {
+		engine->emit_breadcrumb = gen5_emit_breadcrumb;
+		engine->emit_flush = gen4_emit_flush;
+		engine->get_seqno = gen5_render_get_seqno;
+		engine->set_seqno = gen5_render_set_seqno;
+		engine->irq_get = gen5_get_irq;
+		engine->irq_put = gen5_put_irq;
+		engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
 					GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
 	} else {
-		ring->add_request = i9xx_add_request;
-		if (INTEL_INFO(dev)->gen < 4)
-			ring->flush = gen2_render_ring_flush;
+		engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+		if (INTEL_INFO(dev_priv)->gen < 4)
+			engine->emit_flush = gen2_emit_flush;
 		else
-			ring->flush = gen4_render_ring_flush;
-		ring->get_seqno = ring_get_seqno;
-		ring->set_seqno = ring_set_seqno;
-		if (IS_GEN2(dev)) {
-			ring->irq_get = i8xx_ring_get_irq;
-			ring->irq_put = i8xx_ring_put_irq;
+			engine->emit_flush = gen4_emit_flush;
+		if (IS_GEN2(dev_priv)) {
+			engine->irq_get = i8xx_get_irq;
+			engine->irq_put = i8xx_put_irq;
 		} else {
-			ring->irq_get = i9xx_ring_get_irq;
-			ring->irq_put = i9xx_ring_put_irq;
+			engine->irq_get = i9xx_get_irq;
+			engine->irq_put = i9xx_put_irq;
 		}
-		ring->irq_enable_mask = I915_USER_INTERRUPT;
+		engine->irq_enable_mask = I915_USER_INTERRUPT;
 	}
-	ring->write_tail = ring_write_tail;
-
-	if (IS_HASWELL(dev))
-		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
-	else if (IS_GEN8(dev))
-		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
-	else if (INTEL_INFO(dev)->gen >= 6)
-		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
-	else if (INTEL_INFO(dev)->gen >= 4)
-		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
-	else if (IS_I830(dev) || IS_845G(dev))
-		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
+
+	if (IS_GEN8(dev_priv))
+		engine->emit_batchbuffer = gen8_emit_batchbuffer;
+	else if (IS_HASWELL(dev_priv))
+		engine->emit_batchbuffer = hsw_emit_batchbuffer;
+	else if (INTEL_INFO(dev_priv)->gen >= 6)
+		engine->emit_batchbuffer = gen6_emit_batchbuffer;
+	else if (INTEL_INFO(dev_priv)->gen >= 4)
+		engine->emit_batchbuffer = i965_emit_batchbuffer;
+	else if (IS_I830(dev_priv) || IS_845G(dev_priv))
+		engine->emit_batchbuffer = i830_emit_batchbuffer;
 	else
-		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
-	ring->init = init_render_ring;
-	ring->cleanup = render_ring_cleanup;
+		engine->emit_batchbuffer = i915_emit_batchbuffer;
+
+	engine->resume = render_resume;
+	engine->cleanup = render_cleanup;
 
 	/* Workaround batchbuffer to combat CS tlb bug. */
-	if (HAS_BROKEN_CS_TLB(dev)) {
-		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
+	if (HAS_BROKEN_CS_TLB(dev_priv)) {
+		obj = i915_gem_alloc_object(dev_priv->dev, I830_BATCH_LIMIT);
 		if (obj == NULL) {
 			DRM_ERROR("Failed to allocate batch bo\n");
 			return -ENOMEM;
@@ -2234,158 +1894,148 @@ int intel_init_render_engine(struct drm_device *dev)
 			return ret;
 		}
 
-		ring->scratch.obj = obj;
-		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
+		engine->scratch.obj = obj;
+		engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
+	}
+
+	if (INTEL_INFO(dev_priv)->gen >= 5) {
+		ret = init_pipe_control(engine);
+		if (ret)
+			return ret;
 	}
 
-	return intel_init_ring_buffer(dev, ring);
+	return intel_engine_enable_execlist(engine);
 }
 
-int intel_init_bsd_engine(struct drm_device *dev)
+int intel_init_bsd_engine(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->engine[VCS];
+	struct intel_engine_cs *engine = &dev_priv->engine[VCS];
+	int ret;
 
-	ring->name = "bsd ring";
-	ring->id = VCS;
+	ret = intel_engine_init(engine, dev_priv);
+	if (ret)
+		return ret;
 
-	ring->write_tail = ring_write_tail;
-	if (INTEL_INFO(dev)->gen >= 6) {
-		ring->mmio_base = GEN6_BSD_RING_BASE;
+	engine->name = "bsd ring";
+	engine->id = VCS;
+
+	if (INTEL_INFO(dev_priv)->gen >= 6) {
+		engine->mmio_base = GEN6_BSD_RING_BASE;
 		/* gen6 bsd needs a special wa for tail updates */
-		if (IS_GEN6(dev))
-			ring->write_tail = gen6_bsd_ring_write_tail;
-		ring->flush = gen6_bsd_ring_flush;
-		ring->add_request = gen6_add_request;
-		ring->get_seqno = gen6_ring_get_seqno;
-		ring->set_seqno = ring_set_seqno;
-		if (INTEL_INFO(dev)->gen >= 8) {
-			ring->irq_enable_mask =
+		if (IS_GEN6(dev_priv))
+			engine->write_tail = gen6_bsd_ring_write_tail;
+		engine->emit_flush = gen6_bsd_emit_flush;
+		engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+		if (INTEL_INFO(dev_priv)->gen >= 8) {
+			engine->irq_enable_mask =
 				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
-			ring->irq_get = gen8_ring_get_irq;
-			ring->irq_put = gen8_ring_put_irq;
-			ring->dispatch_execbuffer =
-				gen8_ring_dispatch_execbuffer;
-			if (i915_semaphore_is_enabled(dev)) {
-				ring->semaphore.sync_to = gen8_ring_sync;
-				ring->semaphore.signal = gen8_xcs_signal;
-				GEN8_RING_SEMAPHORE_INIT;
-			}
+			engine->irq_get = gen8_ring_get_irq;
+			engine->irq_put = gen8_ring_put_irq;
+			engine->emit_batchbuffer = gen8_emit_batchbuffer;
+			gen8_engine_init_semaphore(engine);
 		} else {
-			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
-			ring->irq_get = gen6_ring_get_irq;
-			ring->irq_put = gen6_ring_put_irq;
-			ring->dispatch_execbuffer =
-				gen6_ring_dispatch_execbuffer;
-			if (i915_semaphore_is_enabled(dev)) {
-				ring->semaphore.sync_to = gen6_ring_sync;
-				ring->semaphore.signal = gen6_signal;
-				ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
-				ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
-				ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
-				ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
-				ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
-				ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
-				ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
-				ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
-				ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
-				ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
+			engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
+			engine->irq_get = gen6_ring_get_irq;
+			engine->irq_put = gen6_ring_put_irq;
+			engine->emit_batchbuffer = gen6_emit_batchbuffer;
+			if (i915_semaphore_is_enabled(dev_priv)) {
+				engine->semaphore.wait = gen6_emit_wait;
+				engine->semaphore.signal = gen6_emit_semaphore;
+				engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
+				engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
+				engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
+				engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
+				engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+				engine->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
+				engine->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
+				engine->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
+				engine->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
+				engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 			}
 		}
 	} else {
-		ring->mmio_base = BSD_RING_BASE;
-		ring->flush = bsd_ring_flush;
-		ring->add_request = i9xx_add_request;
-		ring->get_seqno = ring_get_seqno;
-		ring->set_seqno = ring_set_seqno;
-		if (IS_GEN5(dev)) {
-			ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
-			ring->irq_get = gen5_ring_get_irq;
-			ring->irq_put = gen5_ring_put_irq;
+		engine->mmio_base = BSD_RING_BASE;
+		engine->emit_flush = bsd_emit_flush;
+		engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+		if (IS_GEN5(dev_priv)) {
+			engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
+			engine->irq_get = gen5_get_irq;
+			engine->irq_put = gen5_put_irq;
 		} else {
-			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
-			ring->irq_get = i9xx_ring_get_irq;
-			ring->irq_put = i9xx_ring_put_irq;
+			engine->irq_enable_mask = I915_BSD_USER_INTERRUPT;
+			engine->irq_get = i9xx_get_irq;
+			engine->irq_put = i9xx_put_irq;
 		}
-		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
+		engine->emit_batchbuffer = i965_emit_batchbuffer;
 	}
-	ring->init = init_ring_common;
 
-	return intel_init_ring_buffer(dev, ring);
+	return intel_engine_enable_execlist(engine);
 }
 
 /**
  * Initialize the second BSD ring for Broadwell GT3.
  * It is noted that this only exists on Broadwell GT3.
  */
-int intel_init_bsd2_engine(struct drm_device *dev)
+int intel_init_bsd2_engine(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->engine[VCS2];
+	struct intel_engine_cs *engine = &dev_priv->engine[VCS2];
+	int ret;
 
-	if ((INTEL_INFO(dev)->gen != 8)) {
+	if ((INTEL_INFO(dev_priv)->gen != 8)) {
 		DRM_ERROR("No dual-BSD ring on non-BDW machine\n");
 		return -EINVAL;
 	}
 
-	ring->name = "bsd2 ring";
-	ring->id = VCS2;
+	ret = intel_engine_init(engine, dev_priv);
+	if (ret)
+		return ret;
 
-	ring->write_tail = ring_write_tail;
-	ring->mmio_base = GEN8_BSD2_RING_BASE;
-	ring->flush = gen6_bsd_ring_flush;
-	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
-	ring->set_seqno = ring_set_seqno;
-	ring->irq_enable_mask =
+	engine->name = "bsd2 ring";
+	engine->id = VCS2;
+
+	engine->mmio_base = GEN8_BSD2_RING_BASE;
+	engine->emit_flush = gen6_bsd_emit_flush;
+	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+	engine->emit_batchbuffer = gen8_emit_batchbuffer;
+	engine->irq_enable_mask =
 			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
-	ring->irq_get = gen8_ring_get_irq;
-	ring->irq_put = gen8_ring_put_irq;
-	ring->dispatch_execbuffer =
-			gen8_ring_dispatch_execbuffer;
-	if (i915_semaphore_is_enabled(dev)) {
-		ring->semaphore.sync_to = gen8_ring_sync;
-		ring->semaphore.signal = gen8_xcs_signal;
-		GEN8_RING_SEMAPHORE_INIT;
-	}
-	ring->init = init_ring_common;
+	engine->irq_get = gen8_ring_get_irq;
+	engine->irq_put = gen8_ring_put_irq;
+	gen8_engine_init_semaphore(engine);
 
-	return intel_init_ring_buffer(dev, ring);
+	return intel_engine_enable_execlist(engine);
 }
 
-int intel_init_blt_engine(struct drm_device *dev)
+int intel_init_blt_engine(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->engine[BCS];
+	struct intel_engine_cs *engine = &dev_priv->engine[BCS];
+	int ret;
+
+	ret = intel_engine_init(engine, dev_priv);
+	if (ret)
+		return ret;
 
-	ring->name = "blitter ring";
-	ring->id = BCS;
+	engine->name = "blitter ring";
+	engine->id = BCS;
 
-	ring->mmio_base = BLT_RING_BASE;
-	ring->write_tail = ring_write_tail;
-	ring->flush = gen6_ring_flush;
-	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
-	ring->set_seqno = ring_set_seqno;
-	if (INTEL_INFO(dev)->gen >= 8) {
-		ring->irq_enable_mask =
+	engine->mmio_base = BLT_RING_BASE;
+	engine->emit_flush = gen6_blt_emit_flush;
+	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+	if (INTEL_INFO(dev_priv)->gen >= 8) {
+		engine->irq_enable_mask =
 			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
-		ring->irq_get = gen8_ring_get_irq;
-		ring->irq_put = gen8_ring_put_irq;
-		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
-		if (i915_semaphore_is_enabled(dev)) {
-			ring->semaphore.sync_to = gen8_ring_sync;
-			ring->semaphore.signal = gen8_xcs_signal;
-			GEN8_RING_SEMAPHORE_INIT;
-		}
+		engine->irq_get = gen8_ring_get_irq;
+		engine->irq_put = gen8_ring_put_irq;
+		engine->emit_batchbuffer = gen8_emit_batchbuffer;
+		gen8_engine_init_semaphore(engine);
 	} else {
-		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
-		ring->irq_get = gen6_ring_get_irq;
-		ring->irq_put = gen6_ring_put_irq;
-		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
-		if (i915_semaphore_is_enabled(dev)) {
-			ring->semaphore.signal = gen6_signal;
-			ring->semaphore.sync_to = gen6_ring_sync;
+		engine->irq_enable_mask = GT_BLT_USER_INTERRUPT;
+		engine->irq_get = gen6_ring_get_irq;
+		engine->irq_put = gen6_ring_put_irq;
+		engine->emit_batchbuffer = gen6_emit_batchbuffer;
+		if (i915_semaphore_is_enabled(dev_priv)) {
+			engine->semaphore.signal = gen6_emit_semaphore;
+			engine->semaphore.wait = gen6_emit_wait;
 			/*
 			 * The current semaphore is only applied on pre-gen8
 			 * platform.  And there is no VCS2 ring on the pre-gen8
@@ -2393,124 +2043,434 @@ int intel_init_blt_engine(struct drm_device *dev)
 			 * initialized as INVALID.  Gen8 will initialize the
 			 * sema between BCS and VCS2 later.
 			 */
-			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
-			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
-			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
-			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
-			ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
-			ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
-			ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
-			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
+			engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
+			engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
+			engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
+			engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
+			engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+			engine->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
+			engine->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
+			engine->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
+			engine->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
+			engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 		}
 	}
-	ring->init = init_ring_common;
 
-	return intel_init_ring_buffer(dev, ring);
+	return intel_engine_enable_execlist(engine);
 }
 
-int intel_init_vebox_engine(struct drm_device *dev)
+int intel_init_vebox_engine(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring = &dev_priv->engine[VECS];
+	struct intel_engine_cs *engine = &dev_priv->engine[VECS];
+	int ret;
 
-	ring->name = "video enhancement ring";
-	ring->id = VECS;
+	ret = intel_engine_init(engine, dev_priv);
+	if (ret)
+		return ret;
 
-	ring->mmio_base = VEBOX_RING_BASE;
-	ring->write_tail = ring_write_tail;
-	ring->flush = gen6_ring_flush;
-	ring->add_request = gen6_add_request;
-	ring->get_seqno = gen6_ring_get_seqno;
-	ring->set_seqno = ring_set_seqno;
+	engine->name = "video enhancement ring";
+	engine->id = VECS;
 
-	if (INTEL_INFO(dev)->gen >= 8) {
-		ring->irq_enable_mask =
+	engine->mmio_base = VEBOX_RING_BASE;
+	engine->emit_flush = gen6_blt_emit_flush;
+	engine->emit_breadcrumb = i9xx_emit_breadcrumb;
+
+	if (INTEL_INFO(dev_priv)->gen >= 8) {
+		engine->irq_enable_mask =
 			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
-		ring->irq_get = gen8_ring_get_irq;
-		ring->irq_put = gen8_ring_put_irq;
-		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
-		if (i915_semaphore_is_enabled(dev)) {
-			ring->semaphore.sync_to = gen8_ring_sync;
-			ring->semaphore.signal = gen8_xcs_signal;
-			GEN8_RING_SEMAPHORE_INIT;
-		}
+		engine->irq_get = gen8_ring_get_irq;
+		engine->irq_put = gen8_ring_put_irq;
+		engine->emit_batchbuffer = gen8_emit_batchbuffer;
+		gen8_engine_init_semaphore(engine);
 	} else {
-		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
-		ring->irq_get = hsw_vebox_get_irq;
-		ring->irq_put = hsw_vebox_put_irq;
-		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
-		if (i915_semaphore_is_enabled(dev)) {
-			ring->semaphore.sync_to = gen6_ring_sync;
-			ring->semaphore.signal = gen6_signal;
-			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
-			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
-			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
-			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
-			ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
-			ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
-			ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
-			ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
-			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
+		engine->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
+		engine->irq_get = hsw_vebox_get_irq;
+		engine->irq_put = hsw_vebox_put_irq;
+		engine->emit_batchbuffer = gen6_emit_batchbuffer;
+		if (i915_semaphore_is_enabled(dev_priv)) {
+			engine->semaphore.wait = gen6_emit_wait;
+			engine->semaphore.signal = gen6_emit_semaphore;
+			engine->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
+			engine->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
+			engine->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
+			engine->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
+			engine->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
+			engine->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
+			engine->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
+			engine->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
+			engine->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
+			engine->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
 		}
 	}
-	ring->init = init_ring_common;
 
-	return intel_init_ring_buffer(dev, ring);
+	return intel_engine_enable_execlist(engine);
 }
 
 int
-intel_engine_flush_all_caches(struct intel_engine_cs *ring)
+intel_engine_flush(struct intel_engine_cs *engine,
+		   struct intel_context *ctx,
+		   u32 breadcrumbs)
 {
-	int ret;
+	struct i915_gem_request *rq;
+	int n, ret = 0;
 
-	if (!ring->gpu_caches_dirty)
+	rq = intel_engine_alloc_request(engine, ctx);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	if (i915_semaphore_is_enabled(engine->i915)) {
+		for (n = 0; n < I915_NUM_ENGINES; n++) {
+			if ((breadcrumbs & (1 << n)) == 0)
+				continue;
+
+			if (engine->i915->engine[n].i915 == NULL)
+				continue;
+
+			if (n == engine->id)
+				break;
+
+			ret = i915_request_emit_semaphore(rq, n);
+			if (ret)
+				break;
+		}
+	}
+	if (ret == 0 && (breadcrumbs & (1 << engine->id)))
+		ret = i915_request_emit_breadcrumb(rq);
+	if (ret == 0)
+		ret = i915_request_commit(rq);
+	i915_request_put(rq);
+
+	return ret;
+}
+
+int intel_engine_sync(struct intel_engine_cs *engine)
+{
+	/* Wait upon the last request to be completed */
+	if (list_empty(&engine->requests))
 		return 0;
 
-	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+	return i915_request_wait(container_of(engine->requests.prev,
+					      struct i915_gem_request,
+					      engine_list));
+}
+
+bool intel_engine_idle(struct intel_engine_cs *engine)
+{
+	struct drm_i915_private *dev_priv = engine->i915;
+	u32 head, tail;
+
+	if (!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0)
+		return false;
+
+	head = I915_READ_HEAD(engine) & HEAD_ADDR;
+	tail = I915_READ_TAIL(engine) & TAIL_ADDR;
+	return head == tail;
+}
+
+static u32
+next_seqno(struct drm_i915_private *i915)
+{
+	/* reserve 0 for non-seqno */
+	if (++i915->next_seqno == 0)
+		++i915->next_seqno;
+	return i915->next_seqno;
+}
+
+struct i915_gem_request *
+intel_engine_alloc_request(struct intel_engine_cs *engine,
+			   struct intel_context *ctx)
+{
+	struct intel_ringbuffer *ring;
+	struct i915_gem_request *rq;
+	int ret;
+
+	ring = ctx->ring[engine->id].ring;
+	if (ring == NULL) {
+		ring = engine->get_ring(engine, ctx);
+		if (IS_ERR(ring))
+			return ERR_CAST(ring);
+
+		ctx->ring[engine->id].ring = ring;
+	}
+
+	rq = kzalloc(sizeof(*rq), GFP_KERNEL);
+	if (rq == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&rq->kref);
+	INIT_LIST_HEAD(&rq->vmas);
+
+	rq->i915 = engine->i915;
+	rq->ring = ring;
+	rq->engine = engine;
+
+	rq->reset_counter = atomic_read(&rq->i915->gpu_error.reset_counter);
+	if (rq->reset_counter & (I915_RESET_IN_PROGRESS_FLAG | I915_WEDGED)) {
+		ret = rq->reset_counter & I915_WEDGED ? -EIO : -EAGAIN;
+		goto err;
+	}
+
+	rq->seqno = next_seqno(rq->i915);
+	rq->head = ring->tail;
+	rq->outstanding = true;
+	rq->pending_flush = I915_INVALIDATE_CACHES;
+
+	rq->ctx = ctx;
+	i915_gem_context_reference(rq->ctx);
+
+	ret = i915_switch_context(rq, ctx);
 	if (ret)
-		return ret;
+		goto err_ctx;
+
+	return rq;
+
+err_ctx:
+	i915_gem_context_unreference(ctx);
+err:
+	kfree(rq);
+	return ERR_PTR(ret);
+}
 
-	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+struct i915_gem_request *
+intel_engine_find_active_request(struct intel_engine_cs *engine)
+{
+	struct i915_gem_request *rq;
+
+	list_for_each_entry(rq, &engine->requests, engine_list)
+		if (!__i915_request_complete__wa(rq))
+			return rq;
+
+	return NULL;
+}
+
+struct i915_gem_request *
+intel_engine_seqno_to_request(struct intel_engine_cs *engine,
+			      u32 seqno)
+{
+	struct i915_gem_request *rq;
+
+	list_for_each_entry(rq, &engine->requests, engine_list) {
+		if (rq->seqno == seqno)
+			return rq;
+
+		if (__i915_seqno_passed(seqno, rq->seqno))
+			break;
+	}
+
+	return NULL;
+}
+
+void intel_engine_cleanup(struct intel_engine_cs *engine)
+{
+	WARN_ON(!intel_engine_idle(engine));
+	WARN_ON(!list_empty(&engine->requests));
+
+	if (engine->cleanup)
+		engine->cleanup(engine);
+}
+
+int intel_engine_suspend(struct intel_engine_cs *engine)
+{
+	struct intel_ringbuffer *ring;
+	int ret = 0;
+
+	if (WARN_ON(!intel_engine_initialized(engine)))
+		return 0;
+
+	if (engine->suspend)
+		ret = engine->suspend(engine);
+
+	list_for_each_entry(ring, &engine->rings, engine_list) {
+		if (ring->retired_head == -1)
+			continue;
+
+		ring->head = ring->retired_head;
+		ring->retired_head = -1;
+	}
+
+	return ret;
+}
+
+int intel_engine_resume(struct intel_engine_cs *engine)
+{
+	int ret = 0;
+
+	if (WARN_ON(!intel_engine_initialized(engine)))
+		return 0;
+
+	if (engine->resume)
+		ret = engine->resume(engine);
+
+	return ret;
+}
+
+int intel_engine_reset(struct intel_engine_cs *engine)
+{
+	struct intel_ringbuffer *ring;
+	int ret = 0;
+
+	if (WARN_ON(!intel_engine_initialized(engine)))
+		return 0;
+
+	if (engine->reset)
+		ret = engine->reset(engine);
+
+	memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
+
+	while (!list_empty(&engine->requests)) {
+		struct i915_gem_request *rq;
+
+		rq = list_first_entry(&engine->requests,
+				      struct i915_gem_request,
+				      engine_list);
+
+		i915_request_retire(rq);
+	}
+
+	list_for_each_entry(ring, &engine->rings, engine_list) {
+		struct drm_i915_gem_object *obj;
+
+		if (ring->retired_head != -1) {
+			ring->head = ring->retired_head;
+			ring->retired_head = -1;
+		}
+
+		if (ring->last_context == NULL)
+			continue;
+
+		obj = ring->last_context->ring[engine->id].state;
+		if (obj)
+			i915_gem_object_ggtt_unpin(obj);
+
+		ring->last_context = NULL;
+	}
+
+	return ret;
+}
+
+static int ring_wait(struct intel_ringbuffer *ring, int n)
+{
+	int ret;
+
+	trace_intel_ringbuffer_wait(ring, n);
+
+	do {
+		struct i915_gem_request *rq;
+
+		i915_gem_retire_requests__engine(ring->engine);
+		if (ring->retired_head != -1) {
+			ring->head = ring->retired_head;
+			ring->retired_head = -1;
+
+			ring->space = intel_ring_space(ring);
+			if (ring->space >= n)
+				return 0;
+		}
+
+		list_for_each_entry(rq, &ring->breadcrumbs, breadcrumb_list)
+			if (__intel_ring_space(rq->tail, ring->tail,
+					       ring->size, I915_RING_RSVD) >= n)
+				break;
+
+		if (&rq->breadcrumb_list == &ring->breadcrumbs)
+			return -EDEADLK;
+
+		ret = i915_request_wait(rq);
+	} while (ret == 0);
+
+	return ret;
+}
+
+static int ring_wrap(struct intel_ringbuffer *ring, int bytes)
+{
+	uint32_t __iomem *virt;
+	int rem;
+
+	rem = ring->size - ring->tail;
+	if (ring->space < rem) {
+		rem = ring_wait(ring, rem);
+		if (rem)
+			return rem;
+	}
+
+	if (unlikely(ring->tail + bytes <= ring->effective_size))
+		return 0;
+
+	trace_intel_ringbuffer_wrap(ring, rem);
+
+	virt = ring->virtual_start + ring->tail;
+	rem = ring->size - ring->tail;
+
+	ring->space -= rem;
+	ring->tail = 0;
+
+	rem /= 4;
+	while (rem--)
+		iowrite32(MI_NOOP, virt++);
 
-	ring->gpu_caches_dirty = false;
 	return 0;
 }
 
-int
-intel_engine_invalidate_all_caches(struct intel_engine_cs *ring)
+static int __intel_ring_prepare(struct intel_ringbuffer *ring,
+				int bytes)
 {
-	uint32_t flush_domains;
 	int ret;
 
-	flush_domains = 0;
-	if (ring->gpu_caches_dirty)
-		flush_domains = I915_GEM_GPU_DOMAINS;
+	trace_intel_ringbuffer_begin(ring, bytes);
 
-	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
-	if (ret)
-		return ret;
+	if (unlikely(ring->tail + bytes > ring->effective_size)) {
+		ret = ring_wrap(ring, bytes);
+		if (unlikely(ret))
+			return ret;
+	}
 
-	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+	if (unlikely(ring->space < bytes)) {
+		ret = ring_wait(ring, bytes);
+		if (unlikely(ret))
+			return ret;
+	}
 
-	ring->gpu_caches_dirty = false;
 	return 0;
 }
 
-void
-intel_stop_engine(struct intel_engine_cs *ring)
+struct intel_ringbuffer *
+intel_ring_begin(struct i915_gem_request *rq,
+		 int num_dwords)
 {
+	struct intel_ringbuffer *ring = rq->ring;
 	int ret;
 
-	if (!intel_engine_initialized(ring))
-		return;
+	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
+	if (ret)
+		return ERR_PTR(ret);
 
-	ret = intel_engine_idle(ring);
-	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
-		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
-			  ring->name, ret);
+	ring->space -= num_dwords * sizeof(uint32_t);
 
-	stop_ring(ring);
+	return ring;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_gem_request *rq)
+{
+	struct intel_ringbuffer *ring;
+	int tail, num_dwords;
+
+	do {
+		tail = rq->ring->tail;
+		num_dwords = (tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
+		if (num_dwords == 0)
+			return 0;
+
+		num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
+		ring = intel_ring_begin(rq, num_dwords);
+		if (IS_ERR(ring))
+			return PTR_ERR(ring);
+	} while (tail != rq->ring->tail);
+
+	while (num_dwords--)
+		intel_ring_emit(ring, MI_NOOP);
+
+	intel_ring_advance(ring);
+
+	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 8648c42..cb5e49d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -20,8 +20,11 @@
  * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
  * cacheline, the Head Pointer must not be greater than the Tail
  * Pointer."
+ *
+ * To also accommodate errata on 830/845 which makes the last pair of cachlines
+ * in the ringbuffer unavailable, reduce the available space further.
  */
-#define I915_RING_FREE_SPACE 64
+#define I915_RING_RSVD (2*CACHELINE_BYTES)
 
 struct  intel_hw_status_page {
 	u32		*page_addr;
@@ -51,27 +54,9 @@ struct  intel_hw_status_page {
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
 #define i915_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SIGNAL_OFFSET(__engine, to)			     \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
-	((__engine)->id * I915_NUM_ENGINES * i915_semaphore_seqno_size) +	\
-	(i915_semaphore_seqno_size * (to)))
-
-#define GEN8_WAIT_OFFSET(__engine, from)			     \
-	(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
-	((from) * I915_NUM_ENGINES * i915_semaphore_seqno_size) + \
-	(i915_semaphore_seqno_size * (__engine)->id))
-
-#define GEN8_RING_SEMAPHORE_INIT do { \
-	if (!dev_priv->semaphore_obj) { \
-		break; \
-	} \
-	ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \
-	ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \
-	ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \
-	ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \
-	ring->semaphore.signal_ggtt[VCS2] = GEN8_SIGNAL_OFFSET(ring, VCS2); \
-	ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
-	} while(0)
+#define GEN8_SEMAPHORE_OFFSET(__dp, __from, __to)			     \
+	(i915_gem_obj_ggtt_offset((__dp)->semaphore_obj) + \
+	 ((__from) * I915_NUM_ENGINES + (__to)) * i915_semaphore_seqno_size)
 
 enum intel_engine_hangcheck_action {
 	HANGCHECK_IDLE = 0,
@@ -93,39 +78,49 @@ struct intel_engine_hangcheck {
 	int deadlock;
 };
 
+struct i915_gem_request;
+struct intel_context;
+struct intel_engine_cs;
+
 struct intel_ringbuffer {
-	struct drm_i915_gem_object *obj;
-	void __iomem *virtual_start;
+	struct intel_context *last_context;
 
 	struct intel_engine_cs *engine;
+	struct list_head engine_list;
 
-	/*
-	 * FIXME: This backpointer is an artifact of the history of how the
-	 * execlist patches came into being. It will get removed once the basic
-	 * code has landed.
+	struct drm_i915_gem_object *obj;
+	void __iomem *virtual_start;
+
+	/**
+	 * List of breadcrumbs associated with GPU requests currently
+	 * outstanding.
 	 */
-	struct intel_context *FIXME_lrc_ctx;
+	struct list_head requests;
+	struct list_head breadcrumbs;
 
-	u32 head;
-	u32 tail;
+	int head;
+	int tail;
 	int space;
+
 	int size;
 	int effective_size;
 
 	/** We track the position of the requests in the ring buffer, and
-	 * when each is retired we increment last_retired_head as the GPU
+	 * when each is retired we increment retired_head as the GPU
 	 * must have finished processing the request and so we know we
 	 * can advance the ringbuffer up to that position.
 	 *
-	 * last_retired_head is set to -1 after the value is consumed so
+	 * retired_head is set to -1 after the value is consumed so
 	 * we can detect new retirements.
 	 */
-	u32 last_retired_head;
+	int retired_head;
+	int breadcrumb_tail;
 };
 
-struct  intel_engine_cs {
-	const char	*name;
-	enum intel_engine_id {
+struct intel_engine_cs {
+	struct drm_i915_private *i915;
+	const char *name;
+	enum intel_ring_id {
 		RCS = 0x0,
 		VCS,
 		BCS,
@@ -133,43 +128,62 @@ struct  intel_engine_cs {
 		VCS2
 	} id;
 #define I915_NUM_ENGINES 5
+#define I915_NUM_ENGINE_BITS 4
 #define LAST_USER_RING (VECS + 1)
-	u32		mmio_base;
-	struct		drm_device *dev;
-	struct intel_ringbuffer *buffer;
+	u32 mmio_base;
+
+	struct list_head rings;
+	struct list_head requests;
+	struct list_head pending;
+	u32 breadcrumb[I915_NUM_ENGINES];
 
 	struct intel_hw_status_page status_page;
 
-	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
+	struct intel_ringbuffer *legacy_ring;
+
+	unsigned irq_refcount; /* protected by i915->irq_lock */
 	u32		irq_enable_mask;	/* bitmask to enable ring interrupt */
 	u32		trace_irq_seqno;
 	bool __must_check (*irq_get)(struct intel_engine_cs *engine);
 	void		(*irq_put)(struct intel_engine_cs *engine);
 
-	int		(*init)(struct intel_engine_cs *engine);
+	struct intel_ringbuffer *
+			(*get_ring)(struct intel_engine_cs *engine,
+				    struct intel_context *ctx);
+	void		(*put_ring)(struct intel_ringbuffer *ring,
+				    struct intel_context *ctx);
+
+	int		(*reset)(struct intel_engine_cs *engine);
+	int		(*suspend)(struct intel_engine_cs *engine);
+	int		(*resume)(struct intel_engine_cs *engine);
+	void		(*cleanup)(struct intel_engine_cs *engine);
 
-	void		(*write_tail)(struct intel_engine_cs *engine,
-				      u32 value);
-	int __must_check (*flush)(struct intel_engine_cs *engine,
-				  u32	invalidate_domains,
-				  u32	flush_domains);
-	int		(*add_request)(struct intel_engine_cs *engine);
 	/* Some chipsets are not quite as coherent as advertised and need
 	 * an expensive kick to force a true read of the up-to-date seqno.
 	 * However, the up-to-date seqno is not always required and the last
 	 * seen value is good enough. Note that the seqno will always be
 	 * monotonic, even if not coherent.
 	 */
-	u32		(*get_seqno)(struct intel_engine_cs *engine,
-				     bool lazy_coherency);
+	u32		(*get_seqno)(struct intel_engine_cs *engine);
 	void		(*set_seqno)(struct intel_engine_cs *engine,
 				     u32 seqno);
-	int		(*dispatch_execbuffer)(struct intel_engine_cs *engine,
-					       u64 offset, u32 length,
-					       unsigned flags);
+
+	int __must_check (*emit_flush)(struct i915_gem_request *rq,
+				       u32 domains);
+#define I915_FLUSH_CACHES 0x1
+#define I915_INVALIDATE_CACHES 0x2
+#define I915_KICK_FBC 0x4
+	int __must_check (*emit_batchbuffer)(struct i915_gem_request *rq,
+					     u64 offset, u32 length,
+					     unsigned flags);
+	int __must_check (*emit_breadcrumb)(struct i915_gem_request *rq);
+
+	int __must_check (*add_request)(struct i915_gem_request *rq);
+	void		(*write_tail)(struct intel_engine_cs *engine,
+				      u32 value);
+
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
-	void		(*cleanup)(struct intel_engine_cs *engine);
 
 	/* GEN8 signal/wait table - never trust comments!
 	 *	  signal to	signal to    signal to   signal to      signal to
@@ -209,38 +223,23 @@ struct  intel_engine_cs {
 	 *  ie. transpose of f(x, y)
 	 */
 	struct {
-		u32	sync_seqno[I915_NUM_ENGINES-1];
-
-		union {
-			struct {
-				/* our mbox written by others */
-				u32		wait[I915_NUM_ENGINES];
-				/* mboxes this ring signals to */
-				u32		signal[I915_NUM_ENGINES];
-			} mbox;
-			u64		signal_ggtt[I915_NUM_ENGINES];
-		};
-
-		/* AKA wait() */
-		int	(*sync_to)(struct intel_engine_cs *engine,
-				   struct intel_engine_cs *to,
-				   u32 seqno);
-		int	(*signal)(struct intel_engine_cs *signaller,
-				  /* num_dwords needed by caller */
-				  unsigned int num_dwords);
+		struct {
+			/* our mbox written by others */
+			u32		wait[I915_NUM_ENGINES];
+			/* mboxes this ring signals to */
+			u32		signal[I915_NUM_ENGINES];
+		} mbox;
+
+		int	(*wait)(struct i915_gem_request *waiter,
+				struct i915_gem_request *signaller);
+		int	(*signal)(struct i915_gem_request *rq, int id);
 	} semaphore;
 
 	/* Execlists */
 	spinlock_t execlist_lock;
-	struct list_head execlist_queue;
+	u32 execlists_submitted;
 	u8 next_context_status_buffer;
 	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
-	int		(*emit_request)(struct intel_ringbuffer *enginebuf);
-	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
-				      u32 invalidate_domains,
-				      u32 flush_domains);
-	int		(*emit_bb_start)(struct intel_ringbuffer *ringbuf,
-					 u64 offset, unsigned flags);
 
 	/**
 	 * List of objects currently involved in rendering from the
@@ -252,26 +251,11 @@ struct  intel_engine_cs {
 	 *
 	 * A reference is held on the buffer while on this list.
 	 */
-	struct list_head active_list;
-
-	/**
-	 * List of breadcrumbs associated with GPU requests currently
-	 * outstanding.
-	 */
-	struct list_head request_list;
-
-	/**
-	 * Do we have some not yet emitted requests outstanding?
-	 */
-	struct drm_i915_gem_request *preallocated_lazy_request;
-	u32 outstanding_lazy_seqno;
-	bool gpu_caches_dirty;
-	bool fbc_dirty;
+	struct list_head read_list, write_list, fence_list;
 
 	wait_queue_head_t irq_queue;
 
 	struct intel_context *default_context;
-	struct intel_context *last_context;
 
 	struct intel_engine_hangcheck hangcheck;
 
@@ -315,7 +299,11 @@ struct  intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 };
 
-bool intel_engine_initialized(struct intel_engine_cs *engine);
+static inline bool
+intel_engine_initialized(struct intel_engine_cs *engine)
+{
+	return engine->default_context;
+}
 
 static inline unsigned
 intel_engine_flag(struct intel_engine_cs *engine)
@@ -325,7 +313,7 @@ intel_engine_flag(struct intel_engine_cs *engine)
 
 static inline u32
 intel_engine_sync_index(struct intel_engine_cs *engine,
-		      struct intel_engine_cs *other)
+			struct intel_engine_cs *other)
 {
 	int idx;
 
@@ -379,59 +367,73 @@ intel_write_status_page(struct intel_engine_cs *engine,
 #define I915_GEM_HWS_SCRATCH_INDEX	0x30
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
-void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
-int intel_alloc_ringbuffer_obj(struct drm_device *dev,
-			       struct intel_ringbuffer *ringbuf);
-
-void intel_stop_engine(struct intel_engine_cs *engine);
-void intel_cleanup_engine(struct intel_engine_cs *engine);
+struct intel_ringbuffer *
+intel_engine_alloc_ring(struct intel_engine_cs *engine,
+			int size);
+void intel_ring_free(struct intel_ringbuffer *ring);
 
-int __must_check intel_ring_begin(struct intel_engine_cs *engine, int n);
-int __must_check intel_ring_cacheline_align(struct intel_engine_cs *engine);
-static inline void intel_ring_emit(struct intel_engine_cs *engine,
+struct intel_ringbuffer *__must_check
+intel_ring_begin(struct i915_gem_request *rq, int n);
+int __must_check intel_ring_cacheline_align(struct i915_gem_request *rq);
+static inline void intel_ring_emit(struct intel_ringbuffer *ring,
 				   u32 data)
 {
-	struct intel_ringbuffer *ringbuf = engine->buffer;
-	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
-	ringbuf->tail += 4;
+	iowrite32(data, ring->virtual_start + ring->tail);
+	ring->tail += 4;
 }
-static inline void intel_ring_advance(struct intel_engine_cs *engine)
+static inline void intel_ring_advance(struct intel_ringbuffer *ring)
 {
-	struct intel_ringbuffer *ringbuf = engine->buffer;
-	ringbuf->tail &= ringbuf->size - 1;
+	ring->tail &= ring->size - 1;
 }
-int __intel_ring_space(int head, int tail, int size);
-int intel_ring_space(struct intel_ringbuffer *ringbuf);
-bool intel_engine_stopped(struct intel_engine_cs *engine);
-void __intel_ring_advance(struct intel_engine_cs *engine);
-
-int __must_check intel_engine_idle(struct intel_engine_cs *engine);
-void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno);
-int intel_engine_flush_all_caches(struct intel_engine_cs *engine);
-int intel_engine_invalidate_all_caches(struct intel_engine_cs *engine);
-
-void intel_fini_pipe_control(struct intel_engine_cs *engine);
-int intel_init_pipe_control(struct intel_engine_cs *engine);
-
-int intel_init_render_engine(struct drm_device *dev);
-int intel_init_bsd_engine(struct drm_device *dev);
-int intel_init_bsd2_engine(struct drm_device *dev);
-int intel_init_blt_engine(struct drm_device *dev);
-int intel_init_vebox_engine(struct drm_device *dev);
 
-u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
-
-static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
+static inline int __intel_ring_space(int head, int tail, int size, int rsvd)
 {
-	return ringbuf->tail;
+	int space = head - (tail + 8);
+	if (space < 0)
+		space += size;
+	return space - rsvd;
 }
 
-static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
+static inline int intel_ring_space(struct intel_ringbuffer *ring)
 {
-	BUG_ON(engine->outstanding_lazy_seqno == 0);
-	return engine->outstanding_lazy_seqno;
+	return __intel_ring_space(ring->head, ring->tail,
+				  ring->size, I915_RING_RSVD);
 }
 
+
+struct i915_gem_request * __must_check __attribute__((nonnull))
+intel_engine_alloc_request(struct intel_engine_cs *engine,
+			   struct intel_context *ctx);
+
+struct i915_gem_request *
+intel_engine_find_active_request(struct intel_engine_cs *engine);
+
+struct i915_gem_request *
+intel_engine_seqno_to_request(struct intel_engine_cs *engine,
+			      u32 seqno);
+
+int intel_init_render_engine(struct drm_i915_private *i915);
+int intel_init_bsd_engine(struct drm_i915_private *i915);
+int intel_init_bsd2_engine(struct drm_i915_private *i915);
+int intel_init_blt_engine(struct drm_i915_private *i915);
+int intel_init_vebox_engine(struct drm_i915_private *i915);
+
+#define intel_engine_stopped(engine) \
+	(engine->i915->gpu_error.stop_rings & intel_engine_flag(engine))
+int __must_check intel_engine_sync(struct intel_engine_cs *engine);
+int __must_check intel_engine_flush(struct intel_engine_cs *engine,
+				    struct intel_context *ctx,
+				    u32 breadcrumbs);
+bool intel_engine_idle(struct intel_engine_cs *engine);
+
+int intel_engine_reset(struct intel_engine_cs *engine);
+int intel_engine_suspend(struct intel_engine_cs *engine);
+int intel_engine_resume(struct intel_engine_cs *engine);
+void intel_engine_cleanup(struct intel_engine_cs *engine);
+
+
+u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
+
 static inline void i915_trace_irq_get(struct intel_engine_cs *engine, u32 seqno)
 {
 	if (engine->trace_irq_seqno == 0 && engine->irq_get(engine))
-- 
1.9.1




More information about the Intel-gfx mailing list