[Intel-gfx] [PATCH 08/40] drm/i915: Start of GPU scheduler

Fri Dec 11 05:16:55 PST 2015

From: John Harrison <John.C.Harrison at Intel.com>

Initial creation of scheduler source files. Note that this patch
implements most of the scheduler functionality but does not hook it in
to the driver yet. It also leaves the scheduler code in 'pass through'
mode so that even when it is hooked in, it will not actually do very
much. This allows the hooks to be added one at a time in byte size
chunks and only when the scheduler is finally enabled at the end does
anything start happening.

The general theory of operation is that when batch buffers are
submitted to the driver, the execbuffer() code assigns a unique
request and then packages up all the information required to execute
the batch buffer at a later time. This package is given over to the
scheduler which adds it to an internal node list. The scheduler also
scans the list of objects associated with the batch buffer and
compares them against the objects already in use by other buffers in
the node list. If matches are found then the new batch buffer node is
marked as being dependent upon the matching node. The same is done for
the context object. The scheduler also bumps up the priority of such
matching nodes on the grounds that the more dependencies a given batch
buffer has the more important it is likely to be.

The scheduler aims to have a given (tuneable) number of batch buffers
in flight on the hardware at any given time. If fewer than this are
currently executing when a new node is queued, then the node is passed
straight through to the submit function. Otherwise it is simply added
to the queue and the driver returns back to user land.

As each batch buffer completes, it raises an interrupt which wakes up
the scheduler. Note that it is possible for multiple buffers to
complete before the IRQ handler gets to run. Further, it is possible
for the seqno values to be un-ordered (particularly once pre-emption
is enabled). However, the scheduler keeps the list of executing
buffers in order of hardware submission. Thus it can scan through the
list until a matching seqno is found and then mark all in flight nodes
from that point on as completed.

A deferred work queue is also poked by the interrupt handler. When
this wakes up it can do more involved processing such as actually
removing completed nodes from the queue and freeing up the resources
associated with them (internal memory allocations, DRM object
references, context reference, etc.). The work handler also checks the
in flight count and calls the submission code if a new slot has
appeared.

When the scheduler's submit code is called, it scans the queued node
list for the highest priority node that has no unmet dependencies.
Note that the dependency calculation is complex as it must take
inter-ring dependencies and potential preemptions into account. Note
also that in the future this will be extended to include external
dependencies such as the Android Native Sync file descriptors and/or
the linux dma-buff synchronisation scheme.

If a suitable node is found then it is sent to execbuff_final() for
submission to the hardware. The in flight count is then re-checked and
a new node popped from the list if appropriate.

Note that this patch does not implement pre-emptive scheduling. Only
basic scheduling by re-ordering batch buffer submission is currently
implemented.

v2: Changed priority levels to +/-1023 due to feedback from Chris
Wilson.

Removed redundant index from scheduler node.

Changed time stamps to use jiffies instead of raw monotonic. This
provides lower resolution but improved compatibility with other i915
code.

Major re-write of completion tracking code due to struct fence
conversion. The scheduler no longer has it's own private IRQ handler
but just lets the existing request code handle completion events.
Instead, the scheduler now hooks into the request notify code to be
told when a request has completed.

Reduced driver mutex locking scope. Removal of scheduler nodes no
longer grabs the mutex lock.

v3: Refactor of dependency generation to make the code more readable.
Also added in read-read optimisation support - i.e., don't treat a
shared read-only buffer as being a dependency.

Allowed the killing of queued nodes rather than only flying ones.

Change-Id: I1e08f59e650a3c2bbaaa9de7627da33849b06106
For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
---
 drivers/gpu/drm/i915/Makefile         |   1 +
 drivers/gpu/drm/i915/i915_drv.h       |   4 +
 drivers/gpu/drm/i915/i915_gem.c       |   5 +
 drivers/gpu/drm/i915/i915_scheduler.c | 763 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_scheduler.h |  91 ++++
 5 files changed, 864 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_scheduler.c
 create mode 100644 drivers/gpu/drm/i915/i915_scheduler.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 15398c5..79cb38b 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -10,6 +10,7 @@ ccflags-y := -Werror
 i915-y := i915_drv.o \
 	  i915_irq.o \
 	  i915_params.o \
+	  i915_scheduler.o \
           i915_suspend.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5d390d9..23aed32 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1695,6 +1695,8 @@ struct i915_execbuffer_params {
 	struct drm_i915_gem_request     *request;
 };
 
+struct i915_scheduler;
+
 /* used in computing the new watermarks state */
 struct intel_wm_config {
 	unsigned int num_pipes_active;
@@ -1947,6 +1949,8 @@ struct drm_i915_private {
 
 	struct i915_runtime_pm pm;
 
+	struct i915_scheduler *scheduler;
+
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
 	struct {
 		int (*execbuf_submit)(struct i915_execbuffer_params *params,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 280cff0..36a11d2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,6 +38,7 @@
 #include <linux/pci.h>
 #include <linux/dma-buf.h>
 #include <../drivers/android/sync.h>
+#include "i915_scheduler.h"
 
 #define RQ_BUG_ON(expr)
 
@@ -5278,6 +5279,10 @@ int i915_gem_init(struct drm_device *dev)
 	 */
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
+	ret = i915_scheduler_init(dev);
+	if (ret)
+		goto out_unlock;
+
 	ret = i915_gem_init_userptr(dev);
 	if (ret)
 		goto out_unlock;
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
new file mode 100644
index 0000000..2e2c590
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -0,0 +1,763 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_drv.h"
+#include "i915_scheduler.h"
+
+static int         i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node);
+static int         i915_scheduler_remove_dependent(struct i915_scheduler *scheduler,
+						   struct i915_scheduler_queue_entry *remove);
+static int         i915_scheduler_submit(struct intel_engine_cs *ring,
+					 bool is_locked);
+static uint32_t    i915_scheduler_count_flying(struct i915_scheduler *scheduler,
+					       struct intel_engine_cs *ring);
+static void        i915_scheduler_priority_bump_clear(struct i915_scheduler *scheduler);
+static int         i915_scheduler_priority_bump(struct i915_scheduler *scheduler,
+						struct i915_scheduler_queue_entry *target,
+						uint32_t bump);
+
+int i915_scheduler_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	int                     r;
+
+	if (scheduler)
+		return 0;
+
+	scheduler = kzalloc(sizeof(*scheduler), GFP_KERNEL);
+	if (!scheduler)
+		return -ENOMEM;
+
+	spin_lock_init(&scheduler->lock);
+
+	for (r = 0; r < I915_NUM_RINGS; r++)
+		INIT_LIST_HEAD(&scheduler->node_queue[r]);
+
+	/* Default tuning values: */
+	scheduler->priority_level_min     = -1023;
+	scheduler->priority_level_max     = 1023;
+	scheduler->priority_level_preempt = 900;
+	scheduler->min_flying             = 2;
+
+	dev_priv->scheduler = scheduler;
+
+	return 0;
+}
+
+static void i915_generate_dependencies(struct i915_scheduler *scheduler,
+				       struct i915_scheduler_queue_entry *node,
+				       uint32_t ring)
+{
+	struct i915_scheduler_obj_entry *this, *that;
+	struct i915_scheduler_queue_entry  *test;
+	int                 i, j;
+	bool                found;
+
+	list_for_each_entry(test, &scheduler->node_queue[ring], link) {
+		if (I915_SQS_IS_COMPLETE(test))
+			continue;
+
+		/*
+		 * Batches on the same ring for the same
+		 * context must be kept in order.
+		 */
+		found = (node->params.ctx == test->params.ctx) &&
+			(node->params.ring == test->params.ring);
+
+		if (found)
+			goto depends;
+
+		/*
+		 * Batches working on the same objects must
+		 * be kept in order.
+		 */
+		for (i = 0; (i < node->num_objs) && !found; i++) {
+			this = node->saved_objects + i;
+
+			for (j = 0; j < test->num_objs; j++) {
+				that = test->saved_objects + j;
+
+				if (this->obj != that->obj)
+					continue;
+
+				/* Only need to worry about writes */
+				if (this->read_only && that->read_only)
+					continue;
+
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			continue;
+
+depends:
+		node->dep_list[node->num_deps] = test;
+		node->num_deps++;
+	}
+}
+
+int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
+{
+	struct drm_i915_private *dev_priv = qe->params.dev->dev_private;
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	struct intel_engine_cs  *ring = qe->params.ring;
+	struct i915_scheduler_queue_entry  *node;
+	struct i915_scheduler_queue_entry  *test;
+	unsigned long       flags;
+	bool                not_flying;
+	int                 i, r;
+	int                 incomplete = 0;
+
+	BUG_ON(!scheduler);
+
+	if (1/*i915.scheduler_override & i915_so_direct_submit*/) {
+		int ret;
+
+		scheduler->flags[qe->params.ring->id] |= i915_sf_submitting;
+		ret = dev_priv->gt.execbuf_final(&qe->params);
+		scheduler->flags[qe->params.ring->id] &= ~i915_sf_submitting;
+
+		/*
+		 * Don't do any clean up on failure because the caller will
+		 * do it all anyway.
+		 */
+		if (ret)
+			return ret;
+
+		/* Free everything that is owned by the QE structure: */
+		kfree(qe->params.cliprects);
+		if (qe->params.dispatch_flags & I915_DISPATCH_SECURE)
+			i915_gem_execbuff_release_batch_obj(qe->params.batch_obj);
+
+		return 0;
+	}
+
+	node = kmalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	*node = *qe;
+	INIT_LIST_HEAD(&node->link);
+	node->status = i915_sqs_queued;
+	node->stamp  = jiffies;
+	i915_gem_request_reference(node->params.request);
+
+	/* Need to determine the number of incomplete entries in the list as
+	 * that will be the maximum size of the dependency list.
+	 *
+	 * Note that the allocation must not be made with the spinlock acquired
+	 * as kmalloc can sleep. However, the unlock/relock is safe because no
+	 * new entries can be queued up during the unlock as the i915 driver
+	 * mutex is still held. Entries could be removed from the list but that
+	 * just means the dep_list will be over-allocated which is fine.
+	 */
+	spin_lock_irqsave(&scheduler->lock, flags);
+	for (r = 0; r < I915_NUM_RINGS; r++) {
+		list_for_each_entry(test, &scheduler->node_queue[r], link) {
+			if (I915_SQS_IS_COMPLETE(test))
+				continue;
+
+			incomplete++;
+		}
+	}
+
+	/* Temporarily unlock to allocate memory: */
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+	if (incomplete) {
+		node->dep_list = kmalloc(sizeof(node->dep_list[0]) * incomplete,
+					 GFP_KERNEL);
+		if (!node->dep_list) {
+			kfree(node);
+			return -ENOMEM;
+		}
+	} else
+		node->dep_list = NULL;
+
+	spin_lock_irqsave(&scheduler->lock, flags);
+	node->num_deps = 0;
+
+	if (node->dep_list) {
+		for (r = 0; r < I915_NUM_RINGS; r++)
+			i915_generate_dependencies(scheduler, node, r);
+
+		BUG_ON(node->num_deps > incomplete);
+	}
+
+	if (node->priority > scheduler->priority_level_max)
+		node->priority = scheduler->priority_level_max;
+	else if (node->priority < scheduler->priority_level_min)
+		node->priority = scheduler->priority_level_min;
+
+	if ((node->priority > 0) && node->num_deps) {
+		i915_scheduler_priority_bump_clear(scheduler);
+
+		for (i = 0; i < node->num_deps; i++)
+			i915_scheduler_priority_bump(scheduler,
+					node->dep_list[i], node->priority);
+	}
+
+	list_add_tail(&node->link, &scheduler->node_queue[ring->id]);
+
+	not_flying = i915_scheduler_count_flying(scheduler, ring) <
+						 scheduler->min_flying;
+
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+
+	if (not_flying)
+		i915_scheduler_submit(ring, true);
+
+	return 0;
+}
+
+static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node)
+{
+	struct drm_i915_private *dev_priv = node->params.dev->dev_private;
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	struct intel_engine_cs  *ring;
+
+	BUG_ON(!scheduler);
+	BUG_ON(!node);
+	BUG_ON(node->status != i915_sqs_popped);
+
+	ring = node->params.ring;
+
+	/* Add the node (which should currently be in state none) to the front
+	 * of the queue. This ensure that flying nodes are always held in
+	 * hardware submission order. */
+	list_add(&node->link, &scheduler->node_queue[ring->id]);
+
+	node->status = i915_sqs_flying;
+
+	if (!(scheduler->flags[ring->id] & i915_sf_interrupts_enabled)) {
+		bool    success = true;
+
+		success = ring->irq_get(ring);
+		if (success)
+			scheduler->flags[ring->id] |= i915_sf_interrupts_enabled;
+		else
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Nodes are considered valid dependencies if they are queued on any ring or
+ * if they are in flight on a different ring. In flight on the same ring is no
+ * longer interesting for non-premptive nodes as the ring serialises execution.
+ * For pre-empting nodes, all in flight dependencies are valid as they must not
+ * be jumped by the act of pre-empting.
+ *
+ * Anything that is neither queued nor flying is uninteresting.
+ */
+static inline bool i915_scheduler_is_dependency_valid(
+			struct i915_scheduler_queue_entry *node, uint32_t idx)
+{
+	struct i915_scheduler_queue_entry *dep;
+
+	dep = node->dep_list[idx];
+	if (!dep)
+		return false;
+
+	if (I915_SQS_IS_QUEUED(dep))
+		return true;
+
+	if (I915_SQS_IS_FLYING(dep)) {
+		if (node->params.ring != dep->params.ring)
+			return true;
+	}
+
+	return false;
+}
+
+static uint32_t i915_scheduler_count_flying(struct i915_scheduler *scheduler,
+					    struct intel_engine_cs *ring)
+{
+	struct i915_scheduler_queue_entry *node;
+	uint32_t                          flying = 0;
+
+	list_for_each_entry(node, &scheduler->node_queue[ring->id], link)
+		if (I915_SQS_IS_FLYING(node))
+			flying++;
+
+	return flying;
+}
+
+/* Add a popped node back in to the queue. For example, because the ring was
+ * hung when execfinal() was called and thus the ring submission needs to be
+ * retried later. */
+static void i915_scheduler_node_requeue(struct i915_scheduler_queue_entry *node)
+{
+	BUG_ON(!node);
+	BUG_ON(!I915_SQS_IS_FLYING(node));
+
+	node->status = i915_sqs_queued;
+	node->params.request->seqno = 0;
+}
+
+/* Give up on a node completely. For example, because it is causing the
+ * ring to hang or is using some resource that no longer exists. */
+static void i915_scheduler_node_kill(struct i915_scheduler_queue_entry *node)
+{
+	BUG_ON(!node);
+	BUG_ON(I915_SQS_IS_COMPLETE(node));
+
+	node->status = i915_sqs_dead;
+}
+
+/*
+ * A sequence number has popped out of the hardware and the request handling
+ * code has mapped it back to a request and will mark that request complete.
+ * It also calls this function to notify the scheduler about the completion
+ * so the scheduler's node can be updated appropriately.
+ * Returns true if the request is scheduler managed, false if not.
+ */
+bool i915_scheduler_notify_request(struct drm_i915_gem_request *req)
+{
+	struct drm_i915_private *dev_priv  = to_i915(req->ring->dev);
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	/* XXX: Need to map back from request to node */
+	struct i915_scheduler_queue_entry *node = NULL;
+	unsigned long       flags;
+
+	if (!node)
+		return false;
+
+	spin_lock_irqsave(&scheduler->lock, flags);
+
+	WARN_ON(!I915_SQS_IS_FLYING(node));
+
+	/* Node was in flight so mark it as complete. */
+	if (req->cancelled)
+		node->status = i915_sqs_dead;
+	else
+		node->status = i915_sqs_complete;
+
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+
+	/*
+	 * XXX: If the in-flight list is now empty then new work should be
+	 * submitted. However, this function is called from interrupt context
+	 * and thus cannot acquire mutex locks and other such things that are
+	 * necessary for fresh submission.
+	 */
+
+	return true;
+}
+
+int i915_scheduler_remove(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_queue_entry  *node, *node_next;
+	unsigned long       flags;
+	int                 flying = 0, queued = 0;
+	int                 ret = 0;
+	bool                do_submit;
+	uint32_t            min_seqno;
+	struct list_head    remove;
+
+	if (list_empty(&scheduler->node_queue[ring->id]))
+		return 0;
+
+	spin_lock_irqsave(&scheduler->lock, flags);
+
+	/* /i915_scheduler_dump_locked(ring, "remove/pre");/ */
+
+	/*
+	 * In the case where the system is idle, starting 'min_seqno' from a big
+	 * number will cause all nodes to be removed as they are now back to
+	 * being in-order. However, this will be a problem if the last one to
+	 * complete was actually out-of-order as the ring seqno value will be
+	 * lower than one or more completed buffers. Thus code looking for the
+	 * completion of said buffers will wait forever.
+	 * Instead, use the hardware seqno as the starting point. This means
+	 * that some buffers might be kept around even in a completely idle
+	 * system but it should guarantee that no-one ever gets confused when
+	 * waiting for buffer completion.
+	 */
+	min_seqno = ring->get_seqno(ring, true);
+
+	list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+		if (I915_SQS_IS_QUEUED(node))
+			queued++;
+		else if (I915_SQS_IS_FLYING(node))
+			flying++;
+		else if (I915_SQS_IS_COMPLETE(node))
+			continue;
+
+		if (node->params.request->seqno == 0)
+			continue;
+
+		if (!i915_seqno_passed(node->params.request->seqno, min_seqno))
+			min_seqno = node->params.request->seqno;
+	}
+
+	INIT_LIST_HEAD(&remove);
+	list_for_each_entry_safe(node, node_next, &scheduler->node_queue[ring->id], link) {
+		/*
+		 * Only remove completed nodes which have a lower seqno than
+		 * all pending nodes. While there is the possibility of the
+		 * ring's seqno counting backwards, all higher buffers must
+		 * be remembered so that the 'i915_seqno_passed()' test can
+		 * report that they have in fact passed.
+		 *
+		 * NB: This is not true for 'dead' nodes. The GPU reset causes
+		 * the software seqno to restart from its initial value. Thus
+		 * the dead nodes must be removed even though their seqno values
+		 * are potentially vastly greater than the current ring seqno.
+		 */
+		if (!I915_SQS_IS_COMPLETE(node))
+			continue;
+
+		if (node->status != i915_sqs_dead) {
+			if (i915_seqno_passed(node->params.request->seqno, min_seqno) &&
+			    (node->params.request->seqno != min_seqno))
+				continue;
+		}
+
+		list_del(&node->link);
+		list_add(&node->link, &remove);
+
+		/* Strip the dependency info while the mutex is still locked */
+		i915_scheduler_remove_dependent(scheduler, node);
+
+		continue;
+	}
+
+	/*
+	 * No idea why but this seems to cause problems occasionally.
+	 * Note that the 'irq_put' code is internally reference counted
+	 * and spin_locked so it should be safe to call.
+	 */
+	/*if ((scheduler->flags[ring->id] & i915_sf_interrupts_enabled) &&
+	    (first_flight[ring->id] == NULL)) {
+		ring->irq_put(ring);
+		scheduler->flags[ring->id] &= ~i915_sf_interrupts_enabled;
+	}*/
+
+	/* Launch more packets now? */
+	do_submit = (queued > 0) && (flying < scheduler->min_flying);
+
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+
+	if (!do_submit && list_empty(&remove))
+		return ret;
+
+	mutex_lock(&ring->dev->struct_mutex);
+
+	if (do_submit)
+		ret = i915_scheduler_submit(ring, true);
+
+	while (!list_empty(&remove)) {
+		node = list_first_entry(&remove, typeof(*node), link);
+		list_del(&node->link);
+
+		/* The batch buffer must be unpinned before it is unreferenced
+		 * otherwise the unpin fails with a missing vma!? */
+		if (node->params.dispatch_flags & I915_DISPATCH_SECURE)
+			i915_gem_execbuff_release_batch_obj(node->params.batch_obj);
+
+		/* Free everything that is owned by the node: */
+		i915_gem_request_unreference(node->params.request);
+		kfree(node->params.cliprects);
+		kfree(node->dep_list);
+		kfree(node);
+	}
+
+	mutex_unlock(&ring->dev->struct_mutex);
+
+	return ret;
+}
+
+static void i915_scheduler_priority_bump_clear(struct i915_scheduler *scheduler)
+{
+	struct i915_scheduler_queue_entry *node;
+	int i;
+
+	/*
+	 * Ensure circular dependencies don't cause problems and that a bump
+	 * by object usage only bumps each using buffer once:
+	 */
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		list_for_each_entry(node, &scheduler->node_queue[i], link)
+			node->bumped = false;
+	}
+}
+
+static int i915_scheduler_priority_bump(struct i915_scheduler *scheduler,
+					struct i915_scheduler_queue_entry *target,
+					uint32_t bump)
+{
+	uint32_t new_priority;
+	int      i, count;
+
+	if (target->priority >= scheduler->priority_level_max)
+		return 1;
+
+	if (target->bumped)
+		return 0;
+
+	new_priority = target->priority + bump;
+	if ((new_priority <= target->priority) ||
+	    (new_priority > scheduler->priority_level_max))
+		target->priority = scheduler->priority_level_max;
+	else
+		target->priority = new_priority;
+
+	count = 1;
+	target->bumped = true;
+
+	for (i = 0; i < target->num_deps; i++) {
+		if (!target->dep_list[i])
+			continue;
+
+		if (target->dep_list[i]->bumped)
+			continue;
+
+		count += i915_scheduler_priority_bump(scheduler,
+						      target->dep_list[i],
+						      bump);
+	}
+
+	return count;
+}
+
+static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
+				    struct i915_scheduler_queue_entry **pop_node,
+				    unsigned long *flags)
+{
+	struct drm_i915_private            *dev_priv = ring->dev->dev_private;
+	struct i915_scheduler              *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_queue_entry  *best;
+	struct i915_scheduler_queue_entry  *node;
+	int     ret;
+	int     i;
+	bool	any_queued;
+	bool	has_local, has_remote, only_remote;
+
+	*pop_node = NULL;
+	ret = -ENODATA;
+
+	any_queued = false;
+	only_remote = false;
+	best = NULL;
+
+	list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+		if (!I915_SQS_IS_QUEUED(node))
+			continue;
+		any_queued = true;
+
+		has_local  = false;
+		has_remote = false;
+		for (i = 0; i < node->num_deps; i++) {
+			if (!i915_scheduler_is_dependency_valid(node, i))
+				continue;
+
+			if (node->dep_list[i]->params.ring == node->params.ring)
+				has_local = true;
+			else
+				has_remote = true;
+		}
+
+		if (has_remote && !has_local)
+			only_remote = true;
+
+		if (!has_local && !has_remote) {
+			if (!best ||
+			    (node->priority > best->priority))
+				best = node;
+		}
+	}
+
+	if (best) {
+		list_del(&best->link);
+
+		INIT_LIST_HEAD(&best->link);
+		best->status  = i915_sqs_popped;
+
+		ret = 0;
+	} else {
+		/* Can only get here if:
+		 * (a) there are no buffers in the queue
+		 * (b) all queued buffers are dependent on other buffers
+		 *     e.g. on a buffer that is in flight on a different ring
+		 */
+		if (only_remote) {
+			/* The only dependent buffers are on another ring. */
+			ret = -EAGAIN;
+		} else if (any_queued) {
+			/* It seems that something has gone horribly wrong! */
+			DRM_ERROR("Broken dependency tracking on ring %d!\n",
+				  (int) ring->id);
+		}
+	}
+
+	/* i915_scheduler_dump_queue_pop(ring, best); */
+
+	*pop_node = best;
+	return ret;
+}
+
+static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
+{
+	struct drm_device   *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_queue_entry  *node;
+	unsigned long       flags;
+	int                 ret = 0, count = 0;
+
+	if (!was_locked) {
+		ret = i915_mutex_lock_interruptible(dev);
+		if (ret)
+			return ret;
+	}
+
+	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	spin_lock_irqsave(&scheduler->lock, flags);
+
+	/* First time around, complain if anything unexpected occurs: */
+	ret = i915_scheduler_pop_from_queue_locked(ring, &node, &flags);
+	if (ret) {
+		spin_unlock_irqrestore(&scheduler->lock, flags);
+
+		if (!was_locked)
+			mutex_unlock(&dev->struct_mutex);
+
+		return ret;
+	}
+
+	do {
+		BUG_ON(!node);
+		BUG_ON(node->params.ring != ring);
+		BUG_ON(node->status != i915_sqs_popped);
+		count++;
+
+		/* The call to pop above will have removed the node from the
+		 * list. So add it back in and mark it as in flight. */
+		i915_scheduler_fly_node(node);
+
+		scheduler->flags[ring->id] |= i915_sf_submitting;
+		spin_unlock_irqrestore(&scheduler->lock, flags);
+		ret = dev_priv->gt.execbuf_final(&node->params);
+		spin_lock_irqsave(&scheduler->lock, flags);
+		scheduler->flags[ring->id] &= ~i915_sf_submitting;
+
+		if (ret) {
+			int requeue = 1;
+
+			/* Oh dear! Either the node is broken or the ring is
+			 * busy. So need to kill the node or requeue it and try
+			 * again later as appropriate. */
+
+			switch (-ret) {
+			case ENODEV:
+			case ENOENT:
+				/* Fatal errors. Kill the node. */
+				requeue = -1;
+			break;
+
+			case EAGAIN:
+			case EBUSY:
+			case EIO:
+			case ENOMEM:
+			case ERESTARTSYS:
+			case EINTR:
+				/* Supposedly recoverable errors. */
+			break;
+
+			default:
+				DRM_DEBUG_DRIVER("<%s> Got unexpected error from execfinal(): %d!\n",
+						 ring->name, ret);
+				/* Assume it is recoverable and hope for the best. */
+			break;
+			}
+
+			/* Check that the watchdog/reset code has not nuked
+			 * the node while we weren't looking: */
+			if (node->status == i915_sqs_dead)
+				requeue = 0;
+
+			if (requeue == 1) {
+				i915_scheduler_node_requeue(node);
+				/* No point spinning if the ring is currently
+				 * unavailable so just give up and come back
+				 * later. */
+				break;
+			} else if (requeue == -1)
+				i915_scheduler_node_kill(node);
+		}
+
+		/* Keep launching until the sky is sufficiently full. */
+		if (i915_scheduler_count_flying(scheduler, ring) >=
+						scheduler->min_flying)
+			break;
+
+		ret = i915_scheduler_pop_from_queue_locked(ring, &node, &flags);
+	} while (ret == 0);
+
+	spin_unlock_irqrestore(&scheduler->lock, flags);
+
+	if (!was_locked)
+		mutex_unlock(&dev->struct_mutex);
+
+	/* Don't complain about not being able to submit extra entries */
+	if (ret == -ENODATA)
+		ret = 0;
+
+	return (ret < 0) ? ret : count;
+}
+
+static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler,
+					   struct i915_scheduler_queue_entry *remove)
+{
+	struct i915_scheduler_queue_entry  *node;
+	int     i, r;
+	int     count = 0;
+
+	for (i = 0; i < remove->num_deps; i++)
+		if ((remove->dep_list[i]) &&
+		    (!I915_SQS_IS_COMPLETE(remove->dep_list[i])))
+			count++;
+	BUG_ON(count);
+
+	for (r = 0; r < I915_NUM_RINGS; r++) {
+		list_for_each_entry(node, &scheduler->node_queue[r], link) {
+			for (i = 0; i < node->num_deps; i++) {
+				if (node->dep_list[i] != remove)
+					continue;
+
+				node->dep_list[i] = NULL;
+			}
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
new file mode 100644
index 0000000..00dc7f3
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _I915_SCHEDULER_H_
+#define _I915_SCHEDULER_H_
+
+enum i915_scheduler_queue_status {
+	/* Limbo: */
+	i915_sqs_none = 0,
+	/* Not yet submitted to hardware: */
+	i915_sqs_queued,
+	/* Popped from queue, ready to fly: */
+	i915_sqs_popped,
+	/* Sent to hardware for processing: */
+	i915_sqs_flying,
+	/* Finished processing on the hardware: */
+	i915_sqs_complete,
+	/* Killed by watchdog or catastrophic submission failure: */
+	i915_sqs_dead,
+	/* Limit value for use with arrays/loops */
+	i915_sqs_MAX
+};
+
+#define I915_SQS_IS_QUEUED(node)	(((node)->status == i915_sqs_queued))
+#define I915_SQS_IS_FLYING(node)	(((node)->status == i915_sqs_flying))
+#define I915_SQS_IS_COMPLETE(node)	(((node)->status == i915_sqs_complete) || \
+					 ((node)->status == i915_sqs_dead))
+
+struct i915_scheduler_obj_entry {
+	struct drm_i915_gem_object          *obj;
+	bool                                read_only;
+};
+
+struct i915_scheduler_queue_entry {
+	struct i915_execbuffer_params       params;
+	/* -1023 = lowest priority, 0 = default, 1023 = highest */
+	int32_t                             priority;
+	struct i915_scheduler_obj_entry     *saved_objects;
+	int                                 num_objs;
+	bool                                bumped;
+	struct i915_scheduler_queue_entry   **dep_list;
+	int                                 num_deps;
+	enum i915_scheduler_queue_status    status;
+	unsigned long                       stamp;
+	struct list_head                    link;
+};
+
+struct i915_scheduler {
+	struct list_head    node_queue[I915_NUM_RINGS];
+	uint32_t            flags[I915_NUM_RINGS];
+	spinlock_t          lock;
+
+	/* Tuning parameters: */
+	int32_t             priority_level_min;
+	int32_t             priority_level_max;
+	int32_t             priority_level_preempt;
+	uint32_t            min_flying;
+};
+
+/* Flag bits for i915_scheduler::flags */
+enum {
+	i915_sf_interrupts_enabled  = (1 << 0),
+	i915_sf_submitting          = (1 << 1),
+};
+
+int         i915_scheduler_init(struct drm_device *dev);
+int         i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
+bool        i915_scheduler_notify_request(struct drm_i915_gem_request *req);
+
+#endif  /* _I915_SCHEDULER_H_ */
-- 
1.9.1