[Intel-gfx] [RFC 07/39] drm/i915: Start of GPU scheduler

Tue Jul 21 02:40:10 PDT 2015

On Fri, Jul 17, 2015 at 03:33:16PM +0100, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
> 
> Initial creation of scheduler source files. Note that this patch implements most
> of the scheduler functionality but does not hook it in to the driver yet. It
> also leaves the scheduler code in 'pass through' mode so that even when it is
> hooked in, it will not actually do very much. This allows the hooks to be added
> one at a time in byte size chunks and only when the scheduler is finally enabled
> at the end does anything start happening.
> 
> The general theory of operation is that when batch buffers are submitted to the
> driver, the execbuffer() code assigns a unique request and then packages up all
> the information required to execute the batch buffer at a later time. This
> package is given over to the scheduler which adds it to an internal node list.
> The scheduler also scans the list of objects associated with the batch buffer
> and compares them against the objects already in use by other buffers in the
> node list. If matches are found then the new batch buffer node is marked as
> being dependent upon the matching node. The same is done for the context object.
> The scheduler also bumps up the priority of such matching nodes on the grounds
> that the more dependencies a given batch buffer has the more important it is
> likely to be.
> 
> The scheduler aims to have a given (tuneable) number of batch buffers in flight
> on the hardware at any given time. If fewer than this are currently executing
> when a new node is queued, then the node is passed straight through to the
> submit function. Otherwise it is simply added to the queue and the driver
> returns back to user land.
> 
> As each batch buffer completes, it raises an interrupt which wakes up the
> scheduler. Note that it is possible for multiple buffers to complete before the
> IRQ handler gets to run. Further, it is possible for the seqno values to be
> un-ordered (particularly once pre-emption is enabled). However, the scheduler
> keeps the list of executing buffers in order of hardware submission. Thus it can
> scan through the list until a matching seqno is found and then mark all in
> flight nodes from that point on as completed.
> 
> A deferred work queue is also poked by the interrupt handler. When this wakes up
> it can do more involved processing such as actually removing completed nodes
> from the queue and freeing up the resources associated with them (internal
> memory allocations, DRM object references, context reference, etc.). The work
> handler also checks the in flight count and calls the submission code if a new
> slot has appeared.
> 
> When the scheduler's submit code is called, it scans the queued node list for
> the highest priority node that has no unmet dependencies. Note that the
> dependency calculation is complex as it must take inter-ring dependencies and
> potential preemptions into account. Note also that in the future this will be
> extended to include external dependencies such as the Android Native Sync file
> descriptors and/or the linux dma-buff synchronisation scheme.
> 
> If a suitable node is found then it is sent to execbuff_final() for submission
> to the hardware. The in flight count is then re-checked and a new node popped
> from the list if appropriate.
> 
> Note that this patch does not implement pre-emptive scheduling. Only basic
> scheduling by re-ordering batch buffer submission is currently implemented.
> 
> Change-Id: I1e08f59e650a3c2bbaaa9de7627da33849b06106
> For: VIZ-1587
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile         |   1 +
>  drivers/gpu/drm/i915/i915_drv.h       |   4 +
>  drivers/gpu/drm/i915/i915_gem.c       |   5 +
>  drivers/gpu/drm/i915/i915_scheduler.c | 776 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_scheduler.h |  91 ++++
>  5 files changed, 877 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/i915_scheduler.c
>  create mode 100644 drivers/gpu/drm/i915/i915_scheduler.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 47a74114..c367b39 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -9,6 +9,7 @@ ccflags-y := -Werror
>  # core driver code
>  i915-y := i915_drv.o \
>  	  i915_params.o \
> +	  i915_scheduler.o \
>            i915_suspend.o \
>  	  i915_sysfs.o \
>  	  intel_pm.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a680778..7d2a494 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1700,6 +1700,8 @@ struct i915_execbuffer_params {
>  	struct drm_i915_gem_request     *request;
>  };
>  
> +struct i915_scheduler;
> +
>  struct drm_i915_private {
>  	struct drm_device *dev;
>  	struct kmem_cache *objects;
> @@ -1932,6 +1934,8 @@ struct drm_i915_private {
>  
>  	struct i915_runtime_pm pm;
>  
> +	struct i915_scheduler *scheduler;
> +
>  	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
>  	struct {
>  		int (*execbuf_submit)(struct i915_execbuffer_params *params,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0c407ae..3fbc6ec 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -40,6 +40,7 @@
>  #ifdef CONFIG_SYNC
>  #include <../drivers/staging/android/sync.h>
>  #endif
> +#include "i915_scheduler.h"
>  
>  #define RQ_BUG_ON(expr)
>  
> @@ -5398,6 +5399,10 @@ i915_gem_init_hw(struct drm_device *dev)
>  
>  	i915_gem_init_swizzling(dev);
>  
> +	ret = i915_scheduler_init(dev);
> +	if (ret)
> +		return ret;
> +
>  	/*
>  	 * At least 830 can leave some of the unused rings
>  	 * "active" (ie. head != tail) after resume which
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
> new file mode 100644
> index 0000000..71d8df7
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -0,0 +1,776 @@
> +/*
> + * Copyright (c) 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_drv.h"
> +#include "i915_scheduler.h"
> +
> +static int         i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node);
> +static int         i915_scheduler_remove_dependent(struct i915_scheduler *scheduler,
> +						   struct i915_scheduler_queue_entry *remove);
> +static int         i915_scheduler_submit(struct intel_engine_cs *ring,
> +					 bool is_locked);
> +static uint32_t    i915_scheduler_count_flying(struct i915_scheduler *scheduler,
> +					       struct intel_engine_cs *ring);
> +static void        i915_scheduler_priority_bump_clear(struct i915_scheduler *scheduler);
> +static int         i915_scheduler_priority_bump(struct i915_scheduler *scheduler,
> +						struct i915_scheduler_queue_entry *target,
> +						uint32_t bump);
> +
> +int i915_scheduler_init(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct i915_scheduler   *scheduler = dev_priv->scheduler;
> +	int                     r;
> +
> +	if (scheduler)
> +		return 0;
> +
> +	scheduler = kzalloc(sizeof(*scheduler), GFP_KERNEL);
> +	if (!scheduler)
> +		return -ENOMEM;
> +
> +	spin_lock_init(&scheduler->lock);
> +
> +	for (r = 0; r < I915_NUM_RINGS; r++)
> +		INIT_LIST_HEAD(&scheduler->node_queue[r]);
> +
> +	scheduler->index = 1;
> +
> +	/* Default tuning values: */
> +	scheduler->priority_level_max     = ~0U;
> +	scheduler->priority_level_preempt = 900;
> +	scheduler->min_flying             = 2;
> +
> +	dev_priv->scheduler = scheduler;
> +
> +	return 0;
> +}
> +
> +int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
> +{
> +	struct drm_i915_private *dev_priv = qe->params.dev->dev_private;
> +	struct i915_scheduler   *scheduler = dev_priv->scheduler;
> +	struct intel_engine_cs  *ring = qe->params.ring;
> +	struct i915_scheduler_queue_entry  *node;
> +	struct i915_scheduler_queue_entry  *test;
> +	struct timespec     stamp;
> +	unsigned long       flags;
> +	bool                not_flying, found;
> +	int                 i, j, r;
> +	int                 incomplete = 0;
> +
> +	BUG_ON(!scheduler);
> +
> +	if (1/*i915.scheduler_override & i915_so_direct_submit*/) {
> +		int ret;
> +
> +		qe->scheduler_index = scheduler->index++;
> +
> +		scheduler->flags[qe->params.ring->id] |= i915_sf_submitting;
> +		ret = dev_priv->gt.execbuf_final(&qe->params);
> +		scheduler->flags[qe->params.ring->id] &= ~i915_sf_submitting;
> +
> +		/*
> +		 * Don't do any clean up on failure because the caller will
> +		 * do it all anyway.
> +		 */
> +		if (ret)
> +			return ret;
> +
> +		/* Free everything that is owned by the QE structure: */
> +		kfree(qe->params.cliprects);
> +		if (qe->params.dispatch_flags & I915_DISPATCH_SECURE)
> +			i915_gem_execbuff_release_batch_obj(qe->params.batch_obj);
> +
> +		return 0;
> +	}
> +
> +	getrawmonotonic(&stamp);
> +
> +	node = kmalloc(sizeof(*node), GFP_KERNEL);
> +	if (!node)
> +		return -ENOMEM;
> +
> +	*node = *qe;
> +	INIT_LIST_HEAD(&node->link);
> +	node->status = i915_sqs_queued;
> +	node->stamp  = stamp;
> +	i915_gem_request_reference(node->params.request);
> +
> +	/* Need to determine the number of incomplete entries in the list as
> +	 * that will be the maximum size of the dependency list.
> +	 *
> +	 * Note that the allocation must not be made with the spinlock acquired
> +	 * as kmalloc can sleep. However, the unlock/relock is safe because no
> +	 * new entries can be queued up during the unlock as the i915 driver
> +	 * mutex is still held. Entries could be removed from the list but that
> +	 * just means the dep_list will be over-allocated which is fine.
> +	 */
> +	spin_lock_irqsave(&scheduler->lock, flags);
> +	for (r = 0; r < I915_NUM_RINGS; r++) {
> +		list_for_each_entry(test, &scheduler->node_queue[r], link) {
> +			if (I915_SQS_IS_COMPLETE(test))
> +				continue;
> +
> +			incomplete++;
> +		}
> +	}
> +
> +	/* Temporarily unlock to allocate memory: */
> +	spin_unlock_irqrestore(&scheduler->lock, flags);
> +	if (incomplete) {
> +		node->dep_list = kmalloc(sizeof(node->dep_list[0]) * incomplete,
> +					 GFP_KERNEL);
> +		if (!node->dep_list) {
> +			kfree(node);
> +			return -ENOMEM;
> +		}
> +	} else
> +		node->dep_list = NULL;
> +
> +	spin_lock_irqsave(&scheduler->lock, flags);
> +	node->num_deps = 0;
> +
> +	if (node->dep_list) {
> +		for (r = 0; r < I915_NUM_RINGS; r++) {
> +			list_for_each_entry(test, &scheduler->node_queue[r], link) {
> +				if (I915_SQS_IS_COMPLETE(test))
> +					continue;
> +
> +				/*
> +				 * Batches on the same ring for the same
> +				 * context must be kept in order.
> +				 */
> +				found = (node->params.ctx == test->params.ctx) &&
> +					(node->params.ring == test->params.ring);
> +
> +				/*
> +				 * Batches working on the same objects must
> +				 * be kept in order.
> +				 */
> +				for (i = 0; (i < node->num_objs) && !found; i++) {
> +					for (j = 0; j < test->num_objs; j++) {
> +						if (node->saved_objects[i].obj !=
> +							    test->saved_objects[j].obj)
> +							continue;
> +
> +						found = true;
> +						break;

I guess this should be a goto to break out of both loops?

> +					}
> +				}
> +
> +				if (found) {
> +					node->dep_list[node->num_deps] = test;
> +					node->num_deps++;
> +				}
> +			}
> +		}
> +
> +		BUG_ON(node->num_deps > incomplete);
> +	}

This seems to be O(pending_requests * obj_per_request ^ 2), which is a bit
excessive. Also since you only check deps at the object level this breaks
read-read.

Finally move_to_active does track all this already since it has to
exchange all the request for new ones. We might want to move object_sync
in there too just because, but I think this would definitely fit better
in there.
-Daniel

> +
> +	if (node->priority && node->num_deps) {
> +		i915_scheduler_priority_bump_clear(scheduler);
> +
> +		for (i = 0; i < node->num_deps; i++)
> +			i915_scheduler_priority_bump(scheduler,
> +					node->dep_list[i], node->priority);
> +	}
> +
> +	node->scheduler_index = scheduler->index++;
> +
> +	list_add_tail(&node->link, &scheduler->node_queue[ring->id]);
> +
> +	not_flying = i915_scheduler_count_flying(scheduler, ring) <
> +						 scheduler->min_flying;
> +
> +	spin_unlock_irqrestore(&scheduler->lock, flags);
> +
> +	if (not_flying)
> +		i915_scheduler_submit(ring, true);
> +
> +	return 0;
> +}
> +
> +static int i915_scheduler_fly_node(struct i915_scheduler_queue_entry *node)
> +{
> +	struct drm_i915_private *dev_priv = node->params.dev->dev_private;
> +	struct i915_scheduler   *scheduler = dev_priv->scheduler;
> +	struct intel_engine_cs  *ring;
> +
> +	BUG_ON(!scheduler);
> +	BUG_ON(!node);
> +	BUG_ON(node->status != i915_sqs_popped);
> +
> +	ring = node->params.ring;
> +
> +	/* Add the node (which should currently be in state none) to the front
> +	 * of the queue. This ensure that flying nodes are always held in
> +	 * hardware submission order. */
> +	list_add(&node->link, &scheduler->node_queue[ring->id]);
> +
> +	node->status = i915_sqs_flying;
> +
> +	if (!(scheduler->flags[ring->id] & i915_sf_interrupts_enabled)) {
> +		bool    success = true;
> +
> +		success = ring->irq_get(ring);
> +		if (success)
> +			scheduler->flags[ring->id] |= i915_sf_interrupts_enabled;
> +		else
> +			return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Nodes are considered valid dependencies if they are queued on any ring or
> + * if they are in flight on a different ring. In flight on the same ring is no
> + * longer interesting for non-premptive nodes as the ring serialises execution.
> + * For pre-empting nodes, all in flight dependencies are valid as they must not
> + * be jumped by the act of pre-empting.
> + *
> + * Anything that is neither queued nor flying is uninteresting.
> + */
> +static inline bool i915_scheduler_is_dependency_valid(
> +			struct i915_scheduler_queue_entry *node, uint32_t idx)
> +{
> +	struct i915_scheduler_queue_entry *dep;
> +
> +	dep = node->dep_list[idx];
> +	if (!dep)
> +		return false;
> +
> +	if (I915_SQS_IS_QUEUED(dep))
> +		return true;
> +
> +	if (I915_SQS_IS_FLYING(dep)) {
> +		if (node->params.ring != dep->params.ring)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +static uint32_t i915_scheduler_count_flying(struct i915_scheduler *scheduler,
> +					    struct intel_engine_cs *ring)
> +{
> +	struct i915_scheduler_queue_entry *node;
> +	uint32_t                          flying = 0;
> +
> +	list_for_each_entry(node, &scheduler->node_queue[ring->id], link)
> +		if (I915_SQS_IS_FLYING(node))
> +			flying++;
> +
> +	return flying;
> +}
> +
> +/* Add a popped node back in to the queue. For example, because the ring was
> + * hung when execfinal() was called and thus the ring submission needs to be
> + * retried later. */
> +static void i915_scheduler_node_requeue(struct i915_scheduler_queue_entry *node)
> +{
> +	BUG_ON(!node);
> +	BUG_ON(!I915_SQS_IS_FLYING(node));
> +
> +	node->status = i915_sqs_queued;
> +}
> +
> +/* Give up on a popped node completely. For example, because it is causing the
> + * ring to hang or is using some resource that no longer exists. */
> +static void i915_scheduler_node_kill(struct i915_scheduler_queue_entry *node)
> +{
> +	BUG_ON(!node);
> +	BUG_ON(!I915_SQS_IS_FLYING(node));
> +
> +	node->status = i915_sqs_dead;
> +}
> +
> +/*
> + * The batch tagged with the indicated seqence number has completed.
> + * Search the queue for it, update its status and those of any batches
> + * submitted earlier, which must also have completed or been preeempted
> + * as appropriate.
> + *
> + * Called with spinlock already held.
> + */
> +static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring, uint32_t seqno)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct i915_scheduler   *scheduler = dev_priv->scheduler;
> +	struct i915_scheduler_queue_entry *node;
> +	bool got_changes = false;
> +
> +	/*
> +	 * Batch buffers are added to the head of the list in execution order,
> +	 * thus seqno values, although not necessarily incrementing, will be
> +	 * met in completion order when scanning the list. So when a match is
> +	 * found, all subsequent entries must have also popped out. Conversely,
> +	 * if a completed entry is found then there is no need to scan further.
> +	 */
> +	list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> +		if (I915_SQS_IS_COMPLETE(node))
> +			return;
> +
> +		if (seqno == node->params.request->seqno)
> +			break;
> +	}
> +
> +	/*
> +	 * NB: Lots of extra seqnos get added to the ring to track things
> +	 * like cache flushes and page flips. So don't complain about if
> +	 * no node was found.
> +	 */
> +	if (&node->link == &scheduler->node_queue[ring->id])
> +		return;
> +
> +	WARN_ON(!I915_SQS_IS_FLYING(node));
> +
> +	/* Everything from here can be marked as done: */
> +	list_for_each_entry_from(node, &scheduler->node_queue[ring->id], link) {
> +		/* Check if the marking has already been done: */
> +		if (I915_SQS_IS_COMPLETE(node))
> +			break;
> +
> +		if (!I915_SQS_IS_FLYING(node))
> +			continue;
> +
> +		/* Node was in flight so mark it as complete. */
> +		node->status = i915_sqs_complete;
> +		got_changes = true;
> +	}
> +
> +	/* Should submit new work here if flight list is empty but the DRM
> +	 * mutex lock might not be available if a '__wait_request()' call is
> +	 * blocking the system. */
> +}
> +
> +int i915_scheduler_handle_irq(struct intel_engine_cs *ring)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct i915_scheduler   *scheduler = dev_priv->scheduler;
> +	unsigned long       flags;
> +	uint32_t            seqno;
> +
> +	seqno = ring->get_seqno(ring, false);
> +
> +	if (1/*i915.scheduler_override & i915_so_direct_submit*/)
> +		return 0;
> +
> +	if (seqno == scheduler->last_irq_seqno[ring->id]) {
> +		/* Why are there sometimes multiple interrupts per seqno? */
> +		return 0;
> +	}
> +	scheduler->last_irq_seqno[ring->id] = seqno;
> +
> +	spin_lock_irqsave(&scheduler->lock, flags);
> +	i915_scheduler_seqno_complete(ring, seqno);
> +	spin_unlock_irqrestore(&scheduler->lock, flags);
> +
> +	/* XXX: Need to also call i915_scheduler_remove() via work handler. */
> +
> +	return 0;
> +}
> +
> +int i915_scheduler_remove(struct intel_engine_cs *ring)
> +{
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	struct i915_scheduler   *scheduler = dev_priv->scheduler;
> +	struct i915_scheduler_queue_entry  *node, *node_next;
> +	unsigned long       flags;
> +	int                 flying = 0, queued = 0;
> +	int                 ret = 0;
> +	bool                do_submit;
> +	uint32_t            min_seqno;
> +	struct list_head    remove;
> +
> +	if (list_empty(&scheduler->node_queue[ring->id]))
> +		return 0;
> +
> +	spin_lock_irqsave(&scheduler->lock, flags);
> +
> +	/* /i915_scheduler_dump_locked(ring, "remove/pre");/ */
> +
> +	/*
> +	 * In the case where the system is idle, starting 'min_seqno' from a big
> +	 * number will cause all nodes to be removed as they are now back to
> +	 * being in-order. However, this will be a problem if the last one to
> +	 * complete was actually out-of-order as the ring seqno value will be
> +	 * lower than one or more completed buffers. Thus code looking for the
> +	 * completion of said buffers will wait forever.
> +	 * Instead, use the hardware seqno as the starting point. This means
> +	 * that some buffers might be kept around even in a completely idle
> +	 * system but it should guarantee that no-one ever gets confused when
> +	 * waiting for buffer completion.
> +	 */
> +	min_seqno = ring->get_seqno(ring, true);
> +
> +	list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> +		if (I915_SQS_IS_QUEUED(node))
> +			queued++;
> +		else if (I915_SQS_IS_FLYING(node))
> +			flying++;
> +		else if (I915_SQS_IS_COMPLETE(node))
> +			continue;
> +
> +		if (node->params.request->seqno == 0)
> +			continue;
> +
> +		if (!i915_seqno_passed(node->params.request->seqno, min_seqno))
> +			min_seqno = node->params.request->seqno;
> +	}
> +
> +	INIT_LIST_HEAD(&remove);
> +	list_for_each_entry_safe(node, node_next, &scheduler->node_queue[ring->id], link) {
> +		/*
> +		 * Only remove completed nodes which have a lower seqno than
> +		 * all pending nodes. While there is the possibility of the
> +		 * ring's seqno counting backwards, all higher buffers must
> +		 * be remembered so that the 'i915_seqno_passed()' test can
> +		 * report that they have in fact passed.
> +		 *
> +		 * NB: This is not true for 'dead' nodes. The GPU reset causes
> +		 * the software seqno to restart from its initial value. Thus
> +		 * the dead nodes must be removed even though their seqno values
> +		 * are potentially vastly greater than the current ring seqno.
> +		 */
> +		if (!I915_SQS_IS_COMPLETE(node))
> +			continue;
> +
> +		if (node->status != i915_sqs_dead) {
> +			if (i915_seqno_passed(node->params.request->seqno, min_seqno) &&
> +			    (node->params.request->seqno != min_seqno))
> +				continue;
> +		}
> +
> +		list_del(&node->link);
> +		list_add(&node->link, &remove);
> +
> +		/* Strip the dependency info while the mutex is still locked */
> +		i915_scheduler_remove_dependent(scheduler, node);
> +
> +		continue;
> +	}
> +
> +	/*
> +	 * No idea why but this seems to cause problems occasionally.
> +	 * Note that the 'irq_put' code is internally reference counted
> +	 * and spin_locked so it should be safe to call.
> +	 */
> +	/*if ((scheduler->flags[ring->id] & i915_sf_interrupts_enabled) &&
> +	    (first_flight[ring->id] == NULL)) {
> +		ring->irq_put(ring);
> +		scheduler->flags[ring->id] &= ~i915_sf_interrupts_enabled;
> +	}*/
> +
> +	/* Launch more packets now? */
> +	do_submit = (queued > 0) && (flying < scheduler->min_flying);
> +
> +	spin_unlock_irqrestore(&scheduler->lock, flags);
> +
> +	if (do_submit)
> +		ret = i915_scheduler_submit(ring, true);
> +
> +	while (!list_empty(&remove)) {
> +		node = list_first_entry(&remove, typeof(*node), link);
> +		list_del(&node->link);
> +
> +		/* The batch buffer must be unpinned before it is unreferenced
> +		 * otherwise the unpin fails with a missing vma!? */
> +		if (node->params.dispatch_flags & I915_DISPATCH_SECURE)
> +			i915_gem_execbuff_release_batch_obj(node->params.batch_obj);
> +
> +		/* Free everything that is owned by the node: */
> +		i915_gem_request_unreference(node->params.request);
> +		kfree(node->params.cliprects);
> +		kfree(node->dep_list);
> +		kfree(node);
> +	}
> +
> +	return ret;
> +}
> +
> +static void i915_scheduler_priority_bump_clear(struct i915_scheduler *scheduler)
> +{
> +	struct i915_scheduler_queue_entry *node;
> +	int i;
> +
> +	/*
> +	 * Ensure circular dependencies don't cause problems and that a bump
> +	 * by object usage only bumps each using buffer once:
> +	 */
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		list_for_each_entry(node, &scheduler->node_queue[i], link)
> +			node->bumped = false;
> +	}
> +}
> +
> +static int i915_scheduler_priority_bump(struct i915_scheduler *scheduler,
> +					struct i915_scheduler_queue_entry *target,
> +					uint32_t bump)
> +{
> +	uint32_t new_priority;
> +	int      i, count;
> +
> +	if (target->priority >= scheduler->priority_level_max)
> +		return 1;
> +
> +	if (target->bumped)
> +		return 0;
> +
> +	new_priority = target->priority + bump;
> +	if ((new_priority <= target->priority) ||
> +	    (new_priority > scheduler->priority_level_max))
> +		target->priority = scheduler->priority_level_max;
> +	else
> +		target->priority = new_priority;
> +
> +	count = 1;
> +	target->bumped = true;
> +
> +	for (i = 0; i < target->num_deps; i++) {
> +		if (!target->dep_list[i])
> +			continue;
> +
> +		if (target->dep_list[i]->bumped)
> +			continue;
> +
> +		count += i915_scheduler_priority_bump(scheduler,
> +						      target->dep_list[i],
> +						      bump);
> +	}
> +
> +	return count;
> +}
> +
> +static int i915_scheduler_pop_from_queue_locked(struct intel_engine_cs *ring,
> +				    struct i915_scheduler_queue_entry **pop_node,
> +				    unsigned long *flags)
> +{
> +	struct drm_i915_private            *dev_priv = ring->dev->dev_private;
> +	struct i915_scheduler              *scheduler = dev_priv->scheduler;
> +	struct i915_scheduler_queue_entry  *best;
> +	struct i915_scheduler_queue_entry  *node;
> +	int     ret;
> +	int     i;
> +	bool	any_queued;
> +	bool	has_local, has_remote, only_remote;
> +
> +	*pop_node = NULL;
> +	ret = -ENODATA;
> +
> +	any_queued = false;
> +	only_remote = false;
> +	best = NULL;
> +
> +	list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> +		if (!I915_SQS_IS_QUEUED(node))
> +			continue;
> +		any_queued = true;
> +
> +		has_local  = false;
> +		has_remote = false;
> +		for (i = 0; i < node->num_deps; i++) {
> +			if (!i915_scheduler_is_dependency_valid(node, i))
> +				continue;
> +
> +			if (node->dep_list[i]->params.ring == node->params.ring)
> +				has_local = true;
> +			else
> +				has_remote = true;
> +		}
> +
> +		if (has_remote && !has_local)
> +			only_remote = true;
> +
> +		if (!has_local && !has_remote) {
> +			if (!best ||
> +			    (node->priority > best->priority))
> +				best = node;
> +		}
> +	}
> +
> +	if (best) {
> +		list_del(&best->link);
> +
> +		INIT_LIST_HEAD(&best->link);
> +		best->status  = i915_sqs_popped;
> +
> +		ret = 0;
> +	} else {
> +		/* Can only get here if:
> +		 * (a) there are no buffers in the queue
> +		 * (b) all queued buffers are dependent on other buffers
> +		 *     e.g. on a buffer that is in flight on a different ring
> +		 */
> +		if (only_remote) {
> +			/* The only dependent buffers are on another ring. */
> +			ret = -EAGAIN;
> +		} else if (any_queued) {
> +			/* It seems that something has gone horribly wrong! */
> +			DRM_ERROR("Broken dependency tracking on ring %d!\n",
> +				  (int) ring->id);
> +		}
> +	}
> +
> +	/* i915_scheduler_dump_queue_pop(ring, best); */
> +
> +	*pop_node = best;
> +	return ret;
> +}
> +
> +static int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
> +{
> +	struct drm_device   *dev = ring->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct i915_scheduler   *scheduler = dev_priv->scheduler;
> +	struct i915_scheduler_queue_entry  *node;
> +	unsigned long       flags;
> +	int                 ret = 0, count = 0;
> +
> +	if (!was_locked) {
> +		ret = i915_mutex_lock_interruptible(dev);

Nah, scheduler needs its own hw lock, otherwise hell will break lose with
locking inversions. We might need to add engine->hw_lock.

> +		if (ret)
> +			return ret;
> +	}
> +
> +	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> +
> +	spin_lock_irqsave(&scheduler->lock, flags);
> +
> +	/* First time around, complain if anything unexpected occurs: */
> +	ret = i915_scheduler_pop_from_queue_locked(ring, &node, &flags);
> +	if (ret) {
> +		spin_unlock_irqrestore(&scheduler->lock, flags);
> +
> +		if (!was_locked)
> +			mutex_unlock(&dev->struct_mutex);
> +
> +		return ret;
> +	}
> +
> +	do {
> +		BUG_ON(!node);
> +		BUG_ON(node->params.ring != ring);
> +		BUG_ON(node->status != i915_sqs_popped);
> +		count++;
> +
> +		/* The call to pop above will have removed the node from the
> +		 * list. So add it back in and mark it as in flight. */
> +		i915_scheduler_fly_node(node);
> +
> +		scheduler->flags[ring->id] |= i915_sf_submitting;
> +		spin_unlock_irqrestore(&scheduler->lock, flags);
> +		ret = dev_priv->gt.execbuf_final(&node->params);
> +		spin_lock_irqsave(&scheduler->lock, flags);
> +		scheduler->flags[ring->id] &= ~i915_sf_submitting;
> +
> +		if (ret) {
> +			bool requeue = true;
> +
> +			/* Oh dear! Either the node is broken or the ring is
> +			 * busy. So need to kill the node or requeue it and try
> +			 * again later as appropriate. */
> +
> +			switch (-ret) {
> +			case ENODEV:
> +			case ENOENT:
> +				/* Fatal errors. Kill the node. */
> +				requeue = false;
> +			break;
> +
> +			case EAGAIN:
> +			case EBUSY:
> +			case EIO:
> +			case ENOMEM:
> +			case ERESTARTSYS:
> +			case EINTR:
> +				/* Supposedly recoverable errors. */
> +			break;
> +
> +			default:
> +				DRM_DEBUG_DRIVER("<%s> Got unexpected error from execfinal(): %d!\n",
> +						 ring->name, ret);
> +				/* Assume it is recoverable and hope for the best. */
> +			break;
> +			}
> +
> +			if (requeue) {
> +				i915_scheduler_node_requeue(node);
> +				/* No point spinning if the ring is currently
> +				 * unavailable so just give up and come back
> +				 * later. */
> +				break;
> +			} else
> +				i915_scheduler_node_kill(node);
> +		}
> +
> +		/* Keep launching until the sky is sufficiently full. */
> +		if (i915_scheduler_count_flying(scheduler, ring) >=
> +						scheduler->min_flying)
> +			break;
> +
> +		ret = i915_scheduler_pop_from_queue_locked(ring, &node, &flags);
> +	} while (ret == 0);
> +
> +	spin_unlock_irqrestore(&scheduler->lock, flags);
> +
> +	if (!was_locked)
> +		mutex_unlock(&dev->struct_mutex);
> +
> +	/* Don't complain about not being able to submit extra entries */
> +	if (ret == -ENODATA)
> +		ret = 0;
> +
> +	return (ret < 0) ? ret : count;
> +}
> +
> +static int i915_scheduler_remove_dependent(struct i915_scheduler *scheduler,
> +					   struct i915_scheduler_queue_entry *remove)
> +{
> +	struct i915_scheduler_queue_entry  *node;
> +	int     i, r;
> +	int     count = 0;
> +
> +	for (i = 0; i < remove->num_deps; i++)
> +		if ((remove->dep_list[i]) &&
> +		    (!I915_SQS_IS_COMPLETE(remove->dep_list[i])))
> +			count++;
> +	BUG_ON(count);
> +
> +	for (r = 0; r < I915_NUM_RINGS; r++) {
> +		list_for_each_entry(node, &scheduler->node_queue[r], link) {
> +			for (i = 0; i < node->num_deps; i++) {
> +				if (node->dep_list[i] != remove)
> +					continue;
> +
> +				node->dep_list[i] = NULL;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> new file mode 100644
> index 0000000..0c5fc7f
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -0,0 +1,91 @@
> +/*
> + * Copyright (c) 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef _I915_SCHEDULER_H_
> +#define _I915_SCHEDULER_H_
> +
> +enum i915_scheduler_queue_status {
> +	/* Limbo: */
> +	i915_sqs_none = 0,
> +	/* Not yet submitted to hardware: */
> +	i915_sqs_queued,
> +	/* Popped from queue, ready to fly: */
> +	i915_sqs_popped,
> +	/* Sent to hardware for processing: */
> +	i915_sqs_flying,
> +	/* Finished processing on the hardware: */
> +	i915_sqs_complete,
> +	/* Killed by catastrophic submission failure: */
> +	i915_sqs_dead,
> +	/* Limit value for use with arrays/loops */
> +	i915_sqs_MAX
> +};
> +
> +#define I915_SQS_IS_QUEUED(node)	(((node)->status == i915_sqs_queued))
> +#define I915_SQS_IS_FLYING(node)	(((node)->status == i915_sqs_flying))
> +#define I915_SQS_IS_COMPLETE(node)	(((node)->status == i915_sqs_complete) || \
> +					 ((node)->status == i915_sqs_dead))
> +
> +struct i915_scheduler_obj_entry {
> +	struct drm_i915_gem_object          *obj;
> +};
> +
> +struct i915_scheduler_queue_entry {
> +	struct i915_execbuffer_params       params;
> +	uint32_t                            priority;
> +	struct i915_scheduler_obj_entry     *saved_objects;
> +	int                                 num_objs;
> +	bool                                bumped;
> +	struct i915_scheduler_queue_entry   **dep_list;
> +	int                                 num_deps;
> +	enum i915_scheduler_queue_status    status;
> +	struct timespec                     stamp;
> +	struct list_head                    link;
> +	uint32_t                            scheduler_index;
> +};
> +
> +struct i915_scheduler {
> +	struct list_head    node_queue[I915_NUM_RINGS];
> +	uint32_t            flags[I915_NUM_RINGS];
> +	spinlock_t          lock;
> +	uint32_t            index;
> +	uint32_t            last_irq_seqno[I915_NUM_RINGS];
> +
> +	/* Tuning parameters: */
> +	uint32_t            priority_level_max;
> +	uint32_t            priority_level_preempt;
> +	uint32_t            min_flying;
> +};
> +
> +/* Flag bits for i915_scheduler::flags */
> +enum {
> +	i915_sf_interrupts_enabled  = (1 << 0),
> +	i915_sf_submitting          = (1 << 1),
> +};
> +
> +int         i915_scheduler_init(struct drm_device *dev);
> +int         i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
> +int         i915_scheduler_handle_irq(struct intel_engine_cs *ring);
> +
> +#endif  /* _I915_SCHEDULER_H_ */
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch