[Intel-gfx] [PATCH 6/9] drm/i915: Split execlist priority queue into rbtree + linked list

Fri May 5 13:19:07 UTC 2017

On 03/05/2017 12:37, Chris Wilson wrote:
> All the requests at the same priority are executed in FIFO order. They
> do not need to be stored in the rbtree themselves, as they are a simple
> list within a level. If we move the requests at one priority into a list,
> we can then reduce the rbtree to the set of priorities. This should keep
> the height of the rbtree small, as the number of active priorities can not
> exceed the number of active requests and should be typically only a few.
>
> Currently, we have ~2k possible different priority levels, that may
> increase to allow even more fine grained selection. Allocating those in
> advance seems a waste (and may be impossible), so we opt for allocating
> upon first use, and freeing after its requests are depleted. To avoid
> the possibility of an allocation failure causing us to lose a request,
> we preallocate the default priority (0) and bump any request to that
> priority if we fail to allocate it the appropriate plist. Having a
> request (that is ready to run, so not leading to corruption) execute
> out-of-order is better than leaking the request (and its dependency
> tree) entirely.
>
> There should be a benefit to reducing execlists_dequeue() to principally
> using a simple list (and reducing the frequency of both rbtree iteration
> and balancing on erase) but for typical workloads, request coalescing
> should be small enough that we don't notice any change. The main gain is
> from improving PI calls to schedule, and the explicit list within a
> level should make request unwinding simpler (we just need to insert at
> the head of the list rather than the tail and not have to make the
> rbtree search more complicated).
>
> v2: Avoid use-after-free when deleting a depleted priolist
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Michał Winiarski <michal.winiarski at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c        |  12 ++-
>  drivers/gpu/drm/i915/i915_gem_request.c    |   4 +-
>  drivers/gpu/drm/i915/i915_gem_request.h    |   2 +-
>  drivers/gpu/drm/i915/i915_guc_submission.c |  53 ++++++----
>  drivers/gpu/drm/i915/i915_utils.h          |   9 ++
>  drivers/gpu/drm/i915/intel_lrc.c           | 159 +++++++++++++++++++----------
>  drivers/gpu/drm/i915/intel_ringbuffer.h    |   7 ++
>  7 files changed, 163 insertions(+), 83 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 0b5d7142d8d9..a8c7788d986e 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3314,7 +3314,6 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>
>  		if (i915.enable_execlists) {
>  			u32 ptr, read, write;
> -			struct rb_node *rb;
>  			unsigned int idx;
>
>  			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
> @@ -3358,9 +3357,14 @@ static int i915_engine_info(struct seq_file *m, void *unused)
>  			rcu_read_unlock();
>
>  			spin_lock_irq(&engine->timeline->lock);
> -			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
> -				rq = rb_entry(rb, typeof(*rq), priotree.node);
> -				print_request(m, rq, "\t\tQ ");
> +			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
> +				struct execlist_priolist *plist =
> +					rb_entry(rb, typeof(*plist), node);
> +
> +				list_for_each_entry(rq,
> +						    &plist->requests,
> +						    priotree.link)
> +					print_request(m, rq, "\t\tQ ");
>  			}
>  			spin_unlock_irq(&engine->timeline->lock);
>  		} else if (INTEL_GEN(dev_priv) > 6) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index 8d2a5c8e5005..ad2be26923fb 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
>  {
>  	struct i915_dependency *dep, *next;
>
> -	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
> +	GEM_BUG_ON(!list_empty(&pt->link));
>
>  	/* Everyone we depended upon (the fences we wait to be signaled)
>  	 * should retire before us and remove themselves from our list.
> @@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
>  {
>  	INIT_LIST_HEAD(&pt->signalers_list);
>  	INIT_LIST_HEAD(&pt->waiters_list);
> -	RB_CLEAR_NODE(&pt->node);
> +	INIT_LIST_HEAD(&pt->link);
>  	pt->priority = INT_MIN;
>  }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index feb81463abc9..6c58f7d87746 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -67,7 +67,7 @@ struct i915_dependency {
>  struct i915_priotree {
>  	struct list_head signalers_list; /* those before us, we depend upon */
>  	struct list_head waiters_list; /* those after us, they depend upon us */
> -	struct rb_node node;
> +	struct list_head link;
>  	int priority;
>  #define I915_PRIORITY_MAX 1024
>  #define I915_PRIORITY_DFL 0
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 62d3831a8a0d..f0d48bd508c9 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -674,32 +674,45 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
>
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = engine->execlist_first;
> +	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
>  	while (rb) {
> -		struct drm_i915_gem_request *rq =
> -			rb_entry(rb, typeof(*rq), priotree.node);
> -
> -		if (last && rq->ctx != last->ctx) {
> -			if (port != engine->execlist_port)
> -				break;
> -
> -			port_assign(port, last);
> -			port++;
> +		struct execlist_priolist *plist =
> +			rb_entry(rb, typeof(*plist), node);
> +		struct drm_i915_gem_request *rq, *rn;
> +
> +		list_for_each_entry_safe(rq, rn,
> +					 &plist->requests, priotree.link) {
> +			if (last && rq->ctx != last->ctx) {
> +				if (port != engine->execlist_port) {
> +					__list_del_many(&plist->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				port_assign(port, last);
> +				port++;
> +			}
> +
> +			INIT_LIST_HEAD(&rq->priotree.link);
> +			rq->priotree.priority = INT_MAX;
> +
> +			i915_guc_submit(rq);
> +			trace_i915_gem_request_in(rq, port - engine->execlist_port);
> +			last = rq;
> +			submit = true;
>  		}
>
>  		rb = rb_next(rb);
> -		rb_erase(&rq->priotree.node, &engine->execlist_queue);
> -		RB_CLEAR_NODE(&rq->priotree.node);
> -		rq->priotree.priority = INT_MAX;
> -
> -		i915_guc_submit(rq);
> -		trace_i915_gem_request_in(rq, port - engine->execlist_port);
> -		last = rq;
> -		submit = true;
> -	}
> -	if (submit) {
> -		port_assign(port, last);
>  		engine->execlist_first = rb;
> +
> +		rb_erase(&plist->node, &engine->execlist_queue);
> +		INIT_LIST_HEAD(&plist->requests);
> +		if (plist->priority != I915_PRIORITY_DFL)
> +			kfree(plist);
>  	}
> +done:
> +	if (submit)
> +		port_assign(port, last);
>  	spin_unlock_irq(&engine->timeline->lock);
>
>  	return submit;
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index f0500c65726d..9c65f9ae2e97 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -99,4 +99,13 @@
>  	__T;								\
>  })
>
> +#include <linux/list.h>
> +
> +static inline void __list_del_many(struct list_head *head,
> +				   struct list_head *first)
> +{
> +	head->next = first;
> +	first->prev = head;
> +}

Hm hm hmm this is a bit unintuitive since the first element is not 
deleted. Perhaps more accurate name would be __list_set_head? I was 
thinking __list_del_upto_prev but it was too long. :)

> +
>  #endif /* !__I915_UTILS_H */
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 9f7b6112c53a..fb0025627676 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -436,57 +436,79 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
>
>  	spin_lock_irq(&engine->timeline->lock);
>  	rb = engine->execlist_first;
> +	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
>  	while (rb) {
> -		struct drm_i915_gem_request *cursor =
> -			rb_entry(rb, typeof(*cursor), priotree.node);
> -
> -		/* Can we combine this request with the current port? It has to
> -		 * be the same context/ringbuffer and not have any exceptions
> -		 * (e.g. GVT saying never to combine contexts).
> -		 *
> -		 * If we can combine the requests, we can execute both by
> -		 * updating the RING_TAIL to point to the end of the second
> -		 * request, and so we never need to tell the hardware about
> -		 * the first.
> -		 */
> -		if (last && !can_merge_ctx(cursor->ctx, last->ctx)) {
> -			/* If we are on the second port and cannot combine
> -			 * this request with the last, then we are done.
> -			 */
> -			if (port != engine->execlist_port)
> -				break;
> -
> -			/* If GVT overrides us we only ever submit port[0],
> -			 * leaving port[1] empty. Note that we also have
> -			 * to be careful that we don't queue the same
> -			 * context (even though a different request) to
> -			 * the second port.
> +		struct execlist_priolist *plist =
> +			rb_entry(rb, typeof(*plist), node);
> +		struct drm_i915_gem_request *rq, *rn;
> +
> +		list_for_each_entry_safe(rq, rn,
> +					 &plist->requests, priotree.link) {
> +			/*
> +			 * Can we combine this request with the current port?
> +			 * It has to be the same context/ringbuffer and not
> +			 * have any exceptions (e.g. GVT saying never to
> +			 * combine contexts).
> +			 *
> +			 * If we can combine the requests, we can execute both
> +			 * by updating the RING_TAIL to point to the end of the
> +			 * second request, and so we never need to tell the
> +			 * hardware about the first.
>  			 */
> -			if (ctx_single_port_submission(last->ctx) ||
> -			    ctx_single_port_submission(cursor->ctx))
> -				break;
> +			if (last && !can_merge_ctx(rq->ctx, last->ctx)) {
> +				/*
> +				 * If we are on the second port and cannot
> +				 * combine this request with the last, then we
> +				 * are done.
> +				 */
> +				if (port != engine->execlist_port) {
> +					__list_del_many(&plist->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				/*
> +				 * If GVT overrides us we only ever submit
> +				 * port[0], leaving port[1] empty. Note that we
> +				 * also have to be careful that we don't queue
> +				 * the same context (even though a different
> +				 * request) to the second port.
> +				 */
> +				if (ctx_single_port_submission(last->ctx) ||
> +				    ctx_single_port_submission(rq->ctx)) {
> +					__list_del_many(&plist->requests,
> +							&rq->priotree.link);
> +					goto done;
> +				}
> +
> +				GEM_BUG_ON(last->ctx == rq->ctx);
> +
> +				if (submit)
> +					port_assign(port, last);
> +				port++;
> +			}
>
> -			GEM_BUG_ON(last->ctx == cursor->ctx);
> +			INIT_LIST_HEAD(&rq->priotree.link);
> +			rq->priotree.priority = INT_MAX;
>
> -			if (submit)
> -				port_assign(port, last);
> -			port++;
> +			__i915_gem_request_submit(rq);
> +			trace_i915_gem_request_in(rq,
> +						  port - engine->execlist_port);
> +			last = rq;
> +			submit = true;
>  		}
>
>  		rb = rb_next(rb);
> -		rb_erase(&cursor->priotree.node, &engine->execlist_queue);
> -		RB_CLEAR_NODE(&cursor->priotree.node);
> -		cursor->priotree.priority = INT_MAX;
> +		engine->execlist_first = rb;
>
> -		__i915_gem_request_submit(cursor);
> -		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
> -		last = cursor;
> -		submit = true;
> +		rb_erase(&plist->node, &engine->execlist_queue);
> +		INIT_LIST_HEAD(&plist->requests);
> +		if (plist->priority != I915_PRIORITY_DFL)
> +			kfree(plist);
>  	}
> -	if (submit) {
> +done:
> +	if (submit)
>  		port_assign(port, last);
> -		engine->execlist_first = rb;
> -	}
>  	spin_unlock_irq(&engine->timeline->lock);
>
>  	if (submit)
> @@ -610,28 +632,53 @@ static void intel_lrc_irq_handler(unsigned long data)
>  	intel_uncore_forcewake_put(dev_priv, engine->fw_domains);
>  }
>
> -static bool insert_request(struct i915_priotree *pt, struct rb_root *root)
> +static bool
> +insert_request(struct intel_engine_cs *engine,
> +	       struct i915_priotree *pt,
> +	       int prio)
>  {
> +	struct execlist_priolist *plist;
>  	struct rb_node **p, *rb;
>  	bool first = true;
>
> +find_plist:
>  	/* most positive priority is scheduled first, equal priorities fifo */
>  	rb = NULL;
> -	p = &root->rb_node;
> +	p = &engine->execlist_queue.rb_node;
>  	while (*p) {
> -		struct i915_priotree *pos;
> -
>  		rb = *p;
> -		pos = rb_entry(rb, typeof(*pos), node);
> -		if (pt->priority > pos->priority) {
> +		plist = rb_entry(rb, typeof(*plist), node);
> +		if (prio > plist->priority) {
>  			p = &rb->rb_left;
> -		} else {
> +		} else if (prio < plist->priority) {
>  			p = &rb->rb_right;
>  			first = false;
> +		} else {
> +			list_add_tail(&pt->link, &plist->requests);
> +			return false;
>  		}
>  	}
> -	rb_link_node(&pt->node, rb, p);
> -	rb_insert_color(&pt->node, root);
> +
> +	if (prio == I915_PRIORITY_DFL) {
> +		plist = &engine->default_priolist;
> +	} else {
> +		plist = kmalloc(sizeof(*plist), GFP_ATOMIC);
> +		/* Convert an allocation failure to a priority bump */
> +		if (unlikely(!plist)) {
> +			prio = I915_PRIORITY_DFL; /* recurses just once */
> +			goto find_plist;
> +		}
> +	}
> +
> +	plist->priority = prio;
> +	rb_link_node(&plist->node, rb, p);
> +	rb_insert_color(&plist->node, &engine->execlist_queue);
> +
> +	INIT_LIST_HEAD(&plist->requests);
> +	list_add_tail(&pt->link, &plist->requests);
> +
> +	if (first)
> +		engine->execlist_first = &plist->node;
>
>  	return first;
>  }
> @@ -644,8 +691,9 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
>  	/* Will be called from irq-context when using foreign fences. */
>  	spin_lock_irqsave(&engine->timeline->lock, flags);
>
> -	if (insert_request(&request->priotree, &engine->execlist_queue)) {
> -		engine->execlist_first = &request->priotree.node;
> +	if (insert_request(engine,
> +			   &request->priotree,
> +			   request->priotree.priority)) {
>  		if (execlists_elsp_ready(engine))
>  			tasklet_hi_schedule(&engine->irq_tasklet);
>  	}
> @@ -734,10 +782,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
>  			continue;
>
>  		pt->priority = prio;
> -		if (!RB_EMPTY_NODE(&pt->node)) {
> -			rb_erase(&pt->node, &engine->execlist_queue);
> -			if (insert_request(pt, &engine->execlist_queue))
> -				engine->execlist_first = &pt->node;
> +		if (!list_empty(&pt->link)) {
> +			__list_del_entry(&pt->link);
> +			insert_request(engine, pt, prio);
>  		}
>  	}
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 68765d45116b..b8e01a9e2311 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -177,6 +177,12 @@ enum intel_engine_id {
>  	VECS
>  };
>
> +struct execlist_priolist {
> +	struct rb_node node;
> +	struct list_head requests;
> +	int priority;
> +};
> +
>  #define INTEL_ENGINE_CS_MAX_NAME 8
>
>  struct intel_engine_cs {
> @@ -367,6 +373,7 @@ struct intel_engine_cs {
>
>  	/* Execlists */
>  	struct tasklet_struct irq_tasklet;
> +	struct execlist_priolist default_priolist;
>  	struct execlist_port {
>  		struct drm_i915_gem_request *request_count;
>  #define EXECLIST_COUNT_BITS 2
>

I've pulled your tree to look at the control flow more easily. 
Cross-checked back and forth a bit and it looks fine to me.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko