[Intel-gfx] [PATCH 09/37] drm/i915/execlists: Switch to rb_root_cached
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jul 11 13:20:24 UTC 2018
On 29/06/2018 08:53, Chris Wilson wrote:
> The kernel recently gained an augmented rbtree with the purpose of
> cacheing the leftmost element of the rbtree, a frequent optimisation to
> avoid calls to rb_first() which is also employed by the
> execlists->queue. Switch from our open-coded cache to the library.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++---
> drivers/gpu/drm/i915/intel_guc_submission.c | 12 +++-----
> drivers/gpu/drm/i915/intel_lrc.c | 32 +++++++--------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +----
> 4 files changed, 19 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 457003311b74..14d5b673fc27 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -464,8 +464,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
> GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
>
> execlists->queue_priority = INT_MIN;
> - execlists->queue = RB_ROOT;
> - execlists->first = NULL;
> + execlists->queue = RB_ROOT_CACHED;
> }
>
> /**
> @@ -1000,7 +999,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
> }
>
> /* ELSP is empty, but there are ready requests? E.g. after reset */
> - if (READ_ONCE(engine->execlists.first))
> + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root))
> return false;
>
> /* Ring stopped? */
> @@ -1615,7 +1614,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> last = NULL;
> count = 0;
> drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
> - for (rb = execlists->first; rb; rb = rb_next(rb)) {
> + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
> struct i915_priolist *p =
> rb_entry(rb, typeof(*p), node);
>
> diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
> index 05449f636d94..9a2c6856a71e 100644
> --- a/drivers/gpu/drm/i915/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/intel_guc_submission.c
> @@ -694,9 +694,6 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
>
> lockdep_assert_held(&engine->timeline.lock);
>
> - rb = execlists->first;
> - GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> -
> if (port_isset(port)) {
> if (intel_engine_has_preemption(engine)) {
> struct guc_preempt_work *preempt_work =
> @@ -718,7 +715,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
> }
> GEM_BUG_ON(port_isset(port));
>
> - while (rb) {
> + while ((rb = rb_first_cached(&execlists->queue))) {
> struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> @@ -743,15 +740,13 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
> submit = true;
> }
>
> - rb = rb_next(rb);
> - rb_erase(&p->node, &execlists->queue);
> + rb_erase_cached(&p->node, &execlists->queue);
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> }
> done:
> execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
> - execlists->first = rb;
> if (submit)
> port_assign(port, last);
> if (last)
> @@ -760,7 +755,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
> /* We must always keep the beast fed if we have work piled up */
> GEM_BUG_ON(port_isset(execlists->port) &&
> !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
> - GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> + GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
> + !port_isset(execlists->port));
>
> return submit;
> }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a6bc50d7195e..422753c8641f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -273,7 +273,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
> find_priolist:
> /* most positive priority is scheduled first, equal priorities fifo */
> rb = NULL;
> - parent = &execlists->queue.rb_node;
> + parent = &execlists->queue.rb_root.rb_node;
> while (*parent) {
> rb = *parent;
> p = to_priolist(rb);
> @@ -311,10 +311,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
> p->priority = prio;
> INIT_LIST_HEAD(&p->requests);
> rb_link_node(&p->node, rb, parent);
> - rb_insert_color(&p->node, &execlists->queue);
> -
> - if (first)
> - execlists->first = &p->node;
> + rb_insert_color_cached(&p->node, &execlists->queue, first);
>
> return p;
> }
> @@ -598,9 +595,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> * and context switches) submission.
> */
>
> - rb = execlists->first;
> - GEM_BUG_ON(rb_first(&execlists->queue) != rb);
> -
> if (last) {
> /*
> * Don't resubmit or switch until all outstanding
> @@ -662,7 +656,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> last->tail = last->wa_tail;
> }
>
> - while (rb) {
> + while ((rb = rb_first_cached(&execlists->queue))) {
> struct i915_priolist *p = to_priolist(rb);
> struct i915_request *rq, *rn;
>
> @@ -721,8 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> submit = true;
> }
>
> - rb = rb_next(rb);
> - rb_erase(&p->node, &execlists->queue);
> + rb_erase_cached(&p->node, &execlists->queue);
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> @@ -748,14 +741,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
> execlists->queue_priority =
> port != execlists->port ? rq_prio(last) : INT_MIN;
>
> - execlists->first = rb;
> if (submit) {
> port_assign(port, last);
> execlists_submit_ports(engine);
> }
>
> /* We must always keep the beast fed if we have work piled up */
> - GEM_BUG_ON(execlists->first && !port_isset(execlists->port));
> + GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
> + !port_isset(execlists->port));
>
> /* Re-evaluate the executing context setup after each preemptive kick */
> if (last)
> @@ -915,8 +908,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> }
>
> /* Flush the queued requests to the timeline list (for retiring). */
> - rb = execlists->first;
> - while (rb) {
> + while ((rb = rb_first_cached(&execlists->queue))) {
> struct i915_priolist *p = to_priolist(rb);
>
> list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
> @@ -926,8 +918,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> __i915_request_submit(rq);
> }
>
> - rb = rb_next(rb);
> - rb_erase(&p->node, &execlists->queue);
> + rb_erase_cached(&p->node, &execlists->queue);
> INIT_LIST_HEAD(&p->requests);
> if (p->priority != I915_PRIORITY_NORMAL)
> kmem_cache_free(engine->i915->priorities, p);
> @@ -936,8 +927,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
> /* Remaining _unready_ requests will be nop'ed when submitted */
>
> execlists->queue_priority = INT_MIN;
> - execlists->queue = RB_ROOT;
> - execlists->first = NULL;
> + execlists->queue = RB_ROOT_CACHED;
> GEM_BUG_ON(port_isset(execlists->port));
>
> spin_unlock_irqrestore(&engine->timeline.lock, flags);
> @@ -1183,7 +1173,7 @@ static void execlists_submit_request(struct i915_request *request)
>
> queue_request(engine, &request->sched, rq_prio(request));
>
> - GEM_BUG_ON(!engine->execlists.first);
> + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
> GEM_BUG_ON(list_empty(&request->sched.link));
>
> submit_queue(engine, rq_prio(request));
> @@ -2036,7 +2026,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
> struct intel_engine_execlists * const execlists = &engine->execlists;
>
> /* After a GPU reset, we may have requests to replay */
> - if (execlists->first)
> + if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
> tasklet_schedule(&execlists->tasklet);
>
> /*
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index a1aff360d0ce..923875500828 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -292,12 +292,7 @@ struct intel_engine_execlists {
> /**
> * @queue: queue of requests, in priority lists
> */
> - struct rb_root queue;
> -
> - /**
> - * @first: leftmost level in priority @queue
> - */
> - struct rb_node *first;
> + struct rb_root_cached queue;
>
> /**
> * @csb_read: control register for Context Switch buffer
>
All checks out AFAICT. Nice that we can now simplify!
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list