[Intel-gfx] [PATCH 07/10] drm/i915: Restructure priority inheritance
Andi Shyti
andi at etezian.org
Tue Jan 26 17:18:22 UTC 2021
Hi Chris,
> + local_bh_disable();
> + i915_request_set_priority(rq, prio);
> + local_bh_enable();
> +
> + i915_request_put(rq);
> + rq = ptr_mask_bits(rn, 1);
why are you using ptr_mask_bits here?
> + } while (rq);
> +}
> +
> +void i915_sched_init_ipi(struct i915_sched_ipi *ipi)
> +{
> + INIT_WORK(&ipi->work, ipi_schedule);
> + ipi->list = NULL;
> +}
> +
> +static void __ipi_add(struct i915_request *rq)
> +{
> +#define STUB ((struct i915_request *)1)
> + struct intel_engine_cs *engine = READ_ONCE(rq->engine);
> + struct i915_request *first;
> +
> + if (!i915_request_get_rcu(rq))
> + return;
> +
> + if (__i915_request_is_complete(rq) ||
> + cmpxchg(&rq->sched.ipi_link, NULL, STUB)) { /* already queued */
> + i915_request_put(rq);
> + return;
> + }
> +
> + first = READ_ONCE(engine->execlists.ipi.list);
> + do
> + rq->sched.ipi_link = ptr_pack_bits(first, 1, 1);
... and why ptr_pack_bits here?
do they make any difference?
Andi
> @@ -265,66 +322,41 @@ static void __i915_schedule(struct i915_sched_node *node, int prio)
> * end result is a topological list of requests in reverse order, the
> * last element in the list is the request we must execute first.
> */
> - list_for_each_entry(dep, &dfs, dfs_link) {
> - struct i915_sched_node *node = dep->signaler;
> + list_for_each_entry(rq, &dfs, sched.dfs) {
> + struct i915_dependency *p;
>
> - /* If we are already flying, we know we have no signalers */
> - if (node_started(node))
> - continue;
> + /* Also release any children on this engine that are ready */
> + GEM_BUG_ON(rq->engine != engine);
>
> - /*
> - * Within an engine, there can be no cycle, but we may
> - * refer to the same dependency chain multiple times
> - * (redundant dependencies are not eliminated) and across
> - * engines.
> - */
> - list_for_each_entry(p, &node->signalers_list, signal_link) {
> - GEM_BUG_ON(p == dep); /* no cycles! */
> + for_each_signaler(p, rq) {
> + struct i915_request *s =
> + container_of(p->signaler, typeof(*s), sched);
>
> - if (node_signaled(p->signaler))
> + GEM_BUG_ON(s == rq);
> +
> + if (rq_prio(s) >= prio)
> continue;
>
> - if (prio > READ_ONCE(p->signaler->attr.priority))
> - list_move_tail(&p->dfs_link, &dfs);
> + if (__i915_request_is_complete(s))
> + continue;
> +
> + if (s->engine != rq->engine) {
> + ipi_priority(s, prio);
> + continue;
> + }
> +
> + list_move_tail(&s->sched.dfs, &dfs);
> }
> }
>
> - /*
> - * If we didn't need to bump any existing priorities, and we haven't
> - * yet submitted this request (i.e. there is no potential race with
> - * execlists_submit_request()), we can set our own priority and skip
> - * acquiring the engine locks.
> - */
> - if (node->attr.priority == I915_PRIORITY_INVALID) {
> - GEM_BUG_ON(!list_empty(&node->link));
> - node->attr.priority = prio;
> + plist = i915_sched_lookup_priolist(engine, prio);
>
> - if (stack.dfs_link.next == stack.dfs_link.prev)
> - return;
> + /* Fifo and depth-first replacement ensure our deps execute first */
> + list_for_each_entry_safe_reverse(rq, rn, &dfs, sched.dfs) {
> + GEM_BUG_ON(rq->engine != engine);
>
> - __list_del_entry(&stack.dfs_link);
> - }
> -
> - memset(&cache, 0, sizeof(cache));
> - engine = node_to_request(node)->engine;
> - spin_lock(&engine->active.lock);
> -
> - /* Fifo and depth-first replacement ensure our deps execute before us */
> - engine = sched_lock_engine(node, engine, &cache);
> - list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
> - INIT_LIST_HEAD(&dep->dfs_link);
> -
> - node = dep->signaler;
> - engine = sched_lock_engine(node, engine, &cache);
> - lockdep_assert_held(&engine->active.lock);
> -
> - /* Recheck after acquiring the engine->timeline.lock */
> - if (prio <= node->attr.priority || node_signaled(node))
> - continue;
> -
> - GEM_BUG_ON(node_to_request(node)->engine != engine);
> -
> - WRITE_ONCE(node->attr.priority, prio);
> + INIT_LIST_HEAD(&rq->sched.dfs);
> + WRITE_ONCE(rq->sched.attr.priority, prio);
>
> /*
> * Once the request is ready, it will be placed into the
> @@ -334,32 +366,75 @@ static void __i915_schedule(struct i915_sched_node *node, int prio)
> * any preemption required, be dealt with upon submission.
> * See engine->submit_request()
> */
> - if (list_empty(&node->link))
> + if (!i915_request_is_ready(rq))
> continue;
>
> - if (i915_request_in_priority_queue(node_to_request(node))) {
> - if (!cache.priolist)
> - cache.priolist =
> - i915_sched_lookup_priolist(engine,
> - prio);
> - list_move_tail(&node->link, cache.priolist);
> - }
> + if (i915_request_in_priority_queue(rq))
> + list_move_tail(&rq->sched.link, plist);
>
> - /* Defer (tasklet) submission until after all of our updates. */
> - kick_submission(engine, node_to_request(node), prio);
> + /* Defer (tasklet) submission until after all updates. */
> + kick_submission(engine, rq, prio);
> }
> -
> - spin_unlock(&engine->active.lock);
> }
>
> void i915_request_set_priority(struct i915_request *rq, int prio)
> {
> - if (!intel_engine_has_scheduler(rq->engine))
> + struct intel_engine_cs *engine;
> + unsigned long flags;
> +
> + if (prio <= rq_prio(rq))
> return;
>
> - spin_lock_irq(&schedule_lock);
> - __i915_schedule(&rq->sched, prio);
> - spin_unlock_irq(&schedule_lock);
> + /*
> + * If we are setting the priority before being submitted, see if we
> + * can quickly adjust our own priority in-situ and avoid taking
> + * the contended engine->active.lock. If we need priority inheritance,
> + * take the slow route.
> + */
> + if (rq_prio(rq) == I915_PRIORITY_INVALID) {
> + struct i915_dependency *p;
> +
> + rcu_read_lock();
> + for_each_signaler(p, rq) {
> + struct i915_request *s =
> + container_of(p->signaler, typeof(*s), sched);
> +
> + if (rq_prio(s) >= prio)
> + continue;
> +
> + if (__i915_request_is_complete(s))
> + continue;
> +
> + break;
> + }
> + rcu_read_unlock();
> +
> + if (&p->signal_link == &rq->sched.signalers_list &&
> + cmpxchg(&rq->sched.attr.priority,
> + I915_PRIORITY_INVALID,
> + prio) == I915_PRIORITY_INVALID)
> + return;
> + }
> +
> + engine = lock_engine_irqsave(rq, flags);
> + if (prio <= rq_prio(rq))
> + goto unlock;
> +
> + if (__i915_request_is_complete(rq))
> + goto unlock;
> +
> + if (!intel_engine_has_scheduler(engine)) {
> + rq->sched.attr.priority = prio;
> + goto unlock;
> + }
> +
> + rcu_read_lock();
> + __i915_request_set_priority(rq, prio);
> + rcu_read_unlock();
> + GEM_BUG_ON(rq_prio(rq) != prio);
> +
> +unlock:
> + spin_unlock_irqrestore(&engine->active.lock, flags);
> }
>
> void i915_sched_node_init(struct i915_sched_node *node)
> @@ -369,6 +444,9 @@ void i915_sched_node_init(struct i915_sched_node *node)
> INIT_LIST_HEAD(&node->signalers_list);
> INIT_LIST_HEAD(&node->waiters_list);
> INIT_LIST_HEAD(&node->link);
> + INIT_LIST_HEAD(&node->dfs);
> +
> + node->ipi_link = NULL;
>
> i915_sched_node_reinit(node);
> }
> @@ -379,6 +457,9 @@ void i915_sched_node_reinit(struct i915_sched_node *node)
> node->semaphores = 0;
> node->flags = 0;
>
> + GEM_BUG_ON(node->ipi_link);
> + node->ipi_priority = I915_PRIORITY_INVALID;
> +
> GEM_BUG_ON(!list_empty(&node->signalers_list));
> GEM_BUG_ON(!list_empty(&node->waiters_list));
> GEM_BUG_ON(!list_empty(&node->link));
> @@ -414,7 +495,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
> spin_lock(&signal->lock);
>
> if (!node_signaled(signal)) {
> - INIT_LIST_HEAD(&dep->dfs_link);
> dep->signaler = signal;
> dep->waiter = node_get(node);
> dep->flags = flags;
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
> index a045be784c67..5be7f90e7896 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> @@ -35,6 +35,8 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node,
>
> void i915_sched_node_retire(struct i915_sched_node *node);
>
> +void i915_sched_init_ipi(struct i915_sched_ipi *ipi);
> +
> void i915_request_set_priority(struct i915_request *request, int prio);
>
> struct list_head *
> diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h
> index 623bf41fcf35..5a84d59134ee 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler_types.h
> +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h
> @@ -8,8 +8,8 @@
> #define _I915_SCHEDULER_TYPES_H_
>
> #include <linux/list.h>
> +#include <linux/workqueue.h>
>
> -#include "gt/intel_engine_types.h"
> #include "i915_priolist_types.h"
>
> struct drm_i915_private;
> @@ -61,13 +61,23 @@ struct i915_sched_attr {
> */
> struct i915_sched_node {
> spinlock_t lock; /* protect the lists */
> +
> struct list_head signalers_list; /* those before us, we depend upon */
> struct list_head waiters_list; /* those after us, they depend upon us */
> - struct list_head link;
> + struct list_head link; /* guarded by engine->active.lock */
> + struct list_head dfs; /* guarded by engine->active.lock */
> struct i915_sched_attr attr;
> - unsigned int flags;
> + unsigned long flags;
> #define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0)
> - intel_engine_mask_t semaphores;
> + unsigned long semaphores;
> +
> + struct i915_request *ipi_link;
> + int ipi_priority;
> +};
> +
> +struct i915_sched_ipi {
> + struct i915_request *list;
> + struct work_struct work;
> };
>
> struct i915_dependency {
> @@ -75,7 +85,6 @@ struct i915_dependency {
> struct i915_sched_node *waiter;
> struct list_head signal_link;
> struct list_head wait_link;
> - struct list_head dfs_link;
> struct rcu_head rcu;
> unsigned long flags;
> #define I915_DEPENDENCY_ALLOC BIT(0)
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list