[PATCH 26/32] drm/i915: Combine multiple internal plists into the same i915_priolist bucket
Chris Wilson
chris at chris-wilson.co.uk
Fri Jun 1 12:56:43 UTC 2018
As we are about to allow ourselves to slightly bump the user priority
into a few different sublevels, packthose internal priority lists
into the same i915_priolist to keep the rbtree compact and avoid having
to allocate the default user priority even after the internal bumping.
The downside to having an requests[] rather than a node per active list,
is that we then have to walk over the empty higher priority lists. To
compensate, we track the active buckets and use a small bitmap to skip
over any inactive ones.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/intel_engine_cs.c | 6 +--
drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++---
drivers/gpu/drm/i915/intel_lrc.c | 57 +++++++++++----------
drivers/gpu/drm/i915/intel_ringbuffer.h | 15 +++++-
4 files changed, 51 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index c279d72b441a..2bde6faab14d 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1561,10 +1561,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
count = 0;
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
- struct i915_priolist *p =
- rb_entry(rb, typeof(*p), node);
+ struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+ int i;
- list_for_each_entry(rq, &p->requests, sched.link) {
+ priolist_for_each_request(rq, p, i) {
if (count++ < MAX_REQUESTS_TO_SHOW - 1)
print_request(m, rq, "\t\tQ ");
else
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 598c505e2894..9e173239af40 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -719,30 +719,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
+ int i;
- list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+ priolist_for_each_request_consume(rq, rn, p, i) {
if (last && rq->hw_context != last->hw_context) {
- if (port == last_port) {
- __list_del_many(&p->requests,
- &rq->sched.link);
+ if (port == last_port)
goto done;
- }
if (submit)
port_assign(port, last);
port++;
}
- INIT_LIST_HEAD(&rq->sched.link);
+ list_del_init(&rq->sched.link);
__i915_request_submit(rq);
trace_i915_request_in(rq, port_index(port, execlists));
+
last = rq;
submit = true;
}
rb_erase_cached(&p->node, &execlists->queue);
- INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b32349508c5d..839f8aee57c9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -254,14 +254,18 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
ce->lrc_desc = desc;
}
-static struct i915_priolist *
+static struct list_head *
lookup_priolist(struct intel_engine_cs *engine, int prio)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_priolist *p;
struct rb_node **parent, *rb;
bool first = true;
+ int idx, i;
+ /* buckets sorted from highest [in slot 0] to lowest priority */
+ idx = I915_PRIORITY_COUNT - (prio & ~I915_PRIORITY_MASK) - 1;
+ prio >>= I915_USER_PRIORITY_SHIFT;
if (unlikely(execlists->no_priolist))
prio = I915_PRIORITY_NORMAL;
@@ -278,7 +282,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
parent = &rb->rb_right;
first = false;
} else {
- return p;
+ goto out;
}
}
@@ -304,11 +308,15 @@ lookup_priolist(struct intel_engine_cs *engine, int prio)
}
p->priority = prio;
- INIT_LIST_HEAD(&p->requests);
+ for (i = 0; i < ARRAY_SIZE(p->requests); i++)
+ INIT_LIST_HEAD(&p->requests[i]);
rb_link_node(&p->node, rb, parent);
rb_insert_color_cached(&p->node, &execlists->queue, first);
+ p->used = 0;
- return p;
+out:
+ p->used |= BIT(idx);
+ return &p->requests[idx];
}
static void unwind_wa_tail(struct i915_request *rq)
@@ -320,7 +328,7 @@ static void unwind_wa_tail(struct i915_request *rq)
static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
{
struct i915_request *rq, *rn;
- struct i915_priolist *uninitialized_var(p);
+ struct list_head *uninitialized_var(plist);
int last_prio = I915_PRIORITY_INVALID;
lockdep_assert_held(&engine->timeline.lock);
@@ -337,11 +345,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != last_prio) {
last_prio = rq_prio(rq);
- p = lookup_priolist(engine, last_prio);
+ plist = lookup_priolist(engine, last_prio);
}
- GEM_BUG_ON(p->priority != rq_prio(rq));
- list_add(&rq->sched.link, &p->requests);
+ list_add(&rq->sched.link, plist);
}
}
@@ -659,8 +666,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
+ int i;
- list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+ priolist_for_each_request_consume(rq, rn, p, i) {
/*
* Can we combine this request with the current port?
* It has to be the same context/ringbuffer and not
@@ -679,11 +687,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* combine this request with the last, then we
* are done.
*/
- if (port == last_port) {
- __list_del_many(&p->requests,
- &rq->sched.link);
+ if (port == last_port)
goto done;
- }
/*
* If GVT overrides us we only ever submit
@@ -693,11 +698,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* request) to the second port.
*/
if (ctx_single_port_submission(last->hw_context) ||
- ctx_single_port_submission(rq->hw_context)) {
- __list_del_many(&p->requests,
- &rq->sched.link);
+ ctx_single_port_submission(rq->hw_context))
goto done;
- }
GEM_BUG_ON(last->hw_context == rq->hw_context);
@@ -708,15 +710,16 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(port_isset(port));
}
- INIT_LIST_HEAD(&rq->sched.link);
+ list_del_init(&rq->sched.link);
+
__i915_request_submit(rq);
trace_i915_request_in(rq, port_index(port, execlists));
+
last = rq;
submit = true;
}
rb_erase_cached(&p->node, &execlists->queue);
- INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
}
@@ -898,16 +901,16 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
+ int i;
- list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
- INIT_LIST_HEAD(&rq->sched.link);
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ list_del_init(&rq->sched.link);
dma_fence_set_error(&rq->fence, -EIO);
__i915_request_submit(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
- INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
}
@@ -1108,8 +1111,7 @@ static void queue_request(struct intel_engine_cs *engine,
struct i915_sched_node *node,
int prio)
{
- list_add_tail(&node->link,
- &lookup_priolist(engine, prio)->requests);
+ list_add_tail(&node->link, lookup_priolist(engine, prio));
}
static void __update_queue(struct intel_engine_cs *engine, int prio)
@@ -1179,7 +1181,7 @@ sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked)
static void execlists_schedule(struct i915_request *request,
const struct i915_sched_attr *attr)
{
- struct i915_priolist *uninitialized_var(pl);
+ struct list_head *uninitialized_var(plist);
struct intel_engine_cs *engine, *last;
struct i915_dependency *dep, *p;
struct i915_dependency stack;
@@ -1271,11 +1273,10 @@ static void execlists_schedule(struct i915_request *request,
node->attr.priority = prio;
if (!list_empty(&node->link)) {
if (last != engine) {
- pl = lookup_priolist(engine, prio);
+ plist = lookup_priolist(engine, prio);
last = engine;
}
- GEM_BUG_ON(pl->priority != prio);
- list_move_tail(&node->link, &pl->requests);
+ list_move_tail(&node->link, plist);
}
if (prio > engine->execlists.queue_priority &&
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index bd443cbe450c..224437a728c6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -187,11 +187,24 @@ enum intel_engine_id {
};
struct i915_priolist {
+ struct list_head requests[I915_PRIORITY_COUNT];
struct rb_node node;
- struct list_head requests;
+ unsigned long used;
int priority;
};
+#define priolist_for_each_request(it, plist, idx) \
+ for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \
+ list_for_each_entry(it, &(plist)->requests[idx], sched.link)
+
+#define priolist_for_each_request_consume(it, n, plist, idx) \
+ GEM_BUG_ON(!(plist)->used); \
+ for (; \
+ (plist)->used ? idx = ffs((plist)->used) - 1, 1 : 0; \
+ (plist)->used &= ~BIT(idx)) \
+ list_for_each_entry_safe(it, n, \
+ &(plist)->requests[idx], sched.link)
+
/**
* struct intel_engine_execlists - execlist submission queue and port state
*
--
2.17.1
More information about the Intel-gfx-trybot
mailing list