[PATCH 16/16] drm/i915/breadcrumbs: Reduce signaler rbtree to a sorted list
Chris Wilson
chris at chris-wilson.co.uk
Fri Jan 5 01:06:25 UTC 2018
The goal here is to try and reduce the latency of signaling additional
requests following the wakeup from interrupt by reducing the list of
to-be-signaled requests from an rbtree to a sorted linked list. The
original choice of using an rbtree was to facilitate random insertions
of request into the signaler while maintaining a sorted list. However,
if we assume that most new requests are added when they are submitted,
we see those new requests in execution order making a insertion sort
fast, and the reduction in overhead of each signaler iteration
significant.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 4 +-
drivers/gpu/drm/i915/i915_gem_request.h | 2 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 8 +-
drivers/gpu/drm/i915/intel_breadcrumbs.c | 263 +++++++++++--------------------
drivers/gpu/drm/i915/intel_engine_cs.c | 4 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-
6 files changed, 102 insertions(+), 186 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2bb63073d73f..e91b7c77e063 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1298,14 +1298,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
&dev_priv->gpu_error.missed_irq_rings)),
yesno(engine->hangcheck.stalled));
- spin_lock_irq(&b->rb_lock);
+ spin_lock_irq(&b->wait_lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);
seq_printf(m, "\t%s [%d] waiting for %x\n",
w->tsk->comm, w->tsk->pid, w->seqno);
}
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irq(&b->wait_lock);
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 6c607f8dbf92..a2dd3d71c0f8 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -44,8 +44,8 @@ struct intel_wait {
};
struct intel_signal_node {
- struct rb_node node;
struct intel_wait wait;
+ struct list_head link;
};
struct i915_dependency {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 944059322daa..ebc44d715107 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1125,7 +1125,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
if (RB_EMPTY_ROOT(&b->waiters))
return;
- if (!spin_trylock_irq(&b->rb_lock)) {
+ if (!spin_trylock_irq(&b->wait_lock)) {
ee->waiters = ERR_PTR(-EDEADLK);
return;
}
@@ -1133,7 +1133,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
count = 0;
for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
count++;
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irq(&b->wait_lock);
waiter = NULL;
if (count)
@@ -1143,7 +1143,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
if (!waiter)
return;
- if (!spin_trylock_irq(&b->rb_lock)) {
+ if (!spin_trylock_irq(&b->wait_lock)) {
kfree(waiter);
ee->waiters = ERR_PTR(-EDEADLK);
return;
@@ -1161,7 +1161,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
if (++ee->num_waiters == count)
break;
}
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irq(&b->wait_lock);
}
static void error_record_engine_registers(struct i915_gpu_state *error,
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index d5b40208690e..357800d79fa7 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -247,7 +247,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
missed_breadcrumb(engine);
- spin_lock_irq(&b->rb_lock);
+ spin_lock_irq(&b->wait_lock);
spin_lock(&b->irq_lock);
b->irq_wait = NULL;
@@ -261,7 +261,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
}
b->waiters = RB_ROOT;
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irq(&b->wait_lock);
}
static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -347,21 +347,23 @@ static inline struct intel_wait *to_wait(struct rb_node *node)
static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
struct intel_wait *wait)
{
- lockdep_assert_held(&b->rb_lock);
+ lockdep_assert_held(&b->wait_lock);
GEM_BUG_ON(b->irq_wait == wait);
- /* This request is completed, so remove it from the tree, mark it as
+ /*
+ * This request is completed, so remove it from the tree, mark it as
* complete, and *then* wake up the associated task. N.B. when the
* task wakes up, it will find the empty rb_node, discern that it
* has already been removed from the tree and skip the serialisation
- * of the b->rb_lock and b->irq_lock. This means that the destruction
+ * of the b->wait_lock and b->irq_lock. This means that the destruction
* of the intel_wait is not serialised with the interrupt handler
* by the waiter - it must instead be serialised by the caller.
*/
rb_erase(&wait->node, &b->waiters);
RB_CLEAR_NODE(&wait->node);
- wake_up_process(wait->tsk); /* implicit smp_wmb() */
+ if (wait->tsk->state != TASK_RUNNING)
+ wake_up_process(wait->tsk); /* implicit smp_wmb() */
}
static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
@@ -497,9 +499,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_breadcrumbs *b = &engine->breadcrumbs;
bool armed;
- spin_lock_irq(&b->rb_lock);
+ spin_lock_irq(&b->wait_lock);
armed = __intel_engine_add_wait(engine, wait);
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irq(&b->wait_lock);
if (armed)
return armed;
@@ -527,7 +529,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- lockdep_assert_held(&b->rb_lock);
+ lockdep_assert_held(&b->wait_lock);
if (RB_EMPTY_NODE(&wait->node))
goto out;
@@ -597,39 +599,9 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
return;
}
- spin_lock_irq(&b->rb_lock);
+ spin_lock_irq(&b->wait_lock);
__intel_engine_remove_wait(engine, wait);
- spin_unlock_irq(&b->rb_lock);
-}
-
-static bool signal_valid(const struct drm_i915_gem_request *request)
-{
- return intel_wait_check_request(&request->signaling.wait, request);
-}
-
-static bool signal_complete(const struct drm_i915_gem_request *request)
-{
- if (!request)
- return false;
-
- /* If another process served as the bottom-half it may have already
- * signalled that this wait is already completed.
- */
- if (intel_wait_complete(&request->signaling.wait))
- return signal_valid(request);
-
- /* Carefully check if the request is complete, giving time for the
- * seqno to be visible or if the GPU hung.
- */
- if (__i915_request_irq_complete(request))
- return true;
-
- return false;
-}
-
-static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
-{
- return rb_entry(rb, struct drm_i915_gem_request, signaling.node);
+ spin_unlock_irq(&b->wait_lock);
}
static void signaler_set_rtpriority(void)
@@ -639,75 +611,22 @@ static void signaler_set_rtpriority(void)
sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
}
-static void __intel_engine_remove_signal(struct intel_engine_cs *engine,
- struct drm_i915_gem_request *request)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- lockdep_assert_held(&b->rb_lock);
-
- /*
- * Wake up all other completed waiters and select the
- * next bottom-half for the next user interrupt.
- */
- __intel_engine_remove_wait(engine, &request->signaling.wait);
-
- /*
- * Find the next oldest signal. Note that as we have
- * not been holding the lock, another client may
- * have installed an even older signal than the one
- * we just completed - so double check we are still
- * the oldest before picking the next one.
- */
- if (request->signaling.wait.seqno) {
- if (request == rcu_access_pointer(b->first_signal)) {
- struct rb_node *rb = rb_next(&request->signaling.node);
- rcu_assign_pointer(b->first_signal,
- rb ? to_signaler(rb) : NULL);
- }
-
- rb_erase(&request->signaling.node, &b->signals);
- request->signaling.wait.seqno = 0;
- }
-}
-
-static struct drm_i915_gem_request *first_signal(struct intel_breadcrumbs *b)
-{
- /*
- * See the big warnings for i915_gem_active_get_rcu() and similarly
- * for dma_fence_get_rcu_safe() that explain the intricacies involved
- * here with defeating CPU/compiler speculation and enforcing
- * the required memory barriers.
- */
- do {
- struct drm_i915_gem_request *request;
-
- request = rcu_dereference(b->first_signal);
- if (request)
- request = i915_gem_request_get_rcu(request);
-
- barrier();
-
- if (!request || request == rcu_access_pointer(b->first_signal))
- return rcu_pointer_handoff(request);
-
- i915_gem_request_put(request);
- } while (1);
-}
-
static int intel_breadcrumbs_signaler(void *arg)
{
struct intel_engine_cs *engine = arg;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct drm_i915_gem_request *request;
+ struct drm_i915_gem_request *request, *n;
/* Install ourselves with high priority to reduce signalling latency */
signaler_set_rtpriority();
do {
- bool do_schedule = true;
+ LIST_HEAD(signal);
+ u32 seqno;
set_current_state(TASK_INTERRUPTIBLE);
+ if (list_empty(&b->signals))
+ goto skip;
/* We are either woken up by the interrupt bottom-half,
* or by a client adding a new signaller. In both cases,
@@ -717,35 +636,46 @@ static int intel_breadcrumbs_signaler(void *arg)
* need to wait for a new interrupt from the GPU or for
* a new client.
*/
- rcu_read_lock();
- request = first_signal(b);
- rcu_read_unlock();
- if (signal_complete(request)) {
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &request->fence.flags)) {
- local_bh_disable();
- dma_fence_signal(&request->fence);
- local_bh_enable(); /* kick start the tasklets */
- }
+ seqno = intel_engine_get_seqno(engine);
+
+ spin_lock_irq(&b->wait_lock);
+ list_for_each_entry_safe(request, n, &b->signals, signaling.link) {
+ u32 this = request->signaling.wait.seqno;
+
+ if (!i915_seqno_passed(seqno, this))
+ break;
- if (request->signaling.wait.seqno) {
- spin_lock_irq(&b->rb_lock);
- __intel_engine_remove_signal(engine, request);
- spin_unlock_irq(&b->rb_lock);
+ if (this == i915_gem_request_global_seqno(request)) {
+ i915_gem_request_get(request);
+
+ list_move_tail(&request->signaling.link, &signal);
+ request->signaling.wait.seqno = 0;
}
+ }
+ spin_unlock_irq(&b->wait_lock);
+
+ local_bh_disable();
+ list_for_each_entry(request, &signal, signaling.link)
+ dma_fence_signal(&request->fence);
+ local_bh_enable(); /* kick start the tasklets */
+
+ spin_lock_irq(&b->wait_lock);
+ list_for_each_entry_safe(request, n, &signal, signaling.link) {
+ __intel_engine_remove_wait(engine,
+ &request->signaling.wait);
+ i915_gem_request_put(request);
+ }
+ spin_unlock_irq(&b->wait_lock);
- /* If the engine is saturated we may be continually
- * processing completed requests. This angers the
- * NMI watchdog if we never let anything else
- * have access to the CPU. Let's pretend to be nice
- * and relinquish the CPU if we burn through the
- * entire RT timeslice!
- */
- do_schedule = need_resched();
+ if (engine->irq_seqno_barrier &&
+ test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
+ &engine->irq_posted)) {
+ engine->irq_seqno_barrier(engine);
+ intel_engine_wakeup(engine);
}
- i915_gem_request_put(request);
- if (unlikely(do_schedule)) {
+skip:
+ if (list_empty(&signal) || need_resched()) {
if (kthread_should_park())
kthread_parkme();
@@ -760,6 +690,21 @@ static int intel_breadcrumbs_signaler(void *arg)
return 0;
}
+static void insert_signal(struct intel_breadcrumbs *b,
+ struct drm_i915_gem_request *request)
+{
+ struct drm_i915_gem_request *iter;
+
+ lockdep_assert_held(&b->wait_lock);
+
+ list_for_each_entry_reverse(iter, &b->signals, signaling.link)
+ if (i915_seqno_passed(request->signaling.wait.seqno,
+ iter->signaling.wait.seqno))
+ break;
+
+ list_add(&request->signaling.link, &iter->signaling.link);
+}
+
void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
bool wakeup)
{
@@ -770,7 +715,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
/* Note that we may be called from an interrupt handler on another
* device (e.g. nouveau signaling a fence completion causing us
* to submit a request, and so enable signaling). As such,
- * we need to make sure that all other users of b->rb_lock protect
+ * we need to make sure that all other users of b->wait_lock protect
* against interrupts, i.e. use spin_lock_irqsave.
*/
@@ -782,12 +727,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
if (!seqno)
return;
- spin_lock(&b->rb_lock);
+ spin_lock(&b->wait_lock);
GEM_BUG_ON(request->signaling.wait.seqno);
request->signaling.wait.tsk = b->signaler;
request->signaling.wait.request = request;
request->signaling.wait.seqno = seqno;
+ insert_signal(b, request);
/* First add ourselves into the list of waiters, but register our
* bottom-half as the signaller thread. As per usual, only the oldest
@@ -799,38 +745,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
*/
wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);
- if (!__i915_gem_request_completed(request, seqno)) {
- struct rb_node *parent, **p;
- bool first;
-
- /* Now insert ourselves into the retirement ordered list of
- * signals on this engine. We track the oldest seqno as that
- * will be the first signal to complete.
- */
- parent = NULL;
- first = true;
- p = &b->signals.rb_node;
- while (*p) {
- parent = *p;
- if (i915_seqno_passed(seqno,
- to_signaler(parent)->signaling.wait.seqno)) {
- p = &parent->rb_right;
- first = false;
- } else {
- p = &parent->rb_left;
- }
- }
- rb_link_node(&request->signaling.node, parent, p);
- rb_insert_color(&request->signaling.node, &b->signals);
- if (first)
- rcu_assign_pointer(b->first_signal, request);
- } else {
- __intel_engine_remove_wait(engine, &request->signaling.wait);
- request->signaling.wait.seqno = 0;
- wakeup = false;
- }
-
- spin_unlock(&b->rb_lock);
+ spin_unlock(&b->wait_lock);
if (wakeup)
wake_up_process(b->signaler);
@@ -838,17 +753,22 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
{
+ struct intel_engine_cs *engine = request->engine;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&request->lock);
- if (request->signaling.wait.seqno) {
- struct intel_engine_cs *engine = request->engine;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ if (!request->signaling.wait.seqno)
+ return;
- spin_lock(&b->rb_lock);
- __intel_engine_remove_signal(engine, request);
- spin_unlock(&b->rb_lock);
+ spin_lock(&b->wait_lock);
+ __intel_engine_remove_wait(engine, &request->signaling.wait);
+ if (request->signaling.wait.seqno) {
+ list_del(&request->signaling.link);
+ request->signaling.wait.seqno = 0;
}
+ spin_unlock(&b->wait_lock);
}
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -856,8 +776,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
struct intel_breadcrumbs *b = &engine->breadcrumbs;
struct task_struct *tsk;
- spin_lock_init(&b->rb_lock);
- lockdep_set_name(&b->rb_lock, "intel_breadcrumbs->rb");
+ spin_lock_init(&b->wait_lock);
+ lockdep_set_name(&b->wait_lock, "intel_breadcrumbs->wait");
spin_lock_init(&b->irq_lock);
lockdep_set_name(&b->irq_lock, "intel_breadcrumbs->irq");
@@ -865,6 +785,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
+ INIT_LIST_HEAD(&b->signals);
+
/* Spawn a thread to provide a common bottom-half for all signals.
* As this is an asynchronous interface we cannot steal the current
* task for handling the bottom-half to the user interrupt, therefore
@@ -924,8 +846,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
/* The engines should be idle and all requests accounted for! */
WARN_ON(READ_ONCE(b->irq_wait));
WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
- WARN_ON(rcu_access_pointer(b->first_signal));
- WARN_ON(!RB_EMPTY_ROOT(&b->signals));
if (!IS_ERR_OR_NULL(b->signaler))
kthread_stop(b->signaler);
@@ -938,19 +858,14 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
struct intel_breadcrumbs *b = &engine->breadcrumbs;
bool busy = false;
- spin_lock_irq(&b->rb_lock);
+ spin_lock_irq(&b->irq_lock);
if (b->irq_wait) {
wake_up_process(b->irq_wait->tsk);
busy = true;
}
- if (rcu_access_pointer(b->first_signal)) {
- wake_up_process(b->signaler);
- busy = true;
- }
-
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irq(&b->irq_lock);
return busy;
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 5f6aa260bd4e..a4306f7065cd 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1878,14 +1878,14 @@ void intel_engine_dump(struct intel_engine_cs *engine,
}
spin_unlock_irq(&engine->timeline->lock);
- spin_lock_irq(&b->rb_lock);
+ spin_lock_irq(&b->wait_lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
struct intel_wait *w = rb_entry(rb, typeof(*w), node);
drm_printf(m, "\t%s [%d] waiting for %x\n",
w->tsk->comm, w->tsk->pid, w->seqno);
}
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irq(&b->wait_lock);
if (INTEL_GEN(dev_priv) >= 6) {
drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 720df8865680..286f8e6c774f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -324,11 +324,12 @@ struct intel_engine_cs {
spinlock_t irq_lock; /* protects irq_*; irqsafe */
struct intel_wait *irq_wait; /* oldest waiter by retirement */
- spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
+ spinlock_t wait_lock; /* protects the rb and wraps irq_lock */
struct rb_root waiters; /* sorted by retirement, priority */
- struct rb_root signals; /* sorted by retirement */
+
struct task_struct *signaler; /* used for fence signalling */
- struct drm_i915_gem_request __rcu *first_signal;
+ struct list_head signals;
+
struct timer_list fake_irq; /* used after a missed interrupt */
struct timer_list hangcheck; /* detect missed interrupts */
--
2.15.1
More information about the Intel-gfx-trybot
mailing list