[Intel-gfx] [PATCH v2 2/3] drm/i915: Split breadcrumbs spinlock into two
Mika Kuoppala
mika.kuoppala at linux.intel.com
Fri Mar 3 17:26:34 UTC 2017
Chris Wilson <chris at chris-wilson.co.uk> writes:
> As we now take the breadcrumbs spinlock within the interrupt handler, we
> wish to minimise its hold time. During the interrupt we do not care
> about the state of the full rbtree, only that of the first element, so
> we can guard that with a separate lock.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 12 +++----
> drivers/gpu/drm/i915/i915_drv.h | 4 +--
> drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++---
> drivers/gpu/drm/i915/i915_irq.c | 4 +--
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 58 ++++++++++++++++++--------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++--
> 6 files changed, 51 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 95046822e8e0..aa2d726b4349 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -700,14 +700,14 @@ static void i915_ring_seqno_info(struct seq_file *m,
> seq_printf(m, "Current sequence (%s): %x\n",
> engine->name, intel_engine_get_seqno(engine));
>
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->rb_lock);
> for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
> struct intel_wait *w = rb_entry(rb, typeof(*w), node);
>
> seq_printf(m, "Waiting (%s): %s [%d] on %x\n",
> engine->name, w->tsk->comm, w->tsk->pid, w->seqno);
> }
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
> }
>
> static int i915_gem_seqno_info(struct seq_file *m, void *data)
> @@ -1354,14 +1354,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
> &dev_priv->gpu_error.missed_irq_rings)),
> yesno(engine->hangcheck.stalled));
>
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->rb_lock);
> for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
> struct intel_wait *w = rb_entry(rb, typeof(*w), node);
>
> seq_printf(m, "\t%s [%d] waiting for %x\n",
> w->tsk->comm, w->tsk->pid, w->seqno);
> }
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
>
> seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
> (long long)engine->hangcheck.acthd,
> @@ -3359,14 +3359,14 @@ static int i915_engine_info(struct seq_file *m, void *unused)
> I915_READ(RING_PP_DIR_DCLV(engine)));
> }
>
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->rb_lock);
> for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
> struct intel_wait *w = rb_entry(rb, typeof(*w), node);
>
> seq_printf(m, "\t%s [%d] waiting for %x\n",
> w->tsk->comm, w->tsk->pid, w->seqno);
> }
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
>
> seq_puts(m, "\n");
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cb760156bbc5..0da14c304771 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4100,7 +4100,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
> * the seqno before we believe it coherent since they see
> * irq_posted == false but we are still running).
> */
> - spin_lock_irqsave(&b->lock, flags);
> + spin_lock_irqsave(&b->irq_lock, flags);
> if (b->first_wait && b->first_wait->tsk != current)
> /* Note that if the bottom-half is changed as we
> * are sending the wake-up, the new bottom-half will
> @@ -4109,7 +4109,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
> * ourself.
> */
> wake_up_process(b->first_wait->tsk);
> - spin_unlock_irqrestore(&b->lock, flags);
> + spin_unlock_irqrestore(&b->irq_lock, flags);
>
> if (__i915_gem_request_completed(req, seqno))
> return true;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 061af8040498..8effc59f5cb5 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1111,7 +1111,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
> if (RB_EMPTY_ROOT(&b->waiters))
> return;
>
> - if (!spin_trylock_irq(&b->lock)) {
> + if (!spin_trylock_irq(&b->rb_lock)) {
> ee->waiters = ERR_PTR(-EDEADLK);
> return;
> }
> @@ -1119,7 +1119,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
> count = 0;
> for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
> count++;
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
>
> waiter = NULL;
> if (count)
> @@ -1129,7 +1129,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
> if (!waiter)
> return;
>
> - if (!spin_trylock_irq(&b->lock)) {
> + if (!spin_trylock_irq(&b->rb_lock)) {
> kfree(waiter);
> ee->waiters = ERR_PTR(-EDEADLK);
> return;
> @@ -1147,7 +1147,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine,
> if (++ee->num_waiters == count)
> break;
> }
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
> }
>
> static void error_record_engine_registers(struct i915_gpu_state *error,
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 5fa2c4c56b09..3f39e36fa566 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1041,7 +1041,7 @@ static void notify_ring(struct intel_engine_cs *engine)
>
> rcu_read_lock();
>
> - spin_lock(&engine->breadcrumbs.lock);
> + spin_lock(&engine->breadcrumbs.irq_lock);
> wait = engine->breadcrumbs.first_wait;
> if (wait) {
> /* We use a callback from the dma-fence to submit
> @@ -1063,7 +1063,7 @@ static void notify_ring(struct intel_engine_cs *engine)
> } else {
> __intel_engine_disarm_breadcrumbs(engine);
> }
> - spin_unlock(&engine->breadcrumbs.lock);
> + spin_unlock(&engine->breadcrumbs.irq_lock);
>
> if (rq)
> dma_fence_signal(&rq->fence);
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 1cc50304f824..34200f14bade 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -31,6 +31,8 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
> struct intel_wait *wait;
> unsigned int result = 0;
>
> + lockdep_assert_held(&b->irq_lock);
> +
> wait = b->first_wait;
> if (wait) {
> result = ENGINE_WAKEUP_WAITER;
> @@ -47,9 +49,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
> unsigned long flags;
> unsigned int result;
>
> - spin_lock_irqsave(&b->lock, flags);
> + spin_lock_irqsave(&b->irq_lock, flags);
> result = __intel_breadcrumbs_wakeup(b);
> - spin_unlock_irqrestore(&b->lock, flags);
> + spin_unlock_irqrestore(&b->irq_lock, flags);
>
> return result;
> }
> @@ -117,10 +119,10 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
> * coherent seqno check.
> */
>
> - spin_lock_irqsave(&b->lock, flags);
> + spin_lock_irqsave(&b->irq_lock, flags);
> if (!__intel_breadcrumbs_wakeup(b))
> __intel_engine_disarm_breadcrumbs(engine);
> - spin_unlock_irqrestore(&b->lock, flags);
> + spin_unlock_irqrestore(&b->irq_lock, flags);
> if (!b->irq_armed)
> return;
>
> @@ -164,7 +166,7 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> {
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> - lockdep_assert_held(&b->lock);
> + lockdep_assert_held(&b->irq_lock);
>
> if (b->irq_enabled) {
> irq_disable(engine);
> @@ -182,7 +184,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
> if (!b->irq_armed)
> return;
>
> - spin_lock_irqsave(&b->lock, flags);
> + spin_lock_irqsave(&b->irq_lock, flags);
>
> /* We only disarm the irq when we are idle (all requests completed),
> * so if there remains a sleeping waiter, it missed the request
> @@ -193,7 +195,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
>
> __intel_engine_disarm_breadcrumbs(engine);
>
> - spin_unlock_irqrestore(&b->lock, flags);
> + spin_unlock_irqrestore(&b->irq_lock, flags);
> }
>
> static bool use_fake_irq(const struct intel_breadcrumbs *b)
> @@ -228,7 +230,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> container_of(b, struct intel_engine_cs, breadcrumbs);
> struct drm_i915_private *i915 = engine->i915;
>
> - lockdep_assert_held(&b->lock);
> + lockdep_assert_held(&b->irq_lock);
> if (b->irq_armed)
> return;
>
> @@ -276,7 +278,7 @@ static inline struct intel_wait *to_wait(struct rb_node *node)
> static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
> struct intel_wait *wait)
> {
> - lockdep_assert_held(&b->lock);
> + lockdep_assert_held(&b->rb_lock);
>
> /* This request is completed, so remove it from the tree, mark it as
> * complete, and *then* wake up the associated task.
> @@ -292,8 +294,10 @@ static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
> {
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> + spin_lock(&b->irq_lock);
> GEM_BUG_ON(!b->irq_armed);
> b->first_wait = to_wait(next);
> + spin_unlock(&b->irq_lock);
>
> /* We always wake up the next waiter that takes over as the bottom-half
> * as we may delegate not only the irq-seqno barrier to the next waiter
> @@ -383,6 +387,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> }
>
> if (first) {
> + spin_lock(&b->irq_lock);
> GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
> b->first_wait = wait;
> /* After assigning ourselves as the new bottom-half, we must
> @@ -394,6 +399,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> * and so we miss the wake up.
> */
> __intel_breadcrumbs_enable_irq(b);
> + spin_unlock(&b->irq_lock);
> }
> GEM_BUG_ON(!b->first_wait);
> GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
> @@ -407,9 +413,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> bool first;
>
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->rb_lock);
> first = __intel_engine_add_wait(engine, wait);
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
>
> return first;
> }
> @@ -433,7 +439,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
> {
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> - lockdep_assert_held(&b->lock);
> + lockdep_assert_held(&b->rb_lock);
>
> if (RB_EMPTY_NODE(&wait->node))
> goto out;
> @@ -503,9 +509,9 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
> if (RB_EMPTY_NODE(&wait->node))
> return;
>
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->rb_lock);
> __intel_engine_remove_wait(engine, wait);
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
> }
>
> static bool signal_valid(const struct drm_i915_gem_request *request)
> @@ -575,7 +581,7 @@ static int intel_breadcrumbs_signaler(void *arg)
> dma_fence_signal(&request->fence);
> local_bh_enable(); /* kick start the tasklets */
>
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->rb_lock);
>
> /* Wake up all other completed waiters and select the
> * next bottom-half for the next user interrupt.
> @@ -598,7 +604,7 @@ static int intel_breadcrumbs_signaler(void *arg)
> rb_erase(&request->signaling.node, &b->signals);
> RB_CLEAR_NODE(&request->signaling.node);
>
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
>
> i915_gem_request_put(request);
> } else {
> @@ -655,7 +661,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> request->signaling.wait.seqno = seqno;
> i915_gem_request_get(request);
>
> - spin_lock(&b->lock);
> + spin_lock(&b->rb_lock);
>
> /* First add ourselves into the list of waiters, but register our
> * bottom-half as the signaller thread. As per usual, only the oldest
> @@ -689,7 +695,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> if (first)
> rcu_assign_pointer(b->first_signal, request);
>
> - spin_unlock(&b->lock);
> + spin_unlock(&b->rb_lock);
>
> if (wakeup)
> wake_up_process(b->signaler);
> @@ -704,7 +710,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
> lockdep_assert_held(&request->lock);
> GEM_BUG_ON(!request->signaling.wait.seqno);
>
> - spin_lock(&b->lock);
> + spin_lock(&b->rb_lock);
>
> if (!RB_EMPTY_NODE(&request->signaling.node)) {
> if (request == rcu_access_pointer(b->first_signal)) {
> @@ -720,7 +726,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request)
>
> __intel_engine_remove_wait(engine, &request->signaling.wait);
>
> - spin_unlock(&b->lock);
> + spin_unlock(&b->rb_lock);
>
> request->signaling.wait.seqno = 0;
> }
> @@ -730,7 +736,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> struct task_struct *tsk;
>
> - spin_lock_init(&b->lock);
> + spin_lock_init(&b->rb_lock);
> + spin_lock_init(&b->irq_lock);
> +
> setup_timer(&b->fake_irq,
> intel_breadcrumbs_fake_irq,
> (unsigned long)engine);
> @@ -768,7 +776,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
>
> cancel_fake_irq(engine);
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->irq_lock);
In here I was thinking that you want both locks help, but
then can't find a reason why. Perhaps just to ensure that
the wait tree stays still.
>
> if (b->irq_enabled)
> irq_enable(engine);
> @@ -787,7 +795,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
> if (b->irq_armed)
> enable_fake_irq(b);
>
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->irq_lock);
> }
>
> void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
> @@ -811,7 +819,7 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
> struct intel_breadcrumbs *b = &engine->breadcrumbs;
> bool busy = false;
>
> - spin_lock_irq(&b->lock);
> + spin_lock_irq(&b->rb_lock);
Wrong lock taken and relased in this function?
-Mika
>
> if (b->first_wait) {
> wake_up_process(b->first_wait->tsk);
> @@ -823,7 +831,7 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
> busy |= intel_engine_flag(engine);
> }
>
> - spin_unlock_irq(&b->lock);
> + spin_unlock_irq(&b->rb_lock);
>
> return busy;
> }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 55a6a3f8274c..621ac9998d16 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -235,10 +235,12 @@ struct intel_engine_cs {
> * the overhead of waking that client is much preferred.
> */
> struct intel_breadcrumbs {
> - spinlock_t lock; /* protects the lists of requests; irqsafe */
> + spinlock_t irq_lock; /* protects first_wait & irq_*; irqsafe */
> + struct intel_wait *first_wait; /* oldest waiter by retirement */
> +
> + spinlock_t rb_lock; /* protects the rbtrees; irqsafe */
> struct rb_root waiters; /* sorted by retirement, priority */
> struct rb_root signals; /* sorted by retirement */
> - struct intel_wait *first_wait; /* oldest waiter by retirement */
> struct task_struct *signaler; /* used for fence signalling */
> struct drm_i915_gem_request __rcu *first_signal;
> struct timer_list fake_irq; /* used after a missed interrupt */
> --
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list