[Intel-gfx] [PATCH 04/33] drm/i915: Use RCU to annotate and enforce protection for breadcrumb's bh
Daniel Vetter
daniel at ffwll.ch
Mon Aug 8 09:33:56 UTC 2016
On Sun, Aug 07, 2016 at 03:45:12PM +0100, Chris Wilson wrote:
> The bottom-half we use for processing the breadcrumb interrupt is a
> task, which is an RCU protected struct. When accessing this struct, we
> need to be holding the RCU read lock to prevent it disappearing beneath
> us. We can use the RCU annotation to mark our irq_seqno_bh pointer as
> being under RCU guard and then use the RCU accessors to both provide
> correct ordering of access through the pointer.
>
> Most notably, this fixes the access from hard irq context to use the RCU
> read lock, which both Daniel and Tvrtko complained about.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
I'll leave sparse-checking this to 0day and runtime lockdep checking to CI
;-)
> ---
> drivers/gpu/drm/i915/i915_drv.h | 2 +-
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++-------------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 --
> drivers/gpu/drm/i915/intel_ringbuffer.h | 21 ++++++++++++++-------
> 4 files changed, 24 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index feec00f769e1..3d546b5c2e4c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3848,7 +3848,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
> * is woken.
> */
> if (engine->irq_seqno_barrier &&
> - READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current &&
> + rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
> cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
> struct task_struct *tsk;
>
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 8ecb3b6538fc..7552bd039565 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -60,10 +60,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
> * every jiffie in order to kick the oldest waiter to do the
> * coherent seqno check.
> */
> - rcu_read_lock();
> if (intel_engine_wakeup(engine))
> mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> - rcu_read_unlock();
> }
>
> static void irq_enable(struct intel_engine_cs *engine)
> @@ -232,7 +230,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> }
> rb_link_node(&wait->node, parent, p);
> rb_insert_color(&wait->node, &b->waiters);
> - GEM_BUG_ON(!first && !b->irq_seqno_bh);
> + GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
Nit: reading through rcu docs I think the suggested accessor here is
rcu_dereference_protected for write-side access. That one allows the
compiler full freedom for reordering. otoh it's a bit more noise-y and meh
about optional debug checks anyway. So with or without that change:
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
>
> if (completed) {
> struct rb_node *next = rb_next(completed);
> @@ -242,7 +240,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> GEM_BUG_ON(first);
> b->timeout = wait_timeout();
> b->first_wait = to_wait(next);
> - smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
> + rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
> /* As there is a delay between reading the current
> * seqno, processing the completed tasks and selecting
> * the next waiter, we may have missed the interrupt
> @@ -269,7 +267,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
> b->timeout = wait_timeout();
> b->first_wait = wait;
> - smp_store_mb(b->irq_seqno_bh, wait->tsk);
> + rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
> /* After assigning ourselves as the new bottom-half, we must
> * perform a cursory check to prevent a missed interrupt.
> * Either we miss the interrupt whilst programming the hardware,
> @@ -280,7 +278,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> */
> __intel_breadcrumbs_enable_irq(b);
> }
> - GEM_BUG_ON(!b->irq_seqno_bh);
> + GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
> GEM_BUG_ON(!b->first_wait);
> GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
>
> @@ -335,7 +333,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
> const int priority = wakeup_priority(b, wait->tsk);
> struct rb_node *next;
>
> - GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
> + GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
>
> /* We are the current bottom-half. Find the next candidate,
> * the first waiter in the queue on the remaining oldest
> @@ -379,13 +377,13 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
> */
> b->timeout = wait_timeout();
> b->first_wait = to_wait(next);
> - smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
> + rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
> if (b->first_wait->seqno != wait->seqno)
> __intel_breadcrumbs_enable_irq(b);
> - wake_up_process(b->irq_seqno_bh);
> + wake_up_process(b->first_wait->tsk);
> } else {
> b->first_wait = NULL;
> - WRITE_ONCE(b->irq_seqno_bh, NULL);
> + rcu_assign_pointer(b->irq_seqno_bh, NULL);
> __intel_breadcrumbs_disable_irq(b);
> }
> } else {
> @@ -399,7 +397,7 @@ out_unlock:
> GEM_BUG_ON(b->first_wait == wait);
> GEM_BUG_ON(rb_first(&b->waiters) !=
> (b->first_wait ? &b->first_wait->node : NULL));
> - GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
> + GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
> spin_unlock(&b->lock);
> }
>
> @@ -596,11 +594,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915)
> * RCU lock, i.e. as we call wake_up_process() we must be holding the
> * rcu_read_lock().
> */
> - rcu_read_lock();
> for_each_engine(engine, i915)
> if (unlikely(intel_engine_wakeup(engine)))
> mask |= intel_engine_flag(engine);
> - rcu_read_unlock();
>
> return mask;
> }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index e08a1e1b04e4..16b726fe33eb 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2410,9 +2410,7 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
> /* After manually advancing the seqno, fake the interrupt in case
> * there are any waiters for that seqno.
> */
> - rcu_read_lock();
> intel_engine_wakeup(engine);
> - rcu_read_unlock();
> }
>
> static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 4aed4586b0b6..66dc93469076 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -171,7 +171,7 @@ struct intel_engine_cs {
> * the overhead of waking that client is much preferred.
> */
> struct intel_breadcrumbs {
> - struct task_struct *irq_seqno_bh; /* bh for user interrupts */
> + struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
> bool irq_posted;
>
> spinlock_t lock; /* protects the lists of requests */
> @@ -541,23 +541,30 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
>
> static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
> {
> - return READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
> + return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
> }
>
> static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
> {
> bool wakeup = false;
> - struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
> +
> /* Note that for this not to dangerously chase a dangling pointer,
> - * the caller is responsible for ensure that the task remain valid for
> - * wake_up_process() i.e. that the RCU grace period cannot expire.
> + * we must hold the rcu_read_lock here.
> *
> * Also note that tsk is likely to be in !TASK_RUNNING state so an
> * early test for tsk->state != TASK_RUNNING before wake_up_process()
> * is unlikely to be beneficial.
> */
> - if (tsk)
> - wakeup = wake_up_process(tsk);
> + if (rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh)) {
> + struct task_struct *tsk;
> +
> + rcu_read_lock();
> + tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
> + if (tsk)
> + wakeup = wake_up_process(tsk);
> + rcu_read_unlock();
> + }
> +
> return wakeup;
> }
>
> --
> 2.8.1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list