[Intel-gfx] [PATCH v2 1/2] drm/i915: signal first fence from irq handler if complete
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Feb 24 10:04:23 UTC 2017
On 23/02/2017 15:42, Chris Wilson wrote:
> As execlists and other non-semaphore multi-engine devices coordinate
> between engines using interrupts, we can shave off a few 10s of
> microsecond of scheduling latency by doing the fence signaling from the
> interrupt as opposed to a RT kthread. (Realistically the delay adds
> about 1% to an individual cross-engine workload.) We only signal the
> first fence in order to limit the amount of work we move into the
> interrupt handler. We also have to remember that our breadcrumbs may be
> unordered with respect to the interrupt and so we still require the
> waiter process to perform some heavyweight coherency fixups, as well as
> traversing the tree of waiters.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 13 ++++++------
> drivers/gpu/drm/i915/i915_gem_request.c | 2 +-
> drivers/gpu/drm/i915/i915_gem_request.h | 2 ++
> drivers/gpu/drm/i915/i915_irq.c | 35 +++++++++++++++++++++++++++++---
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 17 ++++++----------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 33 +++++++++++-------------------
> 6 files changed, 59 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index eed9ead1b592..07a69e1ff1c3 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -4065,9 +4065,9 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
> * is woken.
> */
> if (engine->irq_seqno_barrier &&
> - rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
> test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
> - struct task_struct *tsk;
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> + unsigned long flags;
>
> /* The ordering of irq_posted versus applying the barrier
> * is crucial. The clearing of the current irq_posted must
> @@ -4089,17 +4089,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
> * the seqno before we believe it coherent since they see
> * irq_posted == false but we are still running).
> */
> - rcu_read_lock();
> - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
> - if (tsk && tsk != current)
> + spin_lock_irqsave(&b->lock, flags);
> + if (b->first_wait && b->first_wait->tsk != current)
> /* Note that if the bottom-half is changed as we
> * are sending the wake-up, the new bottom-half will
> * be woken by whomever made the change. We only have
> * to worry about when we steal the irq-posted for
> * ourself.
> */
> - wake_up_process(tsk);
> - rcu_read_unlock();
> + wake_up_process(b->first_wait->tsk);
> + spin_unlock_irqrestore(&b->lock, flags);
>
> if (__i915_gem_request_completed(req, seqno))
> return true;
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
> index fbfeb5f8d069..77c3ee7a3fd0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.c
> +++ b/drivers/gpu/drm/i915/i915_gem_request.c
> @@ -1083,7 +1083,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
> if (flags & I915_WAIT_LOCKED)
> add_wait_queue(errq, &reset);
>
> - intel_wait_init(&wait);
> + intel_wait_init(&wait, req);
>
> restart:
> do {
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 5f73d8c0a38a..0efee879df23 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -32,10 +32,12 @@
>
> struct drm_file;
> struct drm_i915_gem_object;
> +struct drm_i915_gem_request;
>
> struct intel_wait {
> struct rb_node node;
> struct task_struct *tsk;
> + struct drm_i915_gem_request *request;
> u32 seqno;
Hm, do we now have a situation where both waiters and signallers hold a
reference to the request so could drop the seqno field from here?
> };
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index bc70e2c451b2..3c79753edf0e 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1033,12 +1033,41 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
>
> static void notify_ring(struct intel_engine_cs *engine)
> {
> - bool waiters;
> + struct drm_i915_gem_request *rq = NULL;
> + struct intel_wait *wait;
> +
> + if (!intel_engine_has_waiter(engine))
> + return;
I think we need the tracepoint before the bail out.
>
> atomic_inc(&engine->irq_count);
And the increment as well.
> set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> - waiters = intel_engine_wakeup(engine);
> - trace_intel_engine_notify(engine, waiters);
> +
> + spin_lock(&engine->breadcrumbs.lock);
> + wait = engine->breadcrumbs.first_wait;
> + if (wait) {
> + /* We use a callback from the dma-fence to submit
> + * requests after waiting on our own requests. To
> + * ensure minimum delay in queuing the next request to
> + * hardware, signal the fence now rather than wait for
> + * the signaler to be woken up. We still wake up the
> + * waiter in order to handle the irq-seqno coherency
> + * issues (we may receive the interrupt before the
> + * seqno is written, see __i915_request_irq_complete())
> + * and to handle coalescing of multiple seqno updates
> + * and many waiters.
> + */
> + if (i915_seqno_passed(intel_engine_get_seqno(engine),
> + wait->seqno))
> + rq = wait->request;
> +
> + wake_up_process(wait->tsk);
> + }
> + spin_unlock(&engine->breadcrumbs.lock);
> +
> + if (rq) /* rq is protected by RCU, so safe from hard-irq context */
> + dma_fence_signal(&rq->fence);
> +
> + trace_intel_engine_notify(engine, wait);
> }
>
> static void vlv_c0_read(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 027c93e34c97..5f2665aa817c 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -81,8 +81,11 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
> * every jiffie in order to kick the oldest waiter to do the
> * coherent seqno check.
> */
> - if (intel_engine_wakeup(engine))
> - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> + if (!intel_engine_has_waiter(engine))
> + return;
> +
> + intel_engine_wakeup(engine);
> + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
Not sure it is worth splitting this in two instead of just leaving it as
it was.
> }
>
> static void irq_enable(struct intel_engine_cs *engine)
> @@ -275,7 +278,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> }
> rb_link_node(&wait->node, parent, p);
> rb_insert_color(&wait->node, &b->waiters);
> - GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
>
> if (completed) {
> struct rb_node *next = rb_next(completed);
> @@ -284,7 +286,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> if (next && next != &wait->node) {
> GEM_BUG_ON(first);
> b->first_wait = to_wait(next);
> - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
> /* As there is a delay between reading the current
> * seqno, processing the completed tasks and selecting
> * the next waiter, we may have missed the interrupt
> @@ -311,7 +312,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> if (first) {
> GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
> b->first_wait = wait;
> - rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
> /* After assigning ourselves as the new bottom-half, we must
> * perform a cursory check to prevent a missed interrupt.
> * Either we miss the interrupt whilst programming the hardware,
> @@ -322,7 +322,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
> */
> __intel_breadcrumbs_enable_irq(b);
> }
> - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
> GEM_BUG_ON(!b->first_wait);
> GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
>
> @@ -370,8 +369,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
> const int priority = wakeup_priority(b, wait->tsk);
> struct rb_node *next;
>
> - GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
> -
> /* We are the current bottom-half. Find the next candidate,
> * the first waiter in the queue on the remaining oldest
> * request. As multiple seqnos may complete in the time it
> @@ -413,13 +410,11 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
> * exception rather than a seqno completion.
> */
> b->first_wait = to_wait(next);
> - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
> if (b->first_wait->seqno != wait->seqno)
> __intel_breadcrumbs_enable_irq(b);
> wake_up_process(b->first_wait->tsk);
> } else {
> b->first_wait = NULL;
> - rcu_assign_pointer(b->irq_seqno_bh, NULL);
> __intel_breadcrumbs_disable_irq(b);
> }
> } else {
> @@ -433,7 +428,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
> GEM_BUG_ON(b->first_wait == wait);
> GEM_BUG_ON(rb_first(&b->waiters) !=
> (b->first_wait ? &b->first_wait->node : NULL));
> - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
> }
>
> void intel_engine_remove_wait(struct intel_engine_cs *engine,
> @@ -595,6 +589,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
> return;
>
> request->signaling.wait.tsk = b->signaler;
> + request->signaling.wait.request = request;
> request->signaling.wait.seqno = seqno;
> i915_gem_request_get(request);
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0f29e07a9581..c90b60f47f25 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -235,8 +235,6 @@ struct intel_engine_cs {
> * the overhead of waking that client is much preferred.
> */
> struct intel_breadcrumbs {
> - struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
> -
> spinlock_t lock; /* protects the lists of requests; irqsafe */
> struct rb_root waiters; /* sorted by retirement, priority */
> struct rb_root signals; /* sorted by retirement */
> @@ -582,9 +580,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
> /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
> int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
>
> -static inline void intel_wait_init(struct intel_wait *wait)
> +static inline void intel_wait_init(struct intel_wait *wait,
> + struct drm_i915_gem_request *rq)
> {
> wait->tsk = current;
> + wait->request = rq;
> }
>
> static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
> @@ -639,29 +639,20 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request);
>
> static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
> {
> - return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
> + return READ_ONCE(engine->breadcrumbs.first_wait);
> }
>
> -static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
> +static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
> {
> + struct intel_wait *wait;
> + unsigned long flags;
> bool wakeup = false;
>
> - /* Note that for this not to dangerously chase a dangling pointer,
> - * we must hold the rcu_read_lock here.
> - *
> - * Also note that tsk is likely to be in !TASK_RUNNING state so an
> - * early test for tsk->state != TASK_RUNNING before wake_up_process()
> - * is unlikely to be beneficial.
> - */
> - if (intel_engine_has_waiter(engine)) {
> - struct task_struct *tsk;
> -
> - rcu_read_lock();
> - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
> - if (tsk)
> - wakeup = wake_up_process(tsk);
> - rcu_read_unlock();
> - }
> + spin_lock_irqsave(&engine->breadcrumbs.lock, flags);
> + wait = engine->breadcrumbs.first_wait;
> + if (wait)
> + wakeup = wake_up_process(wait->tsk);
> + spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags);
>
> return wakeup;
> }
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list