[Intel-gfx] [PATCH 15/32] drm/i915: Slaughter the thundering i915_wait_request herd
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Dec 14 04:21:32 PST 2015
Hi,
On 11/12/15 11:33, Chris Wilson wrote:
> One particularly stressful scenario consists of many independent tasks
> all competing for GPU time and waiting upon the results (e.g. realtime
> transcoding of many, many streams). One bottleneck in particular is that
> each client waits on its own results, but every client is woken up after
> every batchbuffer - hence the thunder of hooves as then every client must
> do its heavyweight dance to read a coherent seqno to see if it is the
> lucky one.
>
> Ideally, we only want one client to wake up after the interrupt and
> check its request for completion. Since the requests must retire in
> order, we can select the first client on the oldest request to be woken.
> Once that client has completed his wait, we can then wake up the
> next client and so on. However, all clients then incur latency as every
> process in the chain may be delayed for scheduling - this may also then
> cause some priority inversion. To reduce the latency, when a client
> is added or removed from the list, we scan the tree for completed
> seqno and wake up all the completed waiters in parallel.
>
> v2: Convert from a kworker per engine into a dedicated kthread for the
> bottom-half.
> v3: Rename request members and tweak comments.
> v4: Use a per-engine spinlock in the breadcrumbs bottom-half.
> v5: Fix race in locklessly checking waiter status and kicking the task on
> adding a new waiter.
> v6: Fix deciding when to force the timer to hide missing interrupts.
> v7: Move the bottom-half from the kthread to the first client process.
> v8: Reword a few comments
> v9: Break the busy loop when the interrupt is unmasked or has fired.
> v10: Comments, unnecessary churn, better debugging from Tvrtko
> v11: Wake all completed waiters on removing the current bottom-half to
> reduce the latency of waking up a herd of clients all waiting on the
> same request.
> v12: Rearrange missed-interrupt fault injection so that it works with
> igt/drv_missed_irq_hang
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: "Rogozhkin, Dmitry V" <dmitry.v.rogozhkin at intel.com>
> Cc: "Gong, Zhipeng" <zhipeng.gong at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
> Cc: Dave Gordon <david.s.gordon at intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 1 +
> drivers/gpu/drm/i915/i915_debugfs.c | 19 ++-
> drivers/gpu/drm/i915/i915_drv.h | 3 +-
> drivers/gpu/drm/i915/i915_gem.c | 152 ++++++++---------
> drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
> drivers/gpu/drm/i915/i915_irq.c | 14 +-
> drivers/gpu/drm/i915/intel_breadcrumbs.c | 274 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.c | 5 +-
> drivers/gpu/drm/i915/intel_ringbuffer.c | 5 +-
> drivers/gpu/drm/i915/intel_ringbuffer.h | 63 ++++++-
> 10 files changed, 436 insertions(+), 102 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/intel_breadcrumbs.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 0851de07bd13..d3b9d3618719 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \
> i915_gem_userptr.o \
> i915_gpu_error.o \
> i915_trace_points.o \
> + intel_breadcrumbs.o \
> intel_lrc.o \
> intel_mocs.o \
> intel_ringbuffer.o \
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index d5f66bbdb160..48e574247a30 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -730,10 +730,22 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
> static void i915_ring_seqno_info(struct seq_file *m,
> struct intel_engine_cs *ring)
> {
> + struct rb_node *rb;
> +
> if (ring->get_seqno) {
> seq_printf(m, "Current sequence (%s): %x\n",
> ring->name, ring->get_seqno(ring, false));
> }
> +
> + spin_lock(&ring->breadcrumbs.lock);
> + for (rb = rb_first(&ring->breadcrumbs.requests);
> + rb != NULL;
> + rb = rb_next(rb)) {
> + struct intel_breadcrumb *b = container_of(rb, typeof(*b), node);
> + seq_printf(m, "Waiting (%s): %s [%d] on %x\n",
> + ring->name, b->task->comm, b->task->pid, b->seqno);
> + }
> + spin_unlock(&ring->breadcrumbs.lock);
> }
>
> static int i915_gem_seqno_info(struct seq_file *m, void *data)
> @@ -1356,8 +1368,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
>
> for_each_ring(ring, dev_priv, i) {
> seq_printf(m, "%s:\n", ring->name);
> - seq_printf(m, "\tseqno = %x [current %x]\n",
> - ring->hangcheck.seqno, seqno[i]);
> + seq_printf(m, "\tseqno = %x [current %x], waiters? %d\n",
> + ring->hangcheck.seqno, seqno[i],
> + intel_engine_has_waiter(ring));
> seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
> (long long)ring->hangcheck.acthd,
> (long long)acthd[i]);
> @@ -2322,7 +2335,7 @@ static int count_irq_waiters(struct drm_i915_private *i915)
> int i;
>
> for_each_ring(ring, i915, i)
> - count += ring->irq_refcount;
> + count += intel_engine_has_waiter(ring);
>
> return count;
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f82e8fb19c9b..830d760aa562 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1382,7 +1382,7 @@ struct i915_gpu_error {
> #define I915_STOP_RING_ALLOW_WARN (1 << 30)
>
> /* For missed irq/seqno simulation. */
> - unsigned int test_irq_rings;
> + unsigned long test_irq_rings;
> };
>
> enum modeset_restore {
> @@ -2825,7 +2825,6 @@ ibx_disable_display_interrupt(struct drm_i915_private *dev_priv, uint32_t bits)
> ibx_display_interrupt_update(dev_priv, bits, 0);
> }
>
> -
> /* i915_gem.c */
> int i915_gem_create_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 0340a5fe9cda..fa0cf6c9f4d0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1123,17 +1123,6 @@ i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
> return 0;
> }
>
> -static void fake_irq(unsigned long data)
> -{
> - wake_up_process((struct task_struct *)data);
> -}
> -
> -static bool missed_irq(struct drm_i915_private *dev_priv,
> - struct intel_engine_cs *ring)
> -{
> - return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
> -}
> -
> static unsigned long local_clock_us(unsigned *cpu)
> {
> unsigned long t;
> @@ -1166,7 +1155,9 @@ static bool busywait_stop(unsigned long timeout, unsigned cpu)
> return this_cpu != cpu;
> }
>
> -static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
> +static bool __i915_spin_request(struct drm_i915_gem_request *req,
> + struct intel_breadcrumb *wait,
> + int state)
> {
> unsigned long timeout;
> unsigned cpu;
> @@ -1181,31 +1172,30 @@ static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
> * takes to sleep on a request, on the order of a microsecond.
> */
>
> - if (req->ring->irq_refcount)
> - return -EBUSY;
> -
> /* Only spin if we know the GPU is processing this request */
> if (!i915_gem_request_started(req, true))
> - return -EAGAIN;
> + return false;
>
> timeout = local_clock_us(&cpu) + 5;
> - while (!need_resched()) {
> + do {
> if (i915_gem_request_completed(req, true))
> - return 0;
> + return true;
>
> - if (signal_pending_state(state, current))
> + if (signal_pending_state(state, wait->task))
> break;
>
> if (busywait_stop(timeout, cpu))
> break;
>
> cpu_relax_lowlatency();
> - }
>
> - if (i915_gem_request_completed(req, false))
> - return 0;
> + /* Break the loop if we have consumed the timeslice (or been
> + * preempted) or when either the background thread has
> + * enabled the interrupt, or the IRQ has fired.
> + */
> + } while (!need_resched() && wait->task->state == state);
>
> - return -EAGAIN;
> + return false;
> }
>
> /**
> @@ -1229,18 +1219,13 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
> s64 *timeout,
> struct intel_rps_client *rps)
> {
> - struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
> - struct drm_device *dev = ring->dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - const bool irq_test_in_progress =
> - ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
> int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
> - DEFINE_WAIT(wait);
> - unsigned long timeout_expire;
> + struct intel_breadcrumb wait;
> + unsigned long timeout_remain;
> s64 before, now;
> - int ret;
> + int ret = 0;
>
> - WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
> + might_sleep();
Why this suddenly?
>
> if (list_empty(&req->list))
> return 0;
> @@ -1248,7 +1233,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
> if (i915_gem_request_completed(req, true))
> return 0;
>
> - timeout_expire = 0;
> + timeout_remain = MAX_SCHEDULE_TIMEOUT;
> if (timeout) {
> if (WARN_ON(*timeout < 0))
> return -EINVAL;
> @@ -1256,83 +1241,80 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
> if (*timeout == 0)
> return -ETIME;
>
> - timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
> + timeout_remain = nsecs_to_jiffies_timeout(*timeout);
> }
>
> - if (INTEL_INFO(dev_priv)->gen >= 6)
> - gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
> -
> /* Record current time in case interrupted by signal, or wedged */
> trace_i915_gem_request_wait_begin(req);
> before = ktime_get_raw_ns();
>
> - /* Optimistic spin for the next jiffie before touching IRQs */
> - ret = __i915_spin_request(req, state);
> - if (ret == 0)
> - goto out;
> + if (INTEL_INFO(req->i915)->gen >= 6)
> + gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
>
> - if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
> - ret = -ENODEV;
> - goto out;
> - }
> + intel_breadcrumb_init(&wait, req->seqno);
> + set_task_state(wait.task, state);
>
> - for (;;) {
> - struct timer_list timer;
> + /* Optimistic spin for the next ~jiffie before touching IRQs */
> + if (intel_engine_add_breadcrumb(req->ring, &wait)) {
> + if (__i915_spin_request(req, &wait, state))
> + goto out;
>
> - prepare_to_wait(&ring->irq_queue, &wait, state);
> + intel_engine_enable_breadcrumb_irq(req->ring);
>
> - /* We need to check whether any gpu reset happened in between
> - * the request being submitted and now. If a reset has occurred,
> - * the request is effectively complete (we either are in the
> - * process of or have discarded the rendering and completely
> - * reset the GPU. The results of the request are lost and we
> - * are free to continue on with the original operation.
> + /* In order to check that we haven't missed the interrupt
> + * as we enabled it, we need to kick ourselves to do a
> + * coherent check on the seqno before we sleep.
> */
> - if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
> - ret = 0;
> - break;
> - }
> -
> - if (i915_gem_request_completed(req, false)) {
> - ret = 0;
> - break;
> - }
> + goto wakeup;
> + }
>
> - if (signal_pending_state(state, current)) {
> + for (;;) {
> + if (signal_pending_state(state, wait.task)) {
> ret = -ERESTARTSYS;
> break;
> }
>
> - if (timeout && time_after_eq(jiffies, timeout_expire)) {
> + /* Ensure that even if the GPU hangs, we get woken up. */
> + i915_queue_hangcheck(req->i915);
> +
> + timeout_remain = io_schedule_timeout(timeout_remain);
> + if (timeout_remain == 0) {
> ret = -ETIME;
> break;
> }
>
> - /* Ensure that even if the GPU hangs, we get woken up. */
> - i915_queue_hangcheck(dev_priv);
> -
> - timer.function = NULL;
> - if (timeout || missed_irq(dev_priv, ring)) {
> - unsigned long expire;
> + if (intel_breadcrumb_complete(&wait))
> + break;
>
> - setup_timer_on_stack(&timer, fake_irq, (unsigned long)current);
> - expire = missed_irq(dev_priv, ring) ? jiffies + 1 : timeout_expire;
> - mod_timer(&timer, expire);
> - }
> +wakeup: set_task_state(wait.task, state);
>
> - io_schedule();
> + /* Ensure our read of the seqno is coherent so that we
> + * do not "miss an interrupt" (i.e. if this is the last
> + * request and the seqno write from the GPU is not visible
> + * by the time the interrupt fires, we will see that the
> + * request is incomplete and go back to sleep awaiting
> + * another interrupt that will never come.)
> + *
> + * Strictly, we only need to do this once after an interrupt,
> + * but it is easier and safer to do it every time the waiter
> + * is woken.
> + */
> + if (i915_gem_request_completed(req, false))
> + break;
>
> - if (timer.function) {
> - del_singleshot_timer_sync(&timer);
> - destroy_timer_on_stack(&timer);
> - }
> + /* We need to check whether any gpu reset happened in between
> + * the request being submitted and now. If a reset has occurred,
> + * the request is effectively complete (we either are in the
> + * process of or have discarded the rendering and completely
> + * reset the GPU. The results of the request are lost and we
> + * are free to continue on with the original operation.
> + */
> + if (req->reset_counter != i915_reset_counter(&req->i915->gpu_error))
> + break;
> }
> - if (!irq_test_in_progress)
> - ring->irq_put(ring);
> -
> - finish_wait(&ring->irq_queue, &wait);
> -
> out:
> + intel_engine_remove_breadcrumb(req->ring, &wait);
> + __set_task_state(wait.task, TASK_RUNNING);
> now = ktime_get_raw_ns();
> trace_i915_gem_request_wait_end(req);
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 06ca4082735b..f805d117f3d1 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -900,7 +900,7 @@ static void i915_record_ring_state(struct drm_device *dev,
> ering->instdone = I915_READ(GEN2_INSTDONE);
> }
>
> - ering->waiting = waitqueue_active(&ring->irq_queue);
> + ering->waiting = intel_engine_has_waiter(ring);
> ering->instpm = I915_READ(RING_INSTPM(ring->mmio_base));
> ering->seqno = ring->get_seqno(ring, false);
> ering->acthd = intel_ring_get_active_head(ring);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 5f88869e2207..d250b4721a6a 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1000,8 +1000,7 @@ static void notify_ring(struct intel_engine_cs *ring)
> return;
>
> trace_i915_gem_request_notify(ring);
> -
> - wake_up_all(&ring->irq_queue);
> + intel_engine_wakeup(ring);
> }
>
> static void vlv_c0_read(struct drm_i915_private *dev_priv,
> @@ -1083,7 +1082,7 @@ static bool any_waiters(struct drm_i915_private *dev_priv)
> int i;
>
> for_each_ring(ring, dev_priv, i)
> - if (ring->irq_refcount)
> + if (intel_engine_has_waiter(ring))
> return true;
>
> return false;
> @@ -2411,7 +2410,7 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv,
>
> /* Wake up __wait_seqno, potentially holding dev->struct_mutex. */
> for_each_ring(ring, dev_priv, i)
> - wake_up_all(&ring->irq_queue);
> + intel_engine_wakeup(ring);
This is from process context without a lock or synchronize rcu. I'll
comment on it at the implementation site as well.
>
> /* Wake up intel_crtc_wait_for_pending_flips, holding crtc->mutex. */
> wake_up_all(&dev_priv->pending_flip_queue);
> @@ -2986,16 +2985,17 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
> if (ring_idle(ring, seqno)) {
> ring->hangcheck.action = HANGCHECK_IDLE;
>
> - if (waitqueue_active(&ring->irq_queue)) {
> + if (intel_engine_has_waiter(ring)) {
> /* Issue a wake-up to catch stuck h/w. */
> if (!test_and_set_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings)) {
> - if (!(dev_priv->gpu_error.test_irq_rings & intel_ring_flag(ring)))
> + if (!test_bit(ring->id, &dev_priv->gpu_error.test_irq_rings))
> DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
> ring->name);
> else
> DRM_INFO("Fake missed irq on %s\n",
> ring->name);
> - wake_up_all(&ring->irq_queue);
> +
> + intel_engine_enable_fake_irq(ring);
> }
> /* Safeguard against driver failure */
> ring->hangcheck.score += BUSY;
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> new file mode 100644
> index 000000000000..a9a199bca2c2
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -0,0 +1,274 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "i915_drv.h"
> +
> +static void intel_breadcrumbs_fake_irq(unsigned long data)
> +{
> + struct intel_breadcrumbs *b = (struct intel_breadcrumbs *)data;
> + struct task_struct *task;
> +
> + /*
> + * The timer persists in case we cannot enable interrupts,
> + * or if we have previously seen seqno/interrupt incoherency
> + * ("missed interrupt" syndrome). Here the worker will wake up
> + * every jiffie in order to kick the oldest waiter to do the
> + * coherent seqno check.
> + */
> +
> + task = READ_ONCE(b->first_waiter);
> + if (task) {
> + wake_up_process(task);
Put a comment here describing why a task cannot exit and become invalid
between sampling and wakeup?
Or we could afford a spinlock here since this fires really infrequently?
Or even intel_engine_wakeup?
> + mod_timer(&b->fake_irq, jiffies + 1);
> + }
> +}
> +
> +static void irq_enable(struct intel_engine_cs *engine)
> +{
> + WARN_ON(!engine->irq_get(engine));
> +}
> +
> +static void irq_disable(struct intel_engine_cs *engine)
> +{
> + engine->irq_put(engine);
> +}
These helpers don't do much and only have one call site each?
> +
> +static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
> +{
> + struct intel_engine_cs *engine =
> + container_of(b, struct intel_engine_cs, breadcrumbs);
> + bool noirq;
> +
> + if (b->rpm_wakelock)
> + return;
> +
> + /* Since we are waiting on a request, the GPU should be busy
> + * and should have its own rpm reference. For completeness,
> + * record an rpm reference for ourselves to cover the
> + * interrupt we unmask.
> + */
> + intel_runtime_pm_get_noresume(engine->i915);
> + b->rpm_wakelock = true;
> +
> + /* No interrupts? Kick the waiter every jiffie! */
> + noirq = true;
> + if (intel_irqs_enabled(engine->i915)) {
> + noirq = test_bit(engine->id,
> + &engine->i915->gpu_error.missed_irq_rings);
> + if (!test_bit(engine->id,
> + &engine->i915->gpu_error.test_irq_rings)) {
> + irq_enable(engine);
> + b->irq_enabled = true;
> + }
> + }
> + if (noirq)
> + mod_timer(&b->fake_irq, jiffies + 1);
> +}
> +
> +static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
> +{
> + struct intel_engine_cs *engine =
> + container_of(b, struct intel_engine_cs, breadcrumbs);
> +
> + if (!b->rpm_wakelock)
> + return;
> +
> + if (b->irq_enabled) {
> + irq_disable(engine);
> + b->irq_enabled = false;
> + }
> +
> + intel_runtime_pm_put(engine->i915);
> + b->rpm_wakelock = false;
> +}
Maybe put assert_spin_locked in the above two.
> +
> +inline struct intel_breadcrumb *to_crumb(struct rb_node *node)
> +{
> + return container_of(node, struct intel_breadcrumb, node);
> +}
> +
> +bool intel_engine_add_breadcrumb(struct intel_engine_cs *engine,
> + struct intel_breadcrumb *wait)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> + u32 seqno = engine->get_seqno(engine, true);
> + struct rb_node **p, *parent, *completed;
> + bool first;
> +
> + spin_lock(&b->lock);
> +
> + /* Insert the request into the retirment ordered list
> + * of waiters by walking the rbtree. If we are the oldest
> + * seqno in the tree (the first to be retired), then
> + * set ourselves as the bottom-half.
> + *
> + * As we descend the tree, prune completed branches since we hold the
> + * spinlock we know that the first_waiter must be delayed and can
> + * reduce some of the sequential wake up latency if we take action
> + * ourselves and wake up the copmleted tasks in parallel.
> + */
Why it is interesting to do it both from add and remove breadcrumb
paths? Wouldn't it be sufficient to do it only on remove?
> + first = true;
> + parent = NULL;
> + completed = NULL;
> + p = &b->requests.rb_node;
> + while (*p) {
> + parent = *p;
> + if (i915_seqno_passed(wait->seqno, to_crumb(parent)->seqno)) {
> + p = &parent->rb_right;
> + if (i915_seqno_passed(seqno, to_crumb(parent)->seqno))
> + completed = parent;
> + else
> + first = false;
> + } else
> + p = &parent->rb_left;
> + }
> + rb_link_node(&wait->node, parent, p);
> + rb_insert_color(&wait->node, &b->requests);
> +
> + if (completed != NULL) {
> + struct rb_node *next = rb_next(completed);
> +
> + if (next && next != &wait->node) {
> + smp_store_mb(b->first_waiter, to_crumb(next)->task);
> + __intel_breadcrumbs_enable_irq(b);
> + wake_up_process(to_crumb(next)->task);
I don't get this, why it is waking up the one after completed? Just to
anticipate it might be completed soon?
> + }
> +
> + do {
> + struct intel_breadcrumb *crumb = to_crumb(completed);
> + completed = rb_prev(completed);
> +
> + rb_erase(&crumb->node, &b->requests);
> + RB_CLEAR_NODE(&crumb->node);
> + wake_up_process(crumb->task);
> + } while (completed != NULL);
> + }
> +
> + if (first)
> + smp_store_mb(b->first_waiter, wait->task);
> + BUG_ON(b->first_waiter == NULL);
I object your honour! :) Let the user ssh in and reboot cleanly even if
graphics stack is stuck.
> +
> + spin_unlock(&b->lock);
> +
> + return first;
> +}
> +
> +void intel_engine_enable_breadcrumb_irq(struct intel_engine_cs *engine)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + spin_lock(&b->lock);
> + __intel_breadcrumbs_enable_irq(b);
> + spin_unlock(&b->lock);
> +}
> +
> +void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
> +{
> + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
> +}
> +
> +void intel_engine_remove_breadcrumb(struct intel_engine_cs *engine,
> + struct intel_breadcrumb *wait)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + /* Quick check to see if this waiter was already decoupled from
> + * the tree by the bottom-half to avoid contention on the spinlock
> + * by the herd.
> + */
> + if (RB_EMPTY_NODE(&wait->node))
> + return;
> +
> + spin_lock(&b->lock);
> +
> + if (b->first_waiter == wait->task) {
> + struct rb_node *next;
> + struct task_struct *task;
> +
> + /* We are the current bottom-half. Find the next candidate,
> + * the first waiter in the queue on the remaining oldest
> + * request. As multiple seqnos may complete in the time it
> + * takes us to wake up and find the next waiter, we have to
> + * wake up that waiter for it to perform its own coherent
> + * completion check.
> + */
> + next = rb_next(&wait->node);
> + if (next) {
> + /* If the next waiter is already complete,
> + * wake it up and continue onto the next waiter. So
> + * if have a small herd, they will wake up in parallel
> + * rather than sequentially, which should reduce
> + * the overall latency in waking all the completed
> + * clients.
> + */
> + u32 seqno = engine->get_seqno(engine, true);
> + while (i915_seqno_passed(seqno,
> + to_crumb(next)->seqno)) {
> + struct rb_node *n = rb_next(next);
> +
> + rb_erase(next, &b->requests);
> + RB_CLEAR_NODE(next);
> + wake_up_process(to_crumb(next)->task);
> +
> + next = n;
> + if (next == NULL)
> + break;
> + }
> + }
> + task = next ? to_crumb(next)->task : NULL;
> +
> + smp_store_mb(b->first_waiter, task);
> + if (task) {
> + /* In our haste, we may have completed the first waiter
> + * before we enabled the interrupt. Do so now as we
> + * have a second waiter for a future seqno. Afterwards,
> + * we have to wake up that waiter in case we missed
> + * the interrupt, or if we have to handle an
> + * exception rather than a seqno completion.
> + */
> + __intel_breadcrumbs_enable_irq(b);
> + wake_up_process(task);
> + } else
> + __intel_breadcrumbs_disable_irq(b);
> + }
> +
> + if (!RB_EMPTY_NODE(&wait->node))
> + rb_erase(&wait->node, &b->requests);
> + spin_unlock(&b->lock);
> +}
> +
> +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + spin_lock_init(&b->lock);
> + setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)b);
> +}
> +
> +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
> +{
> + struct intel_breadcrumbs *b = &engine->breadcrumbs;
> +
> + del_timer_sync(&b->fake_irq);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 7030ff526941..780aa6d390aa 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1898,6 +1898,8 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
> i915_cmd_parser_fini_ring(ring);
> i915_gem_batch_pool_fini(&ring->batch_pool);
>
> + intel_engine_fini_breadcrumbs(ring);
> +
> if (ring->status_page.obj) {
> kunmap(sg_page(ring->status_page.obj->pages->sgl));
> ring->status_page.obj = NULL;
> @@ -1915,10 +1917,11 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
> ring->buffer = NULL;
>
> ring->dev = dev;
> + ring->i915 = to_i915(dev);
> INIT_LIST_HEAD(&ring->active_list);
> INIT_LIST_HEAD(&ring->request_list);
> i915_gem_batch_pool_init(dev, &ring->batch_pool);
> - init_waitqueue_head(&ring->irq_queue);
> + intel_engine_init_breadcrumbs(ring);
>
> INIT_LIST_HEAD(&ring->buffers);
> INIT_LIST_HEAD(&ring->execlist_queue);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 69dd69e46fa9..e90c28e2da31 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2152,6 +2152,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> WARN_ON(ring->buffer);
>
> ring->dev = dev;
> + ring->i915 = to_i915(dev);
> INIT_LIST_HEAD(&ring->active_list);
> INIT_LIST_HEAD(&ring->request_list);
> INIT_LIST_HEAD(&ring->execlist_queue);
> @@ -2159,7 +2160,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
> i915_gem_batch_pool_init(dev, &ring->batch_pool);
> memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
>
> - init_waitqueue_head(&ring->irq_queue);
> + intel_engine_init_breadcrumbs(ring);
>
> ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
> if (IS_ERR(ringbuf)) {
> @@ -2223,6 +2224,8 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
>
> i915_cmd_parser_fini_ring(ring);
> i915_gem_batch_pool_fini(&ring->batch_pool);
> + intel_engine_fini_breadcrumbs(ring);
> +
> ring->dev = NULL;
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 49574ffe54bc..281ed2c92b91 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -157,9 +157,35 @@ struct intel_engine_cs {
> #define LAST_USER_RING (VECS + 1)
> u32 mmio_base;
> struct drm_device *dev;
> + struct drm_i915_private *i915;
> struct intel_ringbuffer *buffer;
> struct list_head buffers;
>
> + /* Rather than have every client wait upon all user interrupts,
> + * with the herd waking after every interrupt and each doing the
> + * heavyweight seqno dance, we delegate the task (of being the
> + * bottom-half of the user interrupt) to the first client. After
> + * every interrupt, we wake up one client, who does the heavyweight
> + * coherent seqno read and either goes back to sleep (if incomplete),
> + * or wakes up all the completed clients in parallel, before then
> + * transferring the bottom-half status to the next client in the queue.
> + *
> + * Compared to walking the entire list of waiters in a single dedicated
> + * bottom-half, we reduce the latency of the first waiter by avoiding
> + * a context switch, but incur additional coherent seqno reads when
> + * following the chain of request breadcrumbs. Since it is most likely
> + * that we have a single client waiting on each seqno, then reducing
> + * the overhead of waking that client is much preferred.
> + */
> + struct intel_breadcrumbs {
> + spinlock_t lock; /* protects the lists of requests */
> + struct rb_root requests; /* sorted by retirement */
> + struct task_struct *first_waiter; /* bh for user interrupts */
> + struct timer_list fake_irq; /* used after a missed interrupt */
> + bool irq_enabled;
> + bool rpm_wakelock;
> + } breadcrumbs;
> +
> /*
> * A pool of objects to use as shadow copies of client batch buffers
> * when the command parser is enabled. Prevents the client from
> @@ -303,8 +329,6 @@ struct intel_engine_cs {
>
> bool gpu_caches_dirty;
>
> - wait_queue_head_t irq_queue;
> -
> struct intel_context *default_context;
> struct intel_context *last_context;
>
> @@ -510,4 +534,39 @@ void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
> /* Legacy ringbuffer specific portion of reservation code: */
> int intel_ring_reserve_space(struct drm_i915_gem_request *request);
>
> +/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
> +struct intel_breadcrumb {
> + struct rb_node node;
> + struct task_struct *task;
> + u32 seqno;
> +};
> +void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
> +static inline void intel_breadcrumb_init(struct intel_breadcrumb *wait,
> + u32 seqno)
> +{
> + wait->task = current;
> + wait->seqno = seqno;
> +}
> +static inline bool intel_breadcrumb_complete(struct intel_breadcrumb *wait)
> +{
> + return RB_EMPTY_NODE(&wait->node);
> +}
> +bool intel_engine_add_breadcrumb(struct intel_engine_cs *engine,
> + struct intel_breadcrumb *wait);
> +void intel_engine_remove_breadcrumb(struct intel_engine_cs *engine,
> + struct intel_breadcrumb *wait);
> +static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
> +{
> + return READ_ONCE(engine->breadcrumbs.first_waiter);
> +}
> +static inline void intel_engine_wakeup(struct intel_engine_cs *engine)
> +{
> + struct task_struct *task = READ_ONCE(engine->breadcrumbs.first_waiter);
> + if (task)
> + wake_up_process(task);
And here definitely put a comment saying why this is safe without the
spinlock.
Actually seeing how it is called from irq context and process context I
think it will need a lock.
Maybe you can add an irq flavour is lockless variant is really
beneficial from the user interrupt handler?
> +}
> +void intel_engine_enable_breadcrumb_irq(struct intel_engine_cs *engine);
> +void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
> +void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
> +
> #endif /* _INTEL_RINGBUFFER_H_ */
>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list