[PATCH 59/60] breadcrumb
Chris Wilson
chris at chris-wilson.co.uk
Fri Dec 28 10:43:49 UTC 2018
---
drivers/gpu/drm/i915/i915_debugfs.c | 30 +-
drivers/gpu/drm/i915/i915_gem_context.c | 2 +
drivers/gpu/drm/i915/i915_gem_context.h | 2 +
drivers/gpu/drm/i915/i915_gpu_error.c | 73 --
drivers/gpu/drm/i915/i915_gpu_error.h | 8 -
drivers/gpu/drm/i915/i915_irq.c | 88 +-
drivers/gpu/drm/i915/i915_request.c | 127 +--
drivers/gpu/drm/i915/i915_request.h | 23 +-
drivers/gpu/drm/i915/i915_reset.c | 13 +-
drivers/gpu/drm/i915/intel_breadcrumbs.c | 768 ++++--------------
drivers/gpu/drm/i915/intel_engine_cs.c | 22 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 89 +-
.../drm/i915/selftests/i915_mock_selftests.h | 1 -
drivers/gpu/drm/i915/selftests/igt_spinner.c | 5 -
.../drm/i915/selftests/intel_breadcrumbs.c | 441 ----------
.../gpu/drm/i915/selftests/intel_hangcheck.c | 2 +-
17 files changed, 268 insertions(+), 1428 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8b18b0fd86b5..f3a3322a022a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1284,28 +1284,13 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
for_each_engine(engine, dev_priv, id) {
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct rb_node *rb;
-
seq_printf(m, "%s:\n", engine->name);
seq_printf(m, "\tseqno = %llx, emitted %dms ago\n",
engine->hangcheck ? engine->hangcheck->fence.seqno : 0,
engine->hangcheck ? jiffies_to_msecs(jiffies - engine->hangcheck->emitted_jiffies) : -1);
- seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
- yesno(intel_engine_has_waiter(engine)),
+ seq_printf(m, "\tfake irq active? %s\n",
yesno(test_bit(engine->id,
&dev_priv->gpu_error.missed_irq_rings)));
-
- spin_lock_irq(&b->rb_lock);
- for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
- struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
- seq_printf(m, "\t%s [%d] waiting for %d:%d\n",
- w->tsk->comm, w->tsk->pid,
- upper_32_bits(w->global_seqno),
- lower_32_bits(w->global_seqno));
- }
- spin_unlock_irq(&b->rb_lock);
}
return 0;
@@ -1975,18 +1960,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
return 0;
}
-static int count_irq_waiters(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int count = 0;
-
- for_each_engine(engine, i915, id)
- count += intel_engine_has_waiter(engine);
-
- return count;
-}
-
static const char *rps_power_to_str(unsigned int power)
{
static const char * const strings[] = {
@@ -2026,7 +1999,6 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
seq_printf(m, "RPS enabled? %d\n", rps->enabled);
seq_printf(m, "GPU busy? %s [%d requests]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
- seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters));
seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 22aec4fddff9..8dd5e8e43327 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -359,6 +359,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
ce->gem_context = ctx;
ce->owner = dev_priv->engine[n];
INIT_LIST_HEAD(&ce->active_link);
+ INIT_LIST_HEAD(&ce->signal_link);
+ INIT_LIST_HEAD(&ce->signals);
}
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 5c64e47b9f6a..0b4155fd8eee 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -173,6 +173,8 @@ struct i915_gem_context {
struct intel_engine_cs *active;
struct intel_engine_cs *owner;
struct list_head active_link;
+ struct list_head signal_link;
+ struct list_head signals;
struct i915_vma *state;
struct intel_ring *ring;
u32 *lrc_reg_state;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 75e978d21bc5..76a2f034a902 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -527,7 +527,6 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
}
err_printf(m, " seqno: 0x%08x\n", ee->seqno);
err_printf(m, " last_seqno: 0x%08x\n", ee->last_seqno);
- err_printf(m, " waiting: %s\n", yesno(ee->waiting));
err_printf(m, " ring->head: 0x%08x\n", ee->cpu_ring_head);
err_printf(m, " ring->tail: 0x%08x\n", ee->cpu_ring_tail);
err_printf(m, " engine reset count: %u\n", ee->reset_count);
@@ -783,21 +782,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
error->epoch);
}
- if (IS_ERR(ee->waiters)) {
- err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
- m->i915->engine[i]->name);
- } else if (ee->num_waiters) {
- err_printf(m, "%s --- %d waiters\n",
- m->i915->engine[i]->name,
- ee->num_waiters);
- for (j = 0; j < ee->num_waiters; j++) {
- err_printf(m, " seqno 0x%08x for %s [%d]\n",
- ee->waiters[j].seqno,
- ee->waiters[j].comm,
- ee->waiters[j].pid);
- }
- }
-
print_error_obj(m, m->i915->engine[i],
"ringbuffer", ee->ringbuffer);
@@ -985,8 +969,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
i915_error_object_free(ee->wa_ctx);
kfree(ee->requests);
- if (!IS_ERR_OR_NULL(ee->waiters))
- kfree(ee->waiters);
}
for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
@@ -1135,59 +1117,6 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
I915_READ(RING_SYNC_2(engine->mmio_base));
}
-static void error_record_engine_waiters(struct intel_engine_cs *engine,
- struct drm_i915_error_engine *ee)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct drm_i915_error_waiter *waiter;
- struct rb_node *rb;
- int count;
-
- ee->num_waiters = 0;
- ee->waiters = NULL;
-
- if (RB_EMPTY_ROOT(&b->waiters))
- return;
-
- if (!spin_trylock_irq(&b->rb_lock)) {
- ee->waiters = ERR_PTR(-EDEADLK);
- return;
- }
-
- count = 0;
- for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
- count++;
- spin_unlock_irq(&b->rb_lock);
-
- waiter = NULL;
- if (count)
- waiter = kmalloc_array(count,
- sizeof(struct drm_i915_error_waiter),
- GFP_ATOMIC);
- if (!waiter)
- return;
-
- if (!spin_trylock_irq(&b->rb_lock)) {
- kfree(waiter);
- ee->waiters = ERR_PTR(-EDEADLK);
- return;
- }
-
- ee->waiters = waiter;
- for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
- struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
- strcpy(waiter->comm, w->tsk->comm);
- waiter->pid = w->tsk->pid;
- waiter->seqno = w->global_seqno;
- waiter++;
-
- if (++ee->num_waiters == count)
- break;
- }
- spin_unlock_irq(&b->rb_lock);
-}
-
static void error_record_engine_registers(struct i915_gpu_state *error,
struct intel_engine_cs *engine,
struct drm_i915_error_engine *ee)
@@ -1223,7 +1152,6 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
intel_engine_get_instdone(engine, &ee->instdone);
- ee->waiting = intel_engine_has_waiter(engine);
ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
ee->acthd = intel_engine_get_active_head(engine);
ee->seqno = intel_engine_get_seqno(engine);
@@ -1470,7 +1398,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
ee->engine_id = i;
error_record_engine_registers(error, engine, ee);
- error_record_engine_waiters(engine, ee);
error_record_engine_execlists(engine, ee);
request = i915_gem_find_active_request(engine);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 7f68db4d2f19..e48bb706e844 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -81,8 +81,6 @@ struct i915_gpu_state {
int engine_id;
/* Software tracked state */
bool idle;
- bool waiting;
- int num_waiters;
struct i915_address_space *vm;
int num_requests;
u32 reset_count;
@@ -157,12 +155,6 @@ struct i915_gpu_state {
} *requests, execlist[EXECLIST_MAX_PORTS];
unsigned int num_ports;
- struct drm_i915_error_waiter {
- char comm[TASK_COMM_LEN];
- pid_t pid;
- u32 seqno;
- } *waiters;
-
struct {
u32 gfx_mode;
union {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 19e7a81ef1b5..9e7bb5977829 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -28,9 +28,10 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/sysrq.h>
-#include <linux/slab.h>
#include <linux/circ_buf.h>
+#include <linux/slab.h>
+#include <linux/sysrq.h>
+
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
@@ -1152,67 +1153,6 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
return;
}
-static void notify_ring(struct intel_engine_cs *engine)
-{
- const u32 seqno = intel_engine_get_seqno(engine);
- struct i915_request *rq = NULL;
- struct task_struct *tsk = NULL;
- struct intel_wait *wait;
-
- if (unlikely(!engine->breadcrumbs.irq_armed))
- return;
-
- rcu_read_lock();
-
- spin_lock(&engine->breadcrumbs.irq_lock);
- wait = engine->breadcrumbs.irq_wait;
- if (wait) {
- /*
- * We use a callback from the dma-fence to submit
- * requests after waiting on our own requests. To
- * ensure minimum delay in queuing the next request to
- * hardware, signal the fence now rather than wait for
- * the signaler to be woken up. We still wake up the
- * waiter in order to handle the irq-seqno coherency
- * issues (we may receive the interrupt before the
- * seqno is written, see __i915_request_irq_complete())
- * and to handle coalescing of multiple seqno updates
- * and many waiters.
- */
- if (i915_seqno_passed(seqno, wait->global_seqno)) {
- struct i915_request *waiter = wait->request;
-
- if (waiter &&
- !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &waiter->fence.flags) &&
- intel_wait_check_request(wait, waiter))
- rq = i915_request_get(waiter);
-
- tsk = wait->tsk;
- }
-
- engine->breadcrumbs.irq_count++;
- } else {
- if (engine->breadcrumbs.irq_armed)
- __intel_engine_disarm_breadcrumbs(engine);
- }
- spin_unlock(&engine->breadcrumbs.irq_lock);
-
- if (rq) {
- spin_lock(&rq->lock);
- dma_fence_signal_locked(&rq->fence);
- GEM_BUG_ON(!i915_request_completed(rq));
- spin_unlock(&rq->lock);
-
- i915_request_put(rq);
- }
-
- if (tsk && tsk->state & TASK_NORMAL)
- wake_up_process(tsk);
-
- rcu_read_unlock();
-}
-
static void vlv_c0_read(struct drm_i915_private *dev_priv,
struct intel_rps_ei *ei)
{
@@ -1457,20 +1397,20 @@ static void ilk_gt_irq_handler(struct drm_i915_private *dev_priv,
u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
- notify_ring(dev_priv->engine[RCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (gt_iir & ILK_BSD_USER_INTERRUPT)
- notify_ring(dev_priv->engine[VCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
}
static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
u32 gt_iir)
{
if (gt_iir & GT_RENDER_USER_INTERRUPT)
- notify_ring(dev_priv->engine[RCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (gt_iir & GT_BSD_USER_INTERRUPT)
- notify_ring(dev_priv->engine[VCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
if (gt_iir & GT_BLT_USER_INTERRUPT)
- notify_ring(dev_priv->engine[BCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);
if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT |
@@ -1490,7 +1430,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
tasklet = true;
if (iir & GT_RENDER_USER_INTERRUPT) {
- notify_ring(engine);
+ intel_engine_breadcrumbs_irq(engine);
tasklet |= USES_GUC_SUBMISSION(engine->i915);
}
@@ -1836,7 +1776,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
if (HAS_VEBOX(dev_priv)) {
if (pm_iir & PM_VEBOX_USER_INTERRUPT)
- notify_ring(dev_priv->engine[VECS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);
if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
@@ -4262,7 +4202,7 @@ static irqreturn_t i8xx_irq_handler(int irq, void *arg)
I915_WRITE16(IIR, iir);
if (iir & I915_USER_INTERRUPT)
- notify_ring(dev_priv->engine[RCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (iir & I915_MASTER_ERROR_INTERRUPT)
i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4370,7 +4310,7 @@ static irqreturn_t i915_irq_handler(int irq, void *arg)
I915_WRITE(IIR, iir);
if (iir & I915_USER_INTERRUPT)
- notify_ring(dev_priv->engine[RCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (iir & I915_MASTER_ERROR_INTERRUPT)
i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
@@ -4515,10 +4455,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
I915_WRITE(IIR, iir);
if (iir & I915_USER_INTERRUPT)
- notify_ring(dev_priv->engine[RCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
if (iir & I915_BSD_USER_INTERRUPT)
- notify_ring(dev_priv->engine[VCS]);
+ intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
if (iir & I915_MASTER_ERROR_INTERRUPT)
i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index dc00e96615e7..fe9f62144087 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -60,7 +60,7 @@ static bool i915_fence_signaled(struct dma_fence *fence)
static bool i915_fence_enable_signaling(struct dma_fence *fence)
{
- return intel_engine_enable_signaling(to_request(fence), true);
+ return intel_engine_enable_signaling(to_request(fence));
}
static signed long i915_fence_wait(struct dma_fence *fence,
@@ -356,9 +356,10 @@ void __i915_request_submit(struct i915_request *request)
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
- request->global_seqno = (u64)engine->id << 32 | seqno;
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
- intel_engine_enable_signaling(request, false);
+ set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+ !intel_engine_enable_signaling(request))
+ dma_fence_signal_locked(&request->fence);
spin_unlock(&request->lock);
engine->emit_breadcrumb(request,
@@ -368,8 +369,6 @@ void __i915_request_submit(struct i915_request *request)
move_to_timeline(request, &engine->timeline);
trace_i915_request_execute(request);
-
- wake_up_all(&request->execute);
}
void i915_request_submit(struct i915_request *request)
@@ -413,6 +412,7 @@ void __i915_request_unsubmit(struct i915_request *request)
request->global_seqno = 0;
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
intel_engine_cancel_signaling(request);
+ clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
spin_unlock(&request->lock);
/* Transfer back from the global per-engine timeline to per-context */
@@ -595,13 +595,11 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
/* We bump the ref for the fence chain */
i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
- init_waitqueue_head(&rq->execute);
i915_sched_node_init(&rq->sched);
/* No zalloc, must clear what we need by hand */
rq->global_seqno = 0;
- rq->signaling.wait.global_seqno = 0;
rq->file_priv = NULL;
rq->batch = NULL;
rq->capture_list = NULL;
@@ -1009,12 +1007,9 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu)
}
static bool __i915_spin_request(const struct i915_request * const rq,
- const struct intel_wait * const w,
int state, unsigned long timeout_us)
{
- unsigned int irq, cpu;
-
- GEM_BUG_ON(!w->global_seqno);
+ unsigned int cpu;
/*
* Only wait for the request if we know it is likely to complete.
@@ -1027,7 +1022,7 @@ static bool __i915_spin_request(const struct i915_request * const rq,
* it is a fair assumption that it will not complete within our
* relatively short timeout.
*/
- if (!intel_engine_has_started(w->engine, w->global_seqno))
+ if (!i915_request_started(rq))
return false;
/*
@@ -1041,20 +1036,10 @@ static bool __i915_spin_request(const struct i915_request * const rq,
* takes to sleep on a request, on the order of a microsecond.
*/
- irq = READ_ONCE(w->engine->breadcrumbs.irq_count);
timeout_us += local_clock_us(&cpu);
do {
- if (intel_engine_has_completed(w->engine, w->global_seqno))
- return w->global_seqno == i915_request_global_seqno(rq);
-
- /*
- * Seqno are meant to be ordered *before* the interrupt. If
- * we see an interrupt without a corresponding seqno advance,
- * assume we won't see one in the near future but require
- * the engine->seqno_barrier() to fixup coherency.
- */
- if (READ_ONCE(w->engine->breadcrumbs.irq_count) != irq)
- break;
+ if (i915_request_completed(rq))
+ return true;
if (signal_pending_state(state, current))
break;
@@ -1068,6 +1053,18 @@ static bool __i915_spin_request(const struct i915_request * const rq,
return false;
}
+struct request_wait {
+ struct dma_fence_cb cb;
+ struct task_struct *tsk;
+};
+
+static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct request_wait *wait = container_of(cb, typeof(*wait), cb);
+
+ wake_up_process(wait->tsk);
+}
+
/**
* i915_request_wait - wait until execution of request has finished
* @rq: the request to wait upon
@@ -1093,8 +1090,7 @@ long i915_request_wait(struct i915_request *rq,
{
const int state = flags & I915_WAIT_INTERRUPTIBLE ?
TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
- DEFINE_WAIT_FUNC(exec, default_wake_function);
- struct intel_wait wait;
+ struct request_wait wait;
might_sleep();
GEM_BUG_ON(timeout < 0);
@@ -1106,48 +1102,28 @@ long i915_request_wait(struct i915_request *rq,
return -ETIME;
trace_i915_request_wait_begin(rq, flags);
- add_wait_queue(&rq->execute, &exec);
- intel_wait_init(&wait);
- if (flags & I915_WAIT_PRIORITY)
- i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-restart:
- do {
- set_current_state(state);
- if (intel_wait_update_request(&wait, rq))
- break;
-
- if (signal_pending_state(state, current)) {
- timeout = -ERESTARTSYS;
- goto complete;
- }
+ /* Optimistic short spin before touching IRQs */
+ if (__i915_spin_request(rq, state, 5))
+ goto out;
- if (!timeout) {
- timeout = -ETIME;
- goto complete;
- }
+ if (flags & I915_WAIT_PRIORITY)
+ i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
- timeout = io_schedule_timeout(timeout);
- } while (1);
+ wait.tsk = current;
+ if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
+ goto out;
- GEM_BUG_ON(!intel_wait_has_seqno(&wait));
- GEM_BUG_ON(intel_engine_is_virtual(wait.engine));
- GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+ for (;;) {
+ set_current_state(state);
- /* Optimistic short spin before touching IRQs */
- if (__i915_spin_request(rq, &wait, state, 5))
- goto complete;
+ if (i915_request_completed(rq))
+ break;
- set_current_state(state);
- if (intel_engine_add_wait(&wait))
- /*
- * In order to check that we haven't missed the interrupt
- * as we enabled it, we need to kick ourselves to do a
- * coherent check on the seqno before we sleep.
- */
- goto wakeup;
+ /* Only spins if we know the GPU is processing this request */
+ if (__i915_spin_request(rq, state, 2))
+ break;
- for (;;) {
if (signal_pending_state(state, current)) {
timeout = -ERESTARTSYS;
break;
@@ -1159,33 +1135,12 @@ long i915_request_wait(struct i915_request *rq,
}
timeout = io_schedule_timeout(timeout);
-
- if (intel_wait_complete(&wait) &&
- intel_wait_check_request(&wait, rq))
- break;
-
- set_current_state(state);
-
-wakeup:
- if (i915_request_completed(rq))
- break;
-
- /* Only spin if we know the GPU is processing this request */
- if (__i915_spin_request(rq, &wait, state, 2))
- break;
-
- if (!intel_wait_check_request(&wait, rq)) {
- intel_engine_remove_wait(&wait);
- goto restart;
- }
}
-
- intel_engine_remove_wait(&wait);
-complete:
+ dma_fence_remove_callback(&rq->fence, &wait.cb);
__set_current_state(TASK_RUNNING);
- remove_wait_queue(&rq->execute, &exec);
- trace_i915_request_wait_end(rq);
+out:
+ trace_i915_request_wait_end(rq);
return timeout;
}
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 85a242dcb77c..b946f23c94af 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -38,24 +38,16 @@ struct drm_i915_gem_object;
struct i915_request;
struct i915_timeline;
-struct intel_wait {
- struct rb_node node;
- struct task_struct *tsk;
- struct i915_request *request;
- struct intel_engine_cs *engine;
- u64 global_seqno;
-};
-
-struct intel_signal_node {
- struct intel_wait wait;
- struct list_head link;
-};
-
struct i915_capture_list {
struct i915_capture_list *next;
struct i915_vma *vma;
};
+enum {
+ I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
+ I915_FENCE_FLAG_SIGNAL,
+};
+
/**
* Request queue structure.
*
@@ -98,7 +90,7 @@ struct i915_request {
struct intel_context *hw_context;
struct intel_ring *ring;
struct i915_timeline *timeline;
- struct intel_signal_node signaling;
+ struct list_head signal_link;
/*
* The rcu epoch of when this request was allocated. Used to judiciously
@@ -120,7 +112,6 @@ struct i915_request {
wait_queue_entry_t submitq;
struct i915_sw_dma_fence_cb dmaq;
};
- wait_queue_head_t execute;
/*
* A list of everyone we wait upon, and everyone who waits upon us.
@@ -256,7 +247,7 @@ i915_request_put(struct i915_request *rq)
* preemption onto another ring, the global_seqno encodes a tag into its
* upper 32 bits to identify the execution engine.
*/
-static u64
+static inline u64
i915_request_global_seqno(const struct i915_request *request)
{
return READ_ONCE(request->global_seqno);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 45a0f00421f9..24db0d7bb2e3 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -747,18 +747,19 @@ static void reset_restart(struct drm_i915_private *i915)
static void nop_submit_request(struct i915_request *request)
{
+ struct intel_engine_cs *engine = request->engine;
unsigned long flags;
GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
- request->engine->name,
- request->fence.context, request->fence.seqno);
+ engine->name, request->fence.context, request->fence.seqno);
dma_fence_set_error(&request->fence, -EIO);
- spin_lock_irqsave(&request->engine->timeline.lock, flags);
+ spin_lock_irqsave(&engine->timeline.lock, flags);
__i915_request_submit(request);
i915_request_fake_complete(request);
- intel_engine_write_global_seqno(request->engine, request->global_seqno);
- spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
+ spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+ intel_engine_signal_breadcrumbs(engine);
}
void i915_gem_set_wedged(struct drm_i915_private *i915)
@@ -813,7 +814,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
for_each_engine(engine, i915, id) {
reset_finish_engine(engine);
- intel_engine_wakeup(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
smp_mb__before_atomic();
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 963f88aa9101..046b0181dd5b 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -29,44 +29,117 @@
#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq)
-static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
+static void irq_enable(struct intel_engine_cs *engine)
{
- struct intel_wait *wait;
- unsigned int result = 0;
+ /*
+ * FIXME: Ideally we want this on the API boundary, but for the
+ * sake of testing with mock breadcrumbs (no HW so unable to
+ * enable irqs) we place it deep within the bowels, at the point
+ * of no return.
+ */
+ GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
+
+ /* Caller disables interrupts */
+ if (engine->irq_enable) {
+ spin_lock(&engine->i915->irq_lock);
+ engine->irq_enable(engine);
+ spin_unlock(&engine->i915->irq_lock);
+ }
+}
+static void irq_disable(struct intel_engine_cs *engine)
+{
+ /* Caller disables interrupts */
+ if (engine->irq_disable) {
+ spin_lock(&engine->i915->irq_lock);
+ engine->irq_disable(engine);
+ spin_unlock(&engine->i915->irq_lock);
+ }
+}
+
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
lockdep_assert_held(&b->irq_lock);
- wait = b->irq_wait;
- if (wait) {
- /*
- * N.B. Since task_asleep() and ttwu are not atomic, the
- * waiter may actually go to sleep after the check, causing
- * us to suppress a valid wakeup. We prefer to reduce the
- * number of false positive missed_breadcrumb() warnings
- * at the expense of a few false negatives, as it it easy
- * to trigger a false positive under heavy load. Enough
- * signal should remain from genuine missed_breadcrumb()
- * for us to detect in CI.
- */
- bool was_asleep = task_asleep(wait->tsk);
+ GEM_BUG_ON(!b->irq_enabled);
+ if (!--b->irq_enabled)
+ irq_disable(container_of(b,
+ struct intel_engine_cs,
+ breadcrumbs));
- result = ENGINE_WAKEUP_WAITER;
- if (wake_up_process(wait->tsk) && was_asleep)
- result |= ENGINE_WAKEUP_ASLEEP;
- }
+ b->irq_armed = false;
+}
- return result;
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ if (!b->irq_armed)
+ return;
+
+ spin_lock_irq(&b->irq_lock);
+ if (b->irq_armed)
+ __intel_breadcrumbs_disarm_irq(b);
+ spin_unlock_irq(&b->irq_lock);
}
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- unsigned long flags;
- unsigned int result;
+ struct intel_context *ce, *cn;
+ struct i915_request *rq, *rn;
+ LIST_HEAD(signal);
- spin_lock_irqsave(&b->irq_lock, flags);
- result = __intel_breadcrumbs_wakeup(b);
- spin_unlock_irqrestore(&b->irq_lock, flags);
+ spin_lock(&b->irq_lock);
+
+ list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
+ list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
+ if (!i915_request_completed(rq))
+ break;
+
+ GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
+ &rq->fence.flags));
+ list_del(&rq->signal_link);
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &rq->fence.flags))
+ continue;
+
+ /*
+ * Queue for execution after dropping the signaling
+ * spinlock as the callback chain may end adding
+ * more signalers to the same context or engine.
+ */
+ i915_request_get(rq);
+ list_add_tail(&rq->signal_link, &signal);
+ }
+
+ if (list_empty(&ce->signals))
+ list_del_init(&ce->signal_link);
+ }
+
+ if (b->irq_armed && list_empty(&b->signalers))
+ __intel_breadcrumbs_disarm_irq(b);
+ b->irq_count++;
+
+ spin_unlock(&b->irq_lock);
+
+ list_for_each_entry_safe(rq, rn, &signal, signal_link) {
+ dma_fence_signal(&rq->fence);
+ i915_request_put(rq);
+ }
+
+ return !list_empty(&signal);
+}
+
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
+{
+ bool result;
+
+ local_irq_disable();
+ result = intel_engine_breadcrumbs_irq(engine);
+ local_irq_enable();
return result;
}
@@ -118,7 +191,7 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
* but we still have a waiter. Assuming all batches complete within
* DRM_I915_HANGCHECK_JIFFIES [1.5s]!
*/
- if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
+ if (intel_engine_signal_breadcrumbs(engine)) {
missed_breadcrumb(engine);
mod_timer(&b->fake_irq, jiffies + 1);
} else {
@@ -140,11 +213,7 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t)
* oldest waiter to do the coherent seqno check.
*/
- spin_lock_irq(&b->irq_lock);
- if (b->irq_armed && !__intel_breadcrumbs_wakeup(b))
- __intel_engine_disarm_breadcrumbs(engine);
- spin_unlock_irq(&b->irq_lock);
- if (!b->irq_armed)
+ if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed)
return;
/* If the user has disabled the fake-irq, restore the hangchecking */
@@ -156,49 +225,6 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t)
mod_timer(&b->fake_irq, jiffies + 1);
}
-static void irq_enable(struct intel_engine_cs *engine)
-{
- /*
- * FIXME: Ideally we want this on the API boundary, but for the
- * sake of testing with mock breadcrumbs (no HW so unable to
- * enable irqs) we place it deep within the bowels, at the point
- * of no return.
- */
- GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
-
- /* Caller disables interrupts */
- if (engine->irq_enable) {
- spin_lock(&engine->i915->irq_lock);
- engine->irq_enable(engine);
- spin_unlock(&engine->i915->irq_lock);
- }
-}
-
-static void irq_disable(struct intel_engine_cs *engine)
-{
- /* Caller disables interrupts */
- if (engine->irq_disable) {
- spin_lock(&engine->i915->irq_lock);
- engine->irq_disable(engine);
- spin_unlock(&engine->i915->irq_lock);
- }
-}
-
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- lockdep_assert_held(&b->irq_lock);
- GEM_BUG_ON(b->irq_wait);
- GEM_BUG_ON(!b->irq_armed);
-
- GEM_BUG_ON(!b->irq_enabled);
- if (!--b->irq_enabled)
- irq_disable(engine);
-
- b->irq_armed = false;
-}
-
void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
@@ -221,40 +247,6 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
spin_unlock_irq(&b->irq_lock);
}
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct intel_wait *wait, *n;
-
- if (!b->irq_armed)
- return;
-
- /*
- * We only disarm the irq when we are idle (all requests completed),
- * so if the bottom-half remains asleep, it missed the request
- * completion.
- */
- if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
- missed_breadcrumb(engine);
-
- spin_lock_irq(&b->rb_lock);
-
- spin_lock(&b->irq_lock);
- b->irq_wait = NULL;
- if (b->irq_armed)
- __intel_engine_disarm_breadcrumbs(engine);
- spin_unlock(&b->irq_lock);
-
- rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
- GEM_BUG_ON(!intel_engine_signaled(engine, wait->global_seqno));
- RB_CLEAR_NODE(&wait->node);
- wake_up_process(wait->tsk);
- }
- b->waiters = RB_ROOT;
-
- spin_unlock_irq(&b->rb_lock);
-}
-
static bool use_fake_irq(const struct intel_breadcrumbs *b)
{
const struct intel_engine_cs *engine =
@@ -282,7 +274,7 @@ static void enable_fake_irq(struct intel_breadcrumbs *b)
mod_timer(&b->hangcheck, wait_timeout());
}
-static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
@@ -330,536 +322,130 @@ static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
return enabled;
}
-static inline struct intel_wait *to_wait(struct rb_node *node)
-{
- return rb_entry(node, struct intel_wait, node);
-}
-
-static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
- struct intel_wait *wait)
-{
- lockdep_assert_held(&b->rb_lock);
- GEM_BUG_ON(b->irq_wait == wait);
-
- /*
- * This request is completed, so remove it from the tree, mark it as
- * complete, and *then* wake up the associated task. N.B. when the
- * task wakes up, it will find the empty rb_node, discern that it
- * has already been removed from the tree and skip the serialisation
- * of the b->rb_lock and b->irq_lock. This means that the destruction
- * of the intel_wait is not serialised with the interrupt handler
- * by the waiter - it must instead be serialised by the caller.
- */
- rb_erase(&wait->node, &b->waiters);
- RB_CLEAR_NODE(&wait->node);
-
- if (wait->tsk->state != TASK_RUNNING)
- wake_up_process(wait->tsk); /* implicit smp_wmb() */
-}
-
-static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
- struct rb_node *next)
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- spin_lock(&b->irq_lock);
- GEM_BUG_ON(!b->irq_armed);
- GEM_BUG_ON(!b->irq_wait);
- b->irq_wait = to_wait(next);
- spin_unlock(&b->irq_lock);
+ spin_lock_init(&b->irq_lock);
+ INIT_LIST_HEAD(&b->signalers);
- /* We always wake up the next waiter that takes over as the bottom-half
- * as we may delegate not only the irq-seqno barrier to the next waiter
- * but also the task of waking up concurrent waiters.
- */
- if (next)
- wake_up_process(to_wait(next)->tsk);
+ timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
+ timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
}
-static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
- struct intel_wait *wait)
+static void cancel_fake_irq(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct rb_node **p, *parent, *completed;
- bool first, armed;
- u32 seqno;
-
- GEM_BUG_ON(!wait->global_seqno);
- /* Insert the request into the retirement ordered list
- * of waiters by walking the rbtree. If we are the oldest
- * seqno in the tree (the first to be retired), then
- * set ourselves as the bottom-half.
- *
- * As we descend the tree, prune completed branches since we hold the
- * spinlock we know that the first_waiter must be delayed and can
- * reduce some of the sequential wake up latency if we take action
- * ourselves and wake up the completed tasks in parallel. Also, by
- * removing stale elements in the tree, we may be able to reduce the
- * ping-pong between the old bottom-half and ourselves as first-waiter.
- */
- armed = false;
- first = true;
- parent = NULL;
- completed = NULL;
- seqno = intel_engine_get_seqno(engine);
-
- /* If the request completed before we managed to grab the spinlock,
- * return now before adding ourselves to the rbtree. We let the
- * current bottom-half handle any pending wakeups and instead
- * try and get out of the way quickly.
- */
- if (i915_seqno_passed(seqno, wait->global_seqno)) {
- RB_CLEAR_NODE(&wait->node);
- return first;
- }
-
- p = &b->waiters.rb_node;
- while (*p) {
- parent = *p;
- if (wait->global_seqno == to_wait(parent)->global_seqno) {
- /* We have multiple waiters on the same seqno, select
- * the highest priority task (that with the smallest
- * task->prio) to serve as the bottom-half for this
- * group.
- */
- if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
- p = &parent->rb_right;
- first = false;
- } else {
- p = &parent->rb_left;
- }
- } else if (i915_seqno_passed(wait->global_seqno,
- to_wait(parent)->global_seqno)) {
- p = &parent->rb_right;
- if (i915_seqno_passed(seqno,
- to_wait(parent)->global_seqno))
- completed = parent;
- else
- first = false;
- } else {
- p = &parent->rb_left;
- }
- }
- rb_link_node(&wait->node, parent, p);
- rb_insert_color(&wait->node, &b->waiters);
-
- if (first) {
- spin_lock(&b->irq_lock);
- b->irq_wait = wait;
- /* After assigning ourselves as the new bottom-half, we must
- * perform a cursory check to prevent a missed interrupt.
- * Either we miss the interrupt whilst programming the hardware,
- * or if there was a previous waiter (for a later seqno) they
- * may be woken instead of us (due to the inherent race
- * in the unlocked read of b->irq_seqno_bh in the irq handler)
- * and so we miss the wake up.
- */
- armed = __intel_breadcrumbs_enable_irq(b);
- spin_unlock(&b->irq_lock);
- }
-
- if (completed) {
- /* Advance the bottom-half (b->irq_wait) before we wake up
- * the waiters who may scribble over their intel_wait
- * just as the interrupt handler is dereferencing it via
- * b->irq_wait.
- */
- if (!first) {
- struct rb_node *next = rb_next(completed);
- GEM_BUG_ON(next == &wait->node);
- __intel_breadcrumbs_next(engine, next);
- }
-
- do {
- struct intel_wait *crumb = to_wait(completed);
- completed = rb_prev(completed);
- __intel_breadcrumbs_finish(b, crumb);
- } while (completed);
- }
-
- GEM_BUG_ON(!b->irq_wait);
- GEM_BUG_ON(!b->irq_armed);
- GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node);
-
- return armed;
+ del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
+ del_timer_sync(&b->hangcheck);
+ clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
}
-bool intel_engine_add_wait(struct intel_wait *wait)
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
{
- struct intel_engine_cs *engine = wait->engine;
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- bool armed;
-
- spin_lock_irq(&b->rb_lock);
- armed = __intel_engine_add_wait(engine, wait);
- spin_unlock_irq(&b->rb_lock);
- if (armed)
- return armed;
+ unsigned long flags;
- /* Make the caller recheck if its request has already started. */
- return intel_engine_has_started(engine, wait->global_seqno);
-}
+ spin_lock_irqsave(&b->irq_lock, flags);
-static inline bool chain_wakeup(struct rb_node *rb, int priority)
-{
- return rb && to_wait(rb)->tsk->prio <= priority;
-}
+ /*
+ * Leave the fake_irq timer enabled (if it is running), but clear the
+ * bit so that it turns itself off on its next wake up and goes back
+ * to the long hangcheck interval if still required.
+ */
+ clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-static inline int wakeup_priority(struct intel_breadcrumbs *b,
- struct task_struct *tsk)
-{
- if (tsk == b->signaler)
- return INT_MIN;
+ if (b->irq_enabled)
+ irq_enable(engine);
else
- return tsk->prio;
-}
-
-static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
- struct intel_wait *wait)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- lockdep_assert_held(&b->rb_lock);
-
- if (RB_EMPTY_NODE(&wait->node))
- goto out;
-
- if (b->irq_wait == wait) {
- const int priority = wakeup_priority(b, wait->tsk);
- struct rb_node *next;
-
- /* We are the current bottom-half. Find the next candidate,
- * the first waiter in the queue on the remaining oldest
- * request. As multiple seqnos may complete in the time it
- * takes us to wake up and find the next waiter, we have to
- * wake up that waiter for it to perform its own coherent
- * completion check.
- */
- next = rb_next(&wait->node);
- if (chain_wakeup(next, priority)) {
- /* If the next waiter is already complete,
- * wake it up and continue onto the next waiter. So
- * if have a small herd, they will wake up in parallel
- * rather than sequentially, which should reduce
- * the overall latency in waking all the completed
- * clients.
- *
- * However, waking up a chain adds extra latency to
- * the first_waiter. This is undesirable if that
- * waiter is a high priority task.
- */
- u32 seqno = intel_engine_get_seqno(engine);
-
- while (i915_seqno_passed(seqno,
- to_wait(next)->global_seqno)) {
- struct rb_node *n = rb_next(next);
-
- __intel_breadcrumbs_finish(b, to_wait(next));
- next = n;
- if (!chain_wakeup(next, priority))
- break;
- }
- }
-
- __intel_breadcrumbs_next(engine, next);
- } else {
- GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
- }
-
- GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
- rb_erase(&wait->node, &b->waiters);
- RB_CLEAR_NODE(&wait->node);
-
-out:
- GEM_BUG_ON(b->irq_wait == wait);
- GEM_BUG_ON(rb_first(&b->waiters) !=
- (b->irq_wait ? &b->irq_wait->node : NULL));
-}
-
-void intel_engine_remove_wait(struct intel_wait *wait)
-{
- struct intel_engine_cs *engine = wait->engine;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- /* Quick check to see if this waiter was already decoupled from
- * the tree by the bottom-half to avoid contention on the spinlock
- * by the herd.
- */
- if (RB_EMPTY_NODE(&wait->node)) {
- GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait);
- return;
- }
+ irq_disable(engine);
- spin_lock_irq(&b->rb_lock);
- __intel_engine_remove_wait(engine, wait);
- spin_unlock_irq(&b->rb_lock);
+ spin_unlock_irqrestore(&b->irq_lock, flags);
}
-static void signaler_set_rtpriority(void)
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{
- struct sched_param param = { .sched_priority = 1 };
-
- sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
+ cancel_fake_irq(engine);
}
-static int intel_breadcrumbs_signaler(void *arg)
+bool intel_engine_enable_signaling(struct i915_request *rq)
{
- struct intel_engine_cs *engine = arg;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct i915_request *rq, *n;
-
- /* Install ourselves with high priority to reduce signalling latency */
- signaler_set_rtpriority();
-
- do {
- bool do_schedule = true;
- LIST_HEAD(list);
- u32 seqno;
-
- set_current_state(TASK_INTERRUPTIBLE);
- if (list_empty(&b->signals))
- goto sleep;
-
- /*
- * We are either woken up by the interrupt bottom-half,
- * or by a client adding a new signaller. In both cases,
- * the GPU seqno may have advanced beyond our oldest signal.
- * If it has, propagate the signal, remove the waiter and
- * check again with the next oldest signal. Otherwise we
- * need to wait for a new interrupt from the GPU or for
- * a new client.
- */
- seqno = intel_engine_get_seqno(engine);
-
- spin_lock_irq(&b->rb_lock);
- list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
- u64 this = rq->signaling.wait.global_seqno;
+ struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- GEM_BUG_ON(!this);
- if (!i915_seqno_passed(seqno, this))
- break;
-
- if (likely(this == i915_request_global_seqno(rq))) {
- __intel_engine_remove_wait(engine,
- &rq->signaling.wait);
-
- rq->signaling.wait.global_seqno = 0;
- __list_del_entry(&rq->signaling.link);
-
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
- &rq->fence.flags)) {
- list_add_tail(&rq->signaling.link,
- &list);
- i915_request_get(rq);
- }
- }
- }
- spin_unlock_irq(&b->rb_lock);
-
- if (!list_empty(&list)) {
- local_bh_disable();
- list_for_each_entry_safe(rq, n, &list, signaling.link) {
- dma_fence_signal(&rq->fence);
- GEM_BUG_ON(!i915_request_completed(rq));
- i915_request_put(rq);
- }
- local_bh_enable(); /* kick start the tasklets */
+ if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+ return true;
- /*
- * If the engine is saturated we may be continually
- * processing completed requests. This angers the
- * NMI watchdog if we never let anything else
- * have access to the CPU. Let's pretend to be nice
- * and relinquish the CPU if we burn through the
- * entire RT timeslice!
- */
- do_schedule = need_resched();
- }
+ spin_lock(&b->irq_lock);
+ if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
+ struct intel_context *ce = rq->hw_context;
+ struct i915_request *pos;
- if (unlikely(do_schedule)) {
-sleep:
- if (kthread_should_park())
- kthread_parkme();
+ __intel_breadcrumbs_arm_irq(b);
- if (unlikely(kthread_should_stop()))
+ list_for_each_entry_reverse(pos, &ce->signals, signal_link) {
+ if (i915_seqno_passed(rq->fence.seqno,
+ pos->fence.seqno))
break;
-
- schedule();
}
- } while (1);
- __set_current_state(TASK_RUNNING);
-
- return 0;
-}
+ list_add(&rq->signal_link, &pos->signal_link);
-static void insert_signal(struct intel_breadcrumbs *b,
- struct i915_request *request,
- const u32 seqno)
-{
- struct i915_request *iter;
-
- lockdep_assert_held(&b->rb_lock);
-
- /*
- * A reasonable assumption is that we are called to add signals
- * in sequence, as the requests are submitted for execution and
- * assigned a global_seqno. This will be the case for the majority
- * of internally generated signals (inter-engine signaling).
- *
- * Out of order waiters triggering random signaling enabling will
- * be more problematic, but hopefully rare enough and the list
- * small enough that the O(N) insertion sort is not an issue.
- */
-
- list_for_each_entry_reverse(iter, &b->signals, signaling.link)
- if (i915_seqno_passed(seqno, iter->signaling.wait.global_seqno))
- break;
-
- list_add(&request->signaling.link, &iter->signaling.link);
-}
-
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
-{
- struct intel_engine_cs *engine = request->engine;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct intel_wait *wait = &request->signaling.wait;
- u64 seqno;
-
- /*
- * Note that we may be called from an interrupt handler on another
- * device (e.g. nouveau signaling a fence completion causing us
- * to submit a request, and so enable signaling). As such,
- * we need to make sure that all other users of b->rb_lock protect
- * against interrupts, i.e. use spin_lock_irqsave.
- */
+ list_move_tail(&ce->signal_link, &b->signalers);
- /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
- GEM_BUG_ON(!irqs_disabled());
- lockdep_assert_held(&request->lock);
-
- seqno = i915_request_global_seqno(request);
- if (!seqno) /* will be enabled later upon execution */
- return true;
-
- GEM_BUG_ON(wait->global_seqno);
- wait->tsk = b->signaler;
- wait->request = request;
- wait->global_seqno = seqno;
-
- /*
- * Add ourselves into the list of waiters, but registering our
- * bottom-half as the signaller thread. As per usual, only the oldest
- * waiter (not just signaller) is tasked as the bottom-half waking
- * up all completed waiters after the user interrupt.
- *
- * If we are the oldest waiter, enable the irq (after which we
- * must double check that the seqno did not complete).
- */
- spin_lock(&b->rb_lock);
- insert_signal(b, request, seqno);
- wakeup &= __intel_engine_add_wait(engine, wait);
- spin_unlock(&b->rb_lock);
-
- if (wakeup) {
- wake_up_process(b->signaler);
- return !intel_wait_complete(wait);
+ set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
}
+ spin_unlock(&b->irq_lock);
- return true;
+ return !i915_request_completed(rq);
}
-void intel_engine_cancel_signaling(struct i915_request *request)
+void intel_engine_cancel_signaling(struct i915_request *rq)
{
- struct intel_engine_cs *engine = request->engine;
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- GEM_BUG_ON(!irqs_disabled());
- lockdep_assert_held(&request->lock);
+ struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- if (!READ_ONCE(request->signaling.wait.global_seqno))
+ if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
return;
- spin_lock(&b->rb_lock);
- __intel_engine_remove_wait(engine, &request->signaling.wait);
- if (fetch_and_zero(&request->signaling.wait.global_seqno))
- __list_del_entry(&request->signaling.link);
- spin_unlock(&b->rb_lock);
-}
-
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct task_struct *tsk;
-
- spin_lock_init(&b->rb_lock);
- spin_lock_init(&b->irq_lock);
-
- timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
- timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
-
- INIT_LIST_HEAD(&b->signals);
-
- /* Spawn a thread to provide a common bottom-half for all signals.
- * As this is an asynchronous interface we cannot steal the current
- * task for handling the bottom-half to the user interrupt, therefore
- * we create a thread to do the coherent seqno dance after the
- * interrupt and then signal the waitqueue (via the dma-buf/fence).
- */
- tsk = kthread_run(intel_breadcrumbs_signaler, engine,
- "i915/signal:%d", engine->id);
- if (IS_ERR(tsk))
- return PTR_ERR(tsk);
-
- b->signaler = tsk;
-
- return 0;
-}
+ spin_lock(&b->irq_lock);
+ if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
+ struct intel_context *ce = rq->hw_context;
-static void cancel_fake_irq(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ list_del(&rq->signal_link);
+ if (list_empty(&ce->signals))
+ list_del_init(&ce->signal_link);
- del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
- del_timer_sync(&b->hangcheck);
- clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+ clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+ }
+ spin_unlock(&b->irq_lock);
}
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
- unsigned long flags;
-
- spin_lock_irqsave(&b->irq_lock, flags);
-
- /*
- * Leave the fake_irq timer enabled (if it is running), but clear the
- * bit so that it turns itself off on its next wake up and goes back
- * to the long hangcheck interval if still required.
- */
- clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-
- if (b->irq_enabled)
- irq_enable(engine);
- else
- irq_disable(engine);
-
- spin_unlock_irqrestore(&b->irq_lock, flags);
-}
+ struct intel_context *ce;
+ struct i915_request *rq;
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ if (list_empty(&b->signalers))
+ return;
- /* The engines should be idle and all requests accounted for! */
- WARN_ON(READ_ONCE(b->irq_wait));
- WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
- WARN_ON(!list_empty(&b->signals));
+ drm_printf(p, "Signals:\n");
- if (!IS_ERR_OR_NULL(b->signaler))
- kthread_stop(b->signaler);
+ spin_lock_irq(&b->irq_lock);
+ list_for_each_entry(ce, &b->signalers, signal_link) {
+ list_for_each_entry(rq, &ce->signals, signal_link) {
+ drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
+ rq->fence.context, rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+ }
+ }
+ spin_unlock_irq(&b->irq_lock);
- cancel_fake_irq(engine);
+ if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
+ drm_printf(p, "Fake irq active\n");
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index f12c343f9df7..c1332f349a82 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -452,7 +452,7 @@ void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
/* After manually advancing the seqno, fake the interrupt in case
* there are any waiters for that seqno.
*/
- intel_engine_wakeup(engine);
+ intel_engine_signal_breadcrumbs(engine);
GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
}
@@ -635,9 +635,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
}
}
- ret = intel_engine_init_breadcrumbs(engine);
- if (ret)
- goto err_unpin_preempt;
+ intel_engine_init_breadcrumbs(engine);
if (HWS_NEEDS_PHYSICAL(i915))
ret = init_phys_status_page(engine);
@@ -650,7 +648,6 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
err_breadcrumbs:
intel_engine_fini_breadcrumbs(engine);
-err_unpin_preempt:
if (i915->preempt_context)
context_unpin(i915->preempt_context, engine);
@@ -1363,12 +1360,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
const char *header, ...)
{
const int MAX_REQUESTS_TO_SHOW = 8;
- struct intel_breadcrumbs * const b = &engine->breadcrumbs;
struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct i915_request *rq;
intel_wakeref_t wakeref;
unsigned long flags;
- struct rb_node *rb;
if (header) {
va_list ap;
@@ -1437,23 +1432,14 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_execlists_show_requests(engine, m,
print_request, MAX_REQUESTS_TO_SHOW);
- spin_lock(&b->rb_lock);
- for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
- struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
- drm_printf(m, "\t%s [%d:%c] waiting for %d:%d\n",
- w->tsk->comm, w->tsk->pid,
- task_state_to_char(w->tsk),
- upper_32_bits(w->global_seqno),
- lower_32_bits(w->global_seqno));
- }
- spin_unlock(&b->rb_lock);
local_irq_restore(flags);
drm_printf(m, "HWSP:\n");
hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
+
+ intel_engine_print_breadcrumbs(engine, m);
}
static u8 user_class_map[] = {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 18b643587e62..53a161d33e86 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -724,7 +724,7 @@ static int init_ring_common(struct intel_engine_cs *engine)
}
/* Papering over lost _interrupts_ immediately following the restart */
- intel_engine_wakeup(engine);
+ intel_engine_signal_breadcrumbs(engine);
out:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ac02a52beb41..ff093bd4e5e6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -337,13 +337,8 @@ struct intel_engine_cs {
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
- spinlock_t irq_lock; /* protects irq_*; irqsafe */
- struct intel_wait *irq_wait; /* oldest waiter by retirement */
-
- spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
- struct rb_root waiters; /* sorted by retirement, priority */
- struct list_head signals; /* sorted by retirement */
- struct task_struct *signaler; /* used for fence signalling */
+ spinlock_t irq_lock;
+ struct list_head signalers;
struct timer_list fake_irq; /* used after a missed interrupt */
struct timer_list hangcheck; /* detect missed interrupts */
@@ -844,88 +839,26 @@ static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
}
-/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void intel_wait_init(struct intel_wait *wait)
-{
- wait->tsk = current;
- wait->request = NULL;
-}
-
-static inline void intel_wait_init_for_seqno(struct intel_wait *wait,
- struct intel_engine_cs *engine,
- u64 seqno)
-{
- wait->tsk = current;
- wait->engine = engine;
- wait->global_seqno = seqno;
-}
-
-static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
-{
- return wait->global_seqno;
-}
-
-static inline bool
-intel_wait_update_request(struct intel_wait *wait,
- const struct i915_request *rq)
-{
- u64 seqno, check;
-
- seqno = i915_request_global_seqno(rq);
- do {
- check = seqno;
- wait->engine = rq->engine;
- barrier();
- } while ((seqno = i915_request_global_seqno(rq)) != check);
- wait->global_seqno = seqno;
-
- return seqno;
-}
-
-static inline bool
-intel_wait_check_seqno(const struct intel_wait *wait, u64 seqno)
-{
- return wait->global_seqno == seqno;
-}
-
-static inline bool
-intel_wait_check_request(const struct intel_wait *wait,
- const struct i915_request *rq)
-{
- return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
-}
-
-static inline bool intel_wait_complete(const struct intel_wait *wait)
-{
- return RB_EMPTY_NODE(&wait->node);
-}
-
-bool intel_engine_add_wait(struct intel_wait *wait);
-void intel_engine_remove_wait(struct intel_wait *wait);
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
+bool intel_engine_enable_signaling(struct i915_request *request);
void intel_engine_cancel_signaling(struct i915_request *request);
-static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
-{
- return READ_ONCE(engine->breadcrumbs.irq_wait);
-}
-
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
-#define ENGINE_WAKEUP_WAITER BIT(0)
-#define ENGINE_WAKEUP_ASLEEP BIT(1)
-
void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
+
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+ struct drm_printer *p);
+
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 1b70208eeea7..dff03c05ef42 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -15,7 +15,6 @@ selftest(scatterlist, scatterlist_mock_selftests)
selftest(syncmap, i915_syncmap_mock_selftests)
selftest(uncore, intel_uncore_mock_selftests)
selftest(engine, intel_engine_cs_mock_selftests)
-selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
selftest(timelines, i915_gem_timeline_mock_selftests)
selftest(requests, i915_request_mock_selftests)
selftest(objects, i915_gem_object_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 0e70df0230b8..9ebd9225684e 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -185,11 +185,6 @@ void igt_spinner_fini(struct igt_spinner *spin)
bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq)
{
- if (!wait_event_timeout(rq->execute,
- READ_ONCE(rq->global_seqno),
- msecs_to_jiffies(10)))
- return false;
-
return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),
rq->fence.seqno),
10) &&
diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
index 083b0cb331e4..00efd5bafaa1 100644
--- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c
@@ -27,444 +27,3 @@
#include "mock_gem_device.h"
#include "mock_engine.h"
-
-static int check_rbtree(struct intel_engine_cs *engine,
- const unsigned long *bitmap,
- const struct intel_wait *waiters,
- const int count)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
- struct rb_node *rb;
- int n;
-
- if (&b->irq_wait->node != rb_first(&b->waiters)) {
- pr_err("First waiter does not match first element of wait-tree\n");
- return -EINVAL;
- }
-
- n = find_first_bit(bitmap, count);
- for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
- struct intel_wait *w = container_of(rb, typeof(*w), node);
- int idx = w - waiters;
-
- if (!test_bit(idx, bitmap)) {
- pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n",
- idx, lower_32_bits(w->global_seqno));
- return -EINVAL;
- }
-
- if (n != idx) {
- pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n",
- idx, lower_32_bits(w->global_seqno), n);
- return -EINVAL;
- }
-
- n = find_next_bit(bitmap, count, n + 1);
- }
-
- return 0;
-}
-
-static int check_completion(struct intel_engine_cs *engine,
- const unsigned long *bitmap,
- const struct intel_wait *waiters,
- const int count)
-{
- int n;
-
- for (n = 0; n < count; n++) {
- if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap))
- continue;
-
- pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n",
- n, lower_32_bits(waiters[n].global_seqno),
- intel_wait_complete(&waiters[n]) ? "complete" : "active",
- test_bit(n, bitmap) ? "active" : "complete");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int check_rbtree_empty(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- if (b->irq_wait) {
- pr_err("Empty breadcrumbs still has a waiter\n");
- return -EINVAL;
- }
-
- if (!RB_EMPTY_ROOT(&b->waiters)) {
- pr_err("Empty breadcrumbs, but wait-tree not empty\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int igt_random_insert_remove(void *arg)
-{
- const u32 seqno_bias = 0x1000;
- I915_RND_STATE(prng);
- struct intel_engine_cs *engine = arg;
- struct intel_wait *waiters;
- const int count = 4096;
- unsigned int *order;
- unsigned long *bitmap;
- int err = -ENOMEM;
- int n;
-
- mock_engine_reset(engine);
-
- waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
- if (!waiters)
- goto out_engines;
-
- bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
- GFP_KERNEL);
- if (!bitmap)
- goto out_waiters;
-
- order = i915_random_order(count, &prng);
- if (!order)
- goto out_bitmap;
-
- for (n = 0; n < count; n++)
- intel_wait_init_for_seqno(&waiters[n], engine, seqno_bias + n);
-
- err = check_rbtree(engine, bitmap, waiters, count);
- if (err)
- goto out_order;
-
- /* Add and remove waiters into the rbtree in random order. At each
- * step, we verify that the rbtree is correctly ordered.
- */
- for (n = 0; n < count; n++) {
- int i = order[n];
-
- intel_engine_add_wait(&waiters[i]);
- __set_bit(i, bitmap);
-
- err = check_rbtree(engine, bitmap, waiters, count);
- if (err)
- goto out_order;
- }
-
- i915_random_reorder(order, count, &prng);
- for (n = 0; n < count; n++) {
- int i = order[n];
-
- intel_engine_remove_wait(&waiters[i]);
- __clear_bit(i, bitmap);
-
- err = check_rbtree(engine, bitmap, waiters, count);
- if (err)
- goto out_order;
- }
-
- err = check_rbtree_empty(engine);
-out_order:
- kfree(order);
-out_bitmap:
- kfree(bitmap);
-out_waiters:
- kvfree(waiters);
-out_engines:
- mock_engine_flush(engine);
- return err;
-}
-
-static int igt_insert_complete(void *arg)
-{
- const u32 seqno_bias = 0x1000;
- struct intel_engine_cs *engine = arg;
- struct intel_wait *waiters;
- const int count = 4096;
- unsigned long *bitmap;
- int err = -ENOMEM;
- int n, m;
-
- mock_engine_reset(engine);
-
- waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
- if (!waiters)
- goto out_engines;
-
- bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
- GFP_KERNEL);
- if (!bitmap)
- goto out_waiters;
-
- for (n = 0; n < count; n++) {
- intel_wait_init_for_seqno(&waiters[n], engine, n + seqno_bias);
- intel_engine_add_wait(&waiters[n]);
- __set_bit(n, bitmap);
- }
- err = check_rbtree(engine, bitmap, waiters, count);
- if (err)
- goto out_bitmap;
-
- /* On each step, we advance the seqno so that several waiters are then
- * complete (we increase the seqno by increasingly larger values to
- * retire more and more waiters at once). All retired waiters should
- * be woken and removed from the rbtree, and so that we check.
- */
- for (n = 0; n < count; n = m) {
- int seqno = 2 * n;
-
- GEM_BUG_ON(find_first_bit(bitmap, count) != n);
-
- if (intel_wait_complete(&waiters[n])) {
- pr_err("waiter[%d, seqno=%d] completed too early\n",
- n, lower_32_bits(waiters[n].global_seqno));
- err = -EINVAL;
- goto out_bitmap;
- }
-
- /* complete the following waiters */
- mock_seqno_advance(engine, seqno + seqno_bias);
- for (m = n; m <= seqno; m++) {
- if (m == count)
- break;
-
- GEM_BUG_ON(!test_bit(m, bitmap));
- __clear_bit(m, bitmap);
- }
-
- intel_engine_remove_wait(&waiters[n]);
- RB_CLEAR_NODE(&waiters[n].node);
-
- err = check_rbtree(engine, bitmap, waiters, count);
- if (err) {
- pr_err("rbtree corrupt after seqno advance to %d\n",
- seqno + seqno_bias);
- goto out_bitmap;
- }
-
- err = check_completion(engine, bitmap, waiters, count);
- if (err) {
- pr_err("completions after seqno advance to %d failed\n",
- seqno + seqno_bias);
- goto out_bitmap;
- }
- }
-
- err = check_rbtree_empty(engine);
-out_bitmap:
- kfree(bitmap);
-out_waiters:
- kvfree(waiters);
-out_engines:
- mock_engine_flush(engine);
- return err;
-}
-
-struct igt_wakeup {
- struct task_struct *tsk;
- atomic_t *ready, *set, *done;
- struct intel_engine_cs *engine;
- unsigned long flags;
-#define STOP 0
-#define IDLE 1
- wait_queue_head_t *wq;
- u32 seqno;
-};
-
-static bool wait_for_ready(struct igt_wakeup *w)
-{
- DEFINE_WAIT(ready);
-
- set_bit(IDLE, &w->flags);
- if (atomic_dec_and_test(w->done))
- wake_up_var(w->done);
-
- if (test_bit(STOP, &w->flags))
- goto out;
-
- for (;;) {
- prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE);
- if (atomic_read(w->ready) == 0)
- break;
-
- schedule();
- }
- finish_wait(w->wq, &ready);
-
-out:
- clear_bit(IDLE, &w->flags);
- if (atomic_dec_and_test(w->set))
- wake_up_var(w->set);
-
- return !test_bit(STOP, &w->flags);
-}
-
-static int igt_wakeup_thread(void *arg)
-{
- struct igt_wakeup *w = arg;
- struct intel_wait wait;
-
- while (wait_for_ready(w)) {
- GEM_BUG_ON(kthread_should_stop());
-
- intel_wait_init_for_seqno(&wait, w->engine, w->seqno);
- intel_engine_add_wait(&wait);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (i915_seqno_passed(intel_engine_get_seqno(w->engine),
- w->seqno))
- break;
-
- if (test_bit(STOP, &w->flags)) /* emergency escape */
- break;
-
- schedule();
- }
- intel_engine_remove_wait(&wait);
- __set_current_state(TASK_RUNNING);
- }
-
- return 0;
-}
-
-static void igt_wake_all_sync(atomic_t *ready,
- atomic_t *set,
- atomic_t *done,
- wait_queue_head_t *wq,
- int count)
-{
- atomic_set(set, count);
- atomic_set(ready, 0);
- wake_up_all(wq);
-
- wait_var_event(set, !atomic_read(set));
- atomic_set(ready, count);
- atomic_set(done, count);
-}
-
-static int igt_wakeup(void *arg)
-{
- I915_RND_STATE(prng);
- struct intel_engine_cs *engine = arg;
- struct igt_wakeup *waiters;
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- const int count = 4096;
- const u32 max_seqno = count / 4;
- atomic_t ready, set, done;
- int err = -ENOMEM;
- int n, step;
-
- mock_engine_reset(engine);
-
- waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
- if (!waiters)
- goto out_engines;
-
- /* Create a large number of threads, each waiting on a random seqno.
- * Multiple waiters will be waiting for the same seqno.
- */
- atomic_set(&ready, count);
- for (n = 0; n < count; n++) {
- waiters[n].wq = &wq;
- waiters[n].ready = &ready;
- waiters[n].set = &set;
- waiters[n].done = &done;
- waiters[n].engine = engine;
- waiters[n].flags = BIT(IDLE);
-
- waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n],
- "i915/igt:%d", n);
- if (IS_ERR(waiters[n].tsk))
- goto out_waiters;
-
- get_task_struct(waiters[n].tsk);
- }
-
- for (step = 1; step <= max_seqno; step <<= 1) {
- u32 seqno;
-
- /* The waiter threads start paused as we assign them a random
- * seqno and reset the engine. Once the engine is reset,
- * we signal that the threads may begin their wait upon their
- * seqno.
- */
- for (n = 0; n < count; n++) {
- GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags));
- waiters[n].seqno =
- 1 + prandom_u32_state(&prng) % max_seqno;
- }
- mock_seqno_advance(engine, 0);
- igt_wake_all_sync(&ready, &set, &done, &wq, count);
-
- /* Simulate the GPU doing chunks of work, with one or more
- * seqno appearing to finish at the same time. A random number
- * of threads will be waiting upon the update and hopefully be
- * woken.
- */
- for (seqno = 1; seqno <= max_seqno + step; seqno += step) {
- usleep_range(50, 500);
- mock_seqno_advance(engine, seqno);
- }
- GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno);
-
- /* With the seqno now beyond any of the waiting threads, they
- * should all be woken, see that they are complete and signal
- * that they are ready for the next test. We wait until all
- * threads are complete and waiting for us (i.e. not a seqno).
- */
- if (!wait_var_event_timeout(&done,
- !atomic_read(&done), 10 * HZ)) {
- pr_err("Timed out waiting for %d remaining waiters\n",
- atomic_read(&done));
- err = -ETIMEDOUT;
- break;
- }
-
- err = check_rbtree_empty(engine);
- if (err)
- break;
- }
-
-out_waiters:
- for (n = 0; n < count; n++) {
- if (IS_ERR(waiters[n].tsk))
- break;
-
- set_bit(STOP, &waiters[n].flags);
- }
- mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */
- igt_wake_all_sync(&ready, &set, &done, &wq, n);
-
- for (n = 0; n < count; n++) {
- if (IS_ERR(waiters[n].tsk))
- break;
-
- kthread_stop(waiters[n].tsk);
- put_task_struct(waiters[n].tsk);
- }
-
- kvfree(waiters);
-out_engines:
- mock_engine_flush(engine);
- return err;
-}
-
-int intel_breadcrumbs_mock_selftests(void)
-{
- static const struct i915_subtest tests[] = {
- SUBTEST(igt_random_insert_remove),
- SUBTEST(igt_insert_complete),
- SUBTEST(igt_wakeup),
- };
- struct drm_i915_private *i915;
- int err;
-
- i915 = mock_gem_device();
- if (!i915)
- return -ENOMEM;
-
- err = i915_subtests(tests, i915->engine[RCS]);
- drm_dev_put(&i915->drm);
-
- return err;
-}
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 05cba4889ac8..3c2a7f54c24b 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -1137,7 +1137,7 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
wait_for_completion(&arg.completion);
- if (wait_for(waitqueue_active(&rq->execute), 10)) {
+ if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
struct drm_printer p = drm_info_printer(i915->drm.dev);
pr_err("igt/evict_vma kthread did not wait\n");
--
2.20.1
More information about the Intel-gfx-trybot
mailing list