[Intel-gfx] [PATCH 22/38] drm/i915: Drop fake breadcrumb irq
Chris Wilson
chris at chris-wilson.co.uk
Fri Jan 18 14:00:53 UTC 2019
Missed breadcrumb detection is defunct due to the tight coupling with
dma_fence signaling and the myriad ways we may signal fences from
everywhere but from an interrupt, i.e. we frequently signal a fence
before we even see its interrupt. This means that even if we miss an
interrupt for a fence, it still is signaled before our breadcrumb
hangcheck fires, so simplify the breadcrumb hangchecking by moving it
into the GPU hangcheck and forgo fake interrupts.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 93 -----------
drivers/gpu/drm/i915/i915_gpu_error.c | 2 -
drivers/gpu/drm/i915/i915_gpu_error.h | 5 -
drivers/gpu/drm/i915/intel_breadcrumbs.c | 147 +-----------------
drivers/gpu/drm/i915/intel_hangcheck.c | 2 +
drivers/gpu/drm/i915/intel_ringbuffer.h | 5 -
.../gpu/drm/i915/selftests/i915_gem_context.c | 7 -
drivers/gpu/drm/i915/selftests/i915_request.c | 7 -
8 files changed, 5 insertions(+), 263 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index d7764e62e9b4..c2aaf010c3d1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1321,9 +1321,6 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
intel_engine_last_submit(engine),
jiffies_to_msecs(jiffies -
engine->hangcheck.action_timestamp));
- seq_printf(m, "\tfake irq active? %s\n",
- yesno(test_bit(engine->id,
- &dev_priv->gpu_error.missed_irq_rings)));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
@@ -3874,94 +3871,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
i915_wedged_get, i915_wedged_set,
"%llu\n");
-static int
-fault_irq_set(struct drm_i915_private *i915,
- unsigned long *irq,
- unsigned long val)
-{
- int err;
-
- err = mutex_lock_interruptible(&i915->drm.struct_mutex);
- if (err)
- return err;
-
- err = i915_gem_wait_for_idle(i915,
- I915_WAIT_LOCKED |
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (err)
- goto err_unlock;
-
- *irq = val;
- mutex_unlock(&i915->drm.struct_mutex);
-
- /* Flush idle worker to disarm irq */
- drain_delayed_work(&i915->gt.idle_work);
-
- return 0;
-
-err_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
-}
-
-static int
-i915_ring_missed_irq_get(void *data, u64 *val)
-{
- struct drm_i915_private *dev_priv = data;
-
- *val = dev_priv->gpu_error.missed_irq_rings;
- return 0;
-}
-
-static int
-i915_ring_missed_irq_set(void *data, u64 val)
-{
- struct drm_i915_private *i915 = data;
-
- return fault_irq_set(i915, &i915->gpu_error.missed_irq_rings, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_ring_missed_irq_fops,
- i915_ring_missed_irq_get, i915_ring_missed_irq_set,
- "0x%08llx\n");
-
-static int
-i915_ring_test_irq_get(void *data, u64 *val)
-{
- struct drm_i915_private *dev_priv = data;
-
- *val = dev_priv->gpu_error.test_irq_rings;
-
- return 0;
-}
-
-static int
-i915_ring_test_irq_set(void *data, u64 val)
-{
- struct drm_i915_private *i915 = data;
-
- /* GuC keeps the user interrupt permanently enabled for submission */
- if (USES_GUC_SUBMISSION(i915))
- return -ENODEV;
-
- /*
- * From icl, we can no longer individually mask interrupt generation
- * from each engine.
- */
- if (INTEL_GEN(i915) >= 11)
- return -ENODEV;
-
- val &= INTEL_INFO(i915)->ring_mask;
- DRM_DEBUG_DRIVER("Masking interrupts on rings 0x%08llx\n", val);
-
- return fault_irq_set(i915, &i915->gpu_error.test_irq_rings, val);
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops,
- i915_ring_test_irq_get, i915_ring_test_irq_set,
- "0x%08llx\n");
-
#define DROP_UNBOUND BIT(0)
#define DROP_BOUND BIT(1)
#define DROP_RETIRE BIT(2)
@@ -4724,8 +4633,6 @@ static const struct i915_debugfs_files {
} i915_debugfs_files[] = {
{"i915_wedged", &i915_wedged_fops},
{"i915_cache_sharing", &i915_cache_sharing_fops},
- {"i915_ring_missed_irq", &i915_ring_missed_irq_fops},
- {"i915_ring_test_irq", &i915_ring_test_irq_fops},
{"i915_gem_drop_caches", &i915_drop_caches_fops},
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
{"i915_error_state", &i915_error_state_fops},
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 7c2510ce81d2..7d26c25ee785 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -718,8 +718,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
err_printf(m, "CCID: 0x%08x\n", error->ccid);
- err_printf(m, "Missed interrupts: 0x%08lx\n",
- m->i915->gpu_error.missed_irq_rings);
for (i = 0; i < error->nfence; i++)
err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 0e184712cbcc..99a53c0cd6da 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -203,8 +203,6 @@ struct i915_gpu_error {
atomic_t pending_fb_pin;
- unsigned long missed_irq_rings;
-
/**
* State variable controlling the reset flow and count
*
@@ -273,9 +271,6 @@ struct i915_gpu_error {
*/
wait_queue_head_t reset_queue;
- /* For missed irq/seqno simulation. */
- unsigned long test_irq_rings;
-
struct i915_gpu_restart *restart;
};
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 1187d2edd330..6ae7f37cff35 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -86,7 +86,6 @@ bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
spin_lock(&b->irq_lock);
- b->irq_fired = true;
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
@@ -150,86 +149,6 @@ static void signal_irq_work(struct irq_work *work)
intel_engine_breadcrumbs_irq(engine);
}
-static unsigned long wait_timeout(void)
-{
- return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
-}
-
-static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
-{
- if (GEM_SHOW_DEBUG()) {
- struct drm_printer p = drm_debug_printer(__func__);
-
- intel_engine_dump(engine, &p,
- "%s missed breadcrumb at %pS\n",
- engine->name, __builtin_return_address(0));
- }
-
- set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-}
-
-static void intel_breadcrumbs_hangcheck(struct timer_list *t)
-{
- struct intel_engine_cs *engine =
- from_timer(engine, t, breadcrumbs.hangcheck);
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- if (!b->irq_armed)
- return;
-
- if (b->irq_fired)
- goto rearm;
-
- /*
- * We keep the hangcheck timer alive until we disarm the irq, even
- * if there are no waiters at present.
- *
- * If the waiter was currently running, assume it hasn't had a chance
- * to process the pending interrupt (e.g, low priority task on a loaded
- * system) and wait until it sleeps before declaring a missed interrupt.
- *
- * If the waiter was asleep (and not even pending a wakeup), then we
- * must have missed an interrupt as the GPU has stopped advancing
- * but we still have a waiter. Assuming all batches complete within
- * DRM_I915_HANGCHECK_JIFFIES [1.5s]!
- */
- synchronize_hardirq(engine->i915->drm.irq);
- if (intel_engine_signal_breadcrumbs(engine)) {
- missed_breadcrumb(engine);
- mod_timer(&b->fake_irq, jiffies + 1);
- } else {
-rearm:
- b->irq_fired = false;
- mod_timer(&b->hangcheck, wait_timeout());
- }
-}
-
-static void intel_breadcrumbs_fake_irq(struct timer_list *t)
-{
- struct intel_engine_cs *engine =
- from_timer(engine, t, breadcrumbs.fake_irq);
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- /*
- * The timer persists in case we cannot enable interrupts,
- * or if we have previously seen seqno/interrupt incoherency
- * ("missed interrupt" syndrome, better known as a "missed breadcrumb").
- * Here the worker will wake up every jiffie in order to kick the
- * oldest waiter to do the coherent seqno check.
- */
-
- if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed)
- return;
-
- /* If the user has disabled the fake-irq, restore the hangchecking */
- if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) {
- mod_timer(&b->hangcheck, wait_timeout());
- return;
- }
-
- mod_timer(&b->fake_irq, jiffies + 1);
-}
-
void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
@@ -252,43 +171,14 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
spin_unlock_irq(&b->irq_lock);
}
-static bool use_fake_irq(const struct intel_breadcrumbs *b)
-{
- const struct intel_engine_cs *engine =
- container_of(b, struct intel_engine_cs, breadcrumbs);
-
- if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
- return false;
-
- /*
- * Only start with the heavy weight fake irq timer if we have not
- * seen any interrupts since enabling it the first time. If the
- * interrupts are still arriving, it means we made a mistake in our
- * engine->seqno_barrier(), a timing error that should be transient
- * and unlikely to reoccur.
- */
- return !b->irq_fired;
-}
-
-static void enable_fake_irq(struct intel_breadcrumbs *b)
-{
- /* Ensure we never sleep indefinitely */
- if (!b->irq_enabled || use_fake_irq(b))
- mod_timer(&b->fake_irq, jiffies + 1);
- else
- mod_timer(&b->hangcheck, wait_timeout());
-}
-
-static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
- struct drm_i915_private *i915 = engine->i915;
- bool enabled;
lockdep_assert_held(&b->irq_lock);
if (b->irq_armed)
- return false;
+ return;
/*
* The breadcrumb irq will be disarmed on the interrupt after the
@@ -306,16 +196,8 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
* the driver is idle) we disarm the breadcrumbs.
*/
- /* No interrupts? Kick the waiter every jiffie! */
- enabled = false;
- if (!b->irq_enabled++ &&
- !test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
+ if (!b->irq_enabled++)
irq_enable(engine);
- enabled = true;
- }
-
- enable_fake_irq(b);
- return enabled;
}
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -326,18 +208,6 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
INIT_LIST_HEAD(&b->signalers);
init_irq_work(&b->irq_work, signal_irq_work);
-
- timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
- timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
-}
-
-static void cancel_fake_irq(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
- del_timer_sync(&b->hangcheck);
- clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
}
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
@@ -347,13 +217,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
spin_lock_irqsave(&b->irq_lock, flags);
- /*
- * Leave the fake_irq timer enabled (if it is running), but clear the
- * bit so that it turns itself off on its next wake up and goes back
- * to the long hangcheck interval if still required.
- */
- clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-
if (b->irq_enabled)
irq_enable(engine);
else
@@ -364,7 +227,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{
- cancel_fake_irq(engine);
}
bool intel_engine_enable_signaling(struct i915_request *rq)
@@ -446,7 +308,4 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
}
}
spin_unlock_irq(&b->irq_lock);
-
- if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
- drm_printf(p, "Fake irq active\n");
}
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 925146dcc0f8..e92f48a40a59 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -279,6 +279,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
for_each_engine(engine, dev_priv, id) {
struct hangcheck hc;
+ intel_engine_signal_breadcrumbs(engine);
+
hangcheck_load_sample(engine, &hc);
hangcheck_accumulate_sample(engine, &hc);
hangcheck_store_sample(engine, &hc);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 6d1485208342..3ab159529dc7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -390,14 +390,9 @@ struct intel_engine_cs {
struct irq_work irq_work;
- struct timer_list fake_irq; /* used after a missed interrupt */
- struct timer_list hangcheck; /* detect missed interrupts */
-
- unsigned int hangcheck_interrupts;
unsigned int irq_enabled;
bool irq_armed;
- bool irq_fired;
} breadcrumbs;
struct {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 4cba50679607..2cca234fd291 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -65,7 +65,6 @@ static int begin_live_test(struct live_test *t,
return err;
}
- i915->gpu_error.missed_irq_rings = 0;
t->reset_global = i915_reset_count(&i915->gpu_error);
for_each_engine(engine, i915, id)
@@ -103,12 +102,6 @@ static int end_live_test(struct live_test *t)
return -EIO;
}
- if (i915->gpu_error.missed_irq_rings) {
- pr_err("%s(%s): Missed interrupts on engines %lx\n",
- t->func, t->name, i915->gpu_error.missed_irq_rings);
- return -EIO;
- }
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 48b6159afe64..5953a47827a7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -535,7 +535,6 @@ static int begin_live_test(struct live_test *t,
return err;
}
- i915->gpu_error.missed_irq_rings = 0;
t->reset_count = i915_reset_count(&i915->gpu_error);
return 0;
@@ -559,12 +558,6 @@ static int end_live_test(struct live_test *t)
return -EIO;
}
- if (i915->gpu_error.missed_irq_rings) {
- pr_err("%s(%s): Missed interrupts on engines %lx\n",
- t->func, t->name, i915->gpu_error.missed_irq_rings);
- return -EIO;
- }
-
return 0;
}
--
2.20.1
More information about the Intel-gfx
mailing list