[PATCH 6/8] drm/i915/gt: Hook up CS_MASTER_ERROR_INTERRUPT
Chris Wilson
chris at chris-wilson.co.uk
Sat Jan 25 16:28:37 UTC 2020
Now that we have offline error capture and can reset an engine from
inside an atomic context while also preserving the GPU state for
post-mortem analysis, it is time to handle error interrupts thrown by
the command parser.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
drivers/gpu/drm/i915/gt/intel_gt.c | 5 ++
drivers/gpu/drm/i915/gt/intel_gt_irq.c | 77 +++++++++-----------
drivers/gpu/drm/i915/gt/intel_gt_irq.h | 3 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 42 ++++++++---
drivers/gpu/drm/i915/i915_irq.c | 10 +--
drivers/gpu/drm/i915/i915_reg.h | 5 +-
7 files changed, 79 insertions(+), 64 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 58725024ffa4..d4544e9533c9 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -409,6 +409,7 @@ struct intel_engine_cs {
int (*resume)(struct intel_engine_cs *engine);
struct {
+ void (*handler)(struct intel_engine_cs *engine);
void (*prepare)(struct intel_engine_cs *engine);
void (*rewind)(struct intel_engine_cs *engine, bool stalled);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index da2b6e2ae692..abc445f4d7a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -455,6 +455,11 @@ static int __engines_record_defaults(struct intel_gt *gt)
if (!rq)
continue;
+ if (rq->fence.error) {
+ err = -EIO;
+ goto out;
+ }
+
GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
state = rq->context->state;
if (!state)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
index 6ae64a224b02..586664bfc637 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -24,6 +24,9 @@ cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
{
bool tasklet = false;
+ if (unlikely(iir & GT_CS_MASTER_ERROR_INTERRUPT))
+ engine->reset.handler(engine);
+
if (iir & GT_WAIT_SEMAPHORE_INTERRUPT) {
WRITE_ONCE(engine->execlists.yield,
ENGINE_READ_FW(engine, EXECLIST_CCID));
@@ -218,6 +221,7 @@ void gen11_gt_irq_reset(struct intel_gt *gt)
void gen11_gt_irq_postinstall(struct intel_gt *gt)
{
const u32 irqs =
+ GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
GT_CONTEXT_SWITCH_INTERRUPT |
GT_WAIT_SEMAPHORE_INTERRUPT;
@@ -289,66 +293,56 @@ void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
GT_BSD_CS_ERROR_INTERRUPT |
- GT_RENDER_CS_MASTER_ERROR_INTERRUPT))
+ GT_CS_MASTER_ERROR_INTERRUPT))
DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir);
if (gt_iir & GT_PARITY_ERROR(gt->i915))
gen7_parity_error_irq_handler(gt, gt_iir);
}
-void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl)
{
void __iomem * const regs = gt->uncore->regs;
+ u32 iir;
if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
- gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0));
- if (likely(gt_iir[0]))
- raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]);
- }
-
- if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
- gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1));
- if (likely(gt_iir[1]))
- raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]);
- }
-
- if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
- gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
- if (likely(gt_iir[2]))
- raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]);
- }
-
- if (master_ctl & GEN8_GT_VECS_IRQ) {
- gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3));
- if (likely(gt_iir[3]))
- raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]);
- }
-}
-
-void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
-{
- if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
- cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
- gt_iir[0] >> GEN8_RCS_IRQ_SHIFT);
- cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
- gt_iir[0] >> GEN8_BCS_IRQ_SHIFT);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(0));
+ if (likely(iir)) {
+ cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
+ iir >> GEN8_RCS_IRQ_SHIFT);
+ cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
+ iir >> GEN8_BCS_IRQ_SHIFT);
+ raw_reg_write(regs, GEN8_GT_IIR(0), iir);
+ }
}
if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
- cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
- gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT);
- cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
- gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(1));
+ if (likely(iir)) {
+ cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
+ iir >> GEN8_VCS0_IRQ_SHIFT);
+ cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
+ iir >> GEN8_VCS1_IRQ_SHIFT);
+ raw_reg_write(regs, GEN8_GT_IIR(1), iir);
+ }
}
if (master_ctl & GEN8_GT_VECS_IRQ) {
- cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
- gt_iir[3] >> GEN8_VECS_IRQ_SHIFT);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(3));
+ if (likely(iir)) {
+ cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+ iir >> GEN8_VECS_IRQ_SHIFT);
+ raw_reg_write(regs, GEN8_GT_IIR(3), iir);
+ }
}
if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
- gen6_rps_irq_handler(>->rps, gt_iir[2]);
- guc_irq_handler(>->uc.guc, gt_iir[2] >> 16);
+ iir = raw_reg_read(regs, GEN8_GT_IIR(2));
+ if (likely(iir)) {
+ gen6_rps_irq_handler(>->rps, iir);
+ guc_irq_handler(>->uc.guc, iir >> 16);
+ raw_reg_write(regs, GEN8_GT_IIR(2), iir);
+ }
}
}
@@ -368,6 +362,7 @@ void gen8_gt_irq_postinstall(struct intel_gt *gt)
/* These are interrupts we'll toggle with the ring mask register */
const u32 irqs =
+ GT_CS_MASTER_ERROR_INTERRUPT |
GT_RENDER_USER_INTERRUPT |
GT_CONTEXT_SWITCH_INTERRUPT |
GT_WAIT_SEMAPHORE_INTERRUPT;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
index 8f37593712c9..886c5cf408a2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_irq.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
@@ -36,9 +36,8 @@ void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask);
void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir);
-void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl);
void gen8_gt_irq_reset(struct intel_gt *gt);
-void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
void gen8_gt_irq_postinstall(struct intel_gt *gt);
#endif /* INTEL_GT_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 6ba5a634c6e3..d7447cfa9c84 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2635,12 +2635,12 @@ static bool execlists_capture(struct intel_engine_cs *engine)
if (!cap)
return true;
- cap->rq = execlists_active(&engine->execlists);
- GEM_BUG_ON(!cap->rq);
-
rcu_read_lock();
- cap->rq = active_request(cap->rq->context->timeline, cap->rq);
- cap->rq = i915_request_get_rcu(cap->rq);
+ cap->rq = execlists_active(&engine->execlists);
+ if (cap->rq) {
+ cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+ cap->rq = i915_request_get_rcu(cap->rq);
+ }
rcu_read_unlock();
if (!cap->rq)
goto err_free;
@@ -2680,12 +2680,12 @@ static bool execlists_capture(struct intel_engine_cs *engine)
return false;
}
-static noinline void preempt_reset(struct intel_engine_cs *engine)
+static void reset_handler(struct intel_engine_cs *engine, const char *msg)
{
const unsigned int bit = I915_RESET_ENGINE + engine->id;
unsigned long *lock = &engine->gt->reset.flags;
- if (i915_modparams.reset < 3)
+ if (!intel_has_reset_engine(engine->gt))
return;
if (test_and_set_bit(bit, lock))
@@ -2694,13 +2694,9 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
/* Mark this tasklet as disabled to avoid waiting for it to complete */
tasklet_disable_nosync(&engine->execlists.tasklet);
- ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
- READ_ONCE(engine->props.preempt_timeout_ms),
- jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
-
ring_set_paused(engine, 1); /* Freeze the current request in place */
if (execlists_capture(engine))
- intel_engine_reset(engine, "preemption time out");
+ intel_engine_reset(engine, msg);
else
ring_set_paused(engine, 0);
@@ -2708,6 +2704,23 @@ static noinline void preempt_reset(struct intel_engine_cs *engine)
clear_and_wake_up_bit(bit, lock);
}
+static void execlists_reset_handler(struct intel_engine_cs *engine)
+{
+ ENGINE_TRACE(engine, "CS error\n");
+
+ reset_handler(engine, "CS error");
+ ENGINE_WRITE(engine, RING_EIR, ~0u);
+}
+
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+ ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+
+ reset_handler(engine, "preemption time out");
+}
+
static bool preempt_timeout(const struct intel_engine_cs *const engine)
{
const struct timer_list *t = &engine->execlists.preempt;
@@ -4222,6 +4235,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
engine->schedule = i915_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
+ engine->reset.handler = execlists_reset_handler;
engine->reset.prepare = execlists_reset_prepare;
engine->reset.rewind = execlists_reset_rewind;
engine->reset.cancel = execlists_reset_cancel;
@@ -4313,6 +4327,10 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift;
+
+ ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */
+ ENGINE_WRITE(engine, RING_EMR, ~0u << 8); /* enable reporting */
+ engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 94cb25ac504d..cc0b9f78990b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1609,7 +1609,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
u32 master_ctl, iir;
u32 pipe_stats[I915_MAX_PIPES] = {};
u32 hotplug_status = 0;
- u32 gt_iir[4];
u32 ier = 0;
master_ctl = I915_READ(GEN8_MASTER_IRQ) & ~GEN8_MASTER_IRQ_CONTROL;
@@ -1637,7 +1636,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
ier = I915_READ(VLV_IER);
I915_WRITE(VLV_IER, 0);
- gen8_gt_irq_ack(&dev_priv->gt, master_ctl, gt_iir);
+ gen8_gt_irq_handler(&dev_priv->gt, master_ctl);
if (iir & I915_DISPLAY_PORT_INTERRUPT)
hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -1661,8 +1660,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
I915_WRITE(VLV_IER, ier);
I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
- gen8_gt_irq_handler(&dev_priv->gt, master_ctl, gt_iir);
-
if (hotplug_status)
i9xx_hpd_irq_handler(dev_priv, hotplug_status);
@@ -2391,7 +2388,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
struct drm_i915_private *dev_priv = arg;
void __iomem * const regs = dev_priv->uncore.regs;
u32 master_ctl;
- u32 gt_iir[4];
if (!intel_irqs_enabled(dev_priv))
return IRQ_NONE;
@@ -2403,7 +2399,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
}
/* Find, clear, then process each source of interrupt */
- gen8_gt_irq_ack(&dev_priv->gt, master_ctl, gt_iir);
+ gen8_gt_irq_handler(&dev_priv->gt, master_ctl);
/* IRQs are synced during runtime_suspend, we don't require a wakeref */
if (master_ctl & ~GEN8_GT_IRQS) {
@@ -2414,8 +2410,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
gen8_master_intr_enable(regs);
- gen8_gt_irq_handler(&dev_priv->gt, master_ctl, gt_iir);
-
return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 535ce7e0dc94..988b4a234f30 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2639,6 +2639,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7)
#define RING_IPEIR(base) _MMIO((base) + 0x64)
#define RING_IPEHR(base) _MMIO((base) + 0x68)
+#define RING_EIR(base) _MMIO((base) + 0xb0)
+#define RING_EMR(base) _MMIO((base) + 0xb4)
+#define RING_ESR(base) _MMIO((base) + 0xb8)
/*
* On GEN4, only the render ring INSTDONE exists and has a different
* layout than the GEN7+ version.
@@ -3089,7 +3092,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8)
#define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */
#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4)
-#define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3)
+#define GT_CS_MASTER_ERROR_INTERRUPT (1 << 3)
#define GT_RENDER_SYNC_STATUS_INTERRUPT (1 << 2)
#define GT_RENDER_DEBUG_INTERRUPT (1 << 1)
#define GT_RENDER_USER_INTERRUPT (1 << 0)
--
2.25.0
More information about the Intel-gfx-trybot
mailing list