[Intel-gfx] [RFC 5/8] drm/i915/execlists: Flush pending preemption events during reset
jeff.mcgee at intel.com
jeff.mcgee at intel.com
Wed Mar 21 17:26:22 UTC 2018
From: Chris Wilson <chris at chris-wilson.co.uk>
Catch up with the inflight CSB events, after disabling the tasklet
before deciding which request was truly guilty of hanging the GPU.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: MichaĆ Winiarski <michal.winiarski at intel.com>
CC: Michel Thierry <michel.thierry at intel.com>
Cc: Jeff McGee <jeff.mcgee at intel.com>
---
drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
1 file changed, 197 insertions(+), 158 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e5a3ffbc273a..beb81f13a3cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
local_irq_restore(flags);
}
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void process_csb(struct intel_engine_cs *engine)
{
- struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port * const port = execlists->port;
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
+ unsigned int head, tail;
+ const u32 *buf;
bool fw = false;
- /* We can skip acquiring intel_runtime_pm_get() here as it was taken
- * on our behalf by the request (see i915_gem_mark_busy()) and it will
- * not be relinquished until the device is idle (see
- * i915_gem_idle_work_handler()). As a precaution, we make sure
- * that all ELSP are drained i.e. we have processed the CSB,
- * before allowing ourselves to idle and calling intel_runtime_pm_put().
- */
- GEM_BUG_ON(!dev_priv->gt.awake);
+ if (unlikely(execlists->csb_use_mmio)) {
+ buf = (u32 * __force)
+ (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+ execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+ } else {
+ /* The HWSP contains a (cacheable) mirror of the CSB */
+ buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ }
- /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
- * imposing the cost of a locked atomic transaction when submitting a
- * new request (outside of the context-switch interrupt).
+ /*
+ * The write will be ordered by the uncached read (itself
+ * a memory barrier), so we do not need another in the form
+ * of a locked instruction. The race between the interrupt
+ * handler and the split test/clear is harmless as we order
+ * our clear before the CSB read. If the interrupt arrived
+ * first between the test and the clear, we read the updated
+ * CSB and clear the bit. If the interrupt arrives as we read
+ * the CSB or later (i.e. after we had cleared the bit) the bit
+ * is set and we do a new loop.
*/
- while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
- /* The HWSP contains a (cacheable) mirror of the CSB */
- const u32 *buf =
- &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
- unsigned int head, tail;
-
- if (unlikely(execlists->csb_use_mmio)) {
- buf = (u32 * __force)
- (dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
- execlists->csb_head = -1; /* force mmio read of CSB ptrs */
- }
+ __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+ intel_uncore_forcewake_get(i915, execlists->fw_domains);
+ fw = true;
+
+ head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ tail = GEN8_CSB_WRITE_PTR(head);
+ head = GEN8_CSB_READ_PTR(head);
+ execlists->csb_head = head;
+ } else {
+ const int write_idx =
+ intel_hws_csb_write_index(i915) -
+ I915_HWS_CSB_BUF0_INDEX;
+
+ head = execlists->csb_head;
+ tail = READ_ONCE(buf[write_idx]);
+ }
+ GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+ engine->name,
+ head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+ tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+
+ while (head != tail) {
+ struct i915_request *rq;
+ unsigned int status;
+ unsigned int count;
+
+ if (++head == GEN8_CSB_ENTRIES)
+ head = 0;
- /* The write will be ordered by the uncached read (itself
- * a memory barrier), so we do not need another in the form
- * of a locked instruction. The race between the interrupt
- * handler and the split test/clear is harmless as we order
- * our clear before the CSB read. If the interrupt arrived
- * first between the test and the clear, we read the updated
- * CSB and clear the bit. If the interrupt arrives as we read
- * the CSB or later (i.e. after we had cleared the bit) the bit
- * is set and we do a new loop.
+ /*
+ * We are flying near dragons again.
+ *
+ * We hold a reference to the request in execlist_port[]
+ * but no more than that. We are operating in softirq
+ * context and so cannot hold any mutex or sleep. That
+ * prevents us stopping the requests we are processing
+ * in port[] from being retired simultaneously (the
+ * breadcrumb will be complete before we see the
+ * context-switch). As we only hold the reference to the
+ * request, any pointer chasing underneath the request
+ * is subject to a potential use-after-free. Thus we
+ * store all of the bookkeeping within port[] as
+ * required, and avoid using unguarded pointers beneath
+ * request itself. The same applies to the atomic
+ * status notifier.
*/
- __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- if (unlikely(execlists->csb_head == -1)) { /* following a reset */
- if (!fw) {
- intel_uncore_forcewake_get(dev_priv,
- execlists->fw_domains);
- fw = true;
- }
- head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
- tail = GEN8_CSB_WRITE_PTR(head);
- head = GEN8_CSB_READ_PTR(head);
- execlists->csb_head = head;
- } else {
- const int write_idx =
- intel_hws_csb_write_index(dev_priv) -
- I915_HWS_CSB_BUF0_INDEX;
+ status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+ engine->name, head,
+ status, buf[2*head + 1],
+ execlists->active);
+
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
- head = execlists->csb_head;
- tail = READ_ONCE(buf[write_idx]);
- }
- GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
- engine->name,
- head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
- tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- while (head != tail) {
- struct i915_request *rq;
- unsigned int status;
- unsigned int count;
+ if (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status) {
+ GEM_TRACE("%s preempt-idle\n", engine->name);
+ complete_preempt_context(execlists);
+ continue;
+ }
- if (++head == GEN8_CSB_ENTRIES)
- head = 0;
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT))
+ continue;
- /* We are flying near dragons again.
- *
- * We hold a reference to the request in execlist_port[]
- * but no more than that. We are operating in softirq
- * context and so cannot hold any mutex or sleep. That
- * prevents us stopping the requests we are processing
- * in port[] from being retired simultaneously (the
- * breadcrumb will be complete before we see the
- * context-switch). As we only hold the reference to the
- * request, any pointer chasing underneath the request
- * is subject to a potential use-after-free. Thus we
- * store all of the bookkeeping within port[] as
- * required, and avoid using unguarded pointers beneath
- * request itself. The same applies to the atomic
- * status notifier.
- */
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_USER));
- status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
- engine->name, head,
- status, buf[2*head + 1],
- execlists->active);
-
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ rq = port_unpack(port, &count);
+ GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+ engine->name,
+ port->context_id, count,
+ rq ? rq->global_seqno : 0,
+ rq ? rq_prio(rq) : 0);
+
+ /* Check the context/desc id for this event matches */
+ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+ GEM_BUG_ON(count == 0);
+ if (--count == 0) {
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+ GEM_BUG_ON(port_isset(&port[1]) &&
+ !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+ GEM_BUG_ON(!i915_request_completed(rq));
+ execlists_context_schedule_out(rq);
+ trace_i915_request_out(rq);
+ i915_request_put(rq);
+
+ GEM_TRACE("%s completed ctx=%d\n",
+ engine->name, port->context_id);
+
+ execlists_port_complete(execlists, port);
+ } else {
+ port_set(port, port_pack(rq, count));
+ }
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /* After the final element, the hw should be idle */
+ GEM_BUG_ON(port_count(port) == 0 &&
+ !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ if (port_count(port) == 0)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_USER);
+ }
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
- GEM_TRACE("%s preempt-idle\n", engine->name);
- complete_preempt_context(execlists);
- continue;
- }
+ if (head != execlists->csb_head) {
+ execlists->csb_head = head;
+ writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+ i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ }
- if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT))
- continue;
+ if (unlikely(fw))
+ intel_uncore_forcewake_put(i915, execlists->fw_domains);
+}
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
-
- rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
- engine->name,
- port->context_id, count,
- rq ? rq->global_seqno : 0,
- rq ? rq_prio(rq) : 0);
-
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
- GEM_BUG_ON(count == 0);
- if (--count == 0) {
- GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!i915_request_completed(rq));
- execlists_context_schedule_out(rq);
- trace_i915_request_out(rq);
- i915_request_put(rq);
-
- GEM_TRACE("%s completed ctx=%d\n",
- engine->name, port->context_id);
-
- execlists_port_complete(execlists, port);
- } else {
- port_set(port, port_pack(rq, count));
- }
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- /* After the final element, the hw should be idle */
- GEM_BUG_ON(port_count(port) == 0 &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
- if (port_count(port) == 0)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_USER);
- }
+ /*
+ * We can skip acquiring intel_runtime_pm_get() here as it was taken
+ * on our behalf by the request (see i915_gem_mark_busy()) and it will
+ * not be relinquished until the device is idle (see
+ * i915_gem_idle_work_handler()). As a precaution, we make sure
+ * that all ELSP are drained i.e. we have processed the CSB,
+ * before allowing ourselves to idle and calling intel_runtime_pm_put().
+ */
+ GEM_BUG_ON(!engine->i915->gt.awake);
- if (head != execlists->csb_head) {
- execlists->csb_head = head;
- writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
- dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
- }
- }
+ /*
+ * Prefer doing test_and_clear_bit() as a two stage operation to avoid
+ * imposing the cost of a locked atomic transaction when submitting a
+ * new request (outside of the context-switch interrupt).
+ */
+ if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+ process_csb(engine);
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
+ if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
-
- if (fw)
- intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
static void queue_request(struct intel_engine_cs *engine,
@@ -1667,6 +1673,7 @@ static struct i915_request *
execlists_reset_prepare(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *request, *active;
/*
* Prevent request submission to the hardware until we have
@@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
tasklet_kill(&execlists->tasklet);
tasklet_disable(&execlists->tasklet);
- return i915_gem_find_active_request(engine);
+ /*
+ * We want to flush the pending context switches, having disabled
+ * the tasklet above, we can assume exclusive access to the execlists.
+ * For this allows us to catch up with an inflight preemption event,
+ * and avoid blaming an innocent request if the stall was due to the
+ * preemption itself.
+ */
+ process_csb(engine);
+
+ /*
+ * The last active request can then be no later than the last request
+ * now in ELSP[0]. So search backwards from there, so that is the GPU
+ * has advanced beyond the last CSB update, it will be pardoned.
+ */
+ active = NULL;
+ request = port_request(execlists->port);
+ if (request) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->timeline->lock, flags);
+ list_for_each_entry_from_reverse(request,
+ &engine->timeline->requests,
+ link) {
+ if (__i915_request_completed(request,
+ request->global_seqno))
+ break;
+
+ active = request;
+ }
+ spin_unlock_irqrestore(&engine->timeline->lock, flags);
+ }
+
+ return active;
}
static void reset_irq(struct intel_engine_cs *engine)
--
2.16.2
More information about the Intel-gfx
mailing list