[PATCH 26/29] process-csb-from-irq
Chris Wilson
chris at chris-wilson.co.uk
Wed May 16 10:49:34 UTC 2018
---
drivers/gpu/drm/i915/i915_irq.c | 13 +-
drivers/gpu/drm/i915/intel_engine_cs.c | 8 +-
drivers/gpu/drm/i915/intel_lrc.c | 309 +++++++++++-------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 -
4 files changed, 146 insertions(+), 185 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 460878572515..b3daee06e0d6 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1459,6 +1459,8 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
ivybridge_parity_error_irq_handler(dev_priv, gt_iir);
}
+extern void execlists_process_csb(struct intel_engine_cs *engine);
+
static void
gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
{
@@ -1466,9 +1468,8 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
bool tasklet = false;
if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
- if (READ_ONCE(engine->execlists.active))
- tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
- &engine->irq_posted);
+ execlists_process_csb(engine);
+ tasklet = true;
}
if (iir & GT_RENDER_USER_INTERRUPT) {
@@ -2165,6 +2166,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
I915_WRITE(VLV_IER, 0);
gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+ gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
if (iir & I915_DISPLAY_PORT_INTERRUPT)
hotplug_status = i9xx_hpd_irq_ack(dev_priv);
@@ -2189,8 +2191,6 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
POSTING_READ(GEN8_MASTER_IRQ);
- gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
if (hotplug_status)
i9xx_hpd_irq_handler(dev_priv, hotplug_status);
@@ -2761,6 +2761,7 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
/* Find, clear, then process each source of interrupt */
gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
+ gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
/* IRQs are synced during runtime_suspend, we don't require a wakeref */
if (master_ctl & ~GEN8_GT_IRQS) {
@@ -2771,8 +2772,6 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg)
I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
- gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
-
return IRQ_HANDLED;
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 9f606130eb9a..9110695d44dc 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1310,12 +1310,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
read = GEN8_CSB_READ_PTR(ptr);
write = GEN8_CSB_WRITE_PTR(ptr);
- drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
+ drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
read, execlists->csb_head,
write,
intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
- yesno(test_bit(ENGINE_IRQ_EXECLIST,
- &engine->irq_posted)),
yesno(test_bit(TASKLET_STATE_SCHED,
&engine->execlists.tasklet.state)),
enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
@@ -1496,11 +1494,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_unlock(&b->rb_lock);
local_irq_restore(flags);
- drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+ drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
engine->irq_posted,
yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
- &engine->irq_posted)),
- yesno(test_bit(ENGINE_IRQ_EXECLIST,
&engine->irq_posted)));
drm_printf(m, "HWSP:\n");
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 81f92154caf7..28a2f3ddc6e8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -556,8 +556,10 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists)
{
GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+ __unwind_incomplete_requests(container_of(execlists,
+ typeof(struct intel_engine_cs),
+ execlists));
execlists_cancel_port_requests(execlists);
- execlists_unwind_incomplete_requests(execlists);
execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
}
@@ -874,14 +876,6 @@ static void reset_irq(struct intel_engine_cs *engine)
synchronize_hardirq(engine->i915->drm.irq);
clear_gtiir(engine);
-
- /*
- * The port is checked prior to scheduling a tasklet, but
- * just in case we have suspended the tasklet to do the
- * wedging make sure that when it wakes, it decides there
- * is no work to do by clearing the irq_posted bit.
- */
- clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
}
static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -954,171 +948,163 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
local_irq_restore(flags);
}
-static void process_csb(struct intel_engine_cs *engine)
+void execlists_process_csb(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port *port = execlists->port;
struct drm_i915_private *i915 = engine->i915;
bool fw = false;
- do {
- /* The HWSP contains a (cacheable) mirror of the CSB */
- const u32 *buf =
- &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
- unsigned int head, tail;
+ /* The HWSP contains a (cacheable) mirror of the CSB */
+ const u32 *buf =
+ &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ unsigned int head, tail;
- /* Clear before reading to catch new interrupts */
- clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- smp_mb__after_atomic();
+ if (unlikely(execlists->csb_use_mmio)) {
+ intel_uncore_forcewake_get(i915, execlists->fw_domains);
+ fw = true;
- if (unlikely(execlists->csb_use_mmio)) {
- if (!fw) {
- intel_uncore_forcewake_get(i915, execlists->fw_domains);
- fw = true;
- }
-
- buf = (u32 * __force)
- (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+ buf = (u32 * __force)
+ (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
- head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
- tail = GEN8_CSB_WRITE_PTR(head);
- head = GEN8_CSB_READ_PTR(head);
- execlists->csb_head = head;
- } else {
- const int write_idx =
- intel_hws_csb_write_index(i915) -
- I915_HWS_CSB_BUF0_INDEX;
+ head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ tail = GEN8_CSB_WRITE_PTR(head);
+ head = GEN8_CSB_READ_PTR(head);
+ execlists->csb_head = head;
+ } else {
+ const int write_idx =
+ intel_hws_csb_write_index(i915) -
+ I915_HWS_CSB_BUF0_INDEX;
- head = execlists->csb_head;
- tail = READ_ONCE(buf[write_idx]);
- rmb(); /* Hopefully paired with a wmb() in HW */
- }
- GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
- engine->name,
- head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
- tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+ head = execlists->csb_head;
+ tail = READ_ONCE(buf[write_idx]);
+ rmb(); /* Hopefully paired with a wmb() in HW */
+ }
+ GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+ engine->name,
+ head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+ tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
- while (head != tail) {
- struct i915_request *rq;
- unsigned int status;
- unsigned int count;
+ while (head != tail) {
+ struct i915_request *rq;
+ unsigned int status;
+ unsigned int count;
- if (++head == GEN8_CSB_ENTRIES)
- head = 0;
+ if (++head == GEN8_CSB_ENTRIES)
+ head = 0;
- /*
- * We are flying near dragons again.
- *
- * We hold a reference to the request in execlist_port[]
- * but no more than that. We are operating in softirq
- * context and so cannot hold any mutex or sleep. That
- * prevents us stopping the requests we are processing
- * in port[] from being retired simultaneously (the
- * breadcrumb will be complete before we see the
- * context-switch). As we only hold the reference to the
- * request, any pointer chasing underneath the request
- * is subject to a potential use-after-free. Thus we
- * store all of the bookkeeping within port[] as
- * required, and avoid using unguarded pointers beneath
- * request itself. The same applies to the atomic
- * status notifier.
- */
+ /*
+ * We are flying near dragons again.
+ *
+ * We hold a reference to the request in execlist_port[]
+ * but no more than that. We are operating in softirq
+ * context and so cannot hold any mutex or sleep. That
+ * prevents us stopping the requests we are processing
+ * in port[] from being retired simultaneously (the
+ * breadcrumb will be complete before we see the
+ * context-switch). As we only hold the reference to the
+ * request, any pointer chasing underneath the request
+ * is subject to a potential use-after-free. Thus we
+ * store all of the bookkeeping within port[] as
+ * required, and avoid using unguarded pointers beneath
+ * request itself. The same applies to the atomic
+ * status notifier.
+ */
- status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
- engine->name, head,
- status, buf[2*head + 1],
- execlists->active);
-
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
+ status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+ engine->name, head,
+ status, buf[2*head + 1],
+ execlists->active);
+
+ if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+ GEN8_CTX_STATUS_PREEMPTED))
+ execlists_set_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+ if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+ execlists_clear_active(execlists,
+ EXECLISTS_ACTIVE_HWACK);
+
+ if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+ continue;
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+ /* We should never get a COMPLETED | IDLE_ACTIVE! */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
- GEM_TRACE("%s preempt-idle\n", engine->name);
- complete_preempt_context(execlists);
- continue;
- }
+ if (status & GEN8_CTX_STATUS_COMPLETE &&
+ buf[2*head + 1] == execlists->preempt_complete_status) {
+ GEM_TRACE("%s preempt-idle\n", engine->name);
+ complete_preempt_context(execlists);
+ continue;
+ }
- if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT))
- continue;
+ if (status & GEN8_CTX_STATUS_PREEMPTED &&
+ execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_PREEMPT))
+ continue;
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
+ GEM_BUG_ON(!execlists_is_active(execlists,
+ EXECLISTS_ACTIVE_USER));
+
+ rq = port_unpack(port, &count);
+ GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
+ engine->name,
+ port->context_id, count,
+ rq ? rq->global_seqno : 0,
+ rq ? rq->fence.context : 0,
+ rq ? rq->fence.seqno : 0,
+ intel_engine_get_seqno(engine),
+ rq ? rq_prio(rq) : 0);
+
+ /* Check the context/desc id for this event matches */
+ GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+ GEM_BUG_ON(count == 0);
+ if (--count == 0) {
+ /*
+ * On the final event corresponding to the
+ * submission of this context, we expect either
+ * an element-switch event or a completion
+ * event (and on completion, the active-idle
+ * marker). No more preemptions, lite-restore
+ * or otherwise.
+ */
+ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+ GEM_BUG_ON(port_isset(&port[1]) &&
+ !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+ GEM_BUG_ON(!port_isset(&port[1]) &&
+ !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
- rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
- engine->name,
- port->context_id, count,
- rq ? rq->global_seqno : 0,
- rq ? rq->fence.context : 0,
- rq ? rq->fence.seqno : 0,
- intel_engine_get_seqno(engine),
- rq ? rq_prio(rq) : 0);
+ /*
+ * We rely on the hardware being strongly
+ * ordered, that the breadcrumb write is
+ * coherent (visible from the CPU) before the
+ * user interrupt and CSB is processed.
+ */
+ GEM_BUG_ON(!i915_request_completed(rq));
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+ execlists_context_schedule_out(rq,
+ INTEL_CONTEXT_SCHEDULE_OUT);
+ i915_request_put(rq);
- GEM_BUG_ON(count == 0);
- if (--count == 0) {
- /*
- * On the final event corresponding to the
- * submission of this context, we expect either
- * an element-switch event or a completion
- * event (and on completion, the active-idle
- * marker). No more preemptions, lite-restore
- * or otherwise.
- */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ GEM_TRACE("%s completed ctx=%d\n",
+ engine->name, port->context_id);
- /*
- * We rely on the hardware being strongly
- * ordered, that the breadcrumb write is
- * coherent (visible from the CPU) before the
- * user interrupt and CSB is processed.
- */
- GEM_BUG_ON(!i915_request_completed(rq));
-
- execlists_context_schedule_out(rq,
- INTEL_CONTEXT_SCHEDULE_OUT);
- i915_request_put(rq);
-
- GEM_TRACE("%s completed ctx=%d\n",
- engine->name, port->context_id);
-
- port = execlists_port_complete(execlists, port);
- if (port_isset(port))
- execlists_user_begin(execlists, port);
- else
- execlists_user_end(execlists);
- } else {
- port_set(port, port_pack(rq, count));
- }
+ port = execlists_port_complete(execlists, port);
+ if (port_isset(port))
+ execlists_user_begin(execlists, port);
+ else
+ execlists_user_end(execlists);
+ } else {
+ port_set(port, port_pack(rq, count));
}
+ }
- if (head != execlists->csb_head) {
- execlists->csb_head = head;
- writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
- i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
- }
- } while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+ if (head != execlists->csb_head) {
+ execlists->csb_head = head;
+ writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+ i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+ }
if (unlikely(fw))
intel_uncore_forcewake_put(i915, execlists->fw_domains);
@@ -1132,11 +1118,10 @@ static void execlists_submission_tasklet(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
+ GEM_TRACE("%s awake?=%d, active=%x\n",
engine->name,
engine->i915->gt.awake,
- engine->execlists.active,
- test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
+ engine->execlists.active);
/*
* We can skip acquiring intel_runtime_pm_get() here as it was taken
@@ -1148,14 +1133,6 @@ static void execlists_submission_tasklet(unsigned long data)
*/
GEM_BUG_ON(!engine->i915->gt.awake);
- /*
- * Prefer doing test_and_clear_bit() as a two stage operation to avoid
- * imposing the cost of a locked atomic transaction when submitting a
- * new request (outside of the context-switch interrupt).
- */
- if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
- process_csb(engine);
-
if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
@@ -1862,16 +1839,6 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
*/
__tasklet_disable_once(&execlists->tasklet);
- /*
- * We want to flush the pending context switches, having disabled
- * the tasklet above, we can assume exclusive access to the execlists.
- * For this allows us to catch up with an inflight preemption event,
- * and avoid blaming an innocent request if the stall was due to the
- * preemption itself.
- */
- if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
- process_csb(engine);
-
/*
* The last active request can then be no later than the last request
* now in ELSP[0]. So search backwards from there, so that if the GPU
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 4ad9c5842575..8ac7ceb41ef1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -347,7 +347,6 @@ struct intel_engine_cs {
atomic_t irq_count;
unsigned long irq_posted;
#define ENGINE_IRQ_BREADCRUMB 0
-#define ENGINE_IRQ_EXECLIST 1
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
--
2.17.0
More information about the Intel-gfx-trybot
mailing list