[Intel-gfx] [PATCH 3/6] drm/i915/execlists: Process one CSB update at a time
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Jun 27 12:58:00 UTC 2018
On 27/06/2018 12:38, Chris Wilson wrote:
> In the next patch, we will process the CSB events directly from the
> submission path, rather than only after a CS interrupt. Hence, we will
> no longer have the need for a loop until the has-interrupt bit is clear,
> and in the meantime can remove that small optimisation.
>
> v2: Tvrtko pointed out it was safer to unconditionally kick the tasklet
> after each irq, when assuming that the tasklet is called for each irq.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com> #v1
Okay I can swallow it.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_irq.c | 7 +-
> drivers/gpu/drm/i915/intel_lrc.c | 278 +++++++++++++++----------------
> 2 files changed, 141 insertions(+), 144 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 46aaef5c1851..d02f30591c0b 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1473,9 +1473,10 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> bool tasklet = false;
>
> if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> - if (READ_ONCE(engine->execlists.active))
> - tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST,
> - &engine->irq_posted);
> + if (READ_ONCE(engine->execlists.active)) {
> + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> + tasklet = true;
> + }
> }
>
> if (iir & GT_RENDER_USER_INTERRUPT) {
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 4b31e8f42aeb..91656eb2f2db 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -954,166 +954,162 @@ static void process_csb(struct intel_engine_cs *engine)
> struct intel_engine_execlists * const execlists = &engine->execlists;
> struct execlist_port *port = execlists->port;
> struct drm_i915_private *i915 = engine->i915;
> +
> + /* The HWSP contains a (cacheable) mirror of the CSB */
> + const u32 *buf =
> + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> + unsigned int head, tail;
> bool fw = false;
>
> - do {
> - /* The HWSP contains a (cacheable) mirror of the CSB */
> - const u32 *buf =
> - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
> - unsigned int head, tail;
> -
> - /* Clear before reading to catch new interrupts */
> - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> - smp_mb__after_atomic();
> -
> - if (unlikely(execlists->csb_use_mmio)) {
> - if (!fw) {
> - intel_uncore_forcewake_get(i915, execlists->fw_domains);
> - fw = true;
> - }
> + /* Clear before reading to catch new interrupts */
> + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> + smp_mb__after_atomic();
>
> - buf = (u32 * __force)
> - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
> + if (unlikely(execlists->csb_use_mmio)) {
> + intel_uncore_forcewake_get(i915, execlists->fw_domains);
> + fw = true;
>
> - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> - tail = GEN8_CSB_WRITE_PTR(head);
> - head = GEN8_CSB_READ_PTR(head);
> - execlists->csb_head = head;
> - } else {
> - const int write_idx =
> - intel_hws_csb_write_index(i915) -
> - I915_HWS_CSB_BUF0_INDEX;
> + buf = (u32 * __force)
> + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
>
> - head = execlists->csb_head;
> - tail = READ_ONCE(buf[write_idx]);
> - rmb(); /* Hopefully paired with a wmb() in HW */
> - }
> - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> - engine->name,
> - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
> + head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> + tail = GEN8_CSB_WRITE_PTR(head);
> + head = GEN8_CSB_READ_PTR(head);
> + execlists->csb_head = head;
> + } else {
> + const int write_idx =
> + intel_hws_csb_write_index(i915) -
> + I915_HWS_CSB_BUF0_INDEX;
>
> - while (head != tail) {
> - struct i915_request *rq;
> - unsigned int status;
> - unsigned int count;
> + head = execlists->csb_head;
> + tail = READ_ONCE(buf[write_idx]);
> + rmb(); /* Hopefully paired with a wmb() in HW */
> + }
> + GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
> + engine->name,
> + head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
> + tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
>
> - if (++head == GEN8_CSB_ENTRIES)
> - head = 0;
> + while (head != tail) {
> + struct i915_request *rq;
> + unsigned int status;
> + unsigned int count;
>
> - /*
> - * We are flying near dragons again.
> - *
> - * We hold a reference to the request in execlist_port[]
> - * but no more than that. We are operating in softirq
> - * context and so cannot hold any mutex or sleep. That
> - * prevents us stopping the requests we are processing
> - * in port[] from being retired simultaneously (the
> - * breadcrumb will be complete before we see the
> - * context-switch). As we only hold the reference to the
> - * request, any pointer chasing underneath the request
> - * is subject to a potential use-after-free. Thus we
> - * store all of the bookkeeping within port[] as
> - * required, and avoid using unguarded pointers beneath
> - * request itself. The same applies to the atomic
> - * status notifier.
> - */
> + if (++head == GEN8_CSB_ENTRIES)
> + head = 0;
>
> - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> - GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> - engine->name, head,
> - status, buf[2*head + 1],
> - execlists->active);
> -
> - if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> - GEN8_CTX_STATUS_PREEMPTED))
> - execlists_set_active(execlists,
> - EXECLISTS_ACTIVE_HWACK);
> - if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> - execlists_clear_active(execlists,
> - EXECLISTS_ACTIVE_HWACK);
> -
> - if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> - continue;
> + /*
> + * We are flying near dragons again.
> + *
> + * We hold a reference to the request in execlist_port[]
> + * but no more than that. We are operating in softirq
> + * context and so cannot hold any mutex or sleep. That
> + * prevents us stopping the requests we are processing
> + * in port[] from being retired simultaneously (the
> + * breadcrumb will be complete before we see the
> + * context-switch). As we only hold the reference to the
> + * request, any pointer chasing underneath the request
> + * is subject to a potential use-after-free. Thus we
> + * store all of the bookkeeping within port[] as
> + * required, and avoid using unguarded pointers beneath
> + * request itself. The same applies to the atomic
> + * status notifier.
> + */
>
> - /* We should never get a COMPLETED | IDLE_ACTIVE! */
> - GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> + status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
> + GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
> + engine->name, head,
> + status, buf[2*head + 1],
> + execlists->active);
> +
> + if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> + GEN8_CTX_STATUS_PREEMPTED))
> + execlists_set_active(execlists,
> + EXECLISTS_ACTIVE_HWACK);
> + if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> + execlists_clear_active(execlists,
> + EXECLISTS_ACTIVE_HWACK);
> +
> + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> + continue;
>
> - if (status & GEN8_CTX_STATUS_COMPLETE &&
> - buf[2*head + 1] == execlists->preempt_complete_status) {
> - GEM_TRACE("%s preempt-idle\n", engine->name);
> - complete_preempt_context(execlists);
> - continue;
> - }
> + /* We should never get a COMPLETED | IDLE_ACTIVE! */
> + GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>
> - if (status & GEN8_CTX_STATUS_PREEMPTED &&
> - execlists_is_active(execlists,
> - EXECLISTS_ACTIVE_PREEMPT))
> - continue;
> + if (status & GEN8_CTX_STATUS_COMPLETE &&
> + buf[2*head + 1] == execlists->preempt_complete_status) {
> + GEM_TRACE("%s preempt-idle\n", engine->name);
> + complete_preempt_context(execlists);
> + continue;
> + }
>
> - GEM_BUG_ON(!execlists_is_active(execlists,
> - EXECLISTS_ACTIVE_USER));
> + if (status & GEN8_CTX_STATUS_PREEMPTED &&
> + execlists_is_active(execlists,
> + EXECLISTS_ACTIVE_PREEMPT))
> + continue;
>
> - rq = port_unpack(port, &count);
> - GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
> - engine->name,
> - port->context_id, count,
> - rq ? rq->global_seqno : 0,
> - rq ? rq->fence.context : 0,
> - rq ? rq->fence.seqno : 0,
> - intel_engine_get_seqno(engine),
> - rq ? rq_prio(rq) : 0);
> + GEM_BUG_ON(!execlists_is_active(execlists,
> + EXECLISTS_ACTIVE_USER));
>
> - /* Check the context/desc id for this event matches */
> - GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> + rq = port_unpack(port, &count);
> + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
> + engine->name,
> + port->context_id, count,
> + rq ? rq->global_seqno : 0,
> + rq ? rq->fence.context : 0,
> + rq ? rq->fence.seqno : 0,
> + intel_engine_get_seqno(engine),
> + rq ? rq_prio(rq) : 0);
> +
> + /* Check the context/desc id for this event matches */
> + GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
> +
> + GEM_BUG_ON(count == 0);
> + if (--count == 0) {
> + /*
> + * On the final event corresponding to the
> + * submission of this context, we expect either
> + * an element-switch event or a completion
> + * event (and on completion, the active-idle
> + * marker). No more preemptions, lite-restore
> + * or otherwise.
> + */
> + GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> + GEM_BUG_ON(port_isset(&port[1]) &&
> + !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> + GEM_BUG_ON(!port_isset(&port[1]) &&
> + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
>
> - GEM_BUG_ON(count == 0);
> - if (--count == 0) {
> - /*
> - * On the final event corresponding to the
> - * submission of this context, we expect either
> - * an element-switch event or a completion
> - * event (and on completion, the active-idle
> - * marker). No more preemptions, lite-restore
> - * or otherwise.
> - */
> - GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
> - GEM_BUG_ON(port_isset(&port[1]) &&
> - !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
> - GEM_BUG_ON(!port_isset(&port[1]) &&
> - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
> + /*
> + * We rely on the hardware being strongly
> + * ordered, that the breadcrumb write is
> + * coherent (visible from the CPU) before the
> + * user interrupt and CSB is processed.
> + */
> + GEM_BUG_ON(!i915_request_completed(rq));
>
> - /*
> - * We rely on the hardware being strongly
> - * ordered, that the breadcrumb write is
> - * coherent (visible from the CPU) before the
> - * user interrupt and CSB is processed.
> - */
> - GEM_BUG_ON(!i915_request_completed(rq));
> -
> - execlists_context_schedule_out(rq,
> - INTEL_CONTEXT_SCHEDULE_OUT);
> - i915_request_put(rq);
> -
> - GEM_TRACE("%s completed ctx=%d\n",
> - engine->name, port->context_id);
> -
> - port = execlists_port_complete(execlists, port);
> - if (port_isset(port))
> - execlists_user_begin(execlists, port);
> - else
> - execlists_user_end(execlists);
> - } else {
> - port_set(port, port_pack(rq, count));
> - }
> - }
> + execlists_context_schedule_out(rq,
> + INTEL_CONTEXT_SCHEDULE_OUT);
> + i915_request_put(rq);
>
> - if (head != execlists->csb_head) {
> - execlists->csb_head = head;
> - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> + GEM_TRACE("%s completed ctx=%d\n",
> + engine->name, port->context_id);
> +
> + port = execlists_port_complete(execlists, port);
> + if (port_isset(port))
> + execlists_user_begin(execlists, port);
> + else
> + execlists_user_end(execlists);
> + } else {
> + port_set(port, port_pack(rq, count));
> }
> - } while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> + }
> +
> + if (head != execlists->csb_head) {
> + execlists->csb_head = head;
> + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
> + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
> + }
>
> if (unlikely(fw))
> intel_uncore_forcewake_put(i915, execlists->fw_domains);
>
More information about the Intel-gfx
mailing list