[Intel-gfx] [PATCH 8/9] drm/i915/execlists: Trust the CSB
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jun 28 11:29:41 UTC 2018
On 27/06/2018 22:07, Chris Wilson wrote:
> Now that we use the CSB stored in the CPU friendly HWSP, we do not need
> to track interrupts for when the mmio CSB registers are valid and can
> just check where we read up to last from the cached HWSP. This means we
> can forgo the atomic bit tracking from interrupt, and in the next patch
> it means we can check the CSB at any time.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_irq.c | 11 ++-----
> drivers/gpu/drm/i915/intel_engine_cs.c | 8 ++----
> drivers/gpu/drm/i915/intel_lrc.c | 38 ++++++-------------------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 -
> 4 files changed, 14 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3702992f9f75..44fb11ca3cab 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1469,15 +1469,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
> static void
> gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> {
> - struct intel_engine_execlists * const execlists = &engine->execlists;
> bool tasklet = false;
>
> - if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> - if (READ_ONCE(engine->execlists.active)) {
> - set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> - tasklet = true;
> - }
> - }
> + if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> + tasklet = true;
>
> if (iir & GT_RENDER_USER_INTERRUPT) {
> notify_ring(engine);
> @@ -1485,7 +1480,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
> }
>
> if (tasklet)
> - tasklet_hi_schedule(&execlists->tasklet);
> + tasklet_hi_schedule(&engine->execlists.tasklet);
> }
>
> static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 7209c22798e6..ace93958689e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
> ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
> read = GEN8_CSB_READ_PTR(ptr);
> write = GEN8_CSB_WRITE_PTR(ptr);
> - drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
> + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
> read, execlists->csb_head,
> write,
> intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
> - yesno(test_bit(ENGINE_IRQ_EXECLIST,
> - &engine->irq_posted)),
> yesno(test_bit(TASKLET_STATE_SCHED,
> &engine->execlists.tasklet.state)),
> enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
> @@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
> spin_unlock(&b->rb_lock);
> local_irq_restore(flags);
>
> - drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
> + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
> engine->irq_posted,
> yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
> - &engine->irq_posted)),
> - yesno(test_bit(ENGINE_IRQ_EXECLIST,
> &engine->irq_posted)));
>
> drm_printf(m, "HWSP:\n");
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a6268103663f..87dd8ee117c8 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -874,14 +874,6 @@ static void reset_irq(struct intel_engine_cs *engine)
> smp_store_mb(engine->execlists.active, 0);
>
> clear_gtiir(engine);
> -
> - /*
> - * The port is checked prior to scheduling a tasklet, but
> - * just in case we have suspended the tasklet to do the
> - * wedging make sure that when it wakes, it decides there
> - * is no work to do by clearing the irq_posted bit.
> - */
> - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> }
>
> static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> @@ -972,10 +964,6 @@ static void process_csb(struct intel_engine_cs *engine)
> const u32 * const buf = execlists->csb_status;
> u8 head, tail;
>
> - /* Clear before reading to catch new interrupts */
> - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> - smp_mb__after_atomic();
> -
> /* Note that csb_write, csb_status may be either in HWSP or mmio */
> head = execlists->csb_head;
> tail = READ_ONCE(*execlists->csb_write);
> @@ -1111,11 +1099,10 @@ static void execlists_submission_tasklet(unsigned long data)
> {
> struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>
> - GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
> + GEM_TRACE("%s awake?=%d, active=%x\n",
> engine->name,
> engine->i915->gt.awake,
> - engine->execlists.active,
> - test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> + engine->execlists.active);
>
> /*
> * We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -1127,14 +1114,7 @@ static void execlists_submission_tasklet(unsigned long data)
> */
> GEM_BUG_ON(!engine->i915->gt.awake);
>
> - /*
> - * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> - * imposing the cost of a locked atomic transaction when submitting a
> - * new request (outside of the context-switch interrupt).
> - */
> - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> - process_csb(engine);
> -
> + process_csb(engine);
> if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
> execlists_dequeue(engine);
> }
> @@ -1881,6 +1861,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> {
> struct intel_engine_execlists * const execlists = &engine->execlists;
> struct i915_request *request, *active;
> + unsigned long flags;
>
> GEM_TRACE("%s\n", engine->name);
>
> @@ -1902,8 +1883,9 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> * and avoid blaming an innocent request if the stall was due to the
> * preemption itself.
> */
> - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> - process_csb(engine);
> + spin_lock_irqsave(&engine->timeline.lock, flags);
> +
> + process_csb(engine);
I think taking the lock over process_csb belongs in the following patch.
>
> /*
> * The last active request can then be no later than the last request
> @@ -1913,15 +1895,12 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
> active = NULL;
> request = port_request(execlists->port);
> if (request) {
> - unsigned long flags;
> -
> /*
> * Prevent the breadcrumb from advancing before we decide
> * which request is currently active.
> */
> intel_engine_stop_cs(engine);
>
> - spin_lock_irqsave(&engine->timeline.lock, flags);
> list_for_each_entry_from_reverse(request,
> &engine->timeline.requests,
> link) {
> @@ -1931,9 +1910,10 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>
> active = request;
> }
> - spin_unlock_irqrestore(&engine->timeline.lock, flags);
> }
>
> + spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
> return active;
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5b92c5f03e1d..381c243bfc6f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -359,7 +359,6 @@ struct intel_engine_cs {
> atomic_t irq_count;
> unsigned long irq_posted;
> #define ENGINE_IRQ_BREADCRUMB 0
> -#define ENGINE_IRQ_EXECLIST 1
>
> /* Rather than have every client wait upon all user interrupts,
> * with the herd waking after every interrupt and each doing the
>
Otherwise looks OK.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list