[Intel-gfx] [PATCH 8/9] drm/i915/execlists: Trust the CSB

Thu Jun 28 13:04:51 UTC 2018

On 28/06/2018 13:33, Chris Wilson wrote:
> Now that we use the CSB stored in the CPU friendly HWSP, we do not need
> to track interrupts for when the mmio CSB registers are valid and can
> just check where we read up to last from the cached HWSP. This means we
> can forgo the atomic bit tracking from interrupt, and in the next patch
> it means we can check the CSB at any time.
> 
> v2: Change the splitting inside reset_prepare, we only want to lose
> testing the interrupt in this patch, the next patch requires the change
> in locking
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_irq.c         | 11 +++-------
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++-----
>   drivers/gpu/drm/i915/intel_lrc.c        | 29 ++++---------------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
>   4 files changed, 9 insertions(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 97418efec719..cb91b213aa67 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1478,15 +1478,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>   static void
>   gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   {
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	bool tasklet = false;
>   
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> -		if (READ_ONCE(engine->execlists.active)) {
> -			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -			tasklet = true;
> -		}
> -	}
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> +		tasklet = true;
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
>   		notify_ring(engine);
> @@ -1494,7 +1489,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	}
>   
>   	if (tasklet)
> -		tasklet_hi_schedule(&execlists->tasklet);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
>   static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 7209c22798e6..ace93958689e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
>   		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
>   		read = GEN8_CSB_READ_PTR(ptr);
>   		write = GEN8_CSB_WRITE_PTR(ptr);
> -		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
> +		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
>   			   read, execlists->csb_head,
>   			   write,
>   			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
> -			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
> -					  &engine->irq_posted)),
>   			   yesno(test_bit(TASKLET_STATE_SCHED,
>   					  &engine->execlists.tasklet.state)),
>   			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
> @@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	spin_unlock(&b->rb_lock);
>   	local_irq_restore(flags);
>   
> -	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
> +	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
>   		   engine->irq_posted,
>   		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
> -				  &engine->irq_posted)),
> -		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
>   				  &engine->irq_posted)));
>   
>   	drm_printf(m, "HWSP:\n");
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 14be53035610..7f8b29684d9d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -874,14 +874,6 @@ static void reset_irq(struct intel_engine_cs *engine)
>   	smp_store_mb(engine->execlists.active, 0);
>   
>   	clear_gtiir(engine);
> -
> -	/*
> -	 * The port is checked prior to scheduling a tasklet, but
> -	 * just in case we have suspended the tasklet to do the
> -	 * wedging make sure that when it wakes, it decides there
> -	 * is no work to do by clearing the irq_posted bit.
> -	 */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   }
>   
>   static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> @@ -972,10 +964,6 @@ static void process_csb(struct intel_engine_cs *engine)
>   	const u32 * const buf = execlists->csb_status;
>   	u8 head, tail;
>   
> -	/* Clear before reading to catch new interrupts */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -	smp_mb__after_atomic();
> -
>   	/*
>   	 * Note that csb_write, csb_status may be either in HWSP or mmio.
>   	 * When reading from the csb_write mmio register, we have to be
> @@ -1128,11 +1116,10 @@ static void execlists_submission_tasklet(unsigned long data)
>   {
>   	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>   
> -	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
> +	GEM_TRACE("%s awake?=%d, active=%x\n",
>   		  engine->name,
>   		  engine->i915->gt.awake,
> -		  engine->execlists.active,
> -		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> +		  engine->execlists.active);
>   
>   	/*
>   	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -1144,14 +1131,7 @@ static void execlists_submission_tasklet(unsigned long data)
>   	 */
>   	GEM_BUG_ON(!engine->i915->gt.awake);
>   
> -	/*
> -	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> -	 * imposing the cost of a locked atomic transaction when submitting a
> -	 * new request (outside of the context-switch interrupt).
> -	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> -
> +	process_csb(engine);

I'd probably add a newline here.

>   	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		execlists_dequeue(engine);
>   }
> @@ -1919,8 +1899,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   	 * and avoid blaming an innocent request if the stall was due to the
>   	 * preemption itself.
>   	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> +	process_csb(engine);
>   
>   	/*
>   	 * The last active request can then be no later than the last request
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 62e3db6f6f8d..e1ee1ca9ac16 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -368,7 +368,6 @@ struct intel_engine_cs {
>   	atomic_t irq_count;
>   	unsigned long irq_posted;
>   #define ENGINE_IRQ_BREADCRUMB 0
> -#define ENGINE_IRQ_EXECLIST 1
>   
>   	/* Rather than have every client wait upon all user interrupts,
>   	 * with the herd waking after every interrupt and each doing the
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko