[Intel-gfx] [PATCH 8/9] drm/i915/execlists: Trust the CSB

Thu Jun 28 11:29:41 UTC 2018

On 27/06/2018 22:07, Chris Wilson wrote:
> Now that we use the CSB stored in the CPU friendly HWSP, we do not need
> to track interrupts for when the mmio CSB registers are valid and can
> just check where we read up to last from the cached HWSP. This means we
> can forgo the atomic bit tracking from interrupt, and in the next patch
> it means we can check the CSB at any time.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_irq.c         | 11 ++-----
>   drivers/gpu/drm/i915/intel_engine_cs.c  |  8 ++----
>   drivers/gpu/drm/i915/intel_lrc.c        | 38 ++++++-------------------
>   drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
>   4 files changed, 14 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 3702992f9f75..44fb11ca3cab 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1469,15 +1469,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
>   static void
>   gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   {
> -	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	bool tasklet = false;
>   
> -	if (iir & GT_CONTEXT_SWITCH_INTERRUPT) {
> -		if (READ_ONCE(engine->execlists.active)) {
> -			set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -			tasklet = true;
> -		}
> -	}
> +	if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
> +		tasklet = true;
>   
>   	if (iir & GT_RENDER_USER_INTERRUPT) {
>   		notify_ring(engine);
> @@ -1485,7 +1480,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
>   	}
>   
>   	if (tasklet)
> -		tasklet_hi_schedule(&execlists->tasklet);
> +		tasklet_hi_schedule(&engine->execlists.tasklet);
>   }
>   
>   static void gen8_gt_irq_ack(struct drm_i915_private *i915,
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 7209c22798e6..ace93958689e 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
>   		ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine));
>   		read = GEN8_CSB_READ_PTR(ptr);
>   		write = GEN8_CSB_WRITE_PTR(ptr);
> -		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n",
> +		drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n",
>   			   read, execlists->csb_head,
>   			   write,
>   			   intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)),
> -			   yesno(test_bit(ENGINE_IRQ_EXECLIST,
> -					  &engine->irq_posted)),
>   			   yesno(test_bit(TASKLET_STATE_SCHED,
>   					  &engine->execlists.tasklet.state)),
>   			   enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
> @@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	spin_unlock(&b->rb_lock);
>   	local_irq_restore(flags);
>   
> -	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
> +	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
>   		   engine->irq_posted,
>   		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
> -				  &engine->irq_posted)),
> -		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
>   				  &engine->irq_posted)));
>   
>   	drm_printf(m, "HWSP:\n");
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index a6268103663f..87dd8ee117c8 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -874,14 +874,6 @@ static void reset_irq(struct intel_engine_cs *engine)
>   	smp_store_mb(engine->execlists.active, 0);
>   
>   	clear_gtiir(engine);
> -
> -	/*
> -	 * The port is checked prior to scheduling a tasklet, but
> -	 * just in case we have suspended the tasklet to do the
> -	 * wedging make sure that when it wakes, it decides there
> -	 * is no work to do by clearing the irq_posted bit.
> -	 */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
>   }
>   
>   static void reset_csb_pointers(struct intel_engine_execlists *execlists)
> @@ -972,10 +964,6 @@ static void process_csb(struct intel_engine_cs *engine)
>   	const u32 * const buf = execlists->csb_status;
>   	u8 head, tail;
>   
> -	/* Clear before reading to catch new interrupts */
> -	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
> -	smp_mb__after_atomic();
> -
>   	/* Note that csb_write, csb_status may be either in HWSP or mmio */
>   	head = execlists->csb_head;
>   	tail = READ_ONCE(*execlists->csb_write);
> @@ -1111,11 +1099,10 @@ static void execlists_submission_tasklet(unsigned long data)
>   {
>   	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
>   
> -	GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n",
> +	GEM_TRACE("%s awake?=%d, active=%x\n",
>   		  engine->name,
>   		  engine->i915->gt.awake,
> -		  engine->execlists.active,
> -		  test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted));
> +		  engine->execlists.active);
>   
>   	/*
>   	 * We can skip acquiring intel_runtime_pm_get() here as it was taken
> @@ -1127,14 +1114,7 @@ static void execlists_submission_tasklet(unsigned long data)
>   	 */
>   	GEM_BUG_ON(!engine->i915->gt.awake);
>   
> -	/*
> -	 * Prefer doing test_and_clear_bit() as a two stage operation to avoid
> -	 * imposing the cost of a locked atomic transaction when submitting a
> -	 * new request (outside of the context-switch interrupt).
> -	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> -
> +	process_csb(engine);
>   	if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
>   		execlists_dequeue(engine);
>   }
> @@ -1881,6 +1861,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   {
>   	struct intel_engine_execlists * const execlists = &engine->execlists;
>   	struct i915_request *request, *active;
> +	unsigned long flags;
>   
>   	GEM_TRACE("%s\n", engine->name);
>   
> @@ -1902,8 +1883,9 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   	 * and avoid blaming an innocent request if the stall was due to the
>   	 * preemption itself.
>   	 */
> -	if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
> -		process_csb(engine);
> +	spin_lock_irqsave(&engine->timeline.lock, flags);
> +
> +	process_csb(engine);

I think taking the lock over process_csb belongs in the following patch.

>   
>   	/*
>   	 * The last active request can then be no later than the last request
> @@ -1913,15 +1895,12 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   	active = NULL;
>   	request = port_request(execlists->port);
>   	if (request) {
> -		unsigned long flags;
> -
>   		/*
>   		 * Prevent the breadcrumb from advancing before we decide
>   		 * which request is currently active.
>   		 */
>   		intel_engine_stop_cs(engine);
>   
> -		spin_lock_irqsave(&engine->timeline.lock, flags);
>   		list_for_each_entry_from_reverse(request,
>   						 &engine->timeline.requests,
>   						 link) {
> @@ -1931,9 +1910,10 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
>   
>   			active = request;
>   		}
> -		spin_unlock_irqrestore(&engine->timeline.lock, flags);
>   	}
>   
> +	spin_unlock_irqrestore(&engine->timeline.lock, flags);
> +
>   	return active;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 5b92c5f03e1d..381c243bfc6f 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -359,7 +359,6 @@ struct intel_engine_cs {
>   	atomic_t irq_count;
>   	unsigned long irq_posted;
>   #define ENGINE_IRQ_BREADCRUMB 0
> -#define ENGINE_IRQ_EXECLIST 1
>   
>   	/* Rather than have every client wait upon all user interrupts,
>   	 * with the herd waking after every interrupt and each doing the
> 

Otherwise looks OK.

Regards,

Tvrtko