[Intel-gfx] [PATCH 6/6] drm/i915: Drop unused engine->irq_seqno_barrier w/a

Mon Dec 31 11:35:07 UTC 2018

On 28/12/2018 17:16, Chris Wilson wrote:
> Now that we have eliminated the CPU-side irq_seqno_barrier by moving the
> delays on the GPU before emitting the MI_USER_INTERRUPT, we can remove
> the engine->irq_seqno_barrier infrastructure. Though intentionally
> slowing down the GPU is nasty, so is the code we can now remove!
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          | 84 ------------------------
>   drivers/gpu/drm/i915/i915_gem.c          |  7 --
>   drivers/gpu/drm/i915/i915_irq.c          |  7 --
>   drivers/gpu/drm/i915/i915_request.c      |  8 +--
>   drivers/gpu/drm/i915/intel_breadcrumbs.c | 26 --------
>   drivers/gpu/drm/i915/intel_engine_cs.c   |  6 --
>   drivers/gpu/drm/i915/intel_hangcheck.c   | 10 ---
>   drivers/gpu/drm/i915/intel_ringbuffer.c  |  4 --
>   drivers/gpu/drm/i915/intel_ringbuffer.h  | 10 ---
>   9 files changed, 1 insertion(+), 161 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 287f06b9e95a..01a3d77fed41 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3586,90 +3586,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>   	}
>   }
>   
> -static inline bool
> -__i915_request_irq_complete(const struct i915_request *rq)
> -{
> -	struct intel_engine_cs *engine = rq->engine;
> -	u32 seqno;
> -
> -	/* Note that the engine may have wrapped around the seqno, and
> -	 * so our request->global_seqno will be ahead of the hardware,
> -	 * even though it completed the request before wrapping. We catch
> -	 * this by kicking all the waiters before resetting the seqno
> -	 * in hardware, and also signal the fence.
> -	 */
> -	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
> -		return true;
> -
> -	/* The request was dequeued before we were awoken. We check after
> -	 * inspecting the hw to confirm that this was the same request
> -	 * that generated the HWS update. The memory barriers within
> -	 * the request execution are sufficient to ensure that a check
> -	 * after reading the value from hw matches this request.
> -	 */
> -	seqno = i915_request_global_seqno(rq);
> -	if (!seqno)
> -		return false;
> -
> -	/* Before we do the heavier coherent read of the seqno,
> -	 * check the value (hopefully) in the CPU cacheline.
> -	 */
> -	if (__i915_request_completed(rq, seqno))
> -		return true;
> -
> -	/* Ensure our read of the seqno is coherent so that we
> -	 * do not "miss an interrupt" (i.e. if this is the last
> -	 * request and the seqno write from the GPU is not visible
> -	 * by the time the interrupt fires, we will see that the
> -	 * request is incomplete and go back to sleep awaiting
> -	 * another interrupt that will never come.)
> -	 *
> -	 * Strictly, we only need to do this once after an interrupt,
> -	 * but it is easier and safer to do it every time the waiter
> -	 * is woken.
> -	 */
> -	if (engine->irq_seqno_barrier &&
> -	    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
> -		struct intel_breadcrumbs *b = &engine->breadcrumbs;
> -
> -		/* The ordering of irq_posted versus applying the barrier
> -		 * is crucial. The clearing of the current irq_posted must
> -		 * be visible before we perform the barrier operation,
> -		 * such that if a subsequent interrupt arrives, irq_posted
> -		 * is reasserted and our task rewoken (which causes us to
> -		 * do another __i915_request_irq_complete() immediately
> -		 * and reapply the barrier). Conversely, if the clear
> -		 * occurs after the barrier, then an interrupt that arrived
> -		 * whilst we waited on the barrier would not trigger a
> -		 * barrier on the next pass, and the read may not see the
> -		 * seqno update.
> -		 */
> -		engine->irq_seqno_barrier(engine);
> -
> -		/* If we consume the irq, but we are no longer the bottom-half,
> -		 * the real bottom-half may not have serialised their own
> -		 * seqno check with the irq-barrier (i.e. may have inspected
> -		 * the seqno before we believe it coherent since they see
> -		 * irq_posted == false but we are still running).
> -		 */
> -		spin_lock_irq(&b->irq_lock);
> -		if (b->irq_wait && b->irq_wait->tsk != current)
> -			/* Note that if the bottom-half is changed as we
> -			 * are sending the wake-up, the new bottom-half will
> -			 * be woken by whomever made the change. We only have
> -			 * to worry about when we steal the irq-posted for
> -			 * ourself.
> -			 */
> -			wake_up_process(b->irq_wait->tsk);
> -		spin_unlock_irq(&b->irq_lock);
> -
> -		if (__i915_request_completed(rq, seqno))
> -			return true;
> -	}
> -
> -	return false;
> -}
> -
>   void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
>   bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f4254e012620..7ea87c7a0e8c 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3230,13 +3230,6 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
>   			   struct i915_request *request,
>   			   bool stalled)
>   {
> -	/*
> -	 * Make sure this write is visible before we re-enable the interrupt
> -	 * handlers on another CPU, as tasklet_enable() resolves to just
> -	 * a compiler barrier which is insufficient for our purpose here.
> -	 */
> -	smp_store_mb(engine->irq_posted, 0);
> -
>   	if (request)
>   		request = i915_gem_reset_request(engine, request, stalled);
>   
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 0c7fc9890891..fbb094ecf6c9 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1189,13 +1189,6 @@ static void notify_ring(struct intel_engine_cs *engine)
>   				rq = i915_request_get(waiter);
>   
>   			tsk = wait->tsk;
> -		} else {
> -			if (engine->irq_seqno_barrier &&
> -			    i915_seqno_passed(seqno, wait->seqno - 1)) {
> -				set_bit(ENGINE_IRQ_BREADCRUMB,
> -					&engine->irq_posted);
> -				tsk = wait->tsk;
> -			}
>   		}
>   
>   		engine->breadcrumbs.irq_count++;
> diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
> index ea4620f2ac9e..1e158eb8cb97 100644
> --- a/drivers/gpu/drm/i915/i915_request.c
> +++ b/drivers/gpu/drm/i915/i915_request.c
> @@ -1179,13 +1179,7 @@ long i915_request_wait(struct i915_request *rq,
>   		set_current_state(state);
>   
>   wakeup:
> -		/*
> -		 * Carefully check if the request is complete, giving time
> -		 * for the seqno to be visible following the interrupt.
> -		 * We also have to check in case we are kicked by the GPU
> -		 * reset in order to drop the struct_mutex.
> -		 */
> -		if (__i915_request_irq_complete(rq))
> +		if (i915_request_completed(rq))
>   			break;
>   
>   		/*
> diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> index 447c5256f63a..4ed7105d7ff5 100644
> --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
> +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
> @@ -166,12 +166,6 @@ static void irq_enable(struct intel_engine_cs *engine)
>   	 */
>   	GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
>   
> -	/* Enabling the IRQ may miss the generation of the interrupt, but
> -	 * we still need to force the barrier before reading the seqno,
> -	 * just in case.
> -	 */
> -	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> -
>   	/* Caller disables interrupts */
>   	if (engine->irq_enable) {
>   		spin_lock(&engine->i915->irq_lock);
> @@ -683,16 +677,6 @@ static int intel_breadcrumbs_signaler(void *arg)
>   		}
>   
>   		if (unlikely(do_schedule)) {
> -			/* Before we sleep, check for a missed seqno */
> -			if (current->state & TASK_NORMAL &&
> -			    !list_empty(&b->signals) &&
> -			    engine->irq_seqno_barrier &&
> -			    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
> -					       &engine->irq_posted)) {
> -				engine->irq_seqno_barrier(engine);
> -				intel_engine_wakeup(engine);
> -			}
> -
>   sleep:
>   			if (kthread_should_park())
>   				kthread_parkme();
> @@ -859,16 +843,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
>   	else
>   		irq_disable(engine);
>   
> -	/*
> -	 * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
> -	 * GPU is active and may have already executed the MI_USER_INTERRUPT
> -	 * before the CPU is ready to receive. However, the engine is currently
> -	 * idle (we haven't started it yet), there is no possibility for a
> -	 * missed interrupt as we enabled the irq and so we can clear the
> -	 * immediate wakeup (until a real interrupt arrives for the waiter).
> -	 */
> -	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
> -
>   	spin_unlock_irqrestore(&b->irq_lock, flags);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 1462bb49f420..189a934a63e9 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -457,7 +457,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
>   void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
>   {
>   	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
> -	clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
>   
>   	/* After manually advancing the seqno, fake the interrupt in case
>   	 * there are any waiters for that seqno.
> @@ -1536,11 +1535,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
>   	spin_unlock(&b->rb_lock);
>   	local_irq_restore(flags);
>   
> -	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
> -		   engine->irq_posted,
> -		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
> -				  &engine->irq_posted)));
> -
>   	drm_printf(m, "HWSP:\n");
>   	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
>   
> diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
> index c3f929f59424..51e9efec5116 100644
> --- a/drivers/gpu/drm/i915/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/intel_hangcheck.c
> @@ -120,16 +120,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
>   static void hangcheck_load_sample(struct intel_engine_cs *engine,
>   				  struct intel_engine_hangcheck *hc)
>   {
> -	/* We don't strictly need an irq-barrier here, as we are not
> -	 * serving an interrupt request, be paranoid in case the
> -	 * barrier has side-effects (such as preventing a broken
> -	 * cacheline snoop) and so be sure that we can see the seqno
> -	 * advance. If the seqno should stick, due to a stale
> -	 * cacheline, we would erroneously declare the GPU hung.
> -	 */
> -	if (engine->irq_seqno_barrier)
> -		engine->irq_seqno_barrier(engine);
> -
>   	hc->acthd = intel_engine_get_active_head(engine);
>   	hc->seqno = intel_engine_get_seqno(engine);
>   }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 13ac01b67ead..f8d3090ed193 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -711,10 +711,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
>   static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
>   {
>   	intel_engine_stop_cs(engine);
> -
> -	if (engine->irq_seqno_barrier)
> -		engine->irq_seqno_barrier(engine);
> -
>   	return i915_gem_find_active_request(engine);
>   }
>   
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 99e2cb75d29a..91ef00d34e91 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -365,9 +365,6 @@ struct intel_engine_cs {
>   	struct drm_i915_gem_object *default_state;
>   	void *pinned_default_state;
>   
> -	unsigned long irq_posted;
> -#define ENGINE_IRQ_BREADCRUMB 0
> -
>   	/* Rather than have every client wait upon all user interrupts,
>   	 * with the herd waking after every interrupt and each doing the
>   	 * heavyweight seqno dance, we delegate the task (of being the
> @@ -501,13 +498,6 @@ struct intel_engine_cs {
>   	 */
>   	void		(*cancel_requests)(struct intel_engine_cs *engine);
>   
> -	/* Some chipsets are not quite as coherent as advertised and need
> -	 * an expensive kick to force a true read of the up-to-date seqno.
> -	 * However, the up-to-date seqno is not always required and the last
> -	 * seen value is good enough. Note that the seqno will always be
> -	 * monotonic, even if not coherent.
> -	 */
> -	void		(*irq_seqno_barrier)(struct intel_engine_cs *engine);
>   	void		(*cleanup)(struct intel_engine_cs *engine);
>   
>   	struct intel_engine_execlists execlists;
> 

Series looks fine to me. A tiny bit more latency on old platforms to 
simplify the code seems fair. I trust you tested it well and the 
approach is robust under stress testing.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko