[Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all

Mika Kuoppala mika.kuoppala at linux.intel.com
Fri Aug 14 18:41:14 UTC 2020


Chris Wilson <chris at chris-wilson.co.uk> writes:

> Since we expect to inline the csb_parse() routines, the w/a for the
> stale CSB data on Tigerlake will be pulled into process_csb(), and so we
> might as well simply reuse the logic for all, and so will hopefully
> avoid any strange behaviour on Icelake that was not covered by our
> previous w/a.
>
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Bruce Chang <yu.bruce.chang at intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 70 +++++++++++++++++------------
>  1 file changed, 42 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 3b8161c6b601..c176a029f27b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
>   *     bits 47-57: sw context id of the lrc the GT switched away from
>   *     bits 58-63: sw counter of the lrc the GT switched away from
>   */
> -static inline bool gen12_csb_parse(const u64 *csb)
> +static inline bool gen12_csb_parse(const u64 csb)
>  {
> -	bool ctx_away_valid;
> -	bool new_queue;
> -	u64 entry;
> -
> -	/* XXX HSD */
> -	entry = READ_ONCE(*csb);
> -	if (unlikely(entry == -1)) {
> -		preempt_disable();
> -		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))

If we get this deep into desperation, should we start to apply more
pressure. Ie, rmb instead of just instructing the compiler. And could also
start to invalidate the entry which obviously if of no use.

It could even be that the invalidate pays out as the correct value
bubbles throught hierarchy faster?

Weird.

*shrug*

-Mika

> -			GEM_WARN_ON("50us CSB timeout");
> -		preempt_enable();
> -	}
> -	WRITE_ONCE(*(u64 *)csb, -1);
> -
> -	ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> -	new_queue =
> -		lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> +	bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
> +	bool new_queue =
> +		lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>  
>  	/*
>  	 * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
>  	 * would require some extra handling, but we don't support that.
>  	 */
>  	if (!ctx_away_valid || new_queue) {
> -		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
> +		GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
>  		return true;
>  	}
>  
> @@ -2533,19 +2519,47 @@ static inline bool gen12_csb_parse(const u64 *csb)
>  	 * context switch on an unsuccessful wait instruction since we always
>  	 * use polling mode.
>  	 */
> -	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
> +	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
>  	return false;
>  }
>  
> -static inline bool gen8_csb_parse(const u64 *csb)
> +static inline bool gen8_csb_parse(const u64 csb)
> +{
> +	return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +}
> +
> +static inline u64 csb_read(u64 * const csb)
>  {
> -	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +	u64 entry = READ_ONCE(*csb);
> +
> +	/*
> +	 * Unfortunately, the GPU does not always serialise its write
> +	 * of the CSB entries before its write of the CSB pointer, at least
> +	 * from the perspective of the CPU, using what is known as a Global
> +	 * Observation Point. We may read a new CSB tail pointer, but then
> +	 * read the stale CSB entries, causing us to misinterpret the
> +	 * context-switch events, and eventually declare the GPU hung.
> +	 *
> +	 * icl:HSDES#:1806554093
> +	 * tgl:XXX?
> +	 */
> +	if (unlikely(entry == -1)) {
> +		preempt_disable();
> +		if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> +			GEM_WARN_ON("50us CSB timeout");
> +		preempt_enable();
> +	}
> +
> +	/* Consume this entry so that we can spot its future reuse. */
> +	WRITE_ONCE(*csb, -1);
> +
> +	return entry;
>  }
>  
>  static void process_csb(struct intel_engine_cs *engine)
>  {
>  	struct intel_engine_execlists * const execlists = &engine->execlists;
> -	const u64 * const buf = execlists->csb_status;
> +	u64 * const buf = execlists->csb_status;
>  	const u8 num_entries = execlists->csb_size;
>  	u8 head, tail;
>  
> @@ -2603,6 +2617,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  	rmb();
>  	do {
>  		bool promote;
> +		u64 csb;
>  
>  		if (++head == num_entries)
>  			head = 0;
> @@ -2625,15 +2640,14 @@ static void process_csb(struct intel_engine_cs *engine)
>  		 * status notifier.
>  		 */
>  
> +		csb = csb_read(buf + head);
>  		ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
> -			     head,
> -			     upper_32_bits(buf[head]),
> -			     lower_32_bits(buf[head]));
> +			     head, upper_32_bits(csb), lower_32_bits(csb));
>  
>  		if (INTEL_GEN(engine->i915) >= 12)
> -			promote = gen12_csb_parse(buf + head);
> +			promote = gen12_csb_parse(csb);
>  		else
> -			promote = gen8_csb_parse(buf + head);
> +			promote = gen8_csb_parse(csb);
>  		if (promote) {
>  			struct i915_request * const *old = execlists->active;
>  
> -- 
> 2.20.1


More information about the Intel-gfx mailing list