[Intel-gfx] [PATCH] drm/i915/execlists: Ignore lost completion events

Mika Kuoppala mika.kuoppala at linux.intel.com
Wed Sep 4 07:16:30 UTC 2019


Chris Wilson <chris at chris-wilson.co.uk> writes:

> Icelake hit an issue where it missed reporting a completion event and
> instead jumped straight to a idle->active event (skipping over the
> active->idle and not even hitting the lite-restore preemption).
>
> 661497511us : process_csb: rcs0 cs-irq head=11, tail=0
> 661497512us : process_csb: rcs0 csb[0]: status=0x10008002:0x00000020 [lite-restore]
> 661497512us : trace_ports: rcs0: preempted { 28cc8:11052, 0:0 }
> 661497513us : trace_ports: rcs0: promote { 28cc8:11054, 0:0 }
> 661497514us : __i915_request_submit: rcs0 fence 28cc8:11056, current 11052
> 661497514us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes
> 661497515us : trace_ports: rcs0: submit { 28cc8:11056, 0:0 }
> 661497530us : process_csb: rcs0 cs-irq head=0, tail=1
> 661497530us : process_csb: rcs0 csb[1]: status=0x10008002:0x00000020 [lite-restore]
> 661497531us : trace_ports: rcs0: preempted { 28cc8:11054!, 0:0 }
> 661497535us : trace_ports: rcs0: promote { 28cc8:11056, 0:0 }
> 661497540us : __i915_request_submit: rcs0 fence 28cc8:11058, current 11054
> 661497544us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes
> 661497545us : trace_ports: rcs0: submit { 28cc8:11058, 0:0 }
> 661497553us : process_csb: rcs0 cs-irq head=1, tail=2
> 661497553us : process_csb: rcs0 csb[2]: status=0x10000001:0x00000000 [idle->active]
> 661497574us : process_csb: process_csb:1538 GEM_BUG_ON(*execlists->active)
>

In CI? (ref)

> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.c | 63 +++++++++--------------------
>  1 file changed, 18 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0e28263c7b87..9d50fd3b896c 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -693,6 +693,9 @@ trace_ports(const struct intel_engine_execlists *execlists,
>  	const struct intel_engine_cs *engine =
>  		container_of(execlists, typeof(*engine), execlists);
>  
> +	if (!ports[0])
> +		return;
> +
>  	GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
>  		  engine->name, msg,
>  		  ports[0]->fence.context,
> @@ -1371,13 +1374,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
>  	return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
>  }
>  
> -enum csb_step {
> -	CSB_NOP,
> -	CSB_PROMOTE,
> -	CSB_PREEMPT,
> -	CSB_COMPLETE,
> -};
> -
>  /*
>   * Starting with Gen12, the status has a new format:
>   *
> @@ -1404,7 +1400,7 @@ enum csb_step {
>   *     bits 47-57: sw context id of the lrc the GT switched away from
>   *     bits 58-63: sw counter of the lrc the GT switched away from
>   */
> -static inline enum csb_step
> +static inline bool
>  gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)

should it be gen11_should_promote now?
-Mika

>  {
>  	u32 lower_dw = csb[0];
> @@ -1414,7 +1410,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  	bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>  
>  	if (!ctx_away_valid && ctx_to_valid)
> -		return CSB_PROMOTE;
> +		return true;
>  
>  	/*
>  	 * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -1424,7 +1420,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  	 * would require some extra handling, but we don't support that.
>  	 */
>  	if (new_queue && ctx_away_valid)
> -		return CSB_PREEMPT;
> +		return true;
>  
>  	/*
>  	 * switch detail = 5 is covered by the case above and we do not expect a
> @@ -1433,29 +1429,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  	 */
>  	GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
>  
> -	if (*execlists->active) {
> -		GEM_BUG_ON(!ctx_away_valid);
> -		return CSB_COMPLETE;
> -	}
> -
> -	return CSB_NOP;
> +	return false;
>  }
>  
> -static inline enum csb_step
> +static inline bool
>  gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
>  {
> -	unsigned int status = *csb;
> -
> -	if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
> -		return CSB_PROMOTE;
> -
> -	if (status & GEN8_CTX_STATUS_PREEMPTED)
> -		return CSB_PREEMPT;
> -
> -	if (*execlists->active)
> -		return CSB_COMPLETE;
> -
> -	return CSB_NOP;
> +	return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
>  }
>  
>  static void process_csb(struct intel_engine_cs *engine)
> @@ -1494,7 +1474,7 @@ static void process_csb(struct intel_engine_cs *engine)
>  	rmb();
>  
>  	do {
> -		enum csb_step csb_step;
> +		bool promote;
>  
>  		if (++head == num_entries)
>  			head = 0;
> @@ -1522,20 +1502,16 @@ static void process_csb(struct intel_engine_cs *engine)
>  			  buf[2 * head + 0], buf[2 * head + 1]);
>  
>  		if (INTEL_GEN(engine->i915) >= 12)
> -			csb_step = gen12_csb_parse(execlists, buf + 2 * head);
> +			promote = gen12_csb_parse(execlists, buf + 2 * head);
>  		else
> -			csb_step = gen8_csb_parse(execlists, buf + 2 * head);
> -
> -		switch (csb_step) {
> -		case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
> +			promote = gen8_csb_parse(execlists, buf + 2 * head);
> +		if (promote) {
> +			/* cancel old inflight, prepare for switch */
>  			trace_ports(execlists, "preempted", execlists->active);
> -
>  			while (*execlists->active)
>  				execlists_schedule_out(*execlists->active++);
>  
> -			/* fallthrough */
> -		case CSB_PROMOTE: /* switch pending to inflight */
> -			GEM_BUG_ON(*execlists->active);
> +			/* switch pending to inflight */
>  			GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
>  			execlists->active =
>  				memcpy(execlists->inflight,
> @@ -1550,9 +1526,10 @@ static void process_csb(struct intel_engine_cs *engine)
>  				ring_set_paused(engine, 0);
>  
>  			WRITE_ONCE(execlists->pending[0], NULL);
> -			break;
> +		} else {
> +			GEM_BUG_ON(!*execlists->active);
>  
> -		case CSB_COMPLETE: /* port0 completed, advanced to port1 */
> +			/* port0 completed, advanced to port1 */
>  			trace_ports(execlists, "completed", execlists->active);
>  
>  			/*
> @@ -1567,10 +1544,6 @@ static void process_csb(struct intel_engine_cs *engine)
>  
>  			GEM_BUG_ON(execlists->active - execlists->inflight >
>  				   execlists_num_ports(execlists));
> -			break;
> -
> -		case CSB_NOP:
> -			break;
>  		}
>  	} while (head != tail);
>  
> -- 
> 2.23.0


More information about the Intel-gfx mailing list