[Intel-gfx] [PATCH] drm/i915/execlists: Ignore lost completion events
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Sep 10 10:17:46 UTC 2019
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Icelake hit an issue where it missed reporting a completion event and
> instead jumped straight to a idle->active event (skipping over the
> active->idle and not even hitting the lite-restore preemption).
>
> 661497511us : process_csb: rcs0 cs-irq head=11, tail=0
> 661497512us : process_csb: rcs0 csb[0]: status=0x10008002:0x00000020 [lite-restore]
> 661497512us : trace_ports: rcs0: preempted { 28cc8:11052, 0:0 }
> 661497513us : trace_ports: rcs0: promote { 28cc8:11054, 0:0 }
> 661497514us : __i915_request_submit: rcs0 fence 28cc8:11056, current 11052
> 661497514us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes
> 661497515us : trace_ports: rcs0: submit { 28cc8:11056, 0:0 }
> 661497530us : process_csb: rcs0 cs-irq head=0, tail=1
> 661497530us : process_csb: rcs0 csb[1]: status=0x10008002:0x00000020 [lite-restore]
> 661497531us : trace_ports: rcs0: preempted { 28cc8:11054!, 0:0 }
> 661497535us : trace_ports: rcs0: promote { 28cc8:11056, 0:0 }
> 661497540us : __i915_request_submit: rcs0 fence 28cc8:11058, current 11054
> 661497544us : __execlists_submission_tasklet: rcs0: queue_priority_hint:-2147483648, submit:yes
> 661497545us : trace_ports: rcs0: submit { 28cc8:11058, 0:0 }
> 661497553us : process_csb: rcs0 cs-irq head=1, tail=2
> 661497553us : process_csb: rcs0 csb[2]: status=0x10000001:0x00000000 [idle->active]
> 661497574us : process_csb: process_csb:1538 GEM_BUG_ON(*execlists->active)
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Trace is convincing, tho always it feels bit uneasy
for the hardware to change the behaviour 'suddenly'.
And state folds to a binary so othing much to argue against.
Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 63 +++++++++--------------------
> 1 file changed, 18 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 0ddfbebbcbbc..3aad35b570d4 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -693,6 +693,9 @@ trace_ports(const struct intel_engine_execlists *execlists,
> const struct intel_engine_cs *engine =
> container_of(execlists, typeof(*engine), execlists);
>
> + if (!ports[0])
> + return;
> +
> GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
> engine->name, msg,
> ports[0]->fence.context,
> @@ -1381,13 +1384,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
> return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
> }
>
> -enum csb_step {
> - CSB_NOP,
> - CSB_PROMOTE,
> - CSB_PREEMPT,
> - CSB_COMPLETE,
> -};
> -
> /*
> * Starting with Gen12, the status has a new format:
> *
> @@ -1414,7 +1410,7 @@ enum csb_step {
> * bits 47-57: sw context id of the lrc the GT switched away from
> * bits 58-63: sw counter of the lrc the GT switched away from
> */
> -static inline enum csb_step
> +static inline bool
> gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> {
> u32 lower_dw = csb[0];
> @@ -1424,7 +1420,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>
> if (!ctx_away_valid && ctx_to_valid)
> - return CSB_PROMOTE;
> + return true;
>
> /*
> * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -1434,7 +1430,7 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> * would require some extra handling, but we don't support that.
> */
> if (new_queue && ctx_away_valid)
> - return CSB_PREEMPT;
> + return true;
>
> /*
> * switch detail = 5 is covered by the case above and we do not expect a
> @@ -1443,29 +1439,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> */
> GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
>
> - if (*execlists->active) {
> - GEM_BUG_ON(!ctx_away_valid);
> - return CSB_COMPLETE;
> - }
> -
> - return CSB_NOP;
> + return false;
> }
>
> -static inline enum csb_step
> +static inline bool
> gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
> {
> - unsigned int status = *csb;
> -
> - if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
> - return CSB_PROMOTE;
> -
> - if (status & GEN8_CTX_STATUS_PREEMPTED)
> - return CSB_PREEMPT;
> -
> - if (*execlists->active)
> - return CSB_COMPLETE;
> -
> - return CSB_NOP;
> + return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> }
>
> static void process_csb(struct intel_engine_cs *engine)
> @@ -1504,7 +1484,7 @@ static void process_csb(struct intel_engine_cs *engine)
> rmb();
>
> do {
> - enum csb_step csb_step;
> + bool promote;
>
> if (++head == num_entries)
> head = 0;
> @@ -1532,20 +1512,16 @@ static void process_csb(struct intel_engine_cs *engine)
> buf[2 * head + 0], buf[2 * head + 1]);
>
> if (INTEL_GEN(engine->i915) >= 12)
> - csb_step = gen12_csb_parse(execlists, buf + 2 * head);
> + promote = gen12_csb_parse(execlists, buf + 2 * head);
> else
> - csb_step = gen8_csb_parse(execlists, buf + 2 * head);
> -
> - switch (csb_step) {
> - case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
> + promote = gen8_csb_parse(execlists, buf + 2 * head);
> + if (promote) {
> + /* cancel old inflight, prepare for switch */
> trace_ports(execlists, "preempted", execlists->active);
> -
> while (*execlists->active)
> execlists_schedule_out(*execlists->active++);
>
> - /* fallthrough */
> - case CSB_PROMOTE: /* switch pending to inflight */
> - GEM_BUG_ON(*execlists->active);
> + /* switch pending to inflight */
> GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
> execlists->active =
> memcpy(execlists->inflight,
> @@ -1560,9 +1536,10 @@ static void process_csb(struct intel_engine_cs *engine)
> ring_set_paused(engine, 0);
>
> WRITE_ONCE(execlists->pending[0], NULL);
> - break;
> + } else {
> + GEM_BUG_ON(!*execlists->active);
>
> - case CSB_COMPLETE: /* port0 completed, advanced to port1 */
> + /* port0 completed, advanced to port1 */
> trace_ports(execlists, "completed", execlists->active);
>
> /*
> @@ -1577,10 +1554,6 @@ static void process_csb(struct intel_engine_cs *engine)
>
> GEM_BUG_ON(execlists->active - execlists->inflight >
> execlists_num_ports(execlists));
> - break;
> -
> - case CSB_NOP:
> - break;
> }
> } while (head != tail);
>
> --
> 2.23.0
More information about the Intel-gfx
mailing list