[Intel-gfx] [PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
Chang, Bruce
yu.bruce.chang at intel.com
Fri Aug 14 18:18:46 UTC 2020
On 8/14/2020 8:57 AM, Chris Wilson wrote:
> Since we expect to inline the csb_parse() routines, the w/a for the
> stale CSB data on Tigerlake will be pulled into process_csb(), and so we
> might as well simply reuse the logic for all, and so will hopefully
> avoid any strange behaviour on Icelake that was not covered by our
> previous w/a.
>
> References: d8f505311717 ("drm/i915/icl: Forcibly evict stale csb entries")
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Cc: Bruce Chang <yu.bruce.chang at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_lrc.c | 70 +++++++++++++++++------------
> 1 file changed, 42 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 3b8161c6b601..c176a029f27b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2496,25 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
> * bits 47-57: sw context id of the lrc the GT switched away from
> * bits 58-63: sw counter of the lrc the GT switched away from
> */
> -static inline bool gen12_csb_parse(const u64 *csb)
> +static inline bool gen12_csb_parse(const u64 csb)
> {
> - bool ctx_away_valid;
> - bool new_queue;
> - u64 entry;
> -
> - /* XXX HSD */
> - entry = READ_ONCE(*csb);
> - if (unlikely(entry == -1)) {
> - preempt_disable();
> - if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> - GEM_WARN_ON("50us CSB timeout");
> - preempt_enable();
> - }
> - WRITE_ONCE(*(u64 *)csb, -1);
> -
> - ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
> - new_queue =
> - lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
> + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
> + bool new_queue =
> + lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
>
> /*
> * The context switch detail is not guaranteed to be 5 when a preemption
> @@ -2524,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
> * would require some extra handling, but we don't support that.
> */
> if (!ctx_away_valid || new_queue) {
> - GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
> + GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
> return true;
> }
>
> @@ -2533,19 +2519,47 @@ static inline bool gen12_csb_parse(const u64 *csb)
> * context switch on an unsuccessful wait instruction since we always
> * use polling mode.
> */
> - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
> + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
> return false;
> }
>
> -static inline bool gen8_csb_parse(const u64 *csb)
> +static inline bool gen8_csb_parse(const u64 csb)
> +{
> + return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> +}
> +
> +static inline u64 csb_read(u64 * const csb)
> {
> - return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
> + u64 entry = READ_ONCE(*csb);
> +
> + /*
> + * Unfortunately, the GPU does not always serialise its write
> + * of the CSB entries before its write of the CSB pointer, at least
> + * from the perspective of the CPU, using what is known as a Global
> + * Observation Point. We may read a new CSB tail pointer, but then
> + * read the stale CSB entries, causing us to misinterpret the
> + * context-switch events, and eventually declare the GPU hung.
> + *
> + * icl:HSDES#:1806554093
> + * tgl:XXX?
FYI: A HSD was also filed recently: HSD# 22011248461
> + */
> + if (unlikely(entry == -1)) {
> + preempt_disable();
> + if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
> + GEM_WARN_ON("50us CSB timeout");
> + preempt_enable();
> + }
> +
> + /* Consume this entry so that we can spot its future reuse. */
> + WRITE_ONCE(*csb, -1);
> +
> + return entry;
> }
>
> static void process_csb(struct intel_engine_cs *engine)
> {
> struct intel_engine_execlists * const execlists = &engine->execlists;
> - const u64 * const buf = execlists->csb_status;
> + u64 * const buf = execlists->csb_status;
> const u8 num_entries = execlists->csb_size;
> u8 head, tail;
>
> @@ -2603,6 +2617,7 @@ static void process_csb(struct intel_engine_cs *engine)
> rmb();
> do {
> bool promote;
> + u64 csb;
>
> if (++head == num_entries)
> head = 0;
> @@ -2625,15 +2640,14 @@ static void process_csb(struct intel_engine_cs *engine)
> * status notifier.
> */
>
> + csb = csb_read(buf + head);
> ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
> - head,
> - upper_32_bits(buf[head]),
> - lower_32_bits(buf[head]));
> + head, upper_32_bits(csb), lower_32_bits(csb));
Nice change! The original trace will actually read the CSB entry. So
when the trace was enabled, our issue will go away since one extra read
will fix our issue.
>
> if (INTEL_GEN(engine->i915) >= 12)
> - promote = gen12_csb_parse(buf + head);
> + promote = gen12_csb_parse(csb);
> else
> - promote = gen8_csb_parse(buf + head);
> + promote = gen8_csb_parse(csb);
> if (promote) {
> struct i915_request * const *old = execlists->active;
>
More information about the Intel-gfx
mailing list