[PATCH 3/3] drm/i915/gt: Apply the CSB w/a for all
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 14 14:30:41 UTC 2020
Since we expect to inline the csb_parse() routines, the w/a for the
stale CSB data on Tigerlake will be pulled into process_csb(), and so we
might as well simply reuse the logic for all, and so will hopefully
avoid any strange behaviour on Icelake that was not covered by our
previous w/a.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Bruce Chang <yu.bruce.chang at intel.com>
---
drivers/gpu/drm/i915/gt/intel_lrc.c | 64 ++++++++++++++++++-----------
1 file changed, 39 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 8b9722309f4e..710fa703cac6 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2496,22 +2496,11 @@ invalidate_csb_entries(const u64 *first, const u64 *last)
* bits 47-57: sw context id of the lrc the GT switched away from
* bits 58-63: sw counter of the lrc the GT switched away from
*/
-static inline bool gen12_csb_parse(const u64 *csb)
+static inline bool gen12_csb_parse(const u64 csb)
{
- bool ctx_away_valid;
- bool new_queue;
- u64 entry;
-
- /* XXX HSD */
- entry = READ_ONCE(*csb);
- if (unlikely(entry == -1) &&
- wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
- GEM_WARN_ON("50us CSB timeout");
- WRITE_ONCE(*(u64 *)csb, -1);
-
- ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(entry));
- new_queue =
- lower_32_bits(entry) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+ bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb));
+ bool new_queue =
+ lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
/*
* The context switch detail is not guaranteed to be 5 when a preemption
@@ -2521,7 +2510,7 @@ static inline bool gen12_csb_parse(const u64 *csb)
* would require some extra handling, but we don't support that.
*/
if (!ctx_away_valid || new_queue) {
- GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(entry)));
+ GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb)));
return true;
}
@@ -2530,19 +2519,44 @@ static inline bool gen12_csb_parse(const u64 *csb)
* context switch on an unsuccessful wait instruction since we always
* use polling mode.
*/
- GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(entry)));
+ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb)));
return false;
}
-static inline bool gen8_csb_parse(const u64 *csb)
+static inline bool gen8_csb_parse(const u64 csb)
{
- return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+ return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
+static inline u64 csb_read(u64 *csb)
+{
+ u64 entry = READ_ONCE(*csb);
+
+ /*
+ * Unfortunately, the GPU does not always serialise its write
+ * of the CSB entries before its write of the CSB pointer, at least
+ * from the perspective of the CPU, using what is known as a Global
+ * Observation Point. We may read a new CSB tail pointer, but then
+ * read the stale CSB entries, causing us to misinterpret the
+ * context-switch events, and eventually declare the GPU hung.
+ *
+ * icl:HSDES#:1806554093
+ * tgl:XXX?
+ */
+ if (unlikely(entry == -1) &&
+ wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 50))
+ GEM_WARN_ON("50us CSB timeout");
+
+ /* Consume this entry so that we can spot its future use */
+ WRITE_ONCE(*csb, -1);
+
+ return entry;
}
static void process_csb(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- const u64 * const buf = execlists->csb_status;
+ u64 * const buf = execlists->csb_status;
const u8 num_entries = execlists->csb_size;
u8 head, tail;
@@ -2600,6 +2614,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
bool promote;
+ u64 entry;
if (++head == num_entries)
head = 0;
@@ -2622,15 +2637,14 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
+ entry = csb_read(buf + head);
ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
- head,
- upper_32_bits(buf[head]),
- lower_32_bits(buf[head]));
+ head, upper_32_bits(entry), lower_32_bits(entry));
if (INTEL_GEN(engine->i915) >= 12)
- promote = gen12_csb_parse(buf + head);
+ promote = gen12_csb_parse(entry);
else
- promote = gen8_csb_parse(buf + head);
+ promote = gen8_csb_parse(entry);
if (promote) {
struct i915_request * const *old = execlists->active;
--
2.20.1
More information about the Intel-gfx-trybot
mailing list