[PATCH] drm/i915: Don't ignore STOP_RING failures during reset
Janusz Krzysztofik
janusz.krzysztofik at linux.intel.com
Mon Sep 9 16:11:52 UTC 2019
Otherwise kernel panic may occur when process_csb() calls trace_ports()
on an unsuccessfully reset and still non-idle engine.
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik at linux.intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +-
drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +-
drivers/gpu/drm/i915/gt/intel_reset.c | 37 +++++++++++--------
drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 14 +++++--
drivers/gpu/drm/i915/gt/mock_engine.c | 3 +-
drivers/gpu/drm/i915/gt/selftest_reset.c | 5 ++-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 +-
7 files changed, 45 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 943f0663837e..e488813275dc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -407,7 +407,7 @@ struct intel_engine_cs {
int (*resume)(struct intel_engine_cs *engine);
struct {
- void (*prepare)(struct intel_engine_cs *engine);
+ int (*prepare)(struct intel_engine_cs *engine);
void (*reset)(struct intel_engine_cs *engine, bool stalled);
void (*finish)(struct intel_engine_cs *engine);
} reset;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 0ddfbebbcbbc..c8fae411e0c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2287,7 +2287,7 @@ static int execlists_resume(struct intel_engine_cs *engine)
return 0;
}
-static void execlists_reset_prepare(struct intel_engine_cs *engine)
+static int execlists_reset_prepare(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned long flags;
@@ -2323,7 +2323,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
*
* FIXME: Wa for more modern gens needs to be validated
*/
- intel_engine_stop_cs(engine);
+ return intel_engine_stop_cs(engine);
}
static void reset_csb_pointers(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b9d84d52e986..387fe30c110b 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -609,7 +609,7 @@ int intel_reset_guc(struct intel_gt *gt)
* Ensure irq handler finishes, and not run again.
* Also return the active request so that we only search for it once.
*/
-static void reset_prepare_engine(struct intel_engine_cs *engine)
+static int reset_prepare_engine(struct intel_engine_cs *engine)
{
/*
* During the reset sequence, we must prevent the engine from
@@ -619,7 +619,7 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
* GPU state upon resume, i.e. fail to restart after a reset.
*/
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
- engine->reset.prepare(engine);
+ return engine->reset.prepare(engine);
}
static void revoke_mmaps(struct intel_gt *gt)
@@ -648,21 +648,25 @@ static void revoke_mmaps(struct intel_gt *gt)
}
}
-static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
+static int reset_prepare(struct intel_gt *gt, intel_engine_mask_t *awake)
{
struct intel_engine_cs *engine;
- intel_engine_mask_t awake = 0;
enum intel_engine_id id;
+ int ret, err = 0;
+
+ *awake = 0;
for_each_engine(engine, gt->i915, id) {
if (intel_engine_pm_get_if_awake(engine))
- awake |= engine->mask;
- reset_prepare_engine(engine);
+ *awake |= engine->mask;
+ ret = reset_prepare_engine(engine);
+ if (ret && !err)
+ err = ret;
}
intel_uc_reset_prepare(>->uc);
- return awake;
+ return err;
}
static void gt_revoke(struct intel_gt *gt)
@@ -752,7 +756,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
* rolling the global seqno forward (since this would complete requests
* for which we haven't set the fence error to EIO yet).
*/
- awake = reset_prepare(gt);
+ reset_prepare(gt, &awake);
/* Even if the GPU reset fails, it should still stop the engines */
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
@@ -932,16 +936,18 @@ void intel_gt_reset(struct intel_gt *gt,
GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags));
mutex_lock(>->reset.mutex);
- /* Clear any previous failed attempts at recovery. Time to try again. */
- if (!__intel_gt_unset_wedged(gt))
- goto unlock;
-
if (reason)
dev_notice(gt->i915->drm.dev,
"Resetting chip for %s\n", reason);
atomic_inc(>->i915->gpu_error.reset_count);
- awake = reset_prepare(gt);
+ ret = reset_prepare(gt, &awake);
+ if (ret)
+ goto finish;
+
+ /* Clear any previous failed attempts at recovery. Time to try again. */
+ if (!__intel_gt_unset_wedged(gt))
+ goto finish;
if (!intel_has_gpu_reset(gt->i915)) {
if (i915_modparams.reset)
@@ -987,7 +993,6 @@ void intel_gt_reset(struct intel_gt *gt,
finish:
reset_finish(gt, awake);
-unlock:
mutex_unlock(>->reset.mutex);
return;
@@ -1039,7 +1044,9 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
if (!intel_engine_pm_get_if_awake(engine))
return 0;
- reset_prepare_engine(engine);
+ ret = reset_prepare_engine(engine);
+ if (ret)
+ goto out;
if (msg)
dev_notice(engine->i915->drm.dev,
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index ac55a0d054bd..1c1d4dbbed78 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -736,10 +736,11 @@ static int xcs_resume(struct intel_engine_cs *engine)
return ret;
}
-static void reset_prepare(struct intel_engine_cs *engine)
+static int reset_prepare(struct intel_engine_cs *engine)
{
struct intel_uncore *uncore = engine->uncore;
const u32 base = engine->mmio_base;
+ int err;
/*
* We stop engines, otherwise we might get failed reset and a
@@ -757,8 +758,11 @@ static void reset_prepare(struct intel_engine_cs *engine)
*/
GEM_TRACE("%s\n", engine->name);
- if (intel_engine_stop_cs(engine))
+ err = intel_engine_stop_cs(engine);
+ if (err) {
GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
+ return err;
+ }
intel_uncore_write_fw(uncore,
RING_HEAD(base),
@@ -773,10 +777,14 @@ static void reset_prepare(struct intel_engine_cs *engine)
intel_uncore_write_fw(uncore, RING_CTL(base), 0);
/* Check acts as a post */
- if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
+ if (intel_uncore_read_fw(uncore, RING_HEAD(base))) {
GEM_TRACE("%s: ring head [%x] not parked\n",
engine->name,
intel_uncore_read_fw(uncore, RING_HEAD(base)));
+ err = -EIO;
+ }
+
+ return err;
}
static void reset_ring(struct intel_engine_cs *engine, bool stalled)
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 5d43cbc3f345..a04e69c81fb6 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -202,8 +202,9 @@ static void mock_submit_request(struct i915_request *request)
spin_unlock_irqrestore(&engine->hw_lock, flags);
}
-static void mock_reset_prepare(struct intel_engine_cs *engine)
+static int mock_reset_prepare(struct intel_engine_cs *engine)
{
+ return 0;
}
static void mock_reset(struct intel_engine_cs *engine, bool stalled)
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 00a4f60cdfd5..fefdececd680 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -78,7 +78,10 @@ static int igt_atomic_reset(void *arg)
GEM_TRACE("__intel_gt_reset under %s\n", p->name);
- awake = reset_prepare(gt);
+ err = reset_prepare(gt, &awake);
+ if (err)
+ goto unlock;
+
p->critical_section_begin();
err = __intel_gt_reset(gt, ALL_ENGINES);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index f325d3dd564f..0ed8e89774f7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -620,7 +620,7 @@ static void guc_submission_tasklet(unsigned long data)
spin_unlock_irqrestore(&engine->active.lock, flags);
}
-static void guc_reset_prepare(struct intel_engine_cs *engine)
+static int guc_reset_prepare(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -636,6 +636,8 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
* prevents the race.
*/
__tasklet_disable_sync_once(&execlists->tasklet);
+
+ return 0;
}
static void
--
2.21.0
More information about the Intel-gfx-trybot
mailing list