[Intel-gfx] [PATCH 6/7] drm/i915: Detect a failed GPU reset+recovery
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Jan 17 15:59:06 UTC 2017
From: Chris Wilson <chris at chris-wilson.co.uk>
If we can't recover the GPU after the reset, mark it as wedged to cancel
the outstanding tasks and to prevent new users from trying to use the
broken GPU.
v2: Check the same ring is hung again before declaring the reset broken.
v3: use engine_stalled (Mika)
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk> (v2)
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 7 ++++++-
drivers/gpu/drm/i915/i915_drv.h | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++++++++--
3 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index bb747ae..5ee6297 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv)
pr_notice("drm/i915: Resetting chip after gpu hang\n");
disable_irq(dev_priv->drm.irq);
- i915_gem_reset_prepare(dev_priv);
+ ret = i915_gem_reset_prepare(dev_priv);
+ if (ret) {
+ DRM_ERROR("GPU recovery failed\n");
+ intel_gpu_reset(dev_priv, ALL_ENGINES);
+ goto error;
+ }
ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
if (ret) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f861418..3850950 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3328,7 +3328,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
return READ_ONCE(error->reset_count);
}
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 832ad09..3e10e81 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2625,16 +2625,28 @@ static bool engine_stalled(struct intel_engine_cs *engine)
return true;
}
-void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ int err = 0;
/* Ensure irq handler finishes, and not run again. */
- for_each_engine(engine, dev_priv, id)
+ for_each_engine(engine, dev_priv, id) {
+ struct drm_i915_gem_request *request;
+
tasklet_kill(&engine->irq_tasklet);
+ if (engine_stalled(engine)) {
+ request = i915_gem_find_active_request(engine);
+ if (request && request->fence.error == -EIO)
+ err = -EIO; /* Previous reset failed! */
+ }
+ }
+
i915_gem_revoke_fences(dev_priv);
+
+ return err;
}
static void skip_request(struct drm_i915_gem_request *request)
--
2.7.4
More information about the Intel-gfx
mailing list