[Intel-gfx] [PATCH] drm/i915/hsw: Fix GPU hang during resume from S3-devices state

Wed Oct 12 14:46:37 UTC 2016

Currently resuming on HSW from S3 pm_test/devices state leads to an
unrecoverable GPU hang. Resetting the GPU during suspend fixes this. For
a full S3 cycle this change only means the reset happens earlier (before
reaching S3). For S4 the reset will happen now both during the freeze
and quiesce phases, which is a benefit since it will guarantee that the
GPU is idle before creating and loading the hibernation image.

Cc: Mika Kuoppala <mika.kuoppala at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Imre Deak <imre.deak at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 28 ++--------------------------
 drivers/gpu/drm/i915/i915_gem.c | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 89d3222..e9b3bfc 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -532,32 +532,6 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
 
 static void i915_gem_fini(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	/*
-	 * Neither the BIOS, ourselves or any other kernel
-	 * expects the system to be in execlists mode on startup,
-	 * so we need to reset the GPU back to legacy mode. And the only
-	 * known way to disable logical contexts is through a GPU reset.
-	 *
-	 * So in order to leave the system in a known default configuration,
-	 * always reset the GPU upon unload. Afterwards we then clean up the
-	 * GEM state tracking, flushing off the requests and leaving the
-	 * system in a known idle state.
-	 *
-	 * Note that is of the upmost importance that the GPU is idle and
-	 * all stray writes are flushed *before* we dismantle the backing
-	 * storage for the pinned objects.
-	 *
-	 * However, since we are uncertain that reseting the GPU on older
-	 * machines is a good idea, we don't - just in case it leaves the
-	 * machine in an unusable condition.
-	 */
-	if (HAS_HW_CONTEXTS(dev)) {
-		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
-		WARN_ON(reset && reset != -ENODEV);
-	}
-
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_engines(dev);
 	i915_gem_context_fini(dev);
@@ -636,6 +610,8 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	return 0;
 
 cleanup_gem:
+	if (i915_gem_suspend(dev))
+		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 	i915_gem_fini(dev);
 cleanup_irq:
 	intel_guc_fini(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fdd496e..a86bc8f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4273,6 +4273,30 @@ int i915_gem_suspend(struct drm_device *dev)
 	 */
 	WARN_ON(dev_priv->gt.awake);
 
+	/*
+	 * Neither the BIOS, ourselves or any other kernel
+	 * expects the system to be in execlists mode on startup,
+	 * so we need to reset the GPU back to legacy mode. And the only
+	 * known way to disable logical contexts is through a GPU reset.
+	 *
+	 * So in order to leave the system in a known default configuration,
+	 * always reset the GPU upon unload and suspend. Afterwards we then
+	 * clean up the GEM state tracking, flushing off the requests and
+	 * leaving the system in a known idle state.
+	 *
+	 * Note that is of the upmost importance that the GPU is idle and
+	 * all stray writes are flushed *before* we dismantle the backing
+	 * storage for the pinned objects.
+	 *
+	 * However, since we are uncertain that reseting the GPU on older
+	 * machines is a good idea, we don't - just in case it leaves the
+	 * machine in an unusable condition.
+	 */
+	if (HAS_HW_CONTEXTS(dev)) {
+		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
+		WARN_ON(reset && reset != -ENODEV);
+	}
+
 	return 0;
 
 err:
-- 
2.5.0