[Intel-gfx] [PATCH] drm/i915: Prevent machine hang from Broxton's vtd w/a and error capture
Chris Wilson
chris at chris-wilson.co.uk
Thu Oct 11 11:51:08 UTC 2018
Since capturing the error state requires fiddling around with the GGTT
to read arbitrary buffers and is itself run under stop_machine(), it
deadlocks the machine (effectively a hard hang) when run in conjunction
with Broxton's VTd workaround to serialize GGTT access.
v2: Store the ERR_PTR in first_error so that the error can be reported
to the user via sysfs.
Fixes: 0ef34ad6222a ("drm/i915: Serialize GTT/Aperture accesses on BXT")
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield at intel.com>
Cc: John Harrison <john.C.Harrison at intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++
drivers/gpu/drm/i915/i915_gpu_error.c | 15 ++++++++++++++-
drivers/gpu/drm/i915/i915_gpu_error.h | 8 +++++++-
3 files changed, 24 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 29ca9007a704..47b003daa6f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3339,6 +3339,9 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
if (ggtt->vm.clear_range != nop_clear_range)
ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+
+ /* Prevent recursively calling stop_machine() and deadlocks. */
+ i915_disable_error_state(dev_priv, -ENODEV);
}
ggtt->invalidate = gen6_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index c8d8f79688a8..21b5c8765015 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -648,6 +648,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
return 0;
}
+ if (IS_ERR(error))
+ return PTR_ERR(error);
+
if (*error->error_msg)
err_printf(m, "%s\n", error->error_msg);
err_printf(m, "Kernel: " UTS_RELEASE "\n");
@@ -1867,6 +1870,7 @@ void i915_capture_error_state(struct drm_i915_private *i915,
error = i915_capture_gpu_state(i915);
if (!error) {
DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
+ i915_disable_error_state(i915, -ENOMEM);
return;
}
@@ -1922,5 +1926,14 @@ void i915_reset_error_state(struct drm_i915_private *i915)
i915->gpu_error.first_error = NULL;
spin_unlock_irq(&i915->gpu_error.lock);
- i915_gpu_state_put(error);
+ if (!IS_ERR(error))
+ i915_gpu_state_put(error);
+}
+
+void i915_disable_error_state(struct drm_i915_private *i915, int err)
+{
+ spin_lock_irq(&i915->gpu_error.lock);
+ if (!i915->gpu_error.first_error)
+ i915->gpu_error.first_error = ERR_PTR(err);
+ spin_unlock_irq(&i915->gpu_error.lock);
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 8710fb18ed74..3ec89a504de5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -343,6 +343,7 @@ static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
void i915_reset_error_state(struct drm_i915_private *i915);
+void i915_disable_error_state(struct drm_i915_private *i915, int err);
#else
@@ -355,13 +356,18 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
static inline struct i915_gpu_state *
i915_first_error_state(struct drm_i915_private *i915)
{
- return NULL;
+ return ERR_PTR(-ENODEV);
}
static inline void i915_reset_error_state(struct drm_i915_private *i915)
{
}
+static inline void i915_disable_error_state(struct drm_i915_private *i915,
+ int err)
+{
+}
+
#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
#endif /* _I915_GPU_ERROR_H_ */
--
2.19.1
More information about the Intel-gfx
mailing list