[PATCH] drm/i915/guc: Set wedged if enable guc communication failed
Zhanjun Dong
zhanjun.dong at intel.com
Wed Apr 26 16:11:33 UTC 2023
Add err code check for enable_communication on resume path. When resume failed, we can no longer use the GPU, marking the GPU as wedged.
Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
---
drivers/gpu/drm/i915/gt/intel_gt_pm.c | 7 ++++++-
drivers/gpu/drm/i915/gt/intel_reset.c | 19 ++++++++++++++++---
drivers/gpu/drm/i915/gt/intel_reset.h | 1 +
drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++++++--
4 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index e02cb90723ae..775ce511f810 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -373,8 +373,13 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
intel_ggtt_restore_fences(gt->ggtt);
ret = intel_uc_runtime_resume(>->uc);
- if (ret)
+ if (ret && intel_uc_uses_guc_submission(>->uc)) {
+ /* Resume failed on GuC submission, we can no longer use the GPU, marking the GPU
+ * as wedged.
+ */
+ intel_gt_set_wedged_flag(gt);
return ret;
+ }
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 195ff72d7a14..05142761770a 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -962,6 +962,20 @@ static void nop_submit_request(struct i915_request *request)
}
}
+void intel_gt_set_wedged_flag(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (test_bit(I915_WEDGED, >->reset.flags))
+ return;
+
+ for_each_engine(engine, gt, id)
+ engine->submit_request = nop_submit_request;
+
+ set_bit(I915_WEDGED, >->reset.flags);
+}
+
static void __intel_gt_set_wedged(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
@@ -984,8 +998,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
__intel_gt_reset(gt, ALL_ENGINES);
- for_each_engine(engine, gt, id)
- engine->submit_request = nop_submit_request;
+
+ intel_gt_set_wedged_flag(gt);
/*
* Make sure no request can slip through without getting completed by
@@ -993,7 +1007,6 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
* in nop_submit_request.
*/
synchronize_rcu_expedited();
- set_bit(I915_WEDGED, >->reset.flags);
/* Mark all executing requests as skipped */
local_bh_disable();
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 25c975b6e8fc..3796b8d877b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -42,6 +42,7 @@ int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
int __must_check intel_gt_reset_lock_interruptible(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
+void intel_gt_set_wedged_flag(struct intel_gt *gt);
void intel_gt_set_wedged(struct intel_gt *gt);
bool intel_gt_unset_wedged(struct intel_gt *gt);
int intel_gt_terminally_wedged(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 4ccb4be4c9cb..62c5a953991c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -700,8 +700,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
/* Make sure we enable communication if and only if it's disabled */
GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct));
- if (enable_communication)
- guc_enable_communication(guc);
+ if (enable_communication) {
+ err = guc_enable_communication(guc);
+ if (err) {
+ DRM_DEBUG_DRIVER("Failed to enable communication, %pe", ERR_PTR(err));
+ return err;
+ }
+ }
/* If we are only resuming GuC communication but not reloading
* GuC, we need to ensure the ARAT timer interrupt is enabled
--
2.34.1
More information about the dri-devel
mailing list