[Intel-gfx] [PATCH] drm/i915/gt: prepare reset based on reset domain

Tejas Upadhyay tejaskumarx.surendrakumar.upadhyay at intel.com
Thu Dec 9 12:01:34 UTC 2021


Most code paths does full reset with preparing all
engines for reset except below two :

1. Single engine reset needs to prepare engines for
reset based on its reset domain. In __intel_engine
_reset_bh is a place needs loop over to do engine
prepare for all engines which are in same reset
domain before triggering reset.

2. enable_error_interrupt() in drivers/gpu/drm/i915/
gt/intel_execlists_submission.c needs similar change.

whenever there is full reset done, engine prepare for
all engines are already being called right now before
actual reset triggered, except above two scenario
seeking single engine reset.

Note: Requirement of this change is occurred recently
because whenever engine does reset, all engines in
same reset domain gets reset and in case engine goes
for reset before stopping CS or applying required W/A,
there are high chances of hang/crash. reset_prepare_
engine takes care of it.

Signed-off-by: Tejas Upadhyay <tejaskumarx.surendrakumar.upadhyay at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_execlists_submission.c |  9 +++++++++
 drivers/gpu/drm/i915/gt/intel_reset.c                | 12 ++++++++++--
 drivers/gpu/drm/i915/gt/intel_reset.h                |  1 +
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index a69df5e9e77a..668e7ba5b254 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2806,6 +2806,15 @@ static void enable_error_interrupt(struct intel_engine_cs *engine)
 		drm_err(&engine->i915->drm,
 			"engine '%s' resumed still in error: %08x\n",
 			engine->name, status);
+		if (engine->reset_domain) {
+			struct intel_engine_cs *nengine;
+			enum intel_engine_id id;
+
+			for_each_engine(nengine, engine->gt, id)
+				if (nengine->reset_domain ==
+				    engine->reset_domain)
+					reset_prepare_engine(nengine);
+		}
 		__intel_gt_reset(engine->gt, engine->mask);
 	}
 
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 63199f0550e6..454d6ab1d9f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -705,7 +705,7 @@ int intel_reset_guc(struct intel_gt *gt)
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
  */
-static void reset_prepare_engine(struct intel_engine_cs *engine)
+void reset_prepare_engine(struct intel_engine_cs *engine)
 {
 	/*
 	 * During the reset sequence, we must prevent the engine from
@@ -1167,7 +1167,15 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
 	if (!intel_engine_pm_get_if_awake(engine))
 		return 0;
 
-	reset_prepare_engine(engine);
+	if (engine->reset_domain) {
+		struct intel_engine_cs *nengine;
+		enum intel_engine_id id;
+
+		for_each_engine(nengine, gt, id)
+			if (nengine->reset_domain ==
+			    engine->reset_domain)
+				reset_prepare_engine(nengine);
+	}
 
 	if (msg)
 		drm_notice(&engine->i915->drm,
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index adc734e67387..7abd5d49f0e5 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -28,6 +28,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
 			   const char *fmt, ...);
 #define I915_ERROR_CAPTURE BIT(0)
 
+void reset_prepare_engine(struct intel_engine_cs *engine);
 void intel_gt_reset(struct intel_gt *gt,
 		    intel_engine_mask_t stalled_mask,
 		    const char *reason);
-- 
2.31.1



More information about the Intel-gfx mailing list