[PATCH] drm/xe/guc: Fix uninitialised count in GuC load debug prints

Fri May 24 20:26:03 UTC 2024

From: John Harrison <John.C.Harrison at Intel.com>

The debug prints about how long the GuC load takes have a loop
counter. However that was neither initialised nor incremented! Plus,
counting loops is no longer meaningful given the wait function returns
early for any change in the status value. So fix it to only count
loops due to actual timeouts.

Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
Reported-by: kernel test robot <lkp at intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405250151.IbH0l8FG-lkp@intel.com/
Fixes: b0ac1b42dbdc ("drm/xe/guc: Port over the slow GuC loading support from i915")
Cc: John Harrison <John.C.Harrison at Intel.com>
Cc: Lucas De Marchi <lucas.demarchi at intel.com>
Cc: Oded Gabbay <ogabbay at kernel.org>
Cc: "Thomas Hellström" <thomas.hellstrom at linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Cc: Matt Roper <matthew.d.roper at intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Fei Yang <fei.yang at intel.com>
Cc: intel-xe at lists.freedesktop.org
---
 drivers/gpu/drm/xe/xe_guc.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index f7886c00af01..086a048876ba 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -591,7 +591,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
 	ktime_t before, after, delta;
 	int load_done;
 	u32 status = 0;
-	int count;
+	int count = 0;
 	u64 delta_ms;
 	u32 before_freq;
 
@@ -604,6 +604,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
 	 */
 	do {
 		u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK);
+		int ret;
 
 		/*
 		 * Wait for any change (intermediate or terminal) in the status register.
@@ -612,9 +613,10 @@ static void guc_wait_ucode(struct xe_guc *guc)
 		 * timeouts rather than allowing a huge timeout each time. So basically, need
 		 * to treat a timeout no different to a value change.
 		 */
-		xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
-				   last_status, 1000 * 1000, &status, false);
-
+		ret = xe_mmio_wait32_not(gt, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
+					 last_status, 1000 * 1000, &status, false);
+		if (ret < 0)
+			count++;
 		after = ktime_get();
 		delta = ktime_sub(after, before);
 		delta_ms = ktime_to_ms(delta);
@@ -626,7 +628,7 @@ static void guc_wait_ucode(struct xe_guc *guc)
 		if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000))
 			break;
 
-		xe_gt_dbg(gt, "load still in progress, count = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
+		xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
 			  count, xe_guc_pc_get_act_freq(guc_pc),
 			  guc_pc_get_cur_freq(guc_pc), status,
 			  REG_FIELD_GET(GS_BOOTROM_MASK, status),
@@ -678,13 +680,13 @@ static void guc_wait_ucode(struct xe_guc *guc)
 
 		xe_device_declare_wedged(gt_to_xe(gt));
 	} else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
-		xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, count = %d]\n",
+		xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n",
 			   delta_ms, status, count);
 		xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
 			   xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
 			   before_freq, xe_gt_throttle_get_limit_reasons(gt));
 	} else {
-		xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, count = %d\n",
+		xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n",
 			  delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
 			  before_freq, status, count);
 	}
-- 
2.43.2