[PATCH 3/4] drm/i915/uc: Inject probe errors into intel_uc_init_hw

Chris Wilson chris at chris-wilson.co.uk
Fri Aug 2 15:42:31 UTC 2019


From: Michal Wajdeczko <michal.wajdeczko at intel.com>

Inject probe errors into intel_uc_init_hw to make sure we
correctly handle any uC initialization failure.

To avoid complains from CI about injected errors use
i915_probe_error to lower message level.

v2: _sanitize instead _reset to correctly handle Gen9 retries
v3: reorder fw status codes as failed fw is still available
    add more failure points

Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk> #v1
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  4 ++++
 drivers/gpu/drm/i915/gt/uc/intel_huc.c        |  8 +++++---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c         | 20 +++++++++++++++----
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c      | 18 +++++++++++------
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h      | 14 ++++++-------
 drivers/gpu/drm/i915/i915_gem.c               |  2 +-
 6 files changed, 45 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b4b508f19a1c..412892096daa 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1123,6 +1123,10 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	enum intel_engine_id id;
 	int err;
 
+	err = i915_inject_load_error(gt->i915, -ENXIO);
+	if (err)
+		return err;
+
 	/*
 	 * We're using GuC work items for submitting work through GuC. Since
 	 * we're coalescing multiple requests from a single context into a
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d642b167a389..ef54053c5ef9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -139,6 +139,10 @@ int intel_huc_auth(struct intel_huc *huc)
 	GEM_BUG_ON(!intel_uc_fw_is_loaded(&huc->fw));
 	GEM_BUG_ON(intel_huc_is_authenticated(huc));
 
+	ret = i915_inject_load_error(gt->i915, -ENXIO);
+	if (ret)
+		goto fail;
+
 	ret = intel_guc_auth_huc(guc,
 				 intel_guc_ggtt_offset(guc, huc->rsa_data));
 	if (ret) {
@@ -158,13 +162,11 @@ int intel_huc_auth(struct intel_huc *huc)
 	}
 
 	huc->fw.status = INTEL_UC_FIRMWARE_RUNNING;
-
 	return 0;
 
 fail:
+	i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
 	huc->fw.status = INTEL_UC_FIRMWARE_FAIL;
-
-	DRM_ERROR("HuC: Authentication failed %d\n", ret);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index d1b08b28b1ad..1d21c2646831 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -41,6 +41,10 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
 	int ret;
 	u32 guc_status;
 
+	ret = i915_inject_load_error(gt->i915, -ENXIO);
+	if (ret)
+		return ret;
+
 	ret = intel_reset_guc(gt);
 	if (ret) {
 		DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
@@ -209,6 +213,10 @@ static int guc_enable_communication(struct intel_guc *guc)
 
 	GEM_BUG_ON(guc_communication_enabled(guc));
 
+	ret = i915_inject_load_error(i915, -ENXIO);
+	if (ret)
+		return ret;
+
 	ret = intel_guc_ct_enable(&guc->ct);
 	if (ret)
 		return ret;
@@ -340,7 +348,7 @@ void intel_uc_fini(struct intel_uc *uc)
 	intel_guc_fini(guc);
 }
 
-static void __uc_sanitize(struct intel_uc *uc)
+static int __uc_sanitize(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 	struct intel_huc *huc = &uc->huc;
@@ -350,7 +358,7 @@ static void __uc_sanitize(struct intel_uc *uc)
 	intel_huc_sanitize(huc);
 	intel_guc_sanitize(guc);
 
-	__intel_uc_reset_hw(uc);
+	return __intel_uc_reset_hw(uc);
 }
 
 void intel_uc_sanitize(struct intel_uc *uc)
@@ -378,6 +386,10 @@ static int uc_init_wopcm(struct intel_uc *uc)
 	GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
 	GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
 
+	err = i915_inject_load_error(gt->i915, -ENXIO);
+	if (err)
+		return err;
+
 	mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
 	err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask,
 					    size | GUC_WOPCM_SIZE_LOCKED);
@@ -434,7 +446,7 @@ int intel_uc_init_hw(struct intel_uc *uc)
 		 * Always reset the GuC just before (re)loading, so
 		 * that the state and timing are fairly predictable
 		 */
-		ret = __intel_uc_reset_hw(uc);
+		ret = __uc_sanitize(uc);
 		if (ret)
 			goto err_out;
 
@@ -504,7 +516,7 @@ int intel_uc_init_hw(struct intel_uc *uc)
 	if (GEM_WARN_ON(ret == -EIO))
 		ret = -EINVAL;
 
-	dev_err(i915->drm.dev, "GuC initialization failed %d\n", ret);
+	i915_probe_error(i915, "GuC initialization failed %d\n", ret);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 650ad6037b74..a3a22a26016c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -383,6 +383,10 @@ static int uc_fw_xfer(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
 	u64 offset;
 	int ret;
 
+	ret = i915_inject_load_error(gt->i915, -ETIMEDOUT);
+	if (ret)
+		return ret;
+
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
 	/* Set the source address for the uCode */
@@ -443,8 +447,13 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
 	/* make sure the status was cleared the last time we reset the uc */
 	GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
 
+	err = i915_inject_load_error(gt->i915, -ENOEXEC);
+	if (err)
+		return err;
+
 	if (!intel_uc_fw_is_available(uc_fw))
 		return -ENOEXEC;
+
 	/* Call custom loader */
 	intel_uc_fw_ggtt_bind(uc_fw, gt);
 	err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags);
@@ -464,13 +473,10 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct intel_gt *gt,
 	return 0;
 
 fail:
+	i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
+			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+			 err);
 	uc_fw->status = INTEL_UC_FIRMWARE_FAIL;
-	DRM_DEBUG_DRIVER("%s fw load failed\n",
-			 intel_uc_fw_type_repr(uc_fw->type));
-
-	DRM_WARN("%s: Failed to load firmware %s (error %d)\n",
-		 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
-
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 6b64b8073703..bfe3614613b7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -37,12 +37,12 @@ struct intel_gt;
 #define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915"
 
 enum intel_uc_fw_status {
-	INTEL_UC_FIRMWARE_FAIL = -3, /* failed to xfer or init/auth the fw */
-	INTEL_UC_FIRMWARE_MISSING = -2, /* blob not found on the system */
-	INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+	INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW or disabled */
 	INTEL_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */
 	INTEL_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+	INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
 	INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+	INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
 	INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
 	INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
 };
@@ -83,18 +83,18 @@ static inline
 const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
 {
 	switch (status) {
-	case INTEL_UC_FIRMWARE_FAIL:
-		return "FAIL";
-	case INTEL_UC_FIRMWARE_MISSING:
-		return "MISSING";
 	case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
 		return "N/A";
 	case INTEL_UC_FIRMWARE_UNINITIALIZED:
 		return "UNINITIALIZED";
 	case INTEL_UC_FIRMWARE_SELECTED:
 		return "SELECTED";
+	case INTEL_UC_FIRMWARE_MISSING:
+		return "MISSING";
 	case INTEL_UC_FIRMWARE_AVAILABLE:
 		return "AVAILABLE";
+	case INTEL_UC_FIRMWARE_FAIL:
+		return "FAIL";
 	case INTEL_UC_FIRMWARE_TRANSFERRED:
 		return "TRANSFERRED";
 	case INTEL_UC_FIRMWARE_RUNNING:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2ef0a00e9677..dbd00b11ab75 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1243,7 +1243,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915)
 	/* We can't enable contexts until all firmware is loaded */
 	ret = intel_uc_init_hw(&gt->uc);
 	if (ret) {
-		DRM_ERROR("Enabling uc failed (%d)\n", ret);
+		i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
 		goto out;
 	}
 
-- 
2.23.0.rc0



More information about the Intel-gfx-trybot mailing list