[PATCH v3 3/7] drm/xe/xe_survivability: Add support for Runtime survivability mode

Riana Tauro riana.tauro at intel.com
Wed Jul 2 14:11:13 UTC 2025


Certain runtime firmware errors can cause the device to be wedged
requiring a firmware flash to restore normal operation.
Runtime Survivability Mode indicates that a firmware flash is necessary to
recover the device.

The below sysfs is an indication that device is in survivability mode

/sys/bus/pci/devices/<device>/surivability_mode

Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
 drivers/gpu/drm/xe/xe_device.c                |  2 +-
 drivers/gpu/drm/xe/xe_survivability_mode.c    | 26 ++++++++++++++++---
 drivers/gpu/drm/xe/xe_survivability_mode.h    |  4 ++-
 .../gpu/drm/xe/xe_survivability_mode_types.h  |  8 ++++++
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 4a38486dccc8..5defa54ccd26 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -716,7 +716,7 @@ int xe_device_probe_early(struct xe_device *xe)
 		 * possible, but still return the previous error for error
 		 * propagation
 		 */
-		err = xe_survivability_mode_enable(xe);
+		err = xe_survivability_mode_enable(xe, XE_SURVIVABILITY_TYPE_BOOT);
 		if (err)
 			return err;
 
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 1f710b3fc599..e1adcb33c9b0 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -129,7 +129,10 @@ static ssize_t survivability_mode_show(struct device *dev,
 	struct xe_survivability_info *info = survivability->info;
 	int index = 0, count = 0;
 
-	for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
+	count += sysfs_emit_at(buff, count, "Survivability mode: %s\n",
+			       survivability->type ? "Runtime" : "Boot");
+
+	for (index = 0; survivability->boot_status && index < MAX_SCRATCH_MMIO; index++) {
 		if (info[index].reg)
 			count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
 					       info[index].reg, info[index].value);
@@ -169,6 +172,10 @@ static int enable_survivability_mode(struct pci_dev *pdev)
 	if (ret)
 		return ret;
 
+	/* Only create sysfs for runtime survivability mode */
+	if (xe_survivability_mode_is_runtime(xe))
+		return 0;
+
 	/* Make sure xe_heci_gsc_init() knows about survivability mode */
 	survivability->mode = true;
 
@@ -189,6 +196,17 @@ static int enable_survivability_mode(struct pci_dev *pdev)
 	return 0;
 }
 
+/**
+ * xe_survivability_mode_is_runtime - check if survivability mode is runtime
+ * @xe: xe device instance
+ *
+ * Returns true if in runtime survivability mode, false otherwise
+ */
+bool xe_survivability_mode_is_runtime(struct xe_device *xe)
+{
+	return xe->survivability.type == XE_SURVIVABILITY_TYPE_RUNTIME;
+}
+
 /**
  * xe_survivability_mode_is_enabled - check if survivability mode is enabled
  * @xe: xe device instance
@@ -251,16 +269,18 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
  * Return: 0 if survivability mode is enabled or not requested; negative error
  * code otherwise.
  */
-int xe_survivability_mode_enable(struct xe_device *xe)
+int xe_survivability_mode_enable(struct xe_device *xe, const enum xe_survivability_type type)
 {
 	struct xe_survivability *survivability = &xe->survivability;
 	struct xe_survivability_info *info;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
-	if (!xe_survivability_mode_is_requested(xe))
+	if (!xe_survivability_mode_is_requested(xe) &&
+	    type != XE_SURVIVABILITY_TYPE_RUNTIME)
 		return 0;
 
 	survivability->size = MAX_SCRATCH_MMIO;
+	survivability->type = type;
 
 	info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
 			    GFP_KERNEL);
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h
index 02231c2bf008..559d1e99b03a 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
@@ -9,9 +9,11 @@
 #include <linux/types.h>
 
 struct xe_device;
+enum xe_survivability_type;
 
-int xe_survivability_mode_enable(struct xe_device *xe);
+int xe_survivability_mode_enable(struct xe_device *xe, const enum xe_survivability_type);
 bool xe_survivability_mode_is_enabled(struct xe_device *xe);
+bool xe_survivability_mode_is_runtime(struct xe_device *xe);
 bool xe_survivability_mode_is_requested(struct xe_device *xe);
 
 #endif /* _XE_SURVIVABILITY_MODE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
index 19d433e253df..01f07d9c4124 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
@@ -9,6 +9,11 @@
 #include <linux/limits.h>
 #include <linux/types.h>
 
+enum xe_survivability_type {
+	XE_SURVIVABILITY_TYPE_BOOT,
+	XE_SURVIVABILITY_TYPE_RUNTIME,
+};
+
 struct xe_survivability_info {
 	char name[NAME_MAX];
 	u32 reg;
@@ -30,6 +35,9 @@ struct xe_survivability {
 
 	/** @mode: boolean to indicate survivability mode */
 	bool mode;
+
+	/** @type: survivability mode type (boot or runtime) */
+	enum xe_survivability_type type;
 };
 
 #endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */
-- 
2.47.1



More information about the Intel-xe mailing list