[PATCH v3 3/7] drm/xe/xe_survivability: Add support for Runtime survivability mode
Riana Tauro
riana.tauro at intel.com
Wed Jul 2 14:11:13 UTC 2025
Certain runtime firmware errors can cause the device to be wedged
requiring a firmware flash to restore normal operation.
Runtime Survivability Mode indicates that a firmware flash is necessary to
recover the device.
The below sysfs is an indication that device is in survivability mode
/sys/bus/pci/devices/<device>/surivability_mode
Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
drivers/gpu/drm/xe/xe_device.c | 2 +-
drivers/gpu/drm/xe/xe_survivability_mode.c | 26 ++++++++++++++++---
drivers/gpu/drm/xe/xe_survivability_mode.h | 4 ++-
.../gpu/drm/xe/xe_survivability_mode_types.h | 8 ++++++
4 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 4a38486dccc8..5defa54ccd26 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -716,7 +716,7 @@ int xe_device_probe_early(struct xe_device *xe)
* possible, but still return the previous error for error
* propagation
*/
- err = xe_survivability_mode_enable(xe);
+ err = xe_survivability_mode_enable(xe, XE_SURVIVABILITY_TYPE_BOOT);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 1f710b3fc599..e1adcb33c9b0 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -129,7 +129,10 @@ static ssize_t survivability_mode_show(struct device *dev,
struct xe_survivability_info *info = survivability->info;
int index = 0, count = 0;
- for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
+ count += sysfs_emit_at(buff, count, "Survivability mode: %s\n",
+ survivability->type ? "Runtime" : "Boot");
+
+ for (index = 0; survivability->boot_status && index < MAX_SCRATCH_MMIO; index++) {
if (info[index].reg)
count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
info[index].reg, info[index].value);
@@ -169,6 +172,10 @@ static int enable_survivability_mode(struct pci_dev *pdev)
if (ret)
return ret;
+ /* Only create sysfs for runtime survivability mode */
+ if (xe_survivability_mode_is_runtime(xe))
+ return 0;
+
/* Make sure xe_heci_gsc_init() knows about survivability mode */
survivability->mode = true;
@@ -189,6 +196,17 @@ static int enable_survivability_mode(struct pci_dev *pdev)
return 0;
}
+/**
+ * xe_survivability_mode_is_runtime - check if survivability mode is runtime
+ * @xe: xe device instance
+ *
+ * Returns true if in runtime survivability mode, false otherwise
+ */
+bool xe_survivability_mode_is_runtime(struct xe_device *xe)
+{
+ return xe->survivability.type == XE_SURVIVABILITY_TYPE_RUNTIME;
+}
+
/**
* xe_survivability_mode_is_enabled - check if survivability mode is enabled
* @xe: xe device instance
@@ -251,16 +269,18 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
* Return: 0 if survivability mode is enabled or not requested; negative error
* code otherwise.
*/
-int xe_survivability_mode_enable(struct xe_device *xe)
+int xe_survivability_mode_enable(struct xe_device *xe, const enum xe_survivability_type type)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- if (!xe_survivability_mode_is_requested(xe))
+ if (!xe_survivability_mode_is_requested(xe) &&
+ type != XE_SURVIVABILITY_TYPE_RUNTIME)
return 0;
survivability->size = MAX_SCRATCH_MMIO;
+ survivability->type = type;
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
GFP_KERNEL);
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h
index 02231c2bf008..559d1e99b03a 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
@@ -9,9 +9,11 @@
#include <linux/types.h>
struct xe_device;
+enum xe_survivability_type;
-int xe_survivability_mode_enable(struct xe_device *xe);
+int xe_survivability_mode_enable(struct xe_device *xe, const enum xe_survivability_type);
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
+bool xe_survivability_mode_is_runtime(struct xe_device *xe);
bool xe_survivability_mode_is_requested(struct xe_device *xe);
#endif /* _XE_SURVIVABILITY_MODE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
index 19d433e253df..01f07d9c4124 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
@@ -9,6 +9,11 @@
#include <linux/limits.h>
#include <linux/types.h>
+enum xe_survivability_type {
+ XE_SURVIVABILITY_TYPE_BOOT,
+ XE_SURVIVABILITY_TYPE_RUNTIME,
+};
+
struct xe_survivability_info {
char name[NAME_MAX];
u32 reg;
@@ -30,6 +35,9 @@ struct xe_survivability {
/** @mode: boolean to indicate survivability mode */
bool mode;
+
+ /** @type: survivability mode type (boot or runtime) */
+ enum xe_survivability_type type;
};
#endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */
--
2.47.1
More information about the Intel-xe
mailing list