[PATCH v4 3/3] drm/xe: Enable configfs support for survivability mode

Riana Tauro riana.tauro at intel.com
Mon Apr 7 05:14:13 UTC 2025


Enable survivability mode if supported and configfs attribute is set.
Enabling survivability mode manually is useful in cases where pcode does
not detect failure, validation and for IFR (in-field-repair).

To set configfs survivability mode attribute for a device

echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode

The card enters survivability mode if supported

v2: add a log if survivability mode is enabled for unsupported
    platforms (Rodrigo)

Signed-off-by: Riana Tauro <riana.tauro at intel.com>
Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
---
 drivers/gpu/drm/xe/xe_configfs.c           | 62 ++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_configfs.h           |  8 +++
 drivers/gpu/drm/xe/xe_device.c             |  2 +-
 drivers/gpu/drm/xe/xe_pci.c                | 19 ++++---
 drivers/gpu/drm/xe/xe_survivability_mode.c | 35 +++++++++---
 drivers/gpu/drm/xe/xe_survivability_mode.h |  1 +
 6 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index 48a9f428bda9..cb9f175c89a1 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -164,6 +164,68 @@ static struct configfs_subsystem xe_configfs = {
 	},
 };
 
+static struct xe_config_device *configfs_find_group(struct pci_dev *pdev)
+{
+	struct config_item *item;
+	char name[64];
+
+	snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus),
+		 pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+
+	mutex_lock(&xe_configfs.su_mutex);
+	item = config_group_find_item(&xe_configfs.su_group, name);
+	mutex_unlock(&xe_configfs.su_mutex);
+
+	if (!item)
+		return NULL;
+
+	return to_xe_config_device(item);
+}
+
+/**
+ * xe_configfs_get_survivability_mode - get configfs survivability mode attribute
+ * @pdev: pci device
+ *
+ * find the configfs group that belongs to the pci device and return
+ * the survivability mode attribute
+ *
+ * Return: survivability mode if config group is found, false otherwise
+ */
+bool xe_configfs_get_survivability_mode(struct pci_dev *pdev)
+{
+	struct xe_config_device *dev = configfs_find_group(pdev);
+	bool mode;
+
+	if (!dev)
+		return false;
+
+	mode = dev->survivability_mode;
+	config_item_put(&dev->group.cg_item);
+
+	return mode;
+}
+
+/**
+ * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute
+ * @pdev: pci device
+ *
+ * find the configfs group that belongs to the pci device and clear survivability
+ * mode attribute
+ */
+void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
+{
+	struct xe_config_device *dev = configfs_find_group(pdev);
+
+	if (!dev)
+		return;
+
+	mutex_lock(&dev->lock);
+	dev->survivability_mode = 0;
+	mutex_unlock(&dev->lock);
+
+	config_item_put(&dev->group.cg_item);
+}
+
 int __init xe_configfs_init(void)
 {
 	struct config_group *root = &xe_configfs.su_group;
diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
index 5532320818e4..d7d041ec2611 100644
--- a/drivers/gpu/drm/xe/xe_configfs.h
+++ b/drivers/gpu/drm/xe/xe_configfs.h
@@ -5,12 +5,20 @@
 #ifndef _XE_CONFIGFS_H_
 #define _XE_CONFIGFS_H_
 
+#include <linux/types.h>
+
+struct pci_dev;
+
 #if IS_ENABLED(CONFIG_CONFIGFS_FS)
 int xe_configfs_init(void);
 void xe_configfs_exit(void);
+bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
+void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
 #else
 static inline int xe_configfs_init(void) { return 0; };
 static inline void xe_configfs_exit(void) {};
+static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; };
+static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {};
 #endif
 
 #endif
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index d8e227ddf255..75e753e0a682 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -712,7 +712,7 @@ int xe_device_probe_early(struct xe_device *xe)
 	sriov_update_device_info(xe);
 
 	err = xe_pcode_probe_early(xe);
-	if (err) {
+	if (err || xe_survivability_mode_is_requested(xe)) {
 		int save_err = err;
 
 		/*
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 772b6c81672c..1d83d88cde90 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -812,18 +812,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return err;
 
 	err = xe_device_probe_early(xe);
-	if (err) {
-		/*
-		 * In Boot Survivability mode, no drm card is exposed and driver
-		 * is loaded with bare minimum to allow for firmware to be
-		 * flashed through mei. If early probe failed, but it managed to
-		 * enable survivability mode, return success.
-		 */
-		if (xe_survivability_mode_is_enabled(xe))
-			return 0;
+	/*
+	 * In Boot Survivability mode, no drm card is exposed and driver
+	 * is loaded with bare minimum to allow for firmware to be
+	 * flashed through mei. Return success, if survivability mode
+	 * is enabled due to pcode failure or configfs being set
+	 */
+	if (xe_survivability_mode_is_enabled(xe))
+		return 0;
 
+	if (err)
 		return err;
-	}
 
 	err = xe_info_init(xe, desc);
 	if (err)
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 399c06890b0b..1f710b3fc599 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -10,6 +10,7 @@
 #include <linux/pci.h>
 #include <linux/sysfs.h>
 
+#include "xe_configfs.h"
 #include "xe_device.h"
 #include "xe_gt.h"
 #include "xe_heci_gsc.h"
@@ -145,6 +146,7 @@ static void xe_survivability_mode_fini(void *arg)
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	struct device *dev = &pdev->dev;
 
+	xe_configfs_clear_survivability_mode(pdev);
 	sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
 }
 
@@ -198,23 +200,40 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe)
 	return xe->survivability.mode;
 }
 
-/*
- * survivability_mode_requested - check if it's possible to enable
- * survivability mode and that was requested by firmware
+/**
+ * xe_survivability_mode_is_requested - check if it's possible to enable survivability
+ *					mode that was requested by firmware or userspace
+ * @xe: xe device instance
  *
- * This function reads the boot status from Pcode.
+ * This function reads configfs and  boot status from Pcode.
  *
  * Return: true if platform support is available and boot status indicates
- * failure, false otherwise.
+ * failure or if survivability mode is requested, false otherwise.
  */
-static bool survivability_mode_requested(struct xe_device *xe)
+bool xe_survivability_mode_is_requested(struct xe_device *xe)
 {
 	struct xe_survivability *survivability = &xe->survivability;
 	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 	u32 data;
+	bool survivability_mode;
+
+	if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
+		return false;
+
+	survivability_mode = xe_configfs_get_survivability_mode(pdev);
 
-	if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe))
+	if (xe->info.platform < XE_BATTLEMAGE) {
+		if (survivability_mode) {
+			dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
+			xe_configfs_clear_survivability_mode(pdev);
+		}
 		return false;
+	}
+
+	/* Enable survivability mode if set via configfs */
+	if (survivability_mode)
+		return true;
 
 	data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
 	survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
@@ -238,7 +257,7 @@ int xe_survivability_mode_enable(struct xe_device *xe)
 	struct xe_survivability_info *info;
 	struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
 
-	if (!survivability_mode_requested(xe))
+	if (!xe_survivability_mode_is_requested(xe))
 		return 0;
 
 	survivability->size = MAX_SCRATCH_MMIO;
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h
index d7e64885570d..02231c2bf008 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
@@ -12,5 +12,6 @@ struct xe_device;
 
 int xe_survivability_mode_enable(struct xe_device *xe);
 bool xe_survivability_mode_is_enabled(struct xe_device *xe);
+bool xe_survivability_mode_is_requested(struct xe_device *xe);
 
 #endif /* _XE_SURVIVABILITY_MODE_H_ */
-- 
2.47.1



More information about the Intel-xe mailing list