[PATCH 2/2] RFC drm/xe: Enable configfs support for survivability mode
Riana Tauro
riana.tauro at intel.com
Fri Mar 7 14:24:45 UTC 2025
Register a configfs subsystem called 'xe' to userspace that allows
users to modify the exposed attributes. Expose survivability mode as
an attribute that can be modified manually. This is useful if
pcode fails to detect survivability mode and for validation
To enable survivability mode for a card,
echo "domain:bus:dev.fn" > /sys/bus/pci/drivers/xe/unbind
echo "domain:bus:dev.fn" > sys/kernel/config/xe/survivability_mode
echo "domain:bus:dev.fn" > /sys/bus/pci/drivers/xe/bind
Signed-off-by: Riana Tauro <riana.tauro at intel.com>
---
drivers/gpu/drm/xe/xe_configfs.c | 22 ++++++++++++++++++++++
drivers/gpu/drm/xe/xe_configfs.h | 6 +++++-
drivers/gpu/drm/xe/xe_pci.c | 8 ++++----
drivers/gpu/drm/xe/xe_survivability_mode.c | 21 +++++++++++++++++++--
4 files changed, 50 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index 8c5f248e466d..ce9f3757100f 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -6,6 +6,7 @@
#include <linux/configfs.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/pci.h>
#include "xe_configfs.h"
#include "xe_module.h"
@@ -28,6 +29,27 @@ void xe_configfs_clear_survivability_mode(void)
xe_modparam.survivability_mode = NULL;
}
+bool xe_configfs_survivability_mode_enabled(struct pci_dev *pdev)
+{
+ unsigned int domain, bus, slot, function;
+ int ret = 0;
+
+ if (!xe_modparam.survivability_mode)
+ goto err;
+
+ ret = sscanf(xe_modparam.survivability_mode, "%04x:%02x:%02x.%x", &domain, &bus,
+ &slot, &function);
+ if (ret != 4)
+ goto err;
+
+ if ((pci_domain_nr(pdev->bus) == domain) && (pdev->bus->number == bus) &&
+ (PCI_SLOT(pdev->devfn) == slot) && (PCI_FUNC(pdev->devfn) == function))
+ return true;
+
+err:
+ return false;
+}
+
static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len)
{
char *survivability_mode;
diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
index 491629a2ca53..b03f4c7d0f54 100644
--- a/drivers/gpu/drm/xe/xe_configfs.h
+++ b/drivers/gpu/drm/xe/xe_configfs.h
@@ -5,8 +5,12 @@
#ifndef _XE_CONFIGFS_H_
#define _XE_CONFIGFS_H_
+#include <linux/types.h>
+
+struct pci_dev;
+
int xe_configfs_init(void);
void xe_configfs_exit(void);
void xe_configfs_clear_survivability_mode(void);
-
+bool xe_configfs_survivability_mode_enabled(struct pci_dev *pdev);
#endif
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 4d982a5a4ffd..d0f66cc08aa5 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -17,6 +17,7 @@
#include "display/xe_display.h"
#include "regs/xe_gt_regs.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_drv.h"
#include "xe_gt.h"
@@ -815,10 +816,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* mei. If early probe fails, check if survivability mode is flagged by
* HW to be enabled. In that case enable it and return success.
*/
- if (err) {
- if (xe_survivability_mode_required(xe) &&
- xe_survivability_mode_enable(xe))
- return 0;
+ if (xe_configfs_survivability_mode_enabled(pdev) || err) {
+ if (xe_survivability_mode_required(xe))
+ return xe_survivability_mode_enable(xe);
return err;
}
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index d939ce70e6fa..5b60cbf8f7cb 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/sysfs.h>
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_heci_gsc.h"
@@ -28,8 +29,10 @@
* This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware
* to be flashed through mei and collect telemetry. The driver's probe flow is modified
* such that it enters survivability mode when pcode initialization is incomplete and boot status
- * denotes a failure. The driver then populates the survivability_mode PCI sysfs indicating
- * survivability mode and provides additional information required for debug
+ * denotes a failure. Survivability mode can also be enabled manually by writing the pci address of
+ * the card to the xe configfs attribute. This is useful in cases where pcode does not detect
+ * failure and for validation. The driver then populates the survivability_mode PCI sysfs
+ * indicating survivability mode and provides additional information required for debug
*
* KMD exposes below admin-only readable sysfs in survivability mode
*
@@ -42,6 +45,15 @@
* Overflow Information - Provides history of previous failures
* Auxiliary Information - Certain failures may have information in
* addition to postcode information
+ * Enable survivability mode through configfs
+ *
+ * survivability mode is exposed as an attribute under the xe configfs subsystem. User can specify
+ * the card that needs to enter survivability mode.
+ *
+ * echo "domain:bus:dev.fn" > /sys/bus/pci/drivers/xe/unbind
+ * echo "domain:bus:dev.fn" > sys/kernel/config/xe/survivability_mode
+ * echo "domain:bus:dev.fn" > /sys/bus/pci/drivers/xe/bind
+ *
*/
static u32 aux_history_offset(u32 reg_value)
@@ -133,6 +145,7 @@ static void xe_survivability_mode_fini(void *arg)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
+ xe_configfs_clear_survivability_mode();
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
}
@@ -190,11 +203,15 @@ bool xe_survivability_mode_required(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u32 data;
if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe))
return false;
+ if (xe_configfs_survivability_mode_enabled(pdev))
+ return true;
+
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
--
2.47.1
More information about the Intel-xe
mailing list