[PATCH v1 1/2] drm/xe/debugfs: Expose PCIe Gen5 update telemetry

Raag Jadav raag.jadav at intel.com
Mon Mar 31 14:23:35 UTC 2025


Expose debugfs telemetry required for PCIe Gen5 firmware update for
discrete GPUs.

Signed-off-by: Raag Jadav <raag.jadav at intel.com>
---
 drivers/gpu/drm/xe/xe_debugfs.c   | 93 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_pcode_api.h |  4 ++
 2 files changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index d0503959a8ed..67c941abf4fe 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -17,6 +17,9 @@
 #include "xe_gt_debugfs.h"
 #include "xe_gt_printk.h"
 #include "xe_guc_ads.h"
+#include "xe_mmio.h"
+#include "xe_pcode_api.h"
+#include "xe_pcode.h"
 #include "xe_pm.h"
 #include "xe_pxp_debugfs.h"
 #include "xe_sriov.h"
@@ -191,6 +194,89 @@ static const struct file_operations wedged_mode_fops = {
 	.write = wedged_mode_set,
 };
 
+/**
+ * DOC: PCIe Gen5 Update Limitations
+ *
+ * Default link speed of discrete GPUs is determined by FIT parameters stored
+ * in their flash memory, which are subject to override through user initiated
+ * firmware updates. It has been observed that devices configured with PCIe
+ * Gen5 as their default speed can come across link quality issues due to host
+ * or motherboard limitations and may have to auto-downspeed to PCIe Gen4 when
+ * faced with unstable link at Gen5. The users are required to ensure that the
+ * device is capable of auto-downspeeding to PCIe Gen4 before pushing the image
+ * with Gen5 as default configuration. This can be done by reading
+ * ``pcie_gen4_downspeed_capable`` debugfs entry, which will denote PCIe Gen4
+ * auto-downspeed capability of the device with boolean output value of ``0``
+ * or ``1``, meaning `incapable` or `capable` respectively.
+ *
+ * .. code-block:: shell
+ *
+ *    $ cat /sys/kernel/debug/dri/<N>/pcie_gen4_downspeed_capable
+ *
+ * Pushing PCIe Gen5 update on a auto-downspeed incapable device and facing
+ * link instability due to host or motherboard limitations can result in driver
+ * not being able to successfully bind to the device, making further firmware
+ * updates impossible with RMA being the only last resort.
+ *
+ * Link downspeed status of auto-downspeed capable devices is available through
+ * ``pcie_gen4_downspeed_status`` debugfs entry with boolean output value of
+ * ``0`` or ``1``, with ``0`` meaning no downspeeding was required during link
+ * training (which is the optimal scenario) and ``1`` meaning the device has
+ * downsped to PCIe Gen4 due to unstable Gen5 link.
+ *
+ * .. code-block:: shell
+ *
+ *    $ cat /sys/kernel/debug/dri/<N>/pcie_gen4_downspeed_status
+ */
+
+static ssize_t pcie_gen4_downspeed_capable_show(struct file *f, char __user *ubuf,
+						size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+	char buf[16];
+	u32 len, val;
+
+	xe_pm_runtime_get(xe);
+	val = xe_mmio_read32(mmio, PCODE_SCRATCH(16));
+	xe_pm_runtime_put(xe);
+
+	len = scnprintf(buf, sizeof(buf), "%u\n",
+			REG_FIELD_GET(PCIE_GEN4_DOWNGRADE, val) == DOWNGRADE_CAPABLE ? 1 : 0);
+
+	return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static const struct file_operations pcie_gen4_downspeed_capable_fops = {
+	.owner = THIS_MODULE,
+	.read = pcie_gen4_downspeed_capable_show,
+};
+
+static ssize_t pcie_gen4_downspeed_status_show(struct file *f, char __user *ubuf,
+					       size_t size, loff_t *pos)
+{
+	struct xe_device *xe = file_inode(f)->i_private;
+	struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+	char buf[16];
+	u32 len, val;
+	int ret;
+
+	xe_pm_runtime_get(xe);
+	ret = xe_pcode_read(root_tile, PCODE_MBOX(DGFX_PCODE_STATUS,
+			    DGFX_GET_INIT_STATUS, 0), &val, NULL);
+	xe_pm_runtime_put(xe);
+	if (ret)
+		return ret;
+
+	len = scnprintf(buf, sizeof(buf), "%u\n", REG_FIELD_GET(REG_BIT(31), val));
+	return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static const struct file_operations pcie_gen4_downspeed_status_fops = {
+	.owner = THIS_MODULE,
+	.read = pcie_gen4_downspeed_status_show,
+};
+
 void xe_debugfs_register(struct xe_device *xe)
 {
 	struct ttm_device *bdev = &xe->ttm;
@@ -211,6 +297,13 @@ void xe_debugfs_register(struct xe_device *xe)
 	debugfs_create_file("wedged_mode", 0600, root, xe,
 			    &wedged_mode_fops);
 
+	if (IS_DGFX(xe)) {
+		debugfs_create_file("pcie_gen4_downspeed_capable", 0400, root, xe,
+				    &pcie_gen4_downspeed_capable_fops);
+		debugfs_create_file("pcie_gen4_downspeed_status", 0400, root, xe,
+				    &pcie_gen4_downspeed_status_fops);
+	}
+
 	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
 		man = ttm_manager_type(bdev, mem_type);
 
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index e622ae17f08d..1f802d9793ad 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -66,6 +66,10 @@
 /* Auxiliary info bits */
 #define   AUXINFO_HISTORY_OFFSET	REG_GENMASK(31, 29)
 
+/* PCIe Gen4 downgrade capability bits */
+#define   PCIE_GEN4_DOWNGRADE		REG_GENMASK(1, 0)
+#define      DOWNGRADE_CAPABLE		2
+
 struct pcode_err_decode {
 	int errno;
 	const char *str;
-- 
2.34.1



More information about the Intel-xe mailing list