[PATCH v2 2/3] drm/xe: Expose PCIe Gen4 downspeed attributes

Riana Tauro riana.tauro at intel.com
Wed Apr 16 10:06:55 UTC 2025


Hi Raag

On 4/3/2025 11:17 PM, Raag Jadav wrote:
> Expose sysfs attributes for PCIe Gen4 downspeed capability and status.
> 
> Signed-off-by: Raag Jadav <raag.jadav at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_device_sysfs.c | 86 ++++++++++++++++++++++++++++
>   drivers/gpu/drm/xe/xe_pcode_api.h    |  5 ++
>   2 files changed, 91 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
> index d4c73acea1cf..4e6175907832 100644
> --- a/drivers/gpu/drm/xe/xe_device_sysfs.c
> +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
> @@ -11,6 +11,9 @@
>   
>   #include "xe_device.h"
>   #include "xe_device_sysfs.h"
> +#include "xe_mmio.h"
> +#include "xe_pcode_api.h"
> +#include "xe_pcode.h"
>   #include "xe_pm.h"
>   
>   /**
> @@ -63,12 +66,85 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
>   
>   static DEVICE_ATTR_RW(vram_d3cold_threshold);
>   
> +/**
> + * DOC: PCIe Gen5 Update Limitations
> + *
> + * Default link speed of discrete GPUs is determined by FIT parameters stored
> + * in their flash memory, which are subject to override through user initiated
> + * firmware updates. It has been observed that devices configured with PCIe
> + * Gen5 as their default speed can come across link quality issues due to host
> + * or motherboard limitations and may have to auto-downspeed to PCIe Gen4 when
> + * faced with unstable link at Gen5. The users are required to ensure that the
> + * device is capable of auto-downspeeding to PCIe Gen4 link before pushing the
> + * image with Gen5 as default configuration. This can be done by reading
> + * ``pcie_gen4_downspeed_capable`` sysfs entry, which will denote PCIe Gen4
> + * downspeed capability of the device with boolean output value of ``0`` or
> + * ``1``, meaning `incapable` or `capable` respectively.
> + *
> + * .. code-block:: shell
> + *
> + *    $ cat /sys/bus/pci/devices/<bdf>/pcie_gen4_downspeed_capable
> + *
> + * Pushing PCIe Gen5 update on a downspeed incapable device and facing link
> + * instability due to host or motherboard limitations can result in driver not
> + * being able to successfully bind to the device, making further firmware
> + * updates impossible with RMA being the only last resort.
> + *
> + * Link downspeed status of PCIe Gen4 downspeed capable devices is available
> + * through ``pcie_gen4_downspeed_status`` sysfs entry with boolean output value
> + * of ``0`` or ``1``, where ``0`` means no auto-downspeeding was required during
> + * link training (which is the optimal scenario) and ``1`` means the device has
> + * auto-downsped to PCIe Gen4 due to unstable Gen5 link.
> + *
> + * .. code-block:: shell
> + *
> + *    $ cat /sys/bus/pci/devices/<bdf>/pcie_gen4_downspeed_status
Why downspeed? Shouldn't it be downgrade?> + */
> +
> +static ssize_t
> +pcie_gen4_downspeed_capable_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct xe_device *xe = pdev_to_xe_device(pdev);
> +	u32 cap, val;
> +
> +	xe_pm_runtime_get(xe);
> +	val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE4_CAP);
> +	xe_pm_runtime_put(xe);
> +
> +	cap = REG_FIELD_GET(PCIE4_DOWNSPEED, val);
> +	return sysfs_emit(buf, "%u\n", cap == DOWNSPEED_CAPABLE ? true : false);
> +}
> +static DEVICE_ATTR_RO(pcie_gen4_downspeed_capable);
> +
> +static ssize_t
> +pcie_gen4_downspeed_status_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct xe_device *xe = pdev_to_xe_device(pdev);
> +	u32 val;
> +	int ret;
> +
> +	xe_pm_runtime_get(xe);
> +	ret = xe_pcode_read(xe_device_get_root_tile(xe), PCODE_MBOX(DGFX_PCODE_STATUS,
> +			    DGFX_GET_INIT_STATUS, 0), &val, NULL);
indentation> +	xe_pm_runtime_put(xe);
> +
> +	return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_PCIE4_DOWNSPEED_STATUS, val));
> +}
> +static DEVICE_ATTR_RO(pcie_gen4_downspeed_status);
> +
>   static void xe_device_sysfs_fini(void *arg)
>   {
>   	struct xe_device *xe = arg;
>   
>   	if (xe->d3cold.capable)
>   		sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
> +
> +	if (xe->info.platform == XE_BATTLEMAGE) {
> +		sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_pcie_gen4_downspeed_capable.attr);
> +		sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_pcie_gen4_downspeed_status.attr);
> +	}
>   }
>   
>   int xe_device_sysfs_init(struct xe_device *xe)
> @@ -82,5 +158,15 @@ int xe_device_sysfs_init(struct xe_device *xe)
>   			return ret;
>   	}
>   
> +	if (xe->info.platform == XE_BATTLEMAGE) {
> +		ret = sysfs_create_file(&dev->kobj, &dev_attr_pcie_gen4_downspeed_capable.attr);
> +		if (ret)
> +			return ret;
> +
> +		ret = sysfs_create_file(&dev->kobj, &dev_attr_pcie_gen4_downspeed_status.attr);
> +		if (ret)
> +			return ret;
> +	}
This should be create_files. If second one fails, none of the other 
files are removed.> +
>   	return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
>   }
> diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
> index e622ae17f08d..1ea6b2a586ae 100644
> --- a/drivers/gpu/drm/xe/xe_pcode_api.h
> +++ b/drivers/gpu/drm/xe/xe_pcode_api.h
> @@ -34,6 +34,7 @@
>   #define   DGFX_PCODE_STATUS		0x7E
>   #define     DGFX_GET_INIT_STATUS	0x0
>   #define     DGFX_INIT_STATUS_COMPLETE	0x1
> +#define     DGFX_PCIE4_DOWNSPEED_STATUS	REG_BIT(31)PCIE_GEN4

Thanks
Riana>
>   #define   PCODE_POWER_SETUP			0x7C
>   #define     POWER_SETUP_SUBCOMMAND_READ_I1	0x4
> @@ -66,6 +67,10 @@
>   /* Auxiliary info bits */
>   #define   AUXINFO_HISTORY_OFFSET	REG_GENMASK(31, 29)
>   
> +#define BMG_PCIE4_CAP			XE_REG(0x138340)
> +#define   PCIE4_DOWNSPEED		REG_GENMASK(1, 0)
> +#define      DOWNSPEED_CAPABLE		2
> +
>   struct pcode_err_decode {
>   	int errno;
>   	const char *str;



More information about the Intel-xe mailing list