[PATCH v2] drm/xe: Add configfs to load with fewer engines
Matt Roper
matthew.d.roper at intel.com
Thu May 22 19:01:38 UTC 2025
On Tue, May 20, 2025 at 02:28:58PM -0700, Lucas De Marchi wrote:
> Sometimes it's useful to load the driver with a smaller set of engines
> to allow more targeted debugging, particularly on early enabling. Add a
> file in configfs that allows enable only a set of engines.
>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
> Changes in v2:
> - Fix build with CONFIG_CONFIGFS_FS=n
> - Link to v1: https://lore.kernel.org/r/20250520-engine-mask-v1-1-ce6daf16d23b@intel.com
> ---
> drivers/gpu/drm/xe/xe_configfs.c | 133 ++++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_configfs.h | 2 +
> drivers/gpu/drm/xe/xe_hw_engine.c | 20 ++++++
> 3 files changed, 155 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
> index cb9f175c89a1c..fb433503501c8 100644
> --- a/drivers/gpu/drm/xe/xe_configfs.c
> +++ b/drivers/gpu/drm/xe/xe_configfs.c
> @@ -3,14 +3,18 @@
> * Copyright © 2025 Intel Corporation
> */
>
> +#include <linux/bitops.h>
> #include <linux/configfs.h>
> #include <linux/init.h>
> #include <linux/module.h>
> #include <linux/pci.h>
> +#include <linux/string.h>
>
> #include "xe_configfs.h"
> #include "xe_module.h"
>
> +#include "xe_hw_engine_types.h"
> +
> /**
> * DOC: Xe Configfs
> *
> @@ -48,6 +52,23 @@
> * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
> * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported)
> *
> + * Allowed engine:
> + * ---------------
> + *
> + * Allow only a set of engine(s) to be available - this is the equivalent of
> + * fusing engines off in software. Examples:
> + *
> + * Allow only one render and one copy engines, nothing else::
> + *
> + * # echo 'rcs0,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engine_allowed
> + *
> + * Allow only compute engines and first copy engine::
> + *
> + * # echo 'ccs*,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engine_allowed
> + *
> + * The requested configuration may not be supported by the platform and driver
> + * may fail to probe - intended for debugging purposes.
> + *
> * Remove devices
> * ==============
> *
> @@ -60,6 +81,7 @@ struct xe_config_device {
> struct config_group group;
>
> bool survivability_mode;
> + u64 engine_allowed;
>
> /* protects attributes */
> struct mutex lock;
> @@ -94,10 +116,95 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa
> return len;
> }
>
> +static ssize_t engine_allowed_show(struct config_item *item, char *page)
> +{
> + struct xe_config_device *dev = to_xe_config_device(item);
> +
> + return sprintf(page, "%#llx\n", dev->engine_allowed);
> +}
> +
> +static bool lookup_engine_mask(const char *name, size_t namelen, u64 *mask)
> +{
> + static const struct {
> + const char *cls;
> + u64 mask;
> + } map[] = {
> + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK },
> + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK },
> + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK },
> + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK },
> + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK },
Is it intentional that this only controls userspace-accessible engines?
Would there ever be any value in disabling internal engines like the GSC
for testing purposes?
> + };
> + size_t i;
> +
> + for (i = 0; i < ARRAY_SIZE(map); i++) {
> + size_t clslen = strlen(map[i].cls);
> + size_t numlen = namelen - clslen;
> + char instancestr[4];
> + u8 instance;
> +
> + if (namelen < clslen + 1 || strncmp(name, map[i].cls, clslen))
Nitpick: "namelen <= clslen" seems like slightly more natural way to
write the first part of this condition?
> + continue;
> +
> + if (name[clslen] == '*' && numlen == 1) {
> + *mask = map[i].mask;
> + return true;
> + }
> +
> + if (numlen > sizeof(instancestr) - 1)
> + return false;
> +
> + memcpy(instancestr, name + clslen, numlen);
> + instancestr[numlen] = '\0';
> +
> + if (!kstrtou8(instancestr, 10, &instance)) {
> + u16 bit = __ffs64(map[i].mask) + instance;
> + u64 val;
> +
> + if (bit > sizeof(val) * BITS_PER_BYTE)
> + return false;
Would it be simpler to check that bit <= fls of the engine mask? That
will both guarantee that it isn't past the end of a u64, and also that
it isn't overflowing into a bit for some other engine type.
> +
> + val = BIT_ULL(bit);
> + if ((val | map[i].mask) == map[i].mask) {
> + *mask = val;
> + return true;
> + }
> + }
> +
> + return false;
> + }
> +
> + return false;
> +}
> +
> +static ssize_t engine_allowed_store(struct config_item *item, const char *page,
> + size_t len)
> +{
> + struct xe_config_device *dev = to_xe_config_device(item);
> + size_t namelen, p;
> + u64 mask, val = 0;
> +
> + for (p = 0; p < len; p += namelen + 1) {
> + namelen = strcspn(page + p, ",\n");
> + if (!lookup_engine_mask(page + p, namelen, &mask))
> + return -EINVAL;
> +
> + val |= mask;
> + }
> +
> + mutex_lock(&dev->lock);
> + dev->engine_allowed = val;
> + mutex_unlock(&dev->lock);
> +
> + return len;
> +}
> +
> CONFIGFS_ATTR(, survivability_mode);
> +CONFIGFS_ATTR(, engine_allowed);
>
> static struct configfs_attribute *xe_config_device_attrs[] = {
> &attr_survivability_mode,
> + &attr_engine_allowed,
> NULL,
> };
>
> @@ -139,6 +246,9 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro
> if (!dev)
> return ERR_PTR(-ENOMEM);
>
> + /* Default values */
> + dev->engine_allowed = U64_MAX;
> +
> config_group_init_type_name(&dev->group, name, &xe_config_device_type);
>
> mutex_init(&dev->lock);
> @@ -226,6 +336,29 @@ void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
> config_item_put(&dev->group.cg_item);
> }
>
> +/**
> + * xe_configfs_get_engine_allowed - get engine allowed mask from configfs
> + * @pdev: pci device
> + *
> + * Find the configfs group that belongs to the pci device and return
> + * the mask of engines allowed to be used.
> + *
> + * Return: engine mask with allowed engines
> + */
> +u64 xe_configfs_get_engine_allowed(struct pci_dev *pdev)
> +{
> + struct xe_config_device *dev = configfs_find_group(pdev);
> + u64 engine_allowed;
> +
> + if (!dev)
> + return U64_MAX;
> +
> + engine_allowed = dev->engine_allowed;
> + config_item_put(&dev->group.cg_item);
> +
> + return engine_allowed;
> +}
> +
> int __init xe_configfs_init(void)
> {
> struct config_group *root = &xe_configfs.su_group;
> diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
> index d7d041ec26117..c80131f994a71 100644
> --- a/drivers/gpu/drm/xe/xe_configfs.h
> +++ b/drivers/gpu/drm/xe/xe_configfs.h
> @@ -14,11 +14,13 @@ int xe_configfs_init(void);
> void xe_configfs_exit(void);
> bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
> void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
> +u64 xe_configfs_get_engine_allowed(struct pci_dev *pdev);
> #else
> static inline int xe_configfs_init(void) { return 0; };
> static inline void xe_configfs_exit(void) {};
> static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; };
> static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {};
> +static inline u64 xe_configfs_get_engine_allowed(struct pci_dev *pdev) { return U64_MAX; }
> #endif
>
> #endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 93241fd0a4ba3..af750ed8c0f00 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -17,6 +17,7 @@
> #include "regs/xe_irq_regs.h"
> #include "xe_assert.h"
> #include "xe_bo.h"
> +#include "xe_configfs.h"
> #include "xe_device.h"
> #include "xe_execlist.h"
> #include "xe_force_wake.h"
> @@ -810,6 +811,24 @@ static void check_gsc_availability(struct xe_gt *gt)
> }
> }
>
> +static void check_sw_fuses(struct xe_gt *gt)
> +{
> + struct xe_device *xe = gt_to_xe(gt);
> + u64 sw_allowed = xe_configfs_get_engine_allowed(to_pci_dev(xe->drm.dev));
> + enum xe_hw_engine_id id;
> +
> + for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
> + if (!(gt->info.engine_mask & BIT(id)))
> + continue;
> +
> + if (!(sw_allowed & BIT(id))) {
> + gt->info.engine_mask &= ~BIT(id);
> + drm_info(&xe->drm, "%s fused off (in sw)\n",
I'd avoid using the term 'fuse' here. Maybe just "%s disabled via
configfs" would be more accurate and helpful.
Matt
> + engine_infos[id].name);
> + }
> + }
> +}
> +
> int xe_hw_engines_init_early(struct xe_gt *gt)
> {
> int i;
> @@ -818,6 +837,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt)
> read_copy_fuses(gt);
> read_compute_fuses(gt);
> check_gsc_availability(gt);
> + check_sw_fuses(gt);
>
> BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
> BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
>
>
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list