[PATCH v3 3/3] drm/xe/configfs: Add attribute to disable engines
Matt Roper
matthew.d.roper at intel.com
Fri May 23 21:21:57 UTC 2025
On Fri, May 23, 2025 at 10:42:33AM -0700, Lucas De Marchi wrote:
> Add the userspace interface to load the driver with fewer engines.
> The syntax is to just echo the engine names to a file in configfs, like
> below:
>
> echo 'rcs0,bcs0' > /sys/kernel/config/xe/<bdf>/engine_allowed
>
> With that engines other than rcs0 and bcs0 will not be enabled. To
> enable all instances from a class, a '*' can be used.
>
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
> drivers/gpu/drm/xe/xe_configfs.c | 137 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 135 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
> index 11ca36f194bfc..b27bbe203eb46 100644
> --- a/drivers/gpu/drm/xe/xe_configfs.c
> +++ b/drivers/gpu/drm/xe/xe_configfs.c
> @@ -3,14 +3,19 @@
> * Copyright © 2025 Intel Corporation
> */
>
> +#include <linux/bitops.h>
> #include <linux/configfs.h>
> +#include <linux/find.h>
> #include <linux/init.h>
> #include <linux/module.h>
> #include <linux/pci.h>
> +#include <linux/string.h>
>
> #include "xe_configfs.h"
> #include "xe_module.h"
>
> +#include "xe_hw_engine_types.h"
> +
> /**
> * DOC: Xe Configfs
> *
> @@ -48,6 +53,23 @@
> * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
> * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported)
> *
> + * Allowed engine:
> + * ---------------
> + *
> + * Allow only a set of engine(s) to be available - this is the equivalent of
> + * fusing engines off in software. Examples:
> + *
> + * Allow only one render and one copy engines, nothing else::
> + *
> + * # echo 'rcs0,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engine_allowed
> + *
> + * Allow only compute engines and first copy engine::
> + *
> + * # echo 'ccs*,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engine_allowed
> + *
> + * The requested configuration may not be supported by the platform and driver
> + * may fail to probe - intended for debugging purposes.
We should add a note that the engine names here are the per-GT hardware
names for the engines, not the names that would be seen by userspace
(e.g., don't expect "ccs4" to refer to the first compute engine on the
second tile and such). Also worth noting that the mask provided will be
applied to the remaining engines (after hardware fuses are considered)
on every tile.
> + *
> * Remove devices
> * ==============
> *
> @@ -60,11 +82,26 @@ struct xe_config_device {
> struct config_group group;
>
> bool survivability_mode;
> + u64 engine_allowed;
>
> /* protects attributes */
> struct mutex lock;
> };
>
> +struct engine_info {
> + const char *cls;
> + u64 mask;
> +};
> +
> +static const struct engine_info engine_info[] = {
> + { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK },
> + { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK },
> + { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK },
> + { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK },
> + { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK },
> + { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK },
> +};
> +
> static struct xe_config_device *to_xe_config_device(struct config_item *item)
> {
> return container_of(to_config_group(item), struct xe_config_device, group);
> @@ -94,10 +131,95 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa
> return len;
> }
>
> +static ssize_t engine_allowed_show(struct config_item *item, char *page)
> +{
> + struct xe_config_device *dev = to_xe_config_device(item);
> + char *p = page;
> +
> + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
> + u64 mask = engine_info[i].mask;
> +
> + if ((dev->engine_allowed & mask) == mask) {
> + p += sprintf(p, "%s*\n", engine_info[i].cls);
> + } else if (mask & dev->engine_allowed) {
> + u16 bit0 = __ffs64(mask), bit;
> +
> + mask &= dev->engine_allowed;
> +
> + for_each_set_bit(bit, (const unsigned long *)&mask, 64)
> + p += sprintf(p, "%s%u\n", engine_info[i].cls,
> + bit - bit0);
I didn't trace back through the configfs code to figure out where the
buffer we write into originally comes from...I'm guessing if it's always
truly a full page as the name implies then we don't have to worry about
the sprintf ever overflowing the buffer?
> + }
> + }
> +
> + return p - page;
> +}
> +
> +static bool lookup_engine_mask(const char *name, size_t namelen, u64 *mask)
> +{
> + for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
> + size_t clslen = strlen(engine_info[i].cls);
> + size_t numlen = namelen - clslen;
> + char instancestr[4];
> + u8 instance;
> + u16 bit;
> +
> + if (namelen <= clslen || strncmp(name, engine_info[i].cls, clslen))
> + continue;
> +
> + if (name[clslen] == '*' && numlen == 1) {
> + *mask = engine_info[i].mask;
> + return true;
> + }
> +
> + if (numlen > sizeof(instancestr) - 1)
> + return false;
> +
> + memcpy(instancestr, name + clslen, numlen);
> + instancestr[numlen] = '\0';
> +
> + if (kstrtou8(instancestr, 10, &instance))
> + return false;
> +
> + bit = __ffs64(engine_info[i].mask) + instance;
> + if (bit > fls64(engine_info[i].mask))
Hmm, looks like there isn't an __fls64 defined in the kernel libraries?
fls64 uses libc semantics (i.e., returns 1-64 for the various bit
positions instead of 0-63), so I think this is going to be off by 1.
E.g., if I request "rcs1" then bit = 1. But
fls64(XE_HW_ENGINE_RCS_MASK) = 1 so we won't flag this as a problem. So
should this be a >= instead?
Matt
> + return false;
> +
> + *mask = BIT_ULL(bit);
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static ssize_t engine_allowed_store(struct config_item *item, const char *page,
> + size_t len)
> +{
> + struct xe_config_device *dev = to_xe_config_device(item);
> + size_t namelen, p;
> + u64 mask, val = 0;
> +
> + for (p = 0; p < len; p += namelen + 1) {
> + namelen = strcspn(page + p, ",\n");
> + if (!lookup_engine_mask(page + p, namelen, &mask))
> + return -EINVAL;
> +
> + val |= mask;
> + }
> +
> + mutex_lock(&dev->lock);
> + dev->engine_allowed = val;
> + mutex_unlock(&dev->lock);
> +
> + return len;
> +}
> +
> CONFIGFS_ATTR(, survivability_mode);
> +CONFIGFS_ATTR(, engine_allowed);
>
> static struct configfs_attribute *xe_config_device_attrs[] = {
> &attr_survivability_mode,
> + &attr_engine_allowed,
> NULL,
> };
>
> @@ -139,6 +261,9 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro
> if (!dev)
> return ERR_PTR(-ENOMEM);
>
> + /* Default values */
> + dev->engine_allowed = U64_MAX;
> +
> config_group_init_type_name(&dev->group, name, &xe_config_device_type);
>
> mutex_init(&dev->lock);
> @@ -237,8 +362,16 @@ void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
> */
> u64 xe_configfs_get_engine_allowed(struct pci_dev *pdev)
> {
> - /* dummy implementation */
> - return U64_MAX;
> + struct xe_config_device *dev = configfs_find_group(pdev);
> + u64 engine_allowed;
> +
> + if (!dev)
> + return U64_MAX;
> +
> + engine_allowed = dev->engine_allowed;
> + config_item_put(&dev->group.cg_item);
> +
> + return engine_allowed;
> }
>
> int __init xe_configfs_init(void)
>
> --
> 2.49.0
>
--
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation
More information about the Intel-xe
mailing list