[PATCH 1/2] RFC drm/xe: Add configfs to enable survivability mode

Riana Tauro riana.tauro at intel.com
Mon Mar 10 05:31:50 UTC 2025


Hi Lucas

On 3/7/2025 8:46 PM, Lucas De Marchi wrote:
> On Fri, Mar 07, 2025 at 07:54:44PM +0530, Riana Tauro wrote:
>> Registers a configfs subsystem called 'xe' to userspace. The user can
>> use this to modify exposed attributes.
>>
>> Add survivability mode attribute (config/xe/survivability_mode) to the
>> subsystem to allow the user to specify the card that should enter
>> survivability mode.
>>
>> Signed-off-by: Riana Tauro <riana.tauro at intel.com>
>> ---
>> drivers/gpu/drm/xe/Makefile      |  1 +
>> drivers/gpu/drm/xe/xe_configfs.c | 95 ++++++++++++++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_configfs.h | 12 ++++
>> drivers/gpu/drm/xe/xe_module.c   |  5 ++
>> drivers/gpu/drm/xe/xe_module.h   |  1 +
>> 5 files changed, 114 insertions(+)
>> create mode 100644 drivers/gpu/drm/xe/xe_configfs.c
>> create mode 100644 drivers/gpu/drm/xe/xe_configfs.h
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 9699b08585f7..3f8c87292cee 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -28,6 +28,7 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/ 
>> %_wa_oob.h: $(obj)/xe_gen_wa_oob \
>> xe-y += xe_bb.o \
>>     xe_bo.o \
>>     xe_bo_evict.o \
>> +    xe_configfs.o \
>>     xe_devcoredump.o \
>>     xe_device.o \
>>     xe_device_sysfs.o \
>> diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/ 
>> xe_configfs.c
>> new file mode 100644
>> index 000000000000..8c5f248e466d
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_configfs.c
>> @@ -0,0 +1,95 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2025 Intel Corporation
>> + */
>> +
>> +#include <linux/configfs.h>
>> +#include <linux/init.h>
>> +#include <linux/module.h>
>> +
>> +#include "xe_configfs.h"
>> +#include "xe_module.h"
>> +
>> +/**
>> + * DOC: Xe Configfs
>> + *
>> + * XE KMD registers a configfs subsystem called 'xe'to userspace that 
>> allows users to modify
>> + * the exposed attributes.
>> + *
>> + * Attributes:
>> + *
>> + * config/xe/survivability_mode : Write only attribute that allows 
>> user to specify the PCI address
>> + *                  of the card that has to enter survivability mode
> 
> I think the desired interface is actually that the user mkdir the bdf in
> <configfs>/xe/. This populates the available config entries that the user
> writes to.

Initial thought was mkdir bdf, but since it was a single attribute and 
after a offline discussion with Rodrigo, did a simpler version to get 
comments on the RFC patch and if configfs is okay to use
for survivability mode

For survivability mode, below procedure needs to be followed

echo "0000:03:00.0" > /sys/bus/pci/drivers/xe/unbind
add bdf for survivability mode
echo "0000:03:00.0" > /sys/bus/pci/drivers/xe/bind

After the device is removed directory has to be created, the bdf has
to be checked if it belongs to xe driver and then attrs populated.
Even i think mkdir is better in case other attrs have to be added in 
future, but for unbind case the check of the driver might be tricky .

The attr is WO and any value can be written, only if the correct bdf is 
added in the attr then it'll be used on probe to enter survivability 
mode.The current patch only checks the format and does not check if bdf 
belongs to xe.
> 
>> + */
>> +
>> +void xe_configfs_clear_survivability_mode(void)
>> +{
>> +    kfree(xe_modparam.survivability_mode);
>> +    xe_modparam.survivability_mode = NULL;
>> +}
>> +
>> +static ssize_t survivability_mode_store(struct config_item *item, 
>> const char *page, size_t len)
> 
> once you handle the mkdir mentioned above, this should probably be
> a boolean attr like this:
> 
>      drivers/nvme/target/configfs.c:CONFIGFS_ATTR(nvmet_, param_pi_enable);
> 
>> +{
>> +    char *survivability_mode;
>> +    int ret;
>> +    unsigned int domain, bus, slot, function;
>> +
>> +    ret = sscanf(page, "%04x:%02x:%02x.%x", &domain, &bus, &slot, 
>> &function);
>> +    if (ret != 4)
>> +        return -EINVAL;
>> +
>> +    survivability_mode = kstrdup(page, GFP_KERNEL);
>> +    if (!survivability_mode)
>> +        return -ENOMEM;
>> +
>> +    xe_configfs_clear_survivability_mode();
>> +    xe_modparam.survivability_mode = survivability_mode;
>> +
>> +    return len;
>> +}
>> +
>> +CONFIGFS_ATTR_WO(, survivability_mode);
>> +
>> +static struct configfs_attribute *xe_configfs_attrs[] = {
>> +    &attr_survivability_mode,
>> +    NULL,
>> +};
>> +
>> +static const struct config_item_type xe_config_type = {
>> +    .ct_attrs    = xe_configfs_attrs,
>> +    .ct_owner    = THIS_MODULE,
>> +};
>> +
>> +static struct configfs_subsystem xe_config_subsys = {
>> +    .su_group = {
>> +        .cg_item = {
>> +            .ci_namebuf = "xe",
>> +            .ci_type = &xe_config_type,
>> +        },
>> +    },
>> +};
>>
> 
> so... it seems you are missing a configfs_group_operations.make_group.
> 
>> +int __init xe_configfs_init(void)
>> +{
>> +    int ret;
>> +
>> +    config_group_init(&xe_config_subsys.su_group);
>> +    mutex_init(&xe_config_subsys.su_mutex);
> 
> this mutex_init seems odd, but inline with other drivers
yeah it is not used anywhere but the sample and other drivers also 
initialize it
> 
>> +    ret = configfs_register_subsystem(&xe_config_subsys);
>> +    if (ret) {
>> +        pr_err("Error %d while registering subsystem %s\n",
>> +               ret, xe_config_subsys.su_group.cg_item.ci_namebuf);
>> +        mutex_destroy(&xe_config_subsys.su_mutex);
>> +        return ret;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void __exit xe_configfs_exit(void)
>> +{
>> +    xe_configfs_clear_survivability_mode();
>> +    configfs_unregister_subsystem(&xe_config_subsys);
>> +    mutex_destroy(&xe_config_subsys.su_mutex);
>> +}
>> +
>> diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/ 
>> xe_configfs.h
>> new file mode 100644
>> index 000000000000..491629a2ca53
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_configfs.h
>> @@ -0,0 +1,12 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2025 Intel Corporation
>> + */
>> +#ifndef _XE_CONFIGFS_H_
>> +#define _XE_CONFIGFS_H_
>> +
>> +int xe_configfs_init(void);
>> +void xe_configfs_exit(void);
>> +void xe_configfs_clear_survivability_mode(void);
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/ 
>> xe_module.c
>> index 475acdba2b55..15b3cf22193c 100644
>> --- a/drivers/gpu/drm/xe/xe_module.c
>> +++ b/drivers/gpu/drm/xe/xe_module.c
>> @@ -11,6 +11,7 @@
>> #include <drm/drm_module.h>
>>
>> #include "xe_drv.h"
>> +#include "xe_configfs.h"
>> #include "xe_hw_fence.h"
>> #include "xe_pci.h"
>> #include "xe_pm.h"
>> @@ -91,6 +92,10 @@ static const struct init_funcs init_funcs[] = {
>>     {
>>         .init = xe_check_nomodeset,
>>     },
>> +    {
>> +        .init = xe_configfs_init,
>> +        .exit = xe_configfs_exit,
>> +    },
>>     {
>>         .init = xe_hw_fence_module_init,
>>         .exit = xe_hw_fence_module_exit,
>> diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/ 
>> xe_module.h
>> index 84339e509c80..c238dbee6bc7 100644
>> --- a/drivers/gpu/drm/xe/xe_module.h
>> +++ b/drivers/gpu/drm/xe/xe_module.h
>> @@ -24,6 +24,7 @@ struct xe_modparam {
>> #endif
>>     int wedged_mode;
>>     u32 svm_notifier_size;
>> +    char *survivability_mode;
> 
> drop this.. We want a  config struct in the xe_device. It's actually
> allocated by the mkdir in the configs and when we are probing, we should
> find the config instace based on pdev and set the pointer xe->configfs
> or something like that.

Will add a new struct

Thanks
Riana
> 
> thanks
> Lucas De Marchi
> 
>> };
>>
>> extern struct xe_modparam xe_modparam;
>> -- 
>> 2.47.1
>>



More information about the Intel-xe mailing list