[PATCH 1/2] RFC drm/xe: Add configfs to enable survivability mode

Rodrigo Vivi rodrigo.vivi at intel.com
Thu Mar 13 14:52:36 UTC 2025


On Thu, Mar 13, 2025 at 11:48:41AM +0530, Riana Tauro wrote:
> Hi Aravind
> 
> On 3/10/2025 12:44 PM, Aravind Iddamsetty wrote:
> > 
> > On 07-03-2025 19:54, Riana Tauro wrote:
> > 
> > Hi Riana,
> > 
> > I do think we can achieve the same functionality with module param and
> > we needn't reload the driver
> > if we are doing unbind, Since the driver will be loaded event after
> > unbind we can modify the module param
> 
> tried this.  We can modify the module param using sysfs and bind similar to
> configfs.
> 
> If we want to configure any other attributes or move existing module params
> to device specific then adding configfs seems better.
> 
> @Rodrigo thoughts?

Yeap, if we make the param write-able and document that it is only checked
at probe we could indeed use the flow

echo <bdf> > /sys/../xe/unbind
echo <bdf> > /sys/../xe/param/survivability_mode
echo <bdf> > /sys/../xe/bind

and accomplish the same.

on the good side:

So, this is the future prof case. Because if we start seeing cases where
the device fails at boot without the FW request for the survivability mode
we might be forced to have a parameter anyway. :/

on the bad side:

But we were actually aiming to reduce the parameters that we have
because that was getting indiscriminately used and even recommended by some
distros' doc and wikis.

Another problem with the write-able param is that people might expect to

echo <bdf> > /sys/../xe/param/survivability_mode

and immediately get the device in the survivability_mode for that device.
Then we are going to get questions and bug reports that this is not working.

But well, perhaps documenting the flow properly in the param description
might solve this concern.

Lucas, Thomas, thoughts?

> 
> Thanks
> Riana
> > and once we bind the device back it can check if the BDF belongs to this
> > driver instance and configure the mode accordingly.
> > 
> > Thanks,
> > Aravind.
> > > Registers a configfs subsystem called 'xe' to userspace. The user can
> > > use this to modify exposed attributes.
> > > 
> > > Add survivability mode attribute (config/xe/survivability_mode) to the
> > > subsystem to allow the user to specify the card that should enter
> > > survivability mode.
> > > 
> > > Signed-off-by: Riana Tauro <riana.tauro at intel.com>
> > > ---
> > >   drivers/gpu/drm/xe/Makefile      |  1 +
> > >   drivers/gpu/drm/xe/xe_configfs.c | 95 ++++++++++++++++++++++++++++++++
> > >   drivers/gpu/drm/xe/xe_configfs.h | 12 ++++
> > >   drivers/gpu/drm/xe/xe_module.c   |  5 ++
> > >   drivers/gpu/drm/xe/xe_module.h   |  1 +
> > >   5 files changed, 114 insertions(+)
> > >   create mode 100644 drivers/gpu/drm/xe/xe_configfs.c
> > >   create mode 100644 drivers/gpu/drm/xe/xe_configfs.h
> > > 
> > > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> > > index 9699b08585f7..3f8c87292cee 100644
> > > --- a/drivers/gpu/drm/xe/Makefile
> > > +++ b/drivers/gpu/drm/xe/Makefile
> > > @@ -28,6 +28,7 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
> > >   xe-y += xe_bb.o \
> > >   	xe_bo.o \
> > >   	xe_bo_evict.o \
> > > +	xe_configfs.o \
> > >   	xe_devcoredump.o \
> > >   	xe_device.o \
> > >   	xe_device_sysfs.o \
> > > diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
> > > new file mode 100644
> > > index 000000000000..8c5f248e466d
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/xe/xe_configfs.c
> > > @@ -0,0 +1,95 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright © 2025 Intel Corporation
> > > + */
> > > +
> > > +#include <linux/configfs.h>
> > > +#include <linux/init.h>
> > > +#include <linux/module.h>
> > > +
> > > +#include "xe_configfs.h"
> > > +#include "xe_module.h"
> > > +
> > > +/**
> > > + * DOC: Xe Configfs
> > > + *
> > > + * XE KMD registers a configfs subsystem called 'xe'to userspace that allows users to modify
> > > + * the exposed attributes.
> > > + *
> > > + * Attributes:
> > > + *
> > > + * config/xe/survivability_mode : Write only attribute that allows user to specify the PCI address
> > > + *				  of the card that has to enter survivability mode
> > > + */
> > > +
> > > +void xe_configfs_clear_survivability_mode(void)
> > > +{
> > > +	kfree(xe_modparam.survivability_mode);
> > > +	xe_modparam.survivability_mode = NULL;
> > > +}
> > > +
> > > +static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len)
> > > +{
> > > +	char *survivability_mode;
> > > +	int ret;
> > > +	unsigned int domain, bus, slot, function;
> > > +
> > > +	ret = sscanf(page, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function);
> > > +	if (ret != 4)
> > > +		return -EINVAL;
> > > +
> > > +	survivability_mode = kstrdup(page, GFP_KERNEL);
> > > +	if (!survivability_mode)
> > > +		return -ENOMEM;
> > > +
> > > +	xe_configfs_clear_survivability_mode();
> > > +	xe_modparam.survivability_mode = survivability_mode;
> > > +
> > > +	return len;
> > > +}
> > > +
> > > +CONFIGFS_ATTR_WO(, survivability_mode);
> > > +
> > > +static struct configfs_attribute *xe_configfs_attrs[] = {
> > > +	&attr_survivability_mode,
> > > +	NULL,
> > > +};
> > > +
> > > +static const struct config_item_type xe_config_type = {
> > > +	.ct_attrs	= xe_configfs_attrs,
> > > +	.ct_owner	= THIS_MODULE,
> > > +};
> > > +
> > > +static struct configfs_subsystem xe_config_subsys = {
> > > +	.su_group = {
> > > +		.cg_item = {
> > > +			.ci_namebuf = "xe",
> > > +			.ci_type = &xe_config_type,
> > > +		},
> > > +	},
> > > +};
> > > +
> > > +int __init xe_configfs_init(void)
> > > +{
> > > +	int ret;
> > > +
> > > +	config_group_init(&xe_config_subsys.su_group);
> > > +	mutex_init(&xe_config_subsys.su_mutex);
> > > +	ret = configfs_register_subsystem(&xe_config_subsys);
> > > +	if (ret) {
> > > +		pr_err("Error %d while registering subsystem %s\n",
> > > +		       ret, xe_config_subsys.su_group.cg_item.ci_namebuf);
> > > +		mutex_destroy(&xe_config_subsys.su_mutex);
> > > +		return ret;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +void __exit xe_configfs_exit(void)
> > > +{
> > > +	xe_configfs_clear_survivability_mode();
> > > +	configfs_unregister_subsystem(&xe_config_subsys);
> > > +	mutex_destroy(&xe_config_subsys.su_mutex);
> > > +}
> > > +
> > > diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
> > > new file mode 100644
> > > index 000000000000..491629a2ca53
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/xe/xe_configfs.h
> > > @@ -0,0 +1,12 @@
> > > +/* SPDX-License-Identifier: MIT */
> > > +/*
> > > + * Copyright © 2025 Intel Corporation
> > > + */
> > > +#ifndef _XE_CONFIGFS_H_
> > > +#define _XE_CONFIGFS_H_
> > > +
> > > +int xe_configfs_init(void);
> > > +void xe_configfs_exit(void);
> > > +void xe_configfs_clear_survivability_mode(void);
> > > +
> > > +#endif
> > > diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
> > > index 475acdba2b55..15b3cf22193c 100644
> > > --- a/drivers/gpu/drm/xe/xe_module.c
> > > +++ b/drivers/gpu/drm/xe/xe_module.c
> > > @@ -11,6 +11,7 @@
> > >   #include <drm/drm_module.h>
> > >   #include "xe_drv.h"
> > > +#include "xe_configfs.h"
> > >   #include "xe_hw_fence.h"
> > >   #include "xe_pci.h"
> > >   #include "xe_pm.h"
> > > @@ -91,6 +92,10 @@ static const struct init_funcs init_funcs[] = {
> > >   	{
> > >   		.init = xe_check_nomodeset,
> > >   	},
> > > +	{
> > > +		.init = xe_configfs_init,
> > > +		.exit = xe_configfs_exit,
> > > +	},
> > >   	{
> > >   		.init = xe_hw_fence_module_init,
> > >   		.exit = xe_hw_fence_module_exit,
> > > diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
> > > index 84339e509c80..c238dbee6bc7 100644
> > > --- a/drivers/gpu/drm/xe/xe_module.h
> > > +++ b/drivers/gpu/drm/xe/xe_module.h
> > > @@ -24,6 +24,7 @@ struct xe_modparam {
> > >   #endif
> > >   	int wedged_mode;
> > >   	u32 svm_notifier_size;
> > > +	char *survivability_mode;
> > >   };
> > >   extern struct xe_modparam xe_modparam;
> 


More information about the Intel-xe mailing list