[PATCH v3 2/3] drm/xe: Allow to disable engines

Matt Roper matthew.d.roper at intel.com
Fri May 23 21:02:11 UTC 2025


On Fri, May 23, 2025 at 10:42:32AM -0700, Lucas De Marchi wrote:
> Sometimes it's useful to load the driver with a smaller set of engines
> to allow more targeted debugging, particularly on early enabling.
> 
> Besides checking what is fused off in hardware, add similar logic to
> disable engines in software. This will use configfs to allow users
> to set what engine to disable, so already add prepare for that. The
> exact configfs interface will be added later.
> 
> Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_configfs.c  | 15 +++++++++++++++
>  drivers/gpu/drm/xe/xe_configfs.h  |  2 ++
>  drivers/gpu/drm/xe/xe_hw_engine.c | 20 ++++++++++++++++++++
>  3 files changed, 37 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
> index cb9f175c89a1c..11ca36f194bfc 100644
> --- a/drivers/gpu/drm/xe/xe_configfs.c
> +++ b/drivers/gpu/drm/xe/xe_configfs.c
> @@ -226,6 +226,21 @@ void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
>  	config_item_put(&dev->group.cg_item);
>  }
>  
> +/**
> + * xe_configfs_get_engine_allowed - get engine allowed mask from configfs
> + * @pdev: pci device
> + *
> + * Find the configfs group that belongs to the pci device and return
> + * the mask of engines allowed to be used.
> + *
> + * Return: engine mask with allowed engines
> + */
> +u64 xe_configfs_get_engine_allowed(struct pci_dev *pdev)

Nitpick:  should this be plural engine*s*_allowed?  Alternatively,
engine_mask_allowed, although the function name is starting to get a bit
long...

> +{
> +	/* dummy implementation */
> +	return U64_MAX;
> +}
> +
>  int __init xe_configfs_init(void)
>  {
>  	struct config_group *root = &xe_configfs.su_group;
> diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
> index ef6d231b3024b..050da4689d653 100644
> --- a/drivers/gpu/drm/xe/xe_configfs.h
> +++ b/drivers/gpu/drm/xe/xe_configfs.h
> @@ -14,11 +14,13 @@ int xe_configfs_init(void);
>  void xe_configfs_exit(void);
>  bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
>  void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
> +u64 xe_configfs_get_engine_allowed(struct pci_dev *pdev);
>  #else
>  static inline int xe_configfs_init(void) { return 0; }
>  static inline void xe_configfs_exit(void) { }
>  static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }
>  static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) { }
> +static inline u64 xe_configfs_get_engine_allowed(struct pci_dev *pdev) { return U64_MAX; }
>  #endif
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
> index 93241fd0a4ba3..8e7f580db86d8 100644
> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
> @@ -17,6 +17,7 @@
>  #include "regs/xe_irq_regs.h"
>  #include "xe_assert.h"
>  #include "xe_bo.h"
> +#include "xe_configfs.h"
>  #include "xe_device.h"
>  #include "xe_execlist.h"
>  #include "xe_force_wake.h"
> @@ -810,6 +811,24 @@ static void check_gsc_availability(struct xe_gt *gt)
>  	}
>  }
>  
> +static void check_sw_disable(struct xe_gt *gt)
> +{
> +	struct xe_device *xe = gt_to_xe(gt);
> +	u64 sw_allowed = xe_configfs_get_engine_allowed(to_pci_dev(xe->drm.dev));
> +	enum xe_hw_engine_id id;
> +
> +	for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
> +		if (!(gt->info.engine_mask & BIT(id)))
> +			continue;
> +
> +		if (!(sw_allowed & BIT(id))) {
> +			gt->info.engine_mask &= ~BIT(id);
> +			drm_info(&xe->drm, "%s disabled via configfs\n",

We should probably use xe_gt_info() so that it's clear what these
messages apply to, especially on a multi-tile platform where the
different tiles may be fused differently and leave a different set of
remaining engines that could be potentially disabled by software.


Matt

> +				 engine_infos[id].name);
> +		}
> +	}
> +}
> +
>  int xe_hw_engines_init_early(struct xe_gt *gt)
>  {
>  	int i;
> @@ -818,6 +837,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt)
>  	read_copy_fuses(gt);
>  	read_compute_fuses(gt);
>  	check_gsc_availability(gt);
> +	check_sw_disable(gt);
>  
>  	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
>  	BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
> 
> -- 
> 2.49.0
> 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list