[PATCH] drm/xe: Allow to trigger GT resets using debugfs writes
Rodrigo Vivi
rodrigo.vivi at intel.com
Mon May 19 21:59:45 UTC 2025
On Mon, May 19, 2025 at 10:09:14PM +0200, Michal Wajdeczko wrote:
> Today we allow to trigger GT resest by reading dedicated debugfs
> files "force_reset" and "force_reset_sync" that we are exposing
> using drm_info_list[] and drm_debugfs_create_files().
>
> To avoid triggering potentially disruptive actions during otherwise
> "safe" read operations, expose those two attributes using debugfs
> function where we can specify file permissions and provide custom
> "write" handler to trigger the GT resets also from there.
>
> This step would allow us to drop triggering GT resets during read
> operations, which we leave just to give users more time to switch.
>
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
Cc: Peter Senna Tschudin <peter.senna at linux.intel.com>
I like the idea and it simplifies the error that Peter was handling in the
new IGT case.
Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> ---
> drivers/gpu/drm/xe/xe_gt_debugfs.c | 96 +++++++++++++++++++++++-------
> 1 file changed, 76 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> index 119a55bb7580..848618acdca8 100644
> --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
> +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
> @@ -122,24 +122,6 @@ static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
> return ret;
> }
>
> -static int force_reset(struct xe_gt *gt, struct drm_printer *p)
> -{
> - xe_pm_runtime_get(gt_to_xe(gt));
> - xe_gt_reset_async(gt);
> - xe_pm_runtime_put(gt_to_xe(gt));
> -
> - return 0;
> -}
> -
> -static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p)
> -{
> - xe_pm_runtime_get(gt_to_xe(gt));
> - xe_gt_reset(gt);
> - xe_pm_runtime_put(gt_to_xe(gt));
> -
> - return 0;
> -}
> -
> static int sa_info(struct xe_gt *gt, struct drm_printer *p)
> {
> struct xe_tile *tile = gt_to_tile(gt);
> @@ -306,8 +288,6 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
> * - without access to the PF specific data
> */
> static const struct drm_info_list vf_safe_debugfs_list[] = {
> - {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
> - {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync},
> {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
> {"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
> {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
> @@ -332,6 +312,78 @@ static const struct drm_info_list pf_only_debugfs_list[] = {
> {"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
> };
>
> +static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos,
> + void (*call)(struct xe_gt *), struct xe_gt *gt)
> +{
> + bool yes;
> + int ret;
> +
> + if (*ppos)
> + return -EINVAL;
> + ret = kstrtobool_from_user(userbuf, count, &yes);
> + if (ret < 0)
> + return ret;
> + if (yes)
> + call(gt);
> + return count;
> +}
> +
> +static void force_reset(struct xe_gt *gt)
> +{
> + struct xe_device *xe = gt_to_xe(gt);
> +
> + xe_pm_runtime_get(xe);
> + xe_gt_reset_async(gt);
> + xe_pm_runtime_put(xe);
> +}
> +
> +static ssize_t force_reset_write(struct file *file,
> + const char __user *userbuf,
> + size_t count, loff_t *ppos)
> +{
> + struct seq_file *s = file->private_data;
> + struct xe_gt *gt = s->private;
> +
> + return write_to_gt_call(userbuf, count, ppos, force_reset, gt);
> +}
> +
> +static int force_reset_show(struct seq_file *s, void *unused)
> +{
> + struct xe_gt *gt = s->private;
> +
> + force_reset(gt); /* to be deprecated! */
> + return 0;
> +}
> +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset);
> +
> +static void force_reset_sync(struct xe_gt *gt)
> +{
> + struct xe_device *xe = gt_to_xe(gt);
> +
> + xe_pm_runtime_get(xe);
> + xe_gt_reset(gt);
> + xe_pm_runtime_put(xe);
> +}
> +
> +static ssize_t force_reset_sync_write(struct file *file,
> + const char __user *userbuf,
> + size_t count, loff_t *ppos)
> +{
> + struct seq_file *s = file->private_data;
> + struct xe_gt *gt = s->private;
> +
> + return write_to_gt_call(userbuf, count, ppos, force_reset_sync, gt);
> +}
> +
> +static int force_reset_sync_show(struct seq_file *s, void *unused)
> +{
> + struct xe_gt *gt = s->private;
> +
> + force_reset_sync(gt); /* to be deprecated! */
> + return 0;
> +}
> +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset_sync);
> +
> void xe_gt_debugfs_register(struct xe_gt *gt)
> {
> struct xe_device *xe = gt_to_xe(gt);
> @@ -355,6 +407,10 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
> */
> root->d_inode->i_private = gt;
>
> + /* VF safe */
> + debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops);
> + debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops);
> +
> drm_debugfs_create_files(vf_safe_debugfs_list,
> ARRAY_SIZE(vf_safe_debugfs_list),
> root, minor);
> --
> 2.47.1
>
More information about the Intel-xe
mailing list