[PATCH] drm/xe: Allow to trigger GT resets using debugfs writes
Michal Wajdeczko
michal.wajdeczko at intel.com
Tue May 20 09:12:04 UTC 2025
On 20.05.2025 01:44, John Harrison wrote:
> On 5/19/2025 1:09 PM, Michal Wajdeczko wrote:
>> Today we allow to trigger GT resest by reading dedicated debugfs
>> files "force_reset" and "force_reset_sync" that we are exposing
>> using drm_info_list[] and drm_debugfs_create_files().
>>
>> To avoid triggering potentially disruptive actions during otherwise
>> "safe" read operations, expose those two attributes using debugfs
>> function where we can specify file permissions and provide custom
>> "write" handler to trigger the GT resets also from there.
> Would you like to extend this to the GuC log dump-via-dmesg trigger
> entry as well? Dumping that during the IGT read debugfs test also causes
> issues due to the huge amount of output it produces.
yes, that was my plan as I was expecting more discussion there so I
wanted to put that into a separate series
but since you already asked here, my question would be:
do you prefer keeping separate write-only "guc_log_dmesg" file, or keep
single read-write "guc_log" file that would print encoded log on read
and dump it to dmesg on write?
also do we want to keep 0444 mode for "guc_log" or change it to 0400
mode (or 0600 if it would be dual-purpose)
>
>>
>> This step would allow us to drop triggering GT resets during read
>> operations, which we leave just to give users more time to switch.
> I'm not following this sentence. Give users more time to switch what?
switch to use "echo 1 > force_reset" instead of "cat force_reset"
>
> Also, would it be more accurate to call the patch set something like
> 'require a write to trigger GT resets via debugfs'. Saying 'allow'
> implies you are just extending the existing interface rather than
> removing the trigger-via-read ability.
this patch extends the existing interface, the legacy way to trigger a
reset by read is still there (due to IGT) - see force_reset_show()
once the IGT will be updated, we would make those files write-only
I didn't want to introduce any CI regression right now, hence two steps
>
> John.
>
>>
>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_gt_debugfs.c | 96 +++++++++++++++++++++++-------
>> 1 file changed, 76 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/
>> xe_gt_debugfs.c
>> index 119a55bb7580..848618acdca8 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
>> @@ -122,24 +122,6 @@ static int powergate_info(struct xe_gt *gt,
>> struct drm_printer *p)
>> return ret;
>> }
>> -static int force_reset(struct xe_gt *gt, struct drm_printer *p)
>> -{
>> - xe_pm_runtime_get(gt_to_xe(gt));
>> - xe_gt_reset_async(gt);
>> - xe_pm_runtime_put(gt_to_xe(gt));
>> -
>> - return 0;
>> -}
>> -
>> -static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p)
>> -{
>> - xe_pm_runtime_get(gt_to_xe(gt));
>> - xe_gt_reset(gt);
>> - xe_pm_runtime_put(gt_to_xe(gt));
>> -
>> - return 0;
>> -}
>> -
>> static int sa_info(struct xe_gt *gt, struct drm_printer *p)
>> {
>> struct xe_tile *tile = gt_to_tile(gt);
>> @@ -306,8 +288,6 @@ static int hwconfig(struct xe_gt *gt, struct
>> drm_printer *p)
>> * - without access to the PF specific data
>> */
>> static const struct drm_info_list vf_safe_debugfs_list[] = {
>> - {"force_reset", .show = xe_gt_debugfs_simple_show, .data =
>> force_reset},
>> - {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data =
>> force_reset_sync},
>> {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
>> {"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
>> {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
>> @@ -332,6 +312,78 @@ static const struct drm_info_list
>> pf_only_debugfs_list[] = {
>> {"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
>> };
>> +static ssize_t write_to_gt_call(const char __user *userbuf, size_t
>> count, loff_t *ppos,
>> + void (*call)(struct xe_gt *), struct xe_gt *gt)
>> +{
>> + bool yes;
>> + int ret;
>> +
>> + if (*ppos)
>> + return -EINVAL;
>> + ret = kstrtobool_from_user(userbuf, count, &yes);
>> + if (ret < 0)
>> + return ret;
>> + if (yes)
>> + call(gt);
>> + return count;
>> +}
>> +
>> +static void force_reset(struct xe_gt *gt)
>> +{
>> + struct xe_device *xe = gt_to_xe(gt);
>> +
>> + xe_pm_runtime_get(xe);
>> + xe_gt_reset_async(gt);
>> + xe_pm_runtime_put(xe);
>> +}
>> +
>> +static ssize_t force_reset_write(struct file *file,
>> + const char __user *userbuf,
>> + size_t count, loff_t *ppos)
>> +{
>> + struct seq_file *s = file->private_data;
>> + struct xe_gt *gt = s->private;
>> +
>> + return write_to_gt_call(userbuf, count, ppos, force_reset, gt);
>> +}
>> +
>> +static int force_reset_show(struct seq_file *s, void *unused)
>> +{
>> + struct xe_gt *gt = s->private;
>> +
>> + force_reset(gt); /* to be deprecated! */
>> + return 0;
>> +}
>> +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset);
>> +
>> +static void force_reset_sync(struct xe_gt *gt)
>> +{
>> + struct xe_device *xe = gt_to_xe(gt);
>> +
>> + xe_pm_runtime_get(xe);
>> + xe_gt_reset(gt);
>> + xe_pm_runtime_put(xe);
>> +}
>> +
>> +static ssize_t force_reset_sync_write(struct file *file,
>> + const char __user *userbuf,
>> + size_t count, loff_t *ppos)
>> +{
>> + struct seq_file *s = file->private_data;
>> + struct xe_gt *gt = s->private;
>> +
>> + return write_to_gt_call(userbuf, count, ppos, force_reset_sync, gt);
>> +}
>> +
>> +static int force_reset_sync_show(struct seq_file *s, void *unused)
>> +{
>> + struct xe_gt *gt = s->private;
>> +
>> + force_reset_sync(gt); /* to be deprecated! */
>> + return 0;
>> +}
>> +DEFINE_SHOW_STORE_ATTRIBUTE(force_reset_sync);
>> +
>> void xe_gt_debugfs_register(struct xe_gt *gt)
>> {
>> struct xe_device *xe = gt_to_xe(gt);
>> @@ -355,6 +407,10 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
>> */
>> root->d_inode->i_private = gt;
>> + /* VF safe */
>> + debugfs_create_file("force_reset", 0600, root, gt,
>> &force_reset_fops);
>> + debugfs_create_file("force_reset_sync", 0600, root, gt,
>> &force_reset_sync_fops);
>> +
>> drm_debugfs_create_files(vf_safe_debugfs_list,
>> ARRAY_SIZE(vf_safe_debugfs_list),
>> root, minor);
>
More information about the Intel-xe
mailing list