[PATCH 1/2] drm/xe: Add a reason string to the devcoredump

Thu Nov 21 16:44:37 UTC 2024

On 11/20/2024 21:36, Matthew Brost wrote:
> On Wed, Nov 20, 2024 at 07:38:30PM -0800, Matthew Brost wrote:
>> On Wed, Nov 20, 2024 at 03:22:56PM -0800, John.C.Harrison at Intel.com wrote:
>>> From: John Harrison <John.C.Harrison at Intel.com>
>>>
>>> There are debug level prints giving more information about the cause
>>> of the hang immediately before core dumps are created. However, not
>>> everyone has debug level prints enabled or saves the dmesg log at all.
>>> So include that information in the dump file itself. Also, at least
>>> one of those prints included the pid as well as the process name. So
>>> include that in the capture too.
>>>
>> This looks helpful.
>>
>> A couple of nits below.
>>
>>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>>> ---
>>>   drivers/gpu/drm/xe/xe_devcoredump.c       | 20 +++++++++++++++++---
>>>   drivers/gpu/drm/xe/xe_devcoredump.h       |  5 +++--
>>>   drivers/gpu/drm/xe/xe_devcoredump_types.h |  4 ++++
>>>   drivers/gpu/drm/xe/xe_guc_submit.c        | 14 ++++++++++----
>>>   4 files changed, 34 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
>>> index 0e5edf14a241..b5aebe8fb06d 100644
>>> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
>>> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
>>> @@ -99,6 +99,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
>>>   	p = drm_coredump_printer(&iter);
>>>   
>>>   	drm_puts(&p, "**** Xe Device Coredump ****\n");
>>> +	drm_printf(&p, "Reason: %s\n", ss->reason);
>>>   	drm_puts(&p, "kernel: " UTS_RELEASE "\n");
>>>   	drm_puts(&p, "module: " KBUILD_MODNAME "\n");
>>>   
>>> @@ -106,7 +107,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
>>>   	drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
>>>   	ts = ktime_to_timespec64(ss->boot_time);
>>>   	drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
>>> -	drm_printf(&p, "Process: %s\n", ss->process_name);
>>> +	drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid);
>>>   	xe_device_snapshot_print(xe, &p);
>>>   
>>>   	drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id);
>>> @@ -138,6 +139,9 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
>>>   {
>>>   	int i;
>>>   
>>> +	kvfree(ss->reason);
>> Do we need kvalloc / kvfree here? Seems like this would be small in size.
The type of free used is dependent upon the alloc itself. But I think 
this should actually be kfree after all, see below...

>>
>>> +	ss->reason = NULL;
>>> +
>>>   	xe_guc_log_snapshot_free(ss->guc.log);
>>>   	ss->guc.log = NULL;
>>>   
>>> @@ -253,8 +257,11 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
>>>   	ss->snapshot_time = ktime_get_real();
>>>   	ss->boot_time = ktime_get_boottime();
>>>   
>>> -	if (q->vm && q->vm->xef)
>>> +	if (q->vm && q->vm->xef) {
>>>   		process_name = q->vm->xef->process_name;
>>> +		ss->pid = q->vm->xef->pid;
>>> +	}
>>> +
>>>   	strscpy(ss->process_name, process_name);
>>>   
>>>   	ss->gt = q->gt;
>>> @@ -297,10 +304,12 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
>>>    * gt_reset. It is skipped if we still have the core dump device available
>>>    * with the information of the 'first' snapshot.
>>>    */
>> Seems like kernel doc should be updated.
>>
>>> -void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job)
>>> +__printf(3, 4)
>>> +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...)
>>>   {
>>>   	struct xe_device *xe = gt_to_xe(q->gt);
>>>   	struct xe_devcoredump *coredump = &xe->devcoredump;
>>> +	va_list varg;
>>>   
>>>   	if (coredump->captured) {
>>>   		drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n");
>>> @@ -308,6 +317,11 @@ void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job)
>>>   	}
>>>   
>>>   	coredump->captured = true;
>>> +
>>> +	va_start(varg, fmt);
>>> +	coredump->snapshot.reason = kvasprintf(GFP_KERNEL, fmt, varg);
>> See above. Off the top my head idk if there is a non-kv* version of this
>> function but if there is, maybe use that.
> Ok, scratch this comment. I do see these functions defined:
>
> __printf(2, 3) __malloc char *kasprintf(gfp_t gfp, const char *fmt, ...);
> __printf(2, 0) __malloc char *kvasprintf(gfp_t gfp, const char *fmt, va_list args);
>
> The former call doesn't work here as va_start is only safe to call once
> in a call chain [1]. The former internally needs to call va_start again.
> Unsure why this seemly equivalent functions have different signatures
> but I think your usage here is correct.
>
> Matt
>
> [1] https://stackoverflow.com/questions/59608636/can-we-call-va-start-twice-without-calling-va-end-in-between
Yeah, that was the conclusion I came to. But note that the v in the 
above is for va_list not for virtual. From looking at other bits of the 
kernel, it seems that even with kvasprintf, the correct free function is 
just kfree. I thought I had checked that earlier but evidently I managed 
to confuse myself again!

>
>> Nits aside, LGTM:
>> Reviewed-by: Matthew Brost <matthew.brost at intel.com>
>>
>>> +	va_end(varg);
>>> +
>>>   	devcoredump_snapshot(coredump, q, job);
>>>   
>>>   	drm_info(&xe->drm, "Xe device coredump has been created\n");
>>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h
>>> index c04a534e3384..6a17e6d60102 100644
>>> --- a/drivers/gpu/drm/xe/xe_devcoredump.h
>>> +++ b/drivers/gpu/drm/xe/xe_devcoredump.h
>>> @@ -14,11 +14,12 @@ struct xe_exec_queue;
>>>   struct xe_sched_job;
>>>   
>>>   #ifdef CONFIG_DEV_COREDUMP
>>> -void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job);
>>> +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...);
>>>   int xe_devcoredump_init(struct xe_device *xe);
>>>   #else
>>>   static inline void xe_devcoredump(struct xe_exec_queue *q,
>>> -				  struct xe_sched_job *job)
>>> +				  struct xe_sched_job *job,
>>> +				  const char *fmt, ...)
>>>   {
>>>   }
>>>   
>>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
>>> index be4d59ea9ac8..e6234e887102 100644
>>> --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h
>>> @@ -28,6 +28,10 @@ struct xe_devcoredump_snapshot {
>>>   	ktime_t boot_time;
>>>   	/** @process_name: Name of process that triggered this gpu hang */
>>>   	char process_name[TASK_COMM_LEN];
>>> +	/** @pid: Process id of process that triggered this gpu hang */
>>> +	pid_t pid;
>>> +	/** @reason: The reason the coredump was triggered */
>>> +	char *reason;
>>>   
>>>   	/** @gt: Affected GT, used by forcewake for delayed capture */
>>>   	struct xe_gt *gt;
>>> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
>>> index e132294fde51..7d0e7cb977ad 100644
>>> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
>>> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
>>> @@ -898,7 +898,8 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
>>>   		if (!ret) {
>>>   			xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n",
>>>   				   q->guc->id);
>>> -			xe_devcoredump(q, NULL);
>>> +			xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n",
>>> +				       q->guc->id);
>>>   			xe_sched_submission_start(sched);
>>>   			xe_gt_reset_async(q->gt);
>>>   			return;
>>> @@ -906,7 +907,7 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
>>>   	}
>>>   
>>>   	if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0]))
>>> -		xe_devcoredump(q, NULL);
>>> +		xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id);
>>>   
>>>   	xe_sched_submission_start(sched);
>>>   }
>>> @@ -1132,7 +1133,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
>>>   				xe_gt_warn(guc_to_gt(guc),
>>>   					   "Schedule disable failed to respond, guc_id=%d",
>>>   					   q->guc->id);
>>> -			xe_devcoredump(q, job);
>>> +			xe_devcoredump(q, job,
>>> +				       "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
>>> +				       q->guc->id, ret, xe_guc_read_stopped(guc));
>>>   			set_exec_queue_extra_ref(q);
>>>   			xe_exec_queue_get(q);	/* GT reset owns this */
>>>   			set_exec_queue_banned(q);
>>> @@ -1162,7 +1165,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
>>>   	trace_xe_sched_job_timedout(job);
>>>   
>>>   	if (!exec_queue_killed(q))
>>> -		xe_devcoredump(q, job);
>>> +		xe_devcoredump(q, job,
>>> +			       "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
>>> +			       xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
>>> +			       q->guc->id, q->flags);
>>>   
>>>   	/*
>>>   	 * Kernel jobs should never fail, nor should VM jobs if they do
>>> -- 
>>> 2.47.0
>>>