[PATCH v2 2/2] drm/xe/guc: Only add GuC crash dump if available
John Harrison
john.c.harrison at intel.com
Thu Apr 3 21:46:57 UTC 2025
On 3/27/2025 4:40 PM, Zhanjun Dong wrote:
> Add flag of GuC crash dump received. LFD only include crash dump
> section when crash dump is available.
>
> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
> ---
> drivers/gpu/drm/xe/xe_guc_ct.c | 13 +++++++-----
> drivers/gpu/drm/xe/xe_guc_log.c | 30 +++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_guc_log_types.h | 2 ++
> 3 files changed, 40 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 72ad576fc18e..44c11ec662e5 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -1127,12 +1127,15 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action)
> {
> struct xe_gt *gt = ct_to_gt(ct);
>
> - if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED)
> + if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) {
> xe_gt_err(gt, "GuC Crash dump notification\n");
> - else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION)
> - xe_gt_err(gt, "GuC Exception notification\n");
> - else
> - xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action);
> + ct_to_guc(ct)->log.crash_dumped = true;
This will also need to be cleared in the GuC reset path. There is no
guarantee that the log will be saved via the LFD system before a reset
wipes it out. And then a subsequent save will see a stale crash dump.
> + } else {
> + if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION)
You can use "} else if( ..." to avoid the unnecessary extra level of
indentation.
> + xe_gt_err(gt, "GuC Exception notification\n");
> + else
> + xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action);
> + }
>
> CT_DEAD(ct, NULL, CRASH);
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
> index 5659d60e41ab..29684393a62d 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log.c
> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
> @@ -536,6 +536,36 @@ static uint xe_guc_log_save_to_lfd_buf(char *buf, int size, u32 *guc_log_bin,
> return len;
> index += len;
>
> + /* For Crash dump, rd/wr ptr has no effect, only add if crash_dumped is true */
> + if (log->crash_dumped) {
> + struct guc_log_buffer_entry_list *entry;
> +
> + entry = &entry_list[GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CRASH];
> + if (entry->buf_size) {
> + int i;
> + u32 *buf32 = (u32 *)&bin[entry->offset];
> +
> + /* Check if crash dump section are all zero */
> + for (i = 0; i < entry->buf_size / 4; i++)
> + if (buf32[i])
> + break;
> +
> + /* Buffer has non-zero data */
> + if (i < entry->buf_size / 4) {
> + len = xe_guc_log_add_typed_payload(&buf[index], size - index,
> + GUC_LFD_TYPE_FW_CRASH_DUMP,
> + entry->buf_size,
> + &bin[entry->offset]);
> + if (len < 0)
> + return len;
> + index += len;
> +
> + /* Clear flag */
> + log->crash_dumped = false;
> + }
> + }
> + }
> +
> return index;
> }
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
> index b3d5c72ac752..d351f639727b 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
> @@ -46,6 +46,8 @@ struct xe_guc_log {
> u32 level;
> /** @bo: XE BO for GuC log */
> struct xe_bo *bo;
> + /** @crash_dumped: Indicate if crash dumped */
> + bool crash_dumped;
> /** @stats: logging related stats */
> struct {
> u32 sampled_overflow;
More information about the Intel-xe
mailing list