[PATCH v1] drm/xe/guc: Add LFD format output for guc log
Dong, Zhanjun
zhanjun.dong at intel.com
Fri Mar 21 01:03:59 UTC 2025
See my inline comments below.
Regards,
Zhanjun Dong
On 2025-03-20 12:33 p.m., Michal Wajdeczko wrote:
>
>
> On 20.03.2025 16:03, Zhanjun Dong wrote:
>> Add new debugfs entry "guc_log_lfd", which supports output guc log
>> in LFD format.
>
> can you describe what LFD stands for ?
LFD is “Log Format Descriptors", decsribed in the reference link below.>
>> Add flag of GuC crash dump received from GuC, LFD only include crash dump
>> section when this flag is set.
>
> can we move it to a separate patch?
> this patch is already quite large
Sure, will do that way in next rev.>
>>
>> Reference:
>> https://coredocs.intel.com/InterfaceDocs/sphinx/core/debug.html#streaming-log-file-format-lfds
>
> this seems to be internal link, we don't use them here
Emm, let's discuss offline. As I know some API made public is in progress.
>
>>
>> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>> drivers/gpu/drm/xe/abi/guc_log_lfd_abi.h | 494 +++++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_guc_ct.c | 13 +-
>> drivers/gpu/drm/xe/xe_guc_debugfs.c | 14 +
>> drivers/gpu/drm/xe/xe_guc_log.c | 327 +++++++++++++++
>> drivers/gpu/drm/xe/xe_guc_log.h | 1 +
>> drivers/gpu/drm/xe/xe_guc_log_types.h | 2 +
>> 6 files changed, 846 insertions(+), 5 deletions(-)
>> create mode 100644 drivers/gpu/drm/xe/abi/guc_log_lfd_abi.h
>>
>> diff --git a/drivers/gpu/drm/xe/abi/guc_log_lfd_abi.h b/drivers/gpu/drm/xe/abi/guc_log_lfd_abi.h
>> new file mode 100644
>> index 000000000000..3e056ddc55be
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/abi/guc_log_lfd_abi.h
>> @@ -0,0 +1,494 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2025 Intel Corporation
>> + */
>> +
>> +#ifndef _INTEL_GUC_LFD_H_
>> +#define _INTEL_GUC_LFD_H_
>
> this guard name does not match file name
Sure, to be fixed.>
>> +
>> +#include <linux/types.h>
>> +
>> +/* The current major version of GuC-Log-File format. */
>> +#define GUC_LOG_FILE_FORMAT_VERSION_MAJOR 0x0001
>> +
>> +/* The current minor version of GuC-Log-File format. */
>> +#define GUC_LOG_FILE_FORMAT_VERSION_MINOR 0x0000
>> +
>> +/* Magic keys define */
>> +#define GUC_LOGFILE_LFD_MAGIC 0x8086
>> +#define LFD_DRIVER_KEY_1 0X808655CC
>
> please use 0x notation, not 0X
Sure, to be fixed.>
>> +#define LFD_DRIVER_KEY_1 0X808655CC
>
> duplicate
>
>> +#define LFD_DRIVER_KEY_1V2 0X808655DD
>> +#define LFD_DRIVER_KEY_2 0X8086EEAA
>> +#define LFD_DRIVER_KEY_2V2 0X8086EEBB
>> +#define LFD_DRIVER_KEY_STREAMING1 0X474C5346
>> +#define LFD_DRIVER_KEY_STREAMING2 0X8086AAAA
>
> do we need them? one can use:
>
> upper_32_bits(LFD_DRIVER_KEY_STREAMING)
> lower_32_bits(LFD_DRIVER_KEY_STREAMING)
Good idea.>
>> +#define LFD_DRIVER_KEY_STREAMING 0X8086AAAA474C5346
>> +#define LFD_LOG_BUFFER_MARKER_1 0XCABBA9E5
>> +#define LFD_LOG_BUFFER_MARKER_2 0XDEADFEED
>> +#define LFD_CRASH_DUMP_BUFFER_MARKER_1 0XCABBA9E5
>> +#define LFD_CRASH_DUMP_BUFFER_MARKER_2 0X8086DEAD
>> +#define LFD_STATE_CAPTURE_BUFFER_MARKER_1 0XCABBA9F6
>> +#define LFD_STATE_CAPTURE_BUFFER_MARKER_2 0XBEEFFEED
>> +#define LFD_LOG_BUFFER_MARKER_1V2 0XCABBA9E6
>> +#define LFD_CRASH_DUMP_BUFFER_MARKER_1V2 0XCABBA9E6
>> +#define LFD_STATE_CAPTURE_BUFFER_MARKER_1V2 0XCABBA9F7
>> +
>> +#define GUC_LOG_BUFFER_STATE_HEADER_LENGTH 4096
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT 4
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_LOG 0
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CRASH 1
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CAPTURE 2
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT 3
>> +
>> +#define GUC_LOG_INIT_CONFIG_LIC_MAGIC 0x8086900D
>> +#define GUC_LOG_INIT_CONFIG_FORMAT_VERSION_MAJOR 0x0001
>> +#define GUC_LOG_INIT_CONFIG_FORMAT_VERSION_MINOR 0x0000
>> +
>> +#define GUC_LOG_EVENT_ENTRY_FORMAT_VERSION 2
>> +
>> +#define LFD_MAGIC_SIM_KEY 0X900DFEED
>> +
>> +/* Log format descriptor type. */
>
> can you convert all these comments to kernel-doc style?
Ok, let me try.>
>> +enum guc_lfd_type_t {
>
> do we need those "_t" suffixes everywhere ?
All enum/struct is coming from API spec, let's align with spec document.
As some spec changes is in progress, I will make a script to generate
this _abi.h from spec doc, so align with spec is the safe way.
>
>> + /* Start of range for required LFDs from GuC. */
>> + GUC_LFD_TYPE_FW_REQUIRED_RANGE_START = 0x1,
>> +
>> + /*
>> + * GuC Firmware Version structure. LFDs payload is
>> + * guc_lfd_data_fw_version_t
>> + */
>> + GUC_LFD_TYPE_FW_VERSION = 0x1,
>> +
>> + /*
>> + * GuC microcontroller device ID. LFDs payload is
>> + * guc_lfd_data_guc_devid_t
>> + */
>> + GUC_LFD_TYPE_GUC_DEVICE_ID = 0x2,
>> +
>> + /*
>> + * Frequency of GuC timestamps. LFDs payload is
>> + * guc_lfd_data_tsc_freq_t.
>> + */
>> + GUC_LFD_TYPE_TSC_FREQUENCY = 0x3,
>> +
>> + /* End of this range. */
>> + GUC_LFD_TYPE_FW_REQUIRED_RANGE_END = 0x1FFF,
>> +
>> + /* Start of range for required LFDs from GuC. */
>> + GUC_LFD_TYPE_FW_OPTIONAL_RANGE_START = 0x2000,
>> +
>> + /*
>> + * Log-event-entries buffer. LFDs payload is
>> + * guc_lfd_data_log_events_buf_t
>> + */
>> + GUC_LFD_TYPE_LOG_EVENTS_BUFFER = 0x2000,
>> +
>> + /*
>> + * GuC generated crash-dump blob. LFDs payload is
>> + * guc_lfd_data_fw_crashdump_t
>> + */
>> + GUC_LFD_TYPE_FW_CRASH_DUMP = 0x2001,
>> +
>> + /* End of this range. */
>> + GUC_LFD_TYPE_FW_OPTIONAL_RANGE_END = 0x3FFF,
>> +
>> + /* Start of range for required KMD LFDs. */
>> + GUC_LFD_TYPE_KMD_REQUIRED_RANGE_START = 0x4000,
>> +
>> + /* An identifier for the OS. LFDs payload is guc_lfd_data_os_id_t. */
>> + GUC_LFD_TYPE_OS_ID = 0x4000,
>> +
>> + /* End of this range. */
>> + GUC_LFD_TYPE_KMD_REQUIRED_RANGE_END = 0x5FFF,
>> +
>> + /* Start of range for optional KMD LFDs. */
>> + GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_START = 0x6000,
>> +
>> + /*
>> + * Binary representation of GuC log-events schema.
>> + * LFDs TLV payload is guc_lfd_data_binary_schema_t
>> + */
>> + GUC_LFD_TYPE_BINARY_SCHEMA_FORMAT = 0x6000,
>> +
>> + /*
>> + * ASCII string containing comments from the host/KMD.
>> + * LFDs TLV payload is guc_lfd_data_host_comment_t
>> + */
>> + GUC_LFD_TYPE_HOST_COMMENT = 0x6001,
>> +
>> + /* End of this range. */
>> + GUC_LFD_TYPE_KMD_OPTIONAL_RANGE_END = 0x7FFF,
>> +
>> + /* Start of reserved range. */
>> + GUC_LFD_TYPE_RESERVED_RANGE_START = 0x8000,
>> +
>> + /* End of this range. */
>> + GUC_LFD_TYPE_RESERVED_RANGE_END = 0xFFFF,
>> +};
>> +
>> +/* OS Type LFD-ID. */
>> +enum guc_lfd_os_type_t {
>> + /* Windows OS. */
>> + GUC_LFD_OS_TYPE_OSID_WIN = 0x1,
>> +
>> + /* Linux OS. */
>> + GUC_LFD_OS_TYPE_OSID_LIN = 0x2,
>> +
>> + /* VMWare OS. */
>> + GUC_LFD_OS_TYPE_OSID_VMW = 0x3,
>> +
>> + /* Other. */
>> + GUC_LFD_OS_TYPE_OSID_OTHER = 0x4,
>> +};
>> +
>> +/*
>> + * Log format descriptor (LFD). A type of KLV with custom
>> + * field-sizes + magic numbers.
>> + */
>> +struct guc_logfile_lfd_t {
>> + /* BR[15:0] Expected value: 0x8086. Helpful in detecting file errors. */
>
> what is "BR" ?
I beleive is "bit range">
>> + u32 magic : 16;
>
> is there any relation of this magic bits to upper bits that are already
> part some of the defines like LFD_DRIVER_KEY?
Expected to be GUC_LOGFILE_LFD_MAGIC
Yes, it is high 16 bit of LFD_DRIVER_KEY >
>> +
>> + /*
>> + * BR[31:16] File descriptor type (the ‘T’ in TLV) is used to identify
>> + * how to interpret data below. For the range of types,
>> + * see guc_lfd_type_t
>> + */
>> + u32 desc_type : 16;
>> +
>> + /* Number of dwords the data field contains. */
>> + u32 desc_dw_size;
>> +
>> + /* Data defined by File descriptor type. */
>> + u32 data[];
>> +};
>
> likely __packed is missing for all these ABI structs
Yes, to be fixed.>
>> +
>> +/*
>> + * GuC Log File Format Version. Major-Minor is not a fractional number
>> + * (i.e. Ver 1.3 would be older than 1.12)
>> + */
>> +struct guc_logfile_fmt_ver_t {
>> + /*
>> + * BR[15: 0] Guc-Log-File Format minor version.
>> + * Must be GUC_LOG_FILE_FORMAT_VERSION_MINOR
>> + */
>> + u32 minor_version : 16;
>> +
>> + /*
>> + * BR[31:16] Guc-Log-File Format major version.
>> + * Must be GUC_LOG_FILE_FORMAT_VERSION_MAJOR
>> + */
>> + u32 major_version : 16;
>> +};
>> +
>> +/*
>> + * GuC Log Streaming-LFD-File Format. This structure encapsulates the layout of
>> + * the guc-log-file format.
>> + */
>> +struct guc_logfile_t {
>> + /*
>> + * A magic number set by producer of a GuC log file to identify that
>> + * file is a valid guc-log-file containing a stream of LFDs:
>> + * Expected value: 0x8086AAAA474C5346.
>> + */
>> + u64 magic;
>> +
>> + /* Version of this file format layout as per guc_logfile_fmt_ver_t. */
>> + struct guc_logfile_fmt_ver_t version;
>> +
>> + /* A stream of one or more guc_logfile_lfd_t LFD data. */
>> + struct guc_logfile_lfd_t lfd_stream[];
>> +};
>> +
>> +/* This structure describes the full version of a software component. */
>> +struct guc_sw_version_t {
>> + /* BR[ 7: 0] Patch version. */
>> + u32 patch_version : 8;
>> +
>> + /* BR[15: 8] Minor version. */
>> + u32 minor_version : 8;
>> +
>> + /* BR[23:16] Major version. */
>> + u32 major_version : 8;
>> +
>> + /* BR[31:24] Branch ID. */
>> + u32 branch_id : 8;
>> +};
>> +
>> +/* GuC FW Version. This is mandatory fw LFD data. */
>> +struct guc_lfd_data_fw_version_t {
>> + /* The full version of the GuC microkernel that generated the logs. */
>> + struct guc_sw_version_t guc_sw_version;
>
> maybe this struct could be defined here in place?
This file will be generated from spec, let's follow spec document.>
>> +};
>> +
>> +/*
>> + * Log Init Config Types.
>> + * The first word of the each TLV payload of guc_log_init_config_t contains one
>> + * of the following enum values. This value determines how subsequent bytes are
>> + * parsed to obtain the TLV information.
>> + */
>> +enum guc_log_lic_type_t {
>> + /*
>> + * GuC firmware version.
>> + * Value is a 32 bit number represented by guc_sw_version_t.
>> + */
>> + GUC_LOG_LIC_TYPE_GUC_SW_VERSION = 0x1,
>> +
>> + /*
>> + * GuC device id.
>> + * Value is a 32 bit. Refer BSpec symbol GUC_DEVICEID
>> + * https://gfxspecs.intel.com/Predator/Home/Index/60513 for more
>> + * details.
>> + */
>> + GUC_LOG_LIC_TYPE_GUC_DEVICE_ID = 0x2,
>> +
>> + /* The first u32 KLV type */
>> + GUC_LOG_LIC_TYPE_FIRST = GUC_LOG_LIC_TYPE_GUC_DEVICE_ID,
>> +
>> + /*
>> + * GuC timestamp counter frequency.
>> + * Value is a 32 bit number representing frequency in kilohertz.
>> + * This timestamp is utilized in log entries, timer and for engine
>> + * utilization tracking.
>> + */
>> + GUC_LOG_LIC_TYPE_TSC_FREQUENCY = 0x3,
>> +
>> + /*
>> + * HW GMD ID.
>> + * Value is a 32 bit number representing graphics, media and display
>> + * HW architecture IDs.
>> + */
>> + GUC_LOG_LIC_TYPE_GMD_ID = 0x4,
>> +
>> + /* GuC build platform ID. Value is 32 bits. */
>> + GUC_LOG_LIC_TYPE_BUILD_PLATFORM_ID = 0x5,
>> +
>> + /* The last LIC TYPE */
>> + GUC_LOG_LIC_TYPE_LAST,
>> +};
>> +
>> +/* Log Init Config: GUC software version. */
>> +struct guc_lic_guc_sw_version_t {
>> + /* BR[31:16] Number of dwords in proceeding payload = 1. */
>> + u32 size : 16;
>> +
>> + /*
>> + * BR[15: 0] Log-init-config
>> + * TLV type enum = GUC_LOG_LIC_TYPE_GUC_SW_VERSION.
>> + */
>> + u32 type : 16;
>> +
>> + /* 32 bit number representing the Guc’s software version. */
>> + struct guc_sw_version_t guc_sw_version;
>> +};
>> +
>> +/*
>> + * Log Init Config Structure Version.
>> + * Major-Minor is not a fractional number
>> + * (i.e. Ver 1.3 would be older than 1.12)
>> + */
>> +struct guc_lic_format_version_t {
>> + /*
>> + * BR[15: 0] Log-Init-Config structure minor version.
>> + * Must be GUC_LOG_FILE_FORMAT_VERSION_MINOR
>> + */
>> + u32 minor_version : 16;
>> +
>> + /*
>> + * BR[31:16] Log-Init-Config structure major version.
>> + * Must be GUC_LOG_FILE_FORMAT_VERSION_MAJOR
>> + */
>> + u32 major_version : 16;
>> +};
>> +
>> +/* KLV with multiple DWs in an array. */
>> +struct guc_klv_generic_t {
>> + /* Length in Dwords of data. */
>> + u16 length;
>> +
>> + /* Key value. */
>> + u16 key;
>> +
>> + /* Value for this key. */
>> + u32 value[];
>> +};
>> +
>> +/*
>> + * GuC Log-Init-Config structure.
>> + * This is populated by the GUC at log init time and is located in the log
>> + * buffer as per the Log Buffer Layout (In Memory). The array of guc log
>> + * buffer states plus this structure must not exceed 4KB.
>> + */
>> +struct guc_log_init_config_t {
>> + /*
>> + * A magic number set by GuC to identify that this structure
>> + * contains valid information: lic_magic = 0x8086900D.
>> + * Used to verify the information in this structure is valid.
>> + */
>> + u32 lic_magic;
>> +
>> + /*
>> + * The version of the this structure.
>> + * Detail description is guc_lic_format_version_t
>> + */
>> + struct guc_lic_format_version_t lic_ver;
>> +
>> + /* Number of Dws the lic_data array contains. */
>> + u32 lic_dw_size;
>> +
>> + /*
>> + * Array of dwords representing a sequence of LIC TLVs types.
>> + * See guc_log_lic_type_t enums
>> + */
>> + u32 lic_data[];
>> +};
>> +
>> +/* GuC Log entry format Version 2. */
>> +struct guc_log_format_version_2_t {
>> + /* BR[31: 0] The GuC timestamp of the log event. */
>> + u32 timestamp : 32;
>> +
>> + /* BR[19: 0] The Vf ID the log entry has been generated for. */
>> + u32 vf_id : 20;
>> +
>> + /* BR[31:20] Reserved. */
>> + u32 reserved_1 : 12;
>> +
>> + /* BR[14: 0] The GuC Event ID value for the log event. */
>> + u32 event_id : 15;
>> +
>> + /* BR[15:15] Indicates if Event should be visible to Vf Host driver. */
>> + u32 vf_visible : 1;
>> +
>> + /*
>> + * BR[17:16] This field contains the verbosity level of the log entry,
>> + * see the Log Verbosity section. This can then be used by the PF driver
>> + * to filter the logs copied to a VF based on the verbosity requested by
>> + * a VF.
>> + */
>> + u32 verbosity : 2;
>> +
>> + /* BR[31:18] Reserved. */
>> + u32 reserved_2 : 14;
>> +
>> + /* BR[31: 0] The value of Parameter 1 of the log event. */
>> + u32 parameter_1 : 32;
>> +
>> + /* BR[31: 0] The value of Parameter 2 of the log event. */
>> + u32 parameter_2 : 32;
>> +};
>> +
>> +/* GuC Log Buffer State. */
>> +struct guc_log_buffer_state_t {
>
> this strut looks like existing raw layout of the GuC log
> can we place it separately from LFD definitions?
> (maybe on the top of the file)
Let me check, might be put into seperate file.
>
>> + /*
>> + * A marker set by the ukernel to identify the buffer state start
>> + * location in a binary file containing the GuC log. This is used
>> + * by log parsing tools. The Current values are:
>> + *
>> + * Log buffer
>> + * Marker[0] 0XCABBA9E6,
>> + * Marker[1] 0XDEADFEED
>> + * Crashdump
>> + * Marker[0] 0XCABBA9E6,
>> + * Marker[1] 0x8086DEAD
>> + * Error State Capture
>> + * Marker[0] 0XCABBA9F7,
>> + * Marker[1] 0XBEEFFEED
>> + * Schema
>> + * Marker[0] 0x808655DD,
>> + * Marker[1] 0x8086EEBB.
>> + * Note: The schema is never written to the memory by ukernel.
>> + * Its an optional feature for KMD to embed schema in guclog file.
>> + */
>> + u32 Marker[2];
>> +
>> + /*
>> + * BR[31: 0] This is the Last Byte Offset Location that was read by
>> + * KMD. KMD will write to this and uKernel will read this.
>> + */
>> + u32 log_buf_rd_ptr : 32;
>> +
>> + /*
>> + * BR[31: 0] This is the Byte Offset Location that will be written
>> + * by uKernel.
>> + */
>> + u32 log_guc_wr_ptr : 32;
>> +
>> + /*
>> + * BR[31: 0] Log buffer size.
>> + * This is written by the KMD and specifies the size of the buffer
>> + * in bytes
>> + */
>> + u32 log_buf_size : 32;
>> +
>> + /*
>> + * BR[31: 0] This is written by ukernel to the byte offset of the
>> + * next free entry in the buffer on log buffer half full or state
>> + * capture notification.
>> + */
>> + u32 sampled_log_buf_wr_ptr : 32;
>> +
>> + /*
>> + * BR[31: 0] This is the byte offset of location 1 byte after last
>> + * valid guc log event entry written by Guc firmware before there
>> + * was a wraparound.
>> + * This field is updated by guc firmware and should be used by Host
>> + * when copying buffer contents to file.
>> + */
>> + u32 log_guc_buff_wrap_offset : 32;
>> +
>> + /*
>> + * BR[ 0: 0] uKernel sets this when log buffer is half full or when
>> + * a flush has been requested by KMD through host2guc.
>> + * uKernel will send GUC2HOST only if this bit is cleared. This is
>> + * to avoid unnecessary interrupts from GuC.
>> + */
>> + u32 log_buf_flush_to_file : 1;
>> +
>> + /*
>> + * BR[ 4: 1] uKernel increments this when log buffer overflows.
>> + * This is initialized to 0 by KMD.
>> + */
>> + u32 buffer_full_cnt : 4;
>> +
>> + /* BR[31: 5] Reserved. */
>> + u32 reserved : 27;
>> +
>> + /*
>> + * BR[31: 0] The Guc-Log-Entry format version, a single integer.
>> + * Current version is GUC_LOG_EVENT_ENTRY_FORMAT_VERSION
>> + */
>> + u32 version : 32;
>> +};
>> +
>> +/* GuC Log Events Buffer. This is optional fw LFD data. */
>> +struct guc_lfd_data_log_events_buf_t {
>> + /* version of GuC log format of buffer */
>> + u32 log_events_format_version;
>> +
>> + /*
>> + * If log_format_version == 1, array of guc_log_format_version_1_t.
>> + * If log_format_version == 2, array of guc_log_format_version_2_t.
>> + * Dword size determined by guc_logfile_lfd_t.desc_dw_size - 1
>> + */
>> + u32 log_format_buf[];
>> +};
>> +
>> +/* OS Version Information. This is mandatory host LFD data. */
>> +struct guc_lfd_data_os_id_t {
>> + /*
>> + * enum values to identify the OS brand (1=Windows, 2=Linux, etc..).
>> + * See guc_lfd_os_type_t for the range of types
>> + */
>> + u32 os_id;
>> +
>> + /*
>> + * ASCII string containing OS build version information based on os_id.
>> + * String is padded with null characters to ensure its DWORD aligned.
>> + * Dword size determined by guc_logfile_lfd_t.desc_dw_size - 1
>> + */
>> + char build_version[];
>> +};
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
>> index 72ad576fc18e..44c11ec662e5 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
>> @@ -1127,12 +1127,15 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action)
>> {
>> struct xe_gt *gt = ct_to_gt(ct);
>>
>> - if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED)
>> + if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) {
>> xe_gt_err(gt, "GuC Crash dump notification\n");
>> - else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION)
>> - xe_gt_err(gt, "GuC Exception notification\n");
>> - else
>> - xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action);
>> + ct_to_guc(ct)->log.crash_dumped = true;
>> + } else {
>> + if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION)
>> + xe_gt_err(gt, "GuC Exception notification\n");
>> + else
>> + xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action);
>> + }
>>
>> CT_DEAD(ct, NULL, CRASH);
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_debugfs.c b/drivers/gpu/drm/xe/xe_guc_debugfs.c
>> index c569ff456e74..6449c9a69b8a 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_debugfs.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_debugfs.c
>> @@ -48,6 +48,19 @@ static int guc_log(struct seq_file *m, void *data)
>> return 0;
>> }
>>
>> +static int guc_log_lfd(struct seq_file *m, void *data)
>> +{
>> + struct xe_guc *guc = node_to_guc(m->private);
>> + struct xe_device *xe = guc_to_xe(guc);
>> + struct drm_printer p = drm_seq_file_printer(m);
>> +
>> + xe_pm_runtime_get(xe);
>
> just as side note:
>
> if device is already suspended and we are late trying to read the
> debugfs, does it make sense to wake up the device just to get fresh GuC
> log, with all previous data already lost/overwritten?
>
> this applies to legacy "guc_log" entry as well
>
> @John ?
>
>> + xe_guc_log_print_lfd(&guc->log, &p);
>> + xe_pm_runtime_put(xe);
>> +
>> + return 0;
>> +}
>> +
>> static int guc_log_dmesg(struct seq_file *m, void *data)
>> {
>> struct xe_guc *guc = node_to_guc(m->private);
>> @@ -89,6 +102,7 @@ static int guc_pc(struct seq_file *m, void *data)
>> static const struct drm_info_list debugfs_list[] = {
>> {"guc_info", guc_info, 0},
>> {"guc_log", guc_log, 0},
>> + {"guc_log_lfd", guc_log_lfd, 0},
>> {"guc_log_dmesg", guc_log_dmesg, 0},
>> {"guc_ctb", guc_ctb, 0},
>> {"guc_pc", guc_pc, 0},
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
>> index 80514a446ba2..447ee3c3b713 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
>> @@ -7,8 +7,12 @@
>>
>> #include <linux/fault-inject.h>
>>
>> +#include <generated/utsversion.h>
>> +#include <generated/utsrelease.h>
>> +
>> #include <drm/drm_managed.h>
>>
>> +#include "abi/guc_log_lfd_abi.h"
>> #include "regs/xe_guc_regs.h"
>> #include "xe_bo.h"
>> #include "xe_devcoredump.h"
>> @@ -19,6 +23,63 @@
>> #include "xe_mmio.h"
>> #include "xe_module.h"
>>
>> +static struct guc_log_buffer_entry_list {
>
> static const?
>
> or if non-const is required than it can't be static
non-const, "static" here is to prevent global access.
Emm, the structure here is a bit confused as key is const, but others
are non-const, will change it in next rev.>
>> + u32 key[2];
>> + u32 offset;
>> + u32 rd_ptr;
>> + u32 wr_ptr;
>> + u32 buf_size;
>> +} entry_list[GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT] = {
>> + {
>> + {
>> + LFD_LOG_BUFFER_MARKER_1V2,
>> + LFD_LOG_BUFFER_MARKER_2
>
> please use designated initializers
>
>> + }
>> + },
>> + {
>> + {
>> + LFD_LOG_BUFFER_MARKER_1V2,
>> + LFD_CRASH_DUMP_BUFFER_MARKER_2
>> + }
>> + },
>> + {
>> + {
>> + LFD_STATE_CAPTURE_BUFFER_MARKER_1V2,
>> + LFD_STATE_CAPTURE_BUFFER_MARKER_2
>> + }
>> + },
>> + {
>> + {
>> + GUC_LOG_INIT_CONFIG_LIC_MAGIC,
>> + ((GUC_LOG_INIT_CONFIG_FORMAT_VERSION_MAJOR << 16)
>> + | GUC_LOG_INIT_CONFIG_FORMAT_VERSION_MINOR)
>> + }
>> + },
>> +};
>> +
>> +static const struct guc_logfile_lfd_t default_guc_logfile_lfd = {
>> + .magic = GUC_LOGFILE_LFD_MAGIC,
>> +};
>> +
>> +static const struct guc_logfile_t default_guc_logfile = {
>> + .magic = LFD_DRIVER_KEY_STREAMING,
>> + .version = {
>> + .minor_version = GUC_LOG_FILE_FORMAT_VERSION_MINOR,
>> + .major_version = GUC_LOG_FILE_FORMAT_VERSION_MAJOR
>> + }
>> +};
>> +
>> +struct guc_log_init_config_save_t {
>> + struct guc_sw_version_t fw_ver;
>> + /*
>> + * Array of init config KLVs.
>> + * Range from GUC_LOG_LIC_TYPE_FIRST to
>> + * GUC_LOG_LIC_TYPE_LAST
>> + */
>> + u32 KLV[GUC_LOG_LIC_TYPE_LAST - GUC_LOG_LIC_TYPE_FIRST];
>> + struct guc_log_buffer_state_t log_buf_state;
>> +};
>> +
>> static struct xe_guc *
>> log_to_guc(struct xe_guc_log *log)
>> {
>> @@ -216,6 +277,258 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_
>> }
>> }
>>
>> +static int xe_guc_log_add_lfd_header(char *buf)
>
> what about available "buf" size?
Good point, will add it>
>> +{
>> + int len;
>> +
>> + len = sizeof(default_guc_logfile_lfd);
>> + memcpy(buf, &default_guc_logfile_lfd, len);
>> +
>> + return len;
>> +}
>> +
>> +static int xe_guc_log_add_payload(char *buf, u32 data_len, void *data)
>> +{
>> + struct guc_logfile_lfd_t *lfd = (struct guc_logfile_lfd_t *)buf;
>> +
>> + /* make length DW aligned */
>> + lfd->desc_dw_size = data_len / 4;
>> + if (data_len % 4)
>> + lfd->desc_dw_size++;
>> +
>> + if (lfd->data != data)
>> + memcpy(lfd->data, data, data_len);
>> +
>> + return lfd->desc_dw_size * 4;
>> +}
>> +
>> +static int xe_guc_log_add_typed_payload(char *buf, u32 type, u32 data_len, void *data)
>> +{
>> + int len;
>> + struct guc_logfile_lfd_t *lfd = (struct guc_logfile_lfd_t *)buf;
>> +
>> + len = xe_guc_log_add_lfd_header(buf);
>> + lfd->desc_type = type;
>> + len += xe_guc_log_add_payload(buf, data_len, data);
>> +
>> + return len;
>> +}
>> +
>> +static int xe_guc_log_add_os_id(char *buf, u32 id)
>> +{
>> + int info_len;
>> + int len = xe_guc_log_add_lfd_header(buf);
>> + struct guc_logfile_lfd_t *lfd = (struct guc_logfile_lfd_t *)buf;
>> + struct guc_lfd_data_os_id_t *os_id = (struct guc_lfd_data_os_id_t *)lfd->data;
>> +
>> + os_id->os_id = id;
>> + info_len = strlen(UTS_RELEASE) + 1;
>> + strscpy(os_id->build_version, UTS_RELEASE, info_len);
>> + len = xe_guc_log_add_typed_payload(buf, GUC_LFD_TYPE_OS_ID,
>> + info_len + sizeof(*os_id), os_id);
>> +
>> + return len;
>> +}
>> +
>> +static int
>> +xe_guc_log_add_log_event(char *buf, char *log_bin, struct guc_log_init_config_save_t *config)
>> +{
>> + int len;
>> + char *data;
>> + u32 data_len;
>> + struct guc_lfd_data_log_events_buf_t *events_buf;
>> + struct guc_logfile_lfd_t *lfd = (struct guc_logfile_lfd_t *)buf;
>> + struct guc_log_buffer_entry_list *entry;
>> +
>> + entry = &entry_list[GUC_LOG_BUFFER_STATE_HEADER_ENTRY_LOG];
>> +
>> + /* Skip empty log */
>> + if (entry->rd_ptr == entry->wr_ptr)
>> + return 0;
>> +
>> + len = xe_guc_log_add_lfd_header(buf);
>> + lfd = (struct guc_logfile_lfd_t *)buf;
>> +
>> + lfd->desc_type = GUC_LFD_TYPE_LOG_EVENTS_BUFFER;
>> + events_buf = (struct guc_lfd_data_log_events_buf_t *)&lfd->data;
>> + events_buf->log_events_format_version = config->log_buf_state.version;
>> +
>> + data = log_bin + entry->offset + entry->rd_ptr;
>> + if (entry->rd_ptr < entry->wr_ptr) {
>> + data_len = entry->wr_ptr - entry->rd_ptr;
>> + memcpy(events_buf->log_format_buf, data, data_len);
>> + } else {
>> + /* Copy rd to buf end 1st */
>> + data_len = entry->buf_size - entry->rd_ptr;
>> + memcpy(events_buf->log_format_buf, data, data_len);
>> +
>> + /* Copy buf start to wr */
>> + data = &log_bin[entry->offset];
>> + memcpy(&events_buf->log_format_buf[data_len / 4], data, entry->wr_ptr);
>> + data_len += entry->wr_ptr;
>> + }
>> +
>> + /* make length DW aligned */
>> + lfd->desc_dw_size = data_len / 4;
>> + if (data_len % 4)
>> + lfd->desc_dw_size++;
>> +
>> + /* Addup log_events_format_version size */
>> + lfd->desc_dw_size++;
>> +
>> + return len + lfd->desc_dw_size * 4;
>> +}
>> +
>> +static inline int lic_type_to_KLV_index(u32 lic_type)
>> +{
>> + XE_WARN_ON(lic_type < GUC_LOG_LIC_TYPE_FIRST || lic_type >= GUC_LOG_LIC_TYPE_LAST);
>> +
>> + return lic_type - GUC_LOG_LIC_TYPE_FIRST;
>> +}
>> +
>> +static int xe_guc_log_add_klv(char *buf, u32 lic_type, struct guc_log_init_config_save_t *config)
>> +{
>> + return xe_guc_log_add_typed_payload(buf, lic_type, sizeof(u32),
>> + &config->KLV[lic_type_to_KLV_index(lic_type)]);
>> +}
>> +
>> +static void xe_guc_log_loop_log_init(struct guc_log_init_config_t *init,
>> + struct guc_log_init_config_save_t *config)
>> +{
>> + int i;
>> + struct guc_klv_generic_t *p = (struct guc_klv_generic_t *)init->lic_data;
>> +
>> + for (i = 0; i < init->lic_dw_size;) {
>> + if (p->key < GUC_LOG_LIC_TYPE_GUC_SW_VERSION || p->key >= GUC_LOG_LIC_TYPE_LAST)
>> + break;
>> + if (p->key == GUC_LOG_LIC_TYPE_GUC_SW_VERSION)
>> + config->fw_ver = ((struct guc_lic_guc_sw_version_t *)p)->guc_sw_version;
>> + else
>> + config->KLV[lic_type_to_KLV_index(p->key)] = p->value[0];
>> + i += p->length + 1; /* +1DW to include kev(u16) and len(u16) */
>> + p = (struct guc_klv_generic_t *)((u32 *)p + p->length + 1);
>> + }
>> +}
>> +
>> +static void xe_guc_log_load_log_buffer(u32 *guc_log, int len,
>> + struct guc_log_init_config_save_t *config)
>> +{
>> + int i = 0;
>> + u32 offset = GUC_LOG_BUFFER_STATE_HEADER_LENGTH;
>> + struct guc_log_buffer_state_t *p = (struct guc_log_buffer_state_t *)guc_log;
>> +
>> + while (p) {
>> + for (i = 0; i < GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT; i++) {
>> + if (p->Marker[0] == entry_list[i].key[0] &&
>> + p->Marker[1] == entry_list[i].key[1]) {
>> + entry_list[i].offset = offset;
>> + entry_list[i].rd_ptr = p->log_buf_rd_ptr;
>> + entry_list[i].wr_ptr = p->log_guc_wr_ptr;
>> + entry_list[i].buf_size = p->log_buf_size;
>> +
>> + if (i == GUC_LOG_BUFFER_STATE_HEADER_ENTRY_LOG)
>> + config->log_buf_state = *p;
>> +
>> + if (i != GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT) {
>> + offset += p->log_buf_size;
>> + p++;
>> + } else {
>> + /* Load log init config */
>> + xe_guc_log_loop_log_init((struct guc_log_init_config_t *)p,
>> + config);
>> +
>> + /* Init config is the last */
>> + return;
>> + }
>> + }
>> + }
>> + if (i >= GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT)
>> + break;
>> + }
>> +}
>> +
>> +static uint xe_guc_log_save_to_lfd_buf(char *buf, u32 *guc_log_bin, struct xe_guc_log *log,
>> + struct guc_log_init_config_save_t *config)
>> +{
>> + u32 index = 0;
>> + char *bin = (char *)guc_log_bin;
>> + struct guc_logfile_t *guc_logfile;
>> + struct guc_log_buffer_entry_list *entry;
>> +
>> + guc_logfile = (struct guc_logfile_t *)buf;
>> + *guc_logfile = default_guc_logfile;
>> +
>> + index = offsetof(struct guc_logfile_t, lfd_stream);
>> + index += xe_guc_log_add_typed_payload(&buf[index], GUC_LFD_TYPE_FW_VERSION,
>> + sizeof(config->fw_ver), &config->fw_ver);
>> + index += xe_guc_log_add_klv(&buf[index], GUC_LFD_TYPE_GUC_DEVICE_ID, config);
>> + index += xe_guc_log_add_klv(&buf[index], GUC_LFD_TYPE_TSC_FREQUENCY, config);
>> + index += xe_guc_log_add_klv(&buf[index], GUC_LOG_LIC_TYPE_GMD_ID, config);
>> + index += xe_guc_log_add_klv(&buf[index], GUC_LOG_LIC_TYPE_BUILD_PLATFORM_ID, config);
>> + index += xe_guc_log_add_os_id(&buf[index], GUC_LFD_OS_TYPE_OSID_LIN);
>> + index += xe_guc_log_add_log_event(&buf[index], bin, config);
>> +
>> + /* For Crash dump, rd/wr ptr has no effect, only add if crash_dumped is true */
>> + if (log->crash_dumped) {
>> + entry = &entry_list[GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CRASH];
>> + if (entry->buf_size) {
>> + int i;
>> + u32 *buf32 = (u32 *)&bin[entry->offset];
>> +
>> + /* Is crash dump section has non zero data? */
>> + for (i = 0; i < entry->buf_size / 4; i++)
>> + if (buf32[i])
>> + break;
>> + /* Buffer has non-zero data */
>> + if (i < entry->buf_size / 4)
>> + index += xe_guc_log_add_typed_payload(&buf[index],
>> + GUC_LFD_TYPE_FW_CRASH_DUMP,
>> + entry->buf_size,
>> + &bin[entry->offset]);
>> + /* Clear flag */
>> + log->crash_dumped = false;
>> + }
>> + }
>> +
>> + return index;
>> +}
>> +
>> +/**
>> + * xe_guc_log_snapshot_print - dump a previously saved copy of the GuC log to some useful location
>> + * @snapshot: a snapshot of the GuC log
>> + * @p: the printer object to output to
>> + */
>
> this function is static - so it doesn't require kernel-doc (and this one
> doesn't bring any value, and is incomplete anyway)
Yes, unneccesary kernel doc.>
>> +static void
>> +xe_guc_log_snapshot_print_lfd(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p,
>> + struct xe_guc_log *log)
>> +{
>> + size_t remain;
>> + int i;
>> + struct guc_log_init_config_save_t init_config;
>> +
>> + if (!snapshot) {
>> + drm_printf(p, "GuC log snapshot not allocated!\n");
>
> maybe empty output will be better than random text that doesn't look as
> expected ascii85 data dump
Good point, to be removed.>
>> + return;
>> + }
>> +
>> + remain = snapshot->size;
>> + for (i = 0; i < snapshot->num_chunks; i++) {
>> + uint len = 0;
>> + size_t size = min(GUC_LOG_CHUNK_SIZE, remain);
>> + char *lfd_buf = kzalloc(size, GFP_KERNEL);
>> +
>> + /* load from log bin */
>> + xe_guc_log_load_log_buffer(snapshot->copy[i], size, &init_config);
>> + /* Conver to LFD format */
>> + len = xe_guc_log_save_to_lfd_buf(lfd_buf, snapshot->copy[i], log, &init_config);
>> +
>> + xe_print_blob_ascii85(p, "[LOG].data", 0, lfd_buf, 0, len);
>> + kfree(lfd_buf);
>> +
>> + remain -= size;
>> + }
>> +}
>> +
>> /**
>> * xe_guc_log_print_dmesg - dump a copy of the GuC log to dmesg
>> * @log: GuC log structure
>> @@ -251,6 +564,20 @@ void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p)
>> xe_guc_log_snapshot_free(snapshot);
>> }
>>
>> +/**
>> + * xe_guc_log_print_lfd - dump a copy of the GuC log to some useful location
>> + * @log: GuC log structure
>> + * @p: the printer object to output to
>> + */
>> +void xe_guc_log_print_lfd(struct xe_guc_log *log, struct drm_printer *p)
>> +{
>> + struct xe_guc_log_snapshot *snapshot;
>> +
>> + snapshot = xe_guc_log_snapshot_capture(log, false);
>> + xe_guc_log_snapshot_print_lfd(snapshot, p, log);
>> + xe_guc_log_snapshot_free(snapshot);
>> +}
>> +
>> int xe_guc_log_init(struct xe_guc_log *log)
>> {
>> struct xe_device *xe = log_to_xe(log);
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
>> index 5b896f5fafaf..37ff4d11e6cf 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.h
>> @@ -40,6 +40,7 @@ struct xe_device;
>>
>> int xe_guc_log_init(struct xe_guc_log *log);
>> void xe_guc_log_print(struct xe_guc_log *log, struct drm_printer *p);
>> +void xe_guc_log_print_lfd(struct xe_guc_log *log, struct drm_printer *p);
>> void xe_guc_log_print_dmesg(struct xe_guc_log *log);
>> struct xe_guc_log_snapshot *xe_guc_log_snapshot_capture(struct xe_guc_log *log, bool atomic);
>> void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p);
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> index b3d5c72ac752..d351f639727b 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log_types.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
>> @@ -46,6 +46,8 @@ struct xe_guc_log {
>> u32 level;
>> /** @bo: XE BO for GuC log */
>> struct xe_bo *bo;
>> + /** @crash_dumped: Indicate if crash dumped */
>> + bool crash_dumped;
>> /** @stats: logging related stats */
>> struct {
>> u32 sampled_overflow;
>
More information about the Intel-xe
mailing list