[PATCH v6 4/6] drm/xe/guc: Add GuC log init config in LFD format

Wed Jul 30 00:27:01 UTC 2025

On 7/21/2025 6:35 PM, Zhanjun Dong wrote:
> Add support to output GuC log init config (LIC) in LFD format.
>
> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
> ---
>   drivers/gpu/drm/xe/xe_guc_log.c | 204 ++++++++++++++++++++++++++++++++
>   1 file changed, 204 insertions(+)
>
> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
> index d2256773aaf3..609a9fd5ba0c 100644
> --- a/drivers/gpu/drm/xe/xe_guc_log.c
> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
> @@ -7,6 +7,7 @@
>   
>   #include <linux/fault-inject.h>
>   
> +#include <linux/utsname.h>
>   #include <drm/drm_managed.h>
>   
>   #include "abi/guc_lfd_abi.h"
> @@ -20,12 +21,67 @@
>   #include "xe_mmio.h"
>   #include "xe_module.h"
>   
> +/* LFD supported LIC type range */
> +#define GUC_LOG_LIC_TYPE_FIRST	(GUC_LIC_TYPE_GUC_SW_VERSION)
> +#define GUC_LOG_LIC_TYPE_LAST	(GUC_LIC_TYPE_BUILD_PLATFORM_ID)
> +
> +#define GUC_LOG_BUFFER_STATE_HEADER_LENGTH		4096
> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_LOG		0
> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CRASH		1
> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CAPTURE	2
These are already defined in guc_log_abi.h.

> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT		3
And the LIC is not/does not have a guc_log_buffer_state header.

> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT		4
> +
> +#define GUC_LFD_BUFFER_SIZE	SZ_1K
> +
> +struct guc_log_buffer_entry_list {
> +	u32 offset;
> +	u32 rd_ptr;
> +	u32 wr_ptr;
> +	u32 buf_size;
> +};
> +
> +struct guc_lic_save {
> +	u32 version;
> +	/*
> +	 * Array of init config KLVs.
> +	 * Range from GUC_LOG_LIC_TYPE_FIRST to GUC_LOG_LIC_TYPE_LAST
> +	 */
> +	u32 KLV[GUC_LOG_LIC_TYPE_LAST - GUC_LOG_LIC_TYPE_FIRST + 1];
> +	struct guc_log_buffer_entry_list entry[GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT];
> +};
> +
> +static const struct guc_lfd_data default_guc_lfd_data = {
> +	.dw0 = FIELD_PREP_CONST(GUC_LFD_DATA_MAGIC, GUC_LFD_DATA_HEADER_MAGIC)
> +};
> +
>   static const struct guc_lfd_file_header default_guc_lfd_file_header = {
>   	.magic = GUC_LFD_DRIVER_KEY_STREAMING,
>   	.version.dw0 = FIELD_PREP_CONST(GUC_LFD_VERSION_MINOR, GUC_LFD_FORMAT_VERSION_MINOR) |
>   		       FIELD_PREP_CONST(GUC_LFD_VERSION_MAJOR, GUC_LFD_FORMAT_VERSION_MAJOR)
>   };
>   
> +static struct guc_log_buffer_entry_markers {
> +	u32 key[2];
> +} const entry_markers[4] = {
> +	{{
> +		GUC_LFD_LOG_BUFFER_MARKER_1V2,
> +		GUC_LFD_LOG_BUFFER_MARKER_2
> +	}},
> +	{{
> +		GUC_LFD_LOG_BUFFER_MARKER_1V2,
> +		GUC_LFD_CRASH_DUMP_BUFFER_MARKER_2
> +	}},
> +	{{
> +		GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_1V2,
> +		GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_2
> +	}},
> +	{{
> +		GUC_LIC_MAGIC,
> +		((GUC_LIC_FORMAT_VERSION_MAJOR << 16) | GUC_LIC_FORMAT_VERSION_MINOR)
> +	}}
> +};
> +
>   static struct xe_guc *
>   log_to_guc(struct xe_guc_log *log)
>   {
> @@ -223,15 +279,163 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_
>   	}
>   }
>   
> +static int xe_guc_log_add_lfd_header(void *buf, int buf_size)
> +{
> +	int len = sizeof(default_guc_lfd_data);
> +
> +	memcpy(buf, &default_guc_lfd_data, len);
> +	return len;
> +}
For the sake of adding one dword in one place in the code, this seems 
massively over-engineered.

I would just do "buf[0] = FIELD_PREP(MAGIC)".

> +
> +static int xe_guc_log_add_payload(void *buf, int buf_size, u32 data_len, void *data)
> +{
> +	struct guc_lfd_data *lfd = buf;
> +
> +	/* make length DW aligned */
> +	lfd->dw_size = DIV_ROUND_UP(data_len, sizeof(u32));
> +	memcpy(lfd->data, data, data_len);
> +	return lfd->dw_size * sizeof(u32);
> +}
Again, why split this out into a separate function? For such a small 
amount of code that called only once, it is simpler/clearer to just do 
it inline.

> +
> +static int xe_guc_log_add_typed_payload(void *buf, int buf_size, u32 type,
> +					u32 data_len, void *data)
> +{
> +	struct guc_lfd_data *lfd = buf;
> +	int index;
> +
> +	index = xe_guc_log_add_lfd_header(buf, buf_size);
> +	lfd->dw0 |= FIELD_PREP(GUC_LFD_DATA_DESC_TYPE, type);
> +	index += xe_guc_log_add_payload(buf, buf_size, data_len, data);
I'm not seeing any checks that the pre-allocated size is sufficient.

> +
> +	return index;
> +}
> +
> +static inline int lic_type_to_KLV_index(u32 lic_type)
> +{
> +	XE_WARN_ON(lic_type < GUC_LOG_LIC_TYPE_FIRST || lic_type > GUC_LOG_LIC_TYPE_LAST);
> +
> +	return lic_type - GUC_LOG_LIC_TYPE_FIRST;
> +}
> +
> +static int xe_guc_log_add_klv(void *buf, int size, u32 lic_type,
> +			      struct guc_lic_save *config)
> +{
> +	int klv_index = lic_type_to_KLV_index(lic_type);
> +
> +	return xe_guc_log_add_typed_payload(buf, size, lic_type, sizeof(u32),
> +					    &config->KLV[klv_index]);
> +}
> +
> +static int xe_guc_log_add_os_id(void *buf, int buf_size, u32 id)
> +{
> +	struct guc_lfd_data *lfd = buf;
> +	struct guc_lfd_data_os_info *os_id;
> +	char *version;
> +	int info_len;
> +
> +	os_id = (void *)lfd->data;
> +	os_id->os_id = id;
> +
> +	version = init_utsname()->release;
> +	info_len = strnlen(version, buf_size) + 1;
> +
> +	if (buf_size < sizeof(struct guc_lfd_data) + info_len)
> +		return -ENOMEM;
> +
> +	strscpy(os_id->build_version, version, info_len);
> +	return xe_guc_log_add_typed_payload(buf, buf_size, GUC_LFD_TYPE_OS_ID,
> +					    info_len + sizeof(*os_id), os_id);
> +}
> +
> +static void xe_guc_log_loop_log_init(struct guc_lic *init,
> +				     struct guc_lic_save *config)
> +{
> +	struct guc_klv_generic_dw_t *p = (void *)init->data;
> +	int i;
> +
> +	for (i = 0; i < init->dw_size;) {
> +		int klv_len = FIELD_GET(GUC_KLV_0_LEN, p->kl) + 1;
> +		int key = FIELD_GET(GUC_KLV_0_KEY, p->kl);
> +
> +		if (key < GUC_LOG_LIC_TYPE_FIRST || key > GUC_LOG_LIC_TYPE_LAST)
> +			break;
Error message?

> +		config->KLV[lic_type_to_KLV_index(key)] = p->value;
> +		i += klv_len;
> +		p = (void *)((u32 *)p + klv_len);
> +	}
> +}
> +
> +static void xe_guc_log_load_lic(void *guc_log, struct guc_lic_save *config)
> +{
> +	u32 offset = GUC_LOG_BUFFER_STATE_HEADER_LENGTH;
> +	struct guc_log_buffer_state *p = guc_log;
> +	int i = 0;
> +
> +	config->version = p->version;
> +	while (p) {
> +		for (i = 0; i < GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT; i++) {
> +			if (p->marker[0] == entry_markers[i].key[0] &&
> +			    p->marker[1] == entry_markers[i].key[1]) {
> +				config->entry[i].offset = offset;
> +				config->entry[i].rd_ptr = p->read_ptr;
> +				config->entry[i].wr_ptr = p->write_ptr;
> +				config->entry[i].buf_size = p->size;
> +
> +				if (i != GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT) {
> +					offset += p->size;
> +					p++;
> +				} else {
> +					/* Load log init config */
> +					xe_guc_log_loop_log_init((void *)p, config);
> +
> +					/* Init config is the last */
> +					return;
> +				}
> +			}
> +		}
> +		if (i >= GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT)
> +			break;
> +	}
This does not seem like the correct way to decode this data. Not all 
these items are the same structure. While the crash dump section does 
technically have a standard log buffer state header, it does not 
actually use it. And of the register capture buffer is not interesting 
at all to LFD dumping, which is the only other section to have a valid 
header.

I would flip this around and have a helper function which finds the 
structure in the buffer that starts with the given magic header and 
simply returns a pointer to it (or the offset from base). Then the 
processing code for each section (which would be very different 
implementations and be called at different times) can retrieve the bit 
it is interested in and use it and not worry about having to have a 
generic function that processes all sections at once.

> +}
> +
> +static int
> +xe_guc_log_output_lfd_init(struct drm_printer *p, struct xe_guc_log_snapshot *snapshot,
> +			   struct guc_lic_save *config)
> +{
> +	int type, len;
> +	size_t size = 0;
> +	char buf[GUC_LFD_BUFFER_SIZE] = {0};
This will add 1KB of zeros as a global static object. You should never 
give large arrays an empty initialisation like that. I recall someone 
once doing that with a 16MB array and it took us ages to work out why 
the driver was so huge in debug builds!  Also, this is putting a 1KB 
array on the stack. Kernel stack space is limited and you should never 
put large objects on the stack unless absolutely necessary. Just use 
kzalloc() for this.

> +
> +	/* FW required types */
> +	for (type = GUC_LOG_LIC_TYPE_FIRST; type <= GUC_LOG_LIC_TYPE_LAST; type++)
> +		size += xe_guc_log_add_klv(&buf[size], GUC_LFD_BUFFER_SIZE - size, type, config);
I really don't like this idea of effectively cloning the LIC blocks into 
LFD blocks. There is no requirement for any cross-compatibility of 
structures, enums, etc. Also, the LIC information is potentially useful 
data for the driver to include in other places, e.g. devcoredump output.

My preference would be to decode the LIC blocks as part of the GuC 
initialisation and save the info in the xe_guc structure. Then the 
devcoredump code can print it out and the LFD code can create LFD blocks 
from it. But no longer with any assumptions or connections to LIC 
formatting.

Also, no buffer overflow checking?

> +
> +	/* KMD required type(s) */
> +	len = xe_guc_log_add_os_id(&buf[size], GUC_LFD_BUFFER_SIZE - size,
&buf[size] -> buf + size

> +				   GUC_LFD_OS_TYPE_OSID_LIN);
> +	if (len < 0)
> +		return len;
Aborting on error means that if the given LFD block didn't fit then you 
get no output at all. Would it not be better to print a warning message 
and just skip adding that block?

> +	size += len;
> +
> +	xe_print_blob_ascii85(p, NULL, 0, buf, 0, size);
Why use ASCII85 encoding? The LFD stream is supposed to just be raw 
binary data, isn't it?

Hmm. For small logs, it might be useful to output in ASCII, but then do 
we need some kind of file format on top of the LFD format? E.g. the 
"[LOG].data" tag below is part of the devcoredump format and has nothing 
to do with LFDs. Do we need any kind of extra formatting? The LFD 
already contains all the meta data we currently need and can support 
adding arbitrary meta data anyway. So there is no need for extra data 
around the side of the main payload such as we have in the current 
debugfs log dump file.

However, the main purpose of a streaming output file is for capturing 
very large logs. And those are going to need to be compressed to move 
them around. In which case, there is no point in doing ASCII encoding 
because it is going to become a non-ASCII zip file anyway.

John.

> +	return size;
> +}
> +
>   static void
>   xe_guc_log_snapshot_print_lfd(struct xe_guc_log_snapshot *snapshot, struct drm_printer *p)
>   {
> +	struct guc_lic_save config;
> +
>   	if (!snapshot || !snapshot->size)
>   		return;
>   
>   	/* Output LFD file header */
>   	xe_print_blob_ascii85(p, "[LOG].data", 0, &default_guc_lfd_file_header, 0,
>   			      offsetof(struct guc_lfd_file_header, lfd_stream));
> +
> +	/* Output LFD stream */
> +	xe_guc_log_load_lic(snapshot->copy[0], &config);
> +	xe_guc_log_output_lfd_init(p, snapshot, &config);
>   }
>   
>   /**