[PATCH v6 4/6] drm/xe/guc: Add GuC log init config in LFD format
Dong, Zhanjun
zhanjun.dong at intel.com
Fri Aug 22 21:59:03 UTC 2025
See my comments inline below.
Regards,
Zhanjun Dong
On 2025-07-29 8:27 p.m., John Harrison wrote:
> On 7/21/2025 6:35 PM, Zhanjun Dong wrote:
>> Add support to output GuC log init config (LIC) in LFD format.
>>
>> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_guc_log.c | 204 ++++++++++++++++++++++++++++++++
>> 1 file changed, 204 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/
>> xe_guc_log.c
>> index d2256773aaf3..609a9fd5ba0c 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
>> @@ -7,6 +7,7 @@
>> #include <linux/fault-inject.h>
>> +#include <linux/utsname.h>
>> #include <drm/drm_managed.h>
>> #include "abi/guc_lfd_abi.h"
>> @@ -20,12 +21,67 @@
>> #include "xe_mmio.h"
>> #include "xe_module.h"
>> +/* LFD supported LIC type range */
>> +#define GUC_LOG_LIC_TYPE_FIRST (GUC_LIC_TYPE_GUC_SW_VERSION)
>> +#define GUC_LOG_LIC_TYPE_LAST (GUC_LIC_TYPE_BUILD_PLATFORM_ID)
>> +
>> +#define GUC_LOG_BUFFER_STATE_HEADER_LENGTH 4096
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_LOG 0
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CRASH 1
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CAPTURE 2
> These are already defined in guc_log_abi.h.
Will use the existing guc_log_buffer_type enum.
>
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT 3
> And the LIC is not/does not have a guc_log_buffer_state header.
>
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT 4
>> +
>> +#define GUC_LFD_BUFFER_SIZE SZ_1K
>> +
>> +struct guc_log_buffer_entry_list {
>> + u32 offset;
>> + u32 rd_ptr;
>> + u32 wr_ptr;
>> + u32 buf_size;
>> +};
>> +
>> +struct guc_lic_save {
>> + u32 version;
>> + /*
>> + * Array of init config KLVs.
>> + * Range from GUC_LOG_LIC_TYPE_FIRST to GUC_LOG_LIC_TYPE_LAST
>> + */
>> + u32 KLV[GUC_LOG_LIC_TYPE_LAST - GUC_LOG_LIC_TYPE_FIRST + 1];
>> + struct guc_log_buffer_entry_list
>> entry[GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT];
>> +};
>> +
>> +static const struct guc_lfd_data default_guc_lfd_data = {
>> + .dw0 = FIELD_PREP_CONST(GUC_LFD_DATA_MAGIC,
>> GUC_LFD_DATA_HEADER_MAGIC)
>> +};
>> +
>> static const struct guc_lfd_file_header default_guc_lfd_file_header = {
>> .magic = GUC_LFD_DRIVER_KEY_STREAMING,
>> .version.dw0 = FIELD_PREP_CONST(GUC_LFD_VERSION_MINOR,
>> GUC_LFD_FORMAT_VERSION_MINOR) |
>> FIELD_PREP_CONST(GUC_LFD_VERSION_MAJOR,
>> GUC_LFD_FORMAT_VERSION_MAJOR)
>> };
>> +static struct guc_log_buffer_entry_markers {
>> + u32 key[2];
>> +} const entry_markers[4] = {
>> + {{
>> + GUC_LFD_LOG_BUFFER_MARKER_1V2,
>> + GUC_LFD_LOG_BUFFER_MARKER_2
>> + }},
>> + {{
>> + GUC_LFD_LOG_BUFFER_MARKER_1V2,
>> + GUC_LFD_CRASH_DUMP_BUFFER_MARKER_2
>> + }},
>> + {{
>> + GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_1V2,
>> + GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_2
>> + }},
>> + {{
>> + GUC_LIC_MAGIC,
>> + ((GUC_LIC_FORMAT_VERSION_MAJOR << 16) |
>> GUC_LIC_FORMAT_VERSION_MINOR)
>> + }}
>> +};
>> +
>> static struct xe_guc *
>> log_to_guc(struct xe_guc_log *log)
>> {
>> @@ -223,15 +279,163 @@ void xe_guc_log_snapshot_print(struct
>> xe_guc_log_snapshot *snapshot, struct drm_
>> }
>> }
>> +static int xe_guc_log_add_lfd_header(void *buf, int buf_size)
>> +{
>> + int len = sizeof(default_guc_lfd_data);
>> +
>> + memcpy(buf, &default_guc_lfd_data, len);
>> + return len;
>> +}
> For the sake of adding one dword in one place in the code, this seems
> massively over-engineered.
>
> I would just do "buf[0] = FIELD_PREP(MAGIC)".
Sure, will do>
>> +
>> +static int xe_guc_log_add_payload(void *buf, int buf_size, u32
>> data_len, void *data)
>> +{
>> + struct guc_lfd_data *lfd = buf;
>> +
>> + /* make length DW aligned */
>> + lfd->dw_size = DIV_ROUND_UP(data_len, sizeof(u32));
>> + memcpy(lfd->data, data, data_len);
>> + return lfd->dw_size * sizeof(u32);
>> +}
> Again, why split this out into a separate function? For such a small
> amount of code that called only once, it is simpler/clearer to just do
> it inline.
To be merged with next function>
>> +
>> +static int xe_guc_log_add_typed_payload(void *buf, int buf_size, u32
>> type,
>> + u32 data_len, void *data)
>> +{
>> + struct guc_lfd_data *lfd = buf;
>> + int index;
>> +
>> + index = xe_guc_log_add_lfd_header(buf, buf_size);
>> + lfd->dw0 |= FIELD_PREP(GUC_LFD_DATA_DESC_TYPE, type);
>> + index += xe_guc_log_add_payload(buf, buf_size, data_len, data);
> I'm not seeing any checks that the pre-allocated size is sufficient.
Yes, will change to either use local header or add size check>
>> +
>> + return index;
>> +}
>> +
>> +static inline int lic_type_to_KLV_index(u32 lic_type)
>> +{
>> + XE_WARN_ON(lic_type < GUC_LOG_LIC_TYPE_FIRST || lic_type >
>> GUC_LOG_LIC_TYPE_LAST);
>> +
>> + return lic_type - GUC_LOG_LIC_TYPE_FIRST;
>> +}
>> +
>> +static int xe_guc_log_add_klv(void *buf, int size, u32 lic_type,
>> + struct guc_lic_save *config)
>> +{
>> + int klv_index = lic_type_to_KLV_index(lic_type);
>> +
>> + return xe_guc_log_add_typed_payload(buf, size, lic_type,
>> sizeof(u32),
>> + &config->KLV[klv_index]);
>> +}
>> +
>> +static int xe_guc_log_add_os_id(void *buf, int buf_size, u32 id)
>> +{
>> + struct guc_lfd_data *lfd = buf;
>> + struct guc_lfd_data_os_info *os_id;
>> + char *version;
>> + int info_len;
>> +
>> + os_id = (void *)lfd->data;
>> + os_id->os_id = id;
>> +
>> + version = init_utsname()->release;
>> + info_len = strnlen(version, buf_size) + 1;
>> +
>> + if (buf_size < sizeof(struct guc_lfd_data) + info_len)
>> + return -ENOMEM;
>> +
>> + strscpy(os_id->build_version, version, info_len);
>> + return xe_guc_log_add_typed_payload(buf, buf_size,
>> GUC_LFD_TYPE_OS_ID,
>> + info_len + sizeof(*os_id), os_id);
>> +}
>> +
>> +static void xe_guc_log_loop_log_init(struct guc_lic *init,
>> + struct guc_lic_save *config)
>> +{
>> + struct guc_klv_generic_dw_t *p = (void *)init->data;
>> + int i;
>> +
>> + for (i = 0; i < init->dw_size;) {
>> + int klv_len = FIELD_GET(GUC_KLV_0_LEN, p->kl) + 1;
>> + int key = FIELD_GET(GUC_KLV_0_KEY, p->kl);
>> +
>> + if (key < GUC_LOG_LIC_TYPE_FIRST || key > GUC_LOG_LIC_TYPE_LAST)
>> + break;
> Error message?
Will added>
>> + config->KLV[lic_type_to_KLV_index(key)] = p->value;
>> + i += klv_len;
>> + p = (void *)((u32 *)p + klv_len);
>> + }
>> +}
>> +
>> +static void xe_guc_log_load_lic(void *guc_log, struct guc_lic_save
>> *config)
>> +{
>> + u32 offset = GUC_LOG_BUFFER_STATE_HEADER_LENGTH;
>> + struct guc_log_buffer_state *p = guc_log;
>> + int i = 0;
>> +
>> + config->version = p->version;
>> + while (p) {
>> + for (i = 0; i < GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT; i++) {
>> + if (p->marker[0] == entry_markers[i].key[0] &&
>> + p->marker[1] == entry_markers[i].key[1]) {
>> + config->entry[i].offset = offset;
>> + config->entry[i].rd_ptr = p->read_ptr;
>> + config->entry[i].wr_ptr = p->write_ptr;
>> + config->entry[i].buf_size = p->size;
>> +
>> + if (i != GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT) {
>> + offset += p->size;
>> + p++;
>> + } else {
>> + /* Load log init config */
>> + xe_guc_log_loop_log_init((void *)p, config);
>> +
>> + /* Init config is the last */
>> + return;
>> + }
>> + }
>> + }
>> + if (i >= GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT)
>> + break;
>> + }
> This does not seem like the correct way to decode this data. Not all
> these items are the same structure. While the crash dump section does
> technically have a standard log buffer state header, it does not
> actually use it. And of the register capture buffer is not interesting
> at all to LFD dumping, which is the only other section to have a valid
> header.
>
> I would flip this around and have a helper function which finds the
> structure in the buffer that starts with the given magic header and
> simply returns a pointer to it (or the offset from base). Then the
> processing code for each section (which would be very different
> implementations and be called at different times) can retrieve the bit
> it is interested in and use it and not worry about having to have a
> generic function that processes all sections at once.
Good idea, will changed to use marker to find out structure for it>
>> +}
>> +
>> +static int
>> +xe_guc_log_output_lfd_init(struct drm_printer *p, struct
>> xe_guc_log_snapshot *snapshot,
>> + struct guc_lic_save *config)
>> +{
>> + int type, len;
>> + size_t size = 0;
>> + char buf[GUC_LFD_BUFFER_SIZE] = {0};
> This will add 1KB of zeros as a global static object. You should never
> give large arrays an empty initialisation like that. I recall someone
> once doing that with a 16MB array and it took us ages to work out why
> the driver was so huge in debug builds! Also, this is putting a 1KB
> array on the stack. Kernel stack space is limited and you should never
> put large objects on the stack unless absolutely necessary. Just use
> kzalloc() for this.
>
>> +
>> + /* FW required types */
>> + for (type = GUC_LOG_LIC_TYPE_FIRST; type <=
>> GUC_LOG_LIC_TYPE_LAST; type++)
>> + size += xe_guc_log_add_klv(&buf[size], GUC_LFD_BUFFER_SIZE -
>> size, type, config);
> I really don't like this idea of effectively cloning the LIC blocks into
> LFD blocks. There is no requirement for any cross-compatibility of
> structures, enums, etc. Also, the LIC information is potentially useful
> data for the driver to include in other places, e.g. devcoredump output.
>
> My preference would be to decode the LIC blocks as part of the GuC
> initialisation and save the info in the xe_guc structure. Then the
> devcoredump code can print it out and the LFD code can create LFD blocks
> from it. But no longer with any assumptions or connections to LIC
> formatting.
>
> Also, no buffer overflow checking?
Will avoid use LIC_TYPE_XXX
To be changed to add type from GUC_LFD_TYPE_FW_VERSION to
GUC_LFD_TYPE_BUILD_PLATFORM_ID>
>> +
>> + /* KMD required type(s) */
>> + len = xe_guc_log_add_os_id(&buf[size], GUC_LFD_BUFFER_SIZE - size,
> &buf[size] -> buf + size
>
>> + GUC_LFD_OS_TYPE_OSID_LIN);
>> + if (len < 0)
>> + return len;
> Aborting on error means that if the given LFD block didn't fit then you
> get no output at all. Would it not be better to print a warning message
> and just skip adding that block?
To be optimized>
>> + size += len;
>> +
>> + xe_print_blob_ascii85(p, NULL, 0, buf, 0, size);
> Why use ASCII85 encoding? The LFD stream is supposed to just be raw
> binary data, isn't it?
>
> Hmm. For small logs, it might be useful to output in ASCII, but then do
> we need some kind of file format on top of the LFD format? E.g. the
> "[LOG].data" tag below is part of the devcoredump format and has nothing
> to do with LFDs. Do we need any kind of extra formatting? The LFD
> already contains all the meta data we currently need and can support
> adding arbitrary meta data anyway. So there is no need for extra data
> around the side of the main payload such as we have in the current
> debugfs log dump file.
>
> However, the main purpose of a streaming output file is for capturing
> very large logs. And those are going to need to be compressed to move
> them around. In which case, there is no point in doing ASCII encoding
> because it is going to become a non-ASCII zip file anyway.
Sure, will output LFD in binary format.
>
> John.
>
>> + return size;
>> +}
>> +
>> static void
>> xe_guc_log_snapshot_print_lfd(struct xe_guc_log_snapshot *snapshot,
>> struct drm_printer *p)
>> {
>> + struct guc_lic_save config;
>> +
>> if (!snapshot || !snapshot->size)
>> return;
>> /* Output LFD file header */
>> xe_print_blob_ascii85(p, "[LOG].data", 0,
>> &default_guc_lfd_file_header, 0,
>> offsetof(struct guc_lfd_file_header, lfd_stream));
>> +
>> + /* Output LFD stream */
>> + xe_guc_log_load_lic(snapshot->copy[0], &config);
>> + xe_guc_log_output_lfd_init(p, snapshot, &config);
>> }
>> /**
>
More information about the Intel-xe
mailing list