[PATCH v6 4/6] drm/xe/guc: Add GuC log init config in LFD format

Fri Aug 22 21:59:03 UTC 2025

See my comments inline below.

Regards,
Zhanjun Dong

On 2025-07-29 8:27 p.m., John Harrison wrote:
> On 7/21/2025 6:35 PM, Zhanjun Dong wrote:
>> Add support to output GuC log init config (LIC) in LFD format.
>>
>> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_guc_log.c | 204 ++++++++++++++++++++++++++++++++
>>   1 file changed, 204 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/ 
>> xe_guc_log.c
>> index d2256773aaf3..609a9fd5ba0c 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_log.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_log.c
>> @@ -7,6 +7,7 @@
>>   #include <linux/fault-inject.h>
>> +#include <linux/utsname.h>
>>   #include <drm/drm_managed.h>
>>   #include "abi/guc_lfd_abi.h"
>> @@ -20,12 +21,67 @@
>>   #include "xe_mmio.h"
>>   #include "xe_module.h"
>> +/* LFD supported LIC type range */
>> +#define GUC_LOG_LIC_TYPE_FIRST    (GUC_LIC_TYPE_GUC_SW_VERSION)
>> +#define GUC_LOG_LIC_TYPE_LAST    (GUC_LIC_TYPE_BUILD_PLATFORM_ID)
>> +
>> +#define GUC_LOG_BUFFER_STATE_HEADER_LENGTH        4096
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_LOG        0
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CRASH        1
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_CAPTURE    2
> These are already defined in guc_log_abi.h.
Will use the existing guc_log_buffer_type enum.

> 
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT        3
> And the LIC is not/does not have a guc_log_buffer_state header.
> 
>> +#define GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT        4
>> +
>> +#define GUC_LFD_BUFFER_SIZE    SZ_1K
>> +
>> +struct guc_log_buffer_entry_list {
>> +    u32 offset;
>> +    u32 rd_ptr;
>> +    u32 wr_ptr;
>> +    u32 buf_size;
>> +};
>> +
>> +struct guc_lic_save {
>> +    u32 version;
>> +    /*
>> +     * Array of init config KLVs.
>> +     * Range from GUC_LOG_LIC_TYPE_FIRST to GUC_LOG_LIC_TYPE_LAST
>> +     */
>> +    u32 KLV[GUC_LOG_LIC_TYPE_LAST - GUC_LOG_LIC_TYPE_FIRST + 1];
>> +    struct guc_log_buffer_entry_list 
>> entry[GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT];
>> +};
>> +
>> +static const struct guc_lfd_data default_guc_lfd_data = {
>> +    .dw0 = FIELD_PREP_CONST(GUC_LFD_DATA_MAGIC, 
>> GUC_LFD_DATA_HEADER_MAGIC)
>> +};
>> +
>>   static const struct guc_lfd_file_header default_guc_lfd_file_header = {
>>       .magic = GUC_LFD_DRIVER_KEY_STREAMING,
>>       .version.dw0 = FIELD_PREP_CONST(GUC_LFD_VERSION_MINOR, 
>> GUC_LFD_FORMAT_VERSION_MINOR) |
>>                  FIELD_PREP_CONST(GUC_LFD_VERSION_MAJOR, 
>> GUC_LFD_FORMAT_VERSION_MAJOR)
>>   };
>> +static struct guc_log_buffer_entry_markers {
>> +    u32 key[2];
>> +} const entry_markers[4] = {
>> +    {{
>> +        GUC_LFD_LOG_BUFFER_MARKER_1V2,
>> +        GUC_LFD_LOG_BUFFER_MARKER_2
>> +    }},
>> +    {{
>> +        GUC_LFD_LOG_BUFFER_MARKER_1V2,
>> +        GUC_LFD_CRASH_DUMP_BUFFER_MARKER_2
>> +    }},
>> +    {{
>> +        GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_1V2,
>> +        GUC_LFD_STATE_CAPTURE_BUFFER_MARKER_2
>> +    }},
>> +    {{
>> +        GUC_LIC_MAGIC,
>> +        ((GUC_LIC_FORMAT_VERSION_MAJOR << 16) | 
>> GUC_LIC_FORMAT_VERSION_MINOR)
>> +    }}
>> +};
>> +
>>   static struct xe_guc *
>>   log_to_guc(struct xe_guc_log *log)
>>   {
>> @@ -223,15 +279,163 @@ void xe_guc_log_snapshot_print(struct 
>> xe_guc_log_snapshot *snapshot, struct drm_
>>       }
>>   }
>> +static int xe_guc_log_add_lfd_header(void *buf, int buf_size)
>> +{
>> +    int len = sizeof(default_guc_lfd_data);
>> +
>> +    memcpy(buf, &default_guc_lfd_data, len);
>> +    return len;
>> +}
> For the sake of adding one dword in one place in the code, this seems 
> massively over-engineered.
> 
> I would just do "buf[0] = FIELD_PREP(MAGIC)".
Sure, will do>
>> +
>> +static int xe_guc_log_add_payload(void *buf, int buf_size, u32 
>> data_len, void *data)
>> +{
>> +    struct guc_lfd_data *lfd = buf;
>> +
>> +    /* make length DW aligned */
>> +    lfd->dw_size = DIV_ROUND_UP(data_len, sizeof(u32));
>> +    memcpy(lfd->data, data, data_len);
>> +    return lfd->dw_size * sizeof(u32);
>> +}
> Again, why split this out into a separate function? For such a small 
> amount of code that called only once, it is simpler/clearer to just do 
> it inline.
To be merged with next function>
>> +
>> +static int xe_guc_log_add_typed_payload(void *buf, int buf_size, u32 
>> type,
>> +                    u32 data_len, void *data)
>> +{
>> +    struct guc_lfd_data *lfd = buf;
>> +    int index;
>> +
>> +    index = xe_guc_log_add_lfd_header(buf, buf_size);
>> +    lfd->dw0 |= FIELD_PREP(GUC_LFD_DATA_DESC_TYPE, type);
>> +    index += xe_guc_log_add_payload(buf, buf_size, data_len, data);
> I'm not seeing any checks that the pre-allocated size is sufficient.
Yes, will change to either use local header or add size check>
>> +
>> +    return index;
>> +}
>> +
>> +static inline int lic_type_to_KLV_index(u32 lic_type)
>> +{
>> +    XE_WARN_ON(lic_type < GUC_LOG_LIC_TYPE_FIRST || lic_type > 
>> GUC_LOG_LIC_TYPE_LAST);
>> +
>> +    return lic_type - GUC_LOG_LIC_TYPE_FIRST;
>> +}
>> +
>> +static int xe_guc_log_add_klv(void *buf, int size, u32 lic_type,
>> +                  struct guc_lic_save *config)
>> +{
>> +    int klv_index = lic_type_to_KLV_index(lic_type);
>> +
>> +    return xe_guc_log_add_typed_payload(buf, size, lic_type, 
>> sizeof(u32),
>> +                        &config->KLV[klv_index]);
>> +}
>> +
>> +static int xe_guc_log_add_os_id(void *buf, int buf_size, u32 id)
>> +{
>> +    struct guc_lfd_data *lfd = buf;
>> +    struct guc_lfd_data_os_info *os_id;
>> +    char *version;
>> +    int info_len;
>> +
>> +    os_id = (void *)lfd->data;
>> +    os_id->os_id = id;
>> +
>> +    version = init_utsname()->release;
>> +    info_len = strnlen(version, buf_size) + 1;
>> +
>> +    if (buf_size < sizeof(struct guc_lfd_data) + info_len)
>> +        return -ENOMEM;
>> +
>> +    strscpy(os_id->build_version, version, info_len);
>> +    return xe_guc_log_add_typed_payload(buf, buf_size, 
>> GUC_LFD_TYPE_OS_ID,
>> +                        info_len + sizeof(*os_id), os_id);
>> +}
>> +
>> +static void xe_guc_log_loop_log_init(struct guc_lic *init,
>> +                     struct guc_lic_save *config)
>> +{
>> +    struct guc_klv_generic_dw_t *p = (void *)init->data;
>> +    int i;
>> +
>> +    for (i = 0; i < init->dw_size;) {
>> +        int klv_len = FIELD_GET(GUC_KLV_0_LEN, p->kl) + 1;
>> +        int key = FIELD_GET(GUC_KLV_0_KEY, p->kl);
>> +
>> +        if (key < GUC_LOG_LIC_TYPE_FIRST || key > GUC_LOG_LIC_TYPE_LAST)
>> +            break;
> Error message?
Will added>
>> +        config->KLV[lic_type_to_KLV_index(key)] = p->value;
>> +        i += klv_len;
>> +        p = (void *)((u32 *)p + klv_len);
>> +    }
>> +}
>> +
>> +static void xe_guc_log_load_lic(void *guc_log, struct guc_lic_save 
>> *config)
>> +{
>> +    u32 offset = GUC_LOG_BUFFER_STATE_HEADER_LENGTH;
>> +    struct guc_log_buffer_state *p = guc_log;
>> +    int i = 0;
>> +
>> +    config->version = p->version;
>> +    while (p) {
>> +        for (i = 0; i < GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT; i++) {
>> +            if (p->marker[0] == entry_markers[i].key[0] &&
>> +                p->marker[1] == entry_markers[i].key[1]) {
>> +                config->entry[i].offset = offset;
>> +                config->entry[i].rd_ptr = p->read_ptr;
>> +                config->entry[i].wr_ptr = p->write_ptr;
>> +                config->entry[i].buf_size = p->size;
>> +
>> +                if (i != GUC_LOG_BUFFER_STATE_HEADER_ENTRY_INIT) {
>> +                    offset += p->size;
>> +                    p++;
>> +                } else {
>> +                    /* Load log init config */
>> +                    xe_guc_log_loop_log_init((void *)p, config);
>> +
>> +                    /* Init config is the last */
>> +                    return;
>> +                }
>> +            }
>> +        }
>> +        if (i >= GUC_LOG_BUFFER_STATE_HEADER_ENTRY_COUNT)
>> +            break;
>> +    }
> This does not seem like the correct way to decode this data. Not all 
> these items are the same structure. While the crash dump section does 
> technically have a standard log buffer state header, it does not 
> actually use it. And of the register capture buffer is not interesting 
> at all to LFD dumping, which is the only other section to have a valid 
> header.
> 
> I would flip this around and have a helper function which finds the 
> structure in the buffer that starts with the given magic header and 
> simply returns a pointer to it (or the offset from base). Then the 
> processing code for each section (which would be very different 
> implementations and be called at different times) can retrieve the bit 
> it is interested in and use it and not worry about having to have a 
> generic function that processes all sections at once.
Good idea, will changed to use marker to find out structure for it>
>> +}
>> +
>> +static int
>> +xe_guc_log_output_lfd_init(struct drm_printer *p, struct 
>> xe_guc_log_snapshot *snapshot,
>> +               struct guc_lic_save *config)
>> +{
>> +    int type, len;
>> +    size_t size = 0;
>> +    char buf[GUC_LFD_BUFFER_SIZE] = {0};
> This will add 1KB of zeros as a global static object. You should never 
> give large arrays an empty initialisation like that. I recall someone 
> once doing that with a 16MB array and it took us ages to work out why 
> the driver was so huge in debug builds!  Also, this is putting a 1KB 
> array on the stack. Kernel stack space is limited and you should never 
> put large objects on the stack unless absolutely necessary. Just use 
> kzalloc() for this.
> 
>> +
>> +    /* FW required types */
>> +    for (type = GUC_LOG_LIC_TYPE_FIRST; type <= 
>> GUC_LOG_LIC_TYPE_LAST; type++)
>> +        size += xe_guc_log_add_klv(&buf[size], GUC_LFD_BUFFER_SIZE - 
>> size, type, config);
> I really don't like this idea of effectively cloning the LIC blocks into 
> LFD blocks. There is no requirement for any cross-compatibility of 
> structures, enums, etc. Also, the LIC information is potentially useful 
> data for the driver to include in other places, e.g. devcoredump output.
> 
> My preference would be to decode the LIC blocks as part of the GuC 
> initialisation and save the info in the xe_guc structure. Then the 
> devcoredump code can print it out and the LFD code can create LFD blocks 
> from it. But no longer with any assumptions or connections to LIC 
> formatting.
> 
> Also, no buffer overflow checking?
Will avoid use LIC_TYPE_XXX
To be changed to add type from GUC_LFD_TYPE_FW_VERSION to 
GUC_LFD_TYPE_BUILD_PLATFORM_ID>
>> +
>> +    /* KMD required type(s) */
>> +    len = xe_guc_log_add_os_id(&buf[size], GUC_LFD_BUFFER_SIZE - size,
> &buf[size] -> buf + size
> 
>> +                   GUC_LFD_OS_TYPE_OSID_LIN);
>> +    if (len < 0)
>> +        return len;
> Aborting on error means that if the given LFD block didn't fit then you 
> get no output at all. Would it not be better to print a warning message 
> and just skip adding that block?
To be optimized>
>> +    size += len;
>> +
>> +    xe_print_blob_ascii85(p, NULL, 0, buf, 0, size);
> Why use ASCII85 encoding? The LFD stream is supposed to just be raw 
> binary data, isn't it?
> 
> Hmm. For small logs, it might be useful to output in ASCII, but then do 
> we need some kind of file format on top of the LFD format? E.g. the 
> "[LOG].data" tag below is part of the devcoredump format and has nothing 
> to do with LFDs. Do we need any kind of extra formatting? The LFD 
> already contains all the meta data we currently need and can support 
> adding arbitrary meta data anyway. So there is no need for extra data 
> around the side of the main payload such as we have in the current 
> debugfs log dump file.
> 
> However, the main purpose of a streaming output file is for capturing 
> very large logs. And those are going to need to be compressed to move 
> them around. In which case, there is no point in doing ASCII encoding 
> because it is going to become a non-ASCII zip file anyway.
Sure, will output LFD in binary format.

> 
> John.
> 
>> +    return size;
>> +}
>> +
>>   static void
>>   xe_guc_log_snapshot_print_lfd(struct xe_guc_log_snapshot *snapshot, 
>> struct drm_printer *p)
>>   {
>> +    struct guc_lic_save config;
>> +
>>       if (!snapshot || !snapshot->size)
>>           return;
>>       /* Output LFD file header */
>>       xe_print_blob_ascii85(p, "[LOG].data", 0, 
>> &default_guc_lfd_file_header, 0,
>>                     offsetof(struct guc_lfd_file_header, lfd_stream));
>> +
>> +    /* Output LFD stream */
>> +    xe_guc_log_load_lic(snapshot->copy[0], &config);
>> +    xe_guc_log_output_lfd_init(p, snapshot, &config);
>>   }
>>   /**
>