[PATCH 1/2] lib/xe/intel_error_decode_xe: error decode support for xe driver

Souza, Jose jose.souza at intel.com
Thu Feb 6 19:04:11 UTC 2025


On Thu, 2025-02-06 at 13:48 -0500, Rodrigo Vivi wrote:
> On Fri, Jan 31, 2025 at 08:29:39PM +0000, sai.gowtham.ch at intel.com wrote:
> > From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > 
> > Adding error decode support for xe driver, this lib support helps us to decode
> > the errors generated in the dumps, this lib is enabled in the existing intel_error_decode tool
> > to extend them to work for xe dev core dumps.
> > 
> 
> Cc: Jose
> 
> I'd like to get Jose perspective since he implemented the Mesa decode tool.

Most of this and the next patch is a copy of parts of Mesa decoder... so in general looks good but it misses the parse of the VM section and hw
context.
I know that IGT will not be able to decode it into human readable instructions but it should at least parse it and make sure it exist, if not print a
error or fail a test.

Also in my opinion this should be converted into a test, don't make much sense as tool something that don't parse batch buffers so it make more sense
as a IGT test that fails if devcoredump "ABI" contract breaks.


> 
> 
> > Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > ---
> >  lib/meson.build                    |   1 +
> >  lib/xe/intel_error_decode_xe.c     | 287 +++++++++++++++++++++++++++++
> >  lib/xe/intel_error_decode_xe_lib.h |  26 +++
> >  3 files changed, 314 insertions(+)
> >  create mode 100644 lib/xe/intel_error_decode_xe.c
> >  create mode 100644 lib/xe/intel_error_decode_xe_lib.h
> > 
> > diff --git a/lib/meson.build b/lib/meson.build
> > index 9fffdd3c6..c48a64a2c 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -112,6 +112,7 @@ lib_sources = [
> >  	'igt_msm.c',
> >  	'igt_dsc.c',
> >  	'igt_hook.c',
> > +        'xe/intel_error_decode_xe.c',
> >  	'xe/xe_gt.c',
> >  	'xe/xe_ioctl.c',
> >  	'xe/xe_mmio.c',
> > diff --git a/lib/xe/intel_error_decode_xe.c b/lib/xe/intel_error_decode_xe.c
> > new file mode 100644
> > index 000000000..8da06775d
> > --- /dev/null
> > +++ b/lib/xe/intel_error_decode_xe.c
> 
> oh, so you are already in the lib/xe dir, sorry for missunderstanding the other patch.
> but my comment about the name suggestion is still valid: devcoredump_decode.h ?!
> or something like that...
> 
> > @@ -0,0 +1,287 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > +* Copyright © 2025 Intel Corporation
> > +*
> > +* Authors:
> > +*    Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > +*/
> > +
> > +#include <stdbool.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <xe_drm.h>
> > +
> > +#include "drmtest.h"
> > +#include "instdone.h"
> > +#include "intel_chipset.h"
> > +#include "intel_reg.h"
> > +#include "i915/intel_decode.h"
> 
> hmmm... I really don't like that...
> If we need something in common we do need to have a separate lib
> at the lower level...
> 
> > +#include "xe/intel_error_decode_xe_lib.h"
> > +
> > +static uint32_t
> > +xe_print_head(unsigned int reg)
> > +{
> > +	printf("    head = 0x%08x, wraps = %d\n", reg & (0x7ffff<<2), reg >> 21);
> > +	return reg & (0x7ffff<<2);
> > +}
> > +
> > +static uint32_t
> > +xe_print_ctl(unsigned int reg)
> > +{
> > +        uint32_t ring_length =  (((reg & (0x1ff << 12)) >> 12) + 1) * 4096;
> > +
> > +#define BIT_STR(reg, x, on, off) ((1 << (x)) & reg) ? on : off
> > +
> > +        printf("    len=%d%s%s%s\n", ring_length,
> > +               BIT_STR(reg, 0, ", enabled", ", disabled"),
> > +               BIT_STR(reg, 10, ", semaphore wait ", ""),
> > +               BIT_STR(reg, 11, ", rb wait ", "")
> > +                );
> > +#undef BIT_STR
> > +        return ring_length;
> > +}
> > +
> > +static void
> > +xe_print_acthd(unsigned int reg, unsigned int ring_length)
> > +{
> > +        if ((reg & (0x7ffff << 2)) < ring_length)
> > +                printf("    at ring: 0x%08x\n", reg & (0x7ffff << 2));
> > +        else
> > +                printf("    at batch: 0x%08x\n", reg);
> > +}
> > +
> > +static void
> > +xe_print_instdone(uint32_t devid, unsigned int instdone, unsigned int instdone1)
> > +{
> > +        int i;
> > +        static int once;
> > +
> > +        if (!once) {
> > +                if (!init_instdone_definitions(devid))
> > +                        return;
> > +                once = 1;
> > +        }
> > +
> > +        for (i = 0; i < num_instdone_bits; i++) {
> > +                int busy = 0;
> > +
> > +                if (instdone_bits[i].reg == INSTDONE_1) {
> > +                        if (!(instdone1 & instdone_bits[i].bit))
> > +                                busy = 1;
> > +                } else {
> > +                        if (!(instdone & instdone_bits[i].bit))
> > +                                busy = 1;
> > +                }
> > +
> > +                if (busy)
> > +                        printf("    busy: %s\n", instdone_bits[i].name);
> > +        }
> > +}
> > +
> > +static uint16_t xe_get_engine_class(char *name)
> > +{
> > +        uint16_t class;
> > +
> > +        if (strcmp(name, "rcs") == 0) {
> > +                class = DRM_XE_ENGINE_CLASS_RENDER;
> > +        } else if (strcmp(name, "bcs") == 0) {
> > +                class = DRM_XE_ENGINE_CLASS_COPY;
> > +        } else if (strcmp(name, "vcs") == 0) {
> > +                class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> > +        } else if (strcmp(name, "vecs") == 0) {
> > +                class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> > +        } else if (strcmp(name, "ccs") == 0) {
> > +                class = DRM_XE_ENGINE_CLASS_COMPUTE;
> > +        }
> > +
> > +        return class;
> > +}
> > +
> > +static const char *
> > +read_param(const char *line, const char *param)
> > +{
> > +	if (!(strstr(line, param)))
> > +		return NULL;
> > +
> > +	while (*line != ':')
> > +		line++;
> > +	line += 2;
> > +
> > +	return line;
> > +}
> > +
> > +/* parse lines like 'batch_addr[0]: 0x0000effeffff5000 */
> > +bool
> > +read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value)
> > +{
> > +   line = read_param(line, parameter);
> > +   if (!line)
> > +      return false;
> > +
> > +   *value = (uint64_t)strtoull(line, NULL, 0);
> > +   return true;
> > +}
> > +
> > +/* parse lines like 'PCI ID: 0x9a49' */
> > +bool
> > +read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value)
> > +{
> > +   line = read_param(line, parameter);
> > +   if (!line)
> > +      return false;
> > +
> > +   *value = (int)strtoul(line, NULL, 0);
> > +   return true;
> > +}
> > +
> > +/* parse lines like 'rcs0 (physical), logical instance=0' */
> > +bool
> > +read_error_decode_xe_engine_name(const char *line, char *ring_name)
> > +{
> > +   int i;
> > +
> > +   if (!strstr(line, " (physical), logical instance="))
> > +      return false;
> > +
> > +   i = 0;
> > +   for (i = 0; *line != ' '; i++, line++)
> > +      ring_name[i] = *line;
> > +
> > +   ring_name[i] = 0;
> > +   return true;
> > +}
> > +
> > +bool
> > +read_error_decode_topic(const char *line, enum xe_topic *new_topic)
> > +{
> > +   static const char *xe_topic_strings[] = {
> > +      "**** Xe Device Coredump ****",
> > +      "**** GuC CT ****",
> > +      "**** Job ****",
> > +      "**** HW Engines ****",
> > +      "**** VM state ****",
> > +   };
> > +   bool topic_changed = false;
> > +
> > +   for (int i = 0; i < ARRAY_SIZE(xe_topic_strings); i++) {
> > +      if (strncmp(xe_topic_strings[i], line, strlen(xe_topic_strings[i])) == 0) {
> > +         topic_changed = true;
> > +         *new_topic = i;
> > +         break;
> > +      }
> > +   }
> > +
> > +   return topic_changed;
> > +}
> > +
> > +void read_xe_data_file(FILE *file)
> > +{
> > +	struct {
> > +		uint64_t *addrs;
> > +		uint8_t len;
> > +	} batch_buffers = { .addrs = NULL, .len = 0 };
> > +
> > +	unsigned int reg;
> > +	uint32_t devid, ring_length = 0;
> > +	char  *line = NULL;
> > +	size_t line_size;
> > +	enum xe_topic xe_topic = XE_TOPIC_INVALID;
> > +
> > +	while(getline(&line, &line_size, file) > 0) {
> > +		bool topic_changed = false;
> > +		bool print_line = true;
> > +
> > +		topic_changed = read_error_decode_topic(line, &xe_topic);
> > +		if(topic_changed) {
> > +			print_line = (xe_topic != XE_TOPIC_VM);
> > +			if(print_line)
> > +				fputs(line, stdout);
> > +			continue;
> > +		}
> > +
> > +		switch (xe_topic) {
> > +			case XE_TOPIC_DEVICE: {
> > +				uint32_t value;
> > +
> > +				if (read_error_decode_xe_hex(line, "PCI ID", &value)) {
> > +					devid = value;
> > +					printf("Detected GEN%i chipset\n", intel_gen(devid));
> > +				}
> > +
> > +				break;
> > +			}
> > +			case XE_TOPIC_HW_ENGINES: {
> > +				char engine_name[64];
> > +				uint64_t u64_reg;
> > +
> > +				if (read_error_decode_xe_engine_name(line, engine_name)) {
> > +					xe_get_engine_class(engine_name);
> > +					break;
> > +				}
> > +
> > +				if (read_error_decode_xe_hex(line, "RING_HEAD", &reg)) {
> > +					xe_print_head(reg);
> > +					break;
> > +				}
> > +
> > +				if (read_error_decode_xe_hex(line, "RING_CTL", &reg))
> > +					ring_length = xe_print_ctl(reg);
> > +
> > +				if (read_error_decode_xe_hex(line, "RING_INSTDONE", &reg)) {
> > +					fputs(line, stdout);
> > +					xe_print_instdone(devid, reg, -1);
> > +					break;
> > +				}
> > +
> > +				if (read_error_decode_xe_u64_hex(line, "ACTHD", &u64_reg)) {
> > +					fputs(line, stdout);
> > +					xe_print_acthd(u64_reg, ring_length);
> > +					break;
> > +				}
> > +
> > +				if (read_error_decode_xe_hex(line, "SC_INSTDONE", &reg)) {
> > +					fputs(line, stdout);
> > +					xe_print_instdone(devid, reg, -1);
> > +					break;
> > +				}
> > +
> > +				if (read_error_decode_xe_hex(line, "SC_INSTDONE_EXTRA", &reg)) {
> > +					fputs(line, stdout);
> > +					xe_print_instdone(devid, -1, reg);
> > +					break;
> > +				}
> > +
> > +				if (read_error_decode_xe_hex(line, "SAMPLER_INSTDONE", &reg)) {
> > +					fputs(line, stdout);
> > +					xe_print_instdone(devid, reg, -1);
> > +					break;
> > +				}
> > +
> > +				if (read_error_decode_xe_hex(line, "ROW_INSTDONE", &reg)) {
> > +					fputs(line, stdout);
> > +					xe_print_instdone(devid, reg, -1);
> > +					break;
> > +				}
> > +
> > +				break;
> > +			}
> > +			case XE_TOPIC_JOB: {
> > +				uint64_t u64_value;
> > +
> > +				if (read_error_decode_xe_u64_hex(line, "batch_addr[", &u64_value)) {
> > +					batch_buffers.addrs = realloc(batch_buffers.addrs, sizeof(uint64_t) * (batch_buffers.len + 1));
> > +					batch_buffers.addrs[batch_buffers.len] = u64_value;
> > +					batch_buffers.len++;
> > +				}
> > +
> > +				break;
> > +			}
> > +			default:
> > +				break;
> > +		}
> > +	}
> > +
> > +	free(batch_buffers.addrs);
> > +	free(line);
> > +}
> > diff --git a/lib/xe/intel_error_decode_xe_lib.h b/lib/xe/intel_error_decode_xe_lib.h
> > new file mode 100644
> > index 000000000..fc69f7cce
> > --- /dev/null
> > +++ b/lib/xe/intel_error_decode_xe_lib.h
> > @@ -0,0 +1,26 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > +* Copyright © 2025 Intel Corporation
> > +*
> > +* Authors:
> > +*    Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > +*/
> > +
> > +#include <stdbool.h>
> > +#include <stdint.h>
> > +
> > +enum xe_topic {
> > +   XE_TOPIC_DEVICE = 0,
> > +   XE_TOPIC_GUC_CT,
> > +   XE_TOPIC_JOB,
> > +   XE_TOPIC_HW_ENGINES,
> > +   XE_TOPIC_VM,
> > +   XE_TOPIC_INVALID,
> > +};
> > +
> > +void read_xe_data_file(FILE *file);
> > +bool read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value);
> > +bool read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value);
> > +bool read_error_decode_xe_engine_name(const char *line, char *ring_name);
> > +
> > +bool read_error_decode_topic(const char *line, enum xe_topic *new_topic);
> > -- 
> > 2.34.1
> > 



More information about the igt-dev mailing list