[PATCH 1/2] lib/xe/intel_error_decode_xe: error decode support for xe driver

Rodrigo Vivi rodrigo.vivi at intel.com
Thu Feb 6 18:48:32 UTC 2025


On Fri, Jan 31, 2025 at 08:29:39PM +0000, sai.gowtham.ch at intel.com wrote:
> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> 
> Adding error decode support for xe driver, this lib support helps us to decode
> the errors generated in the dumps, this lib is enabled in the existing intel_error_decode tool
> to extend them to work for xe dev core dumps.
> 

Cc: Jose

I'd like to get Jose perspective since he implemented the Mesa decode tool.


> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> ---
>  lib/meson.build                    |   1 +
>  lib/xe/intel_error_decode_xe.c     | 287 +++++++++++++++++++++++++++++
>  lib/xe/intel_error_decode_xe_lib.h |  26 +++
>  3 files changed, 314 insertions(+)
>  create mode 100644 lib/xe/intel_error_decode_xe.c
>  create mode 100644 lib/xe/intel_error_decode_xe_lib.h
> 
> diff --git a/lib/meson.build b/lib/meson.build
> index 9fffdd3c6..c48a64a2c 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -112,6 +112,7 @@ lib_sources = [
>  	'igt_msm.c',
>  	'igt_dsc.c',
>  	'igt_hook.c',
> +        'xe/intel_error_decode_xe.c',
>  	'xe/xe_gt.c',
>  	'xe/xe_ioctl.c',
>  	'xe/xe_mmio.c',
> diff --git a/lib/xe/intel_error_decode_xe.c b/lib/xe/intel_error_decode_xe.c
> new file mode 100644
> index 000000000..8da06775d
> --- /dev/null
> +++ b/lib/xe/intel_error_decode_xe.c

oh, so you are already in the lib/xe dir, sorry for missunderstanding the other patch.
but my comment about the name suggestion is still valid: devcoredump_decode.h ?!
or something like that...

> @@ -0,0 +1,287 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright © 2025 Intel Corporation
> +*
> +* Authors:
> +*    Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> +*/
> +
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <xe_drm.h>
> +
> +#include "drmtest.h"
> +#include "instdone.h"
> +#include "intel_chipset.h"
> +#include "intel_reg.h"
> +#include "i915/intel_decode.h"

hmmm... I really don't like that...
If we need something in common we do need to have a separate lib
at the lower level...

> +#include "xe/intel_error_decode_xe_lib.h"
> +
> +static uint32_t
> +xe_print_head(unsigned int reg)
> +{
> +	printf("    head = 0x%08x, wraps = %d\n", reg & (0x7ffff<<2), reg >> 21);
> +	return reg & (0x7ffff<<2);
> +}
> +
> +static uint32_t
> +xe_print_ctl(unsigned int reg)
> +{
> +        uint32_t ring_length =  (((reg & (0x1ff << 12)) >> 12) + 1) * 4096;
> +
> +#define BIT_STR(reg, x, on, off) ((1 << (x)) & reg) ? on : off
> +
> +        printf("    len=%d%s%s%s\n", ring_length,
> +               BIT_STR(reg, 0, ", enabled", ", disabled"),
> +               BIT_STR(reg, 10, ", semaphore wait ", ""),
> +               BIT_STR(reg, 11, ", rb wait ", "")
> +                );
> +#undef BIT_STR
> +        return ring_length;
> +}
> +
> +static void
> +xe_print_acthd(unsigned int reg, unsigned int ring_length)
> +{
> +        if ((reg & (0x7ffff << 2)) < ring_length)
> +                printf("    at ring: 0x%08x\n", reg & (0x7ffff << 2));
> +        else
> +                printf("    at batch: 0x%08x\n", reg);
> +}
> +
> +static void
> +xe_print_instdone(uint32_t devid, unsigned int instdone, unsigned int instdone1)
> +{
> +        int i;
> +        static int once;
> +
> +        if (!once) {
> +                if (!init_instdone_definitions(devid))
> +                        return;
> +                once = 1;
> +        }
> +
> +        for (i = 0; i < num_instdone_bits; i++) {
> +                int busy = 0;
> +
> +                if (instdone_bits[i].reg == INSTDONE_1) {
> +                        if (!(instdone1 & instdone_bits[i].bit))
> +                                busy = 1;
> +                } else {
> +                        if (!(instdone & instdone_bits[i].bit))
> +                                busy = 1;
> +                }
> +
> +                if (busy)
> +                        printf("    busy: %s\n", instdone_bits[i].name);
> +        }
> +}
> +
> +static uint16_t xe_get_engine_class(char *name)
> +{
> +        uint16_t class;
> +
> +        if (strcmp(name, "rcs") == 0) {
> +                class = DRM_XE_ENGINE_CLASS_RENDER;
> +        } else if (strcmp(name, "bcs") == 0) {
> +                class = DRM_XE_ENGINE_CLASS_COPY;
> +        } else if (strcmp(name, "vcs") == 0) {
> +                class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> +        } else if (strcmp(name, "vecs") == 0) {
> +                class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> +        } else if (strcmp(name, "ccs") == 0) {
> +                class = DRM_XE_ENGINE_CLASS_COMPUTE;
> +        }
> +
> +        return class;
> +}
> +
> +static const char *
> +read_param(const char *line, const char *param)
> +{
> +	if (!(strstr(line, param)))
> +		return NULL;
> +
> +	while (*line != ':')
> +		line++;
> +	line += 2;
> +
> +	return line;
> +}
> +
> +/* parse lines like 'batch_addr[0]: 0x0000effeffff5000 */
> +bool
> +read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value)
> +{
> +   line = read_param(line, parameter);
> +   if (!line)
> +      return false;
> +
> +   *value = (uint64_t)strtoull(line, NULL, 0);
> +   return true;
> +}
> +
> +/* parse lines like 'PCI ID: 0x9a49' */
> +bool
> +read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value)
> +{
> +   line = read_param(line, parameter);
> +   if (!line)
> +      return false;
> +
> +   *value = (int)strtoul(line, NULL, 0);
> +   return true;
> +}
> +
> +/* parse lines like 'rcs0 (physical), logical instance=0' */
> +bool
> +read_error_decode_xe_engine_name(const char *line, char *ring_name)
> +{
> +   int i;
> +
> +   if (!strstr(line, " (physical), logical instance="))
> +      return false;
> +
> +   i = 0;
> +   for (i = 0; *line != ' '; i++, line++)
> +      ring_name[i] = *line;
> +
> +   ring_name[i] = 0;
> +   return true;
> +}
> +
> +bool
> +read_error_decode_topic(const char *line, enum xe_topic *new_topic)
> +{
> +   static const char *xe_topic_strings[] = {
> +      "**** Xe Device Coredump ****",
> +      "**** GuC CT ****",
> +      "**** Job ****",
> +      "**** HW Engines ****",
> +      "**** VM state ****",
> +   };
> +   bool topic_changed = false;
> +
> +   for (int i = 0; i < ARRAY_SIZE(xe_topic_strings); i++) {
> +      if (strncmp(xe_topic_strings[i], line, strlen(xe_topic_strings[i])) == 0) {
> +         topic_changed = true;
> +         *new_topic = i;
> +         break;
> +      }
> +   }
> +
> +   return topic_changed;
> +}
> +
> +void read_xe_data_file(FILE *file)
> +{
> +	struct {
> +		uint64_t *addrs;
> +		uint8_t len;
> +	} batch_buffers = { .addrs = NULL, .len = 0 };
> +
> +	unsigned int reg;
> +	uint32_t devid, ring_length = 0;
> +	char  *line = NULL;
> +	size_t line_size;
> +	enum xe_topic xe_topic = XE_TOPIC_INVALID;
> +
> +	while(getline(&line, &line_size, file) > 0) {
> +		bool topic_changed = false;
> +		bool print_line = true;
> +
> +		topic_changed = read_error_decode_topic(line, &xe_topic);
> +		if(topic_changed) {
> +			print_line = (xe_topic != XE_TOPIC_VM);
> +			if(print_line)
> +				fputs(line, stdout);
> +			continue;
> +		}
> +
> +		switch (xe_topic) {
> +			case XE_TOPIC_DEVICE: {
> +				uint32_t value;
> +
> +				if (read_error_decode_xe_hex(line, "PCI ID", &value)) {
> +					devid = value;
> +					printf("Detected GEN%i chipset\n", intel_gen(devid));
> +				}
> +
> +				break;
> +			}
> +			case XE_TOPIC_HW_ENGINES: {
> +				char engine_name[64];
> +				uint64_t u64_reg;
> +
> +				if (read_error_decode_xe_engine_name(line, engine_name)) {
> +					xe_get_engine_class(engine_name);
> +					break;
> +				}
> +
> +				if (read_error_decode_xe_hex(line, "RING_HEAD", &reg)) {
> +					xe_print_head(reg);
> +					break;
> +				}
> +
> +				if (read_error_decode_xe_hex(line, "RING_CTL", &reg))
> +					ring_length = xe_print_ctl(reg);
> +
> +				if (read_error_decode_xe_hex(line, "RING_INSTDONE", &reg)) {
> +					fputs(line, stdout);
> +					xe_print_instdone(devid, reg, -1);
> +					break;
> +				}
> +
> +				if (read_error_decode_xe_u64_hex(line, "ACTHD", &u64_reg)) {
> +					fputs(line, stdout);
> +					xe_print_acthd(u64_reg, ring_length);
> +					break;
> +				}
> +
> +				if (read_error_decode_xe_hex(line, "SC_INSTDONE", &reg)) {
> +					fputs(line, stdout);
> +					xe_print_instdone(devid, reg, -1);
> +					break;
> +				}
> +
> +				if (read_error_decode_xe_hex(line, "SC_INSTDONE_EXTRA", &reg)) {
> +					fputs(line, stdout);
> +					xe_print_instdone(devid, -1, reg);
> +					break;
> +				}
> +
> +				if (read_error_decode_xe_hex(line, "SAMPLER_INSTDONE", &reg)) {
> +					fputs(line, stdout);
> +					xe_print_instdone(devid, reg, -1);
> +					break;
> +				}
> +
> +				if (read_error_decode_xe_hex(line, "ROW_INSTDONE", &reg)) {
> +					fputs(line, stdout);
> +					xe_print_instdone(devid, reg, -1);
> +					break;
> +				}
> +
> +				break;
> +			}
> +			case XE_TOPIC_JOB: {
> +				uint64_t u64_value;
> +
> +				if (read_error_decode_xe_u64_hex(line, "batch_addr[", &u64_value)) {
> +					batch_buffers.addrs = realloc(batch_buffers.addrs, sizeof(uint64_t) * (batch_buffers.len + 1));
> +					batch_buffers.addrs[batch_buffers.len] = u64_value;
> +					batch_buffers.len++;
> +				}
> +
> +				break;
> +			}
> +			default:
> +				break;
> +		}
> +	}
> +
> +	free(batch_buffers.addrs);
> +	free(line);
> +}
> diff --git a/lib/xe/intel_error_decode_xe_lib.h b/lib/xe/intel_error_decode_xe_lib.h
> new file mode 100644
> index 000000000..fc69f7cce
> --- /dev/null
> +++ b/lib/xe/intel_error_decode_xe_lib.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright © 2025 Intel Corporation
> +*
> +* Authors:
> +*    Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> +*/
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +
> +enum xe_topic {
> +   XE_TOPIC_DEVICE = 0,
> +   XE_TOPIC_GUC_CT,
> +   XE_TOPIC_JOB,
> +   XE_TOPIC_HW_ENGINES,
> +   XE_TOPIC_VM,
> +   XE_TOPIC_INVALID,
> +};
> +
> +void read_xe_data_file(FILE *file);
> +bool read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value);
> +bool read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value);
> +bool read_error_decode_xe_engine_name(const char *line, char *ring_name);
> +
> +bool read_error_decode_topic(const char *line, enum xe_topic *new_topic);
> -- 
> 2.34.1
> 


More information about the igt-dev mailing list