[PATCH 1/2] lib/xe/intel_error_decode_xe: error decode support for xe driver
Souza, Jose
jose.souza at intel.com
Thu Feb 6 19:04:11 UTC 2025
On Thu, 2025-02-06 at 13:48 -0500, Rodrigo Vivi wrote:
> On Fri, Jan 31, 2025 at 08:29:39PM +0000, sai.gowtham.ch at intel.com wrote:
> > From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> >
> > Adding error decode support for xe driver, this lib support helps us to decode
> > the errors generated in the dumps, this lib is enabled in the existing intel_error_decode tool
> > to extend them to work for xe dev core dumps.
> >
>
> Cc: Jose
>
> I'd like to get Jose perspective since he implemented the Mesa decode tool.
Most of this and the next patch is a copy of parts of Mesa decoder... so in general looks good but it misses the parse of the VM section and hw
context.
I know that IGT will not be able to decode it into human readable instructions but it should at least parse it and make sure it exist, if not print a
error or fail a test.
Also in my opinion this should be converted into a test, don't make much sense as tool something that don't parse batch buffers so it make more sense
as a IGT test that fails if devcoredump "ABI" contract breaks.
>
>
> > Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > ---
> > lib/meson.build | 1 +
> > lib/xe/intel_error_decode_xe.c | 287 +++++++++++++++++++++++++++++
> > lib/xe/intel_error_decode_xe_lib.h | 26 +++
> > 3 files changed, 314 insertions(+)
> > create mode 100644 lib/xe/intel_error_decode_xe.c
> > create mode 100644 lib/xe/intel_error_decode_xe_lib.h
> >
> > diff --git a/lib/meson.build b/lib/meson.build
> > index 9fffdd3c6..c48a64a2c 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -112,6 +112,7 @@ lib_sources = [
> > 'igt_msm.c',
> > 'igt_dsc.c',
> > 'igt_hook.c',
> > + 'xe/intel_error_decode_xe.c',
> > 'xe/xe_gt.c',
> > 'xe/xe_ioctl.c',
> > 'xe/xe_mmio.c',
> > diff --git a/lib/xe/intel_error_decode_xe.c b/lib/xe/intel_error_decode_xe.c
> > new file mode 100644
> > index 000000000..8da06775d
> > --- /dev/null
> > +++ b/lib/xe/intel_error_decode_xe.c
>
> oh, so you are already in the lib/xe dir, sorry for missunderstanding the other patch.
> but my comment about the name suggestion is still valid: devcoredump_decode.h ?!
> or something like that...
>
> > @@ -0,0 +1,287 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > +* Copyright © 2025 Intel Corporation
> > +*
> > +* Authors:
> > +* Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > +*/
> > +
> > +#include <stdbool.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <xe_drm.h>
> > +
> > +#include "drmtest.h"
> > +#include "instdone.h"
> > +#include "intel_chipset.h"
> > +#include "intel_reg.h"
> > +#include "i915/intel_decode.h"
>
> hmmm... I really don't like that...
> If we need something in common we do need to have a separate lib
> at the lower level...
>
> > +#include "xe/intel_error_decode_xe_lib.h"
> > +
> > +static uint32_t
> > +xe_print_head(unsigned int reg)
> > +{
> > + printf(" head = 0x%08x, wraps = %d\n", reg & (0x7ffff<<2), reg >> 21);
> > + return reg & (0x7ffff<<2);
> > +}
> > +
> > +static uint32_t
> > +xe_print_ctl(unsigned int reg)
> > +{
> > + uint32_t ring_length = (((reg & (0x1ff << 12)) >> 12) + 1) * 4096;
> > +
> > +#define BIT_STR(reg, x, on, off) ((1 << (x)) & reg) ? on : off
> > +
> > + printf(" len=%d%s%s%s\n", ring_length,
> > + BIT_STR(reg, 0, ", enabled", ", disabled"),
> > + BIT_STR(reg, 10, ", semaphore wait ", ""),
> > + BIT_STR(reg, 11, ", rb wait ", "")
> > + );
> > +#undef BIT_STR
> > + return ring_length;
> > +}
> > +
> > +static void
> > +xe_print_acthd(unsigned int reg, unsigned int ring_length)
> > +{
> > + if ((reg & (0x7ffff << 2)) < ring_length)
> > + printf(" at ring: 0x%08x\n", reg & (0x7ffff << 2));
> > + else
> > + printf(" at batch: 0x%08x\n", reg);
> > +}
> > +
> > +static void
> > +xe_print_instdone(uint32_t devid, unsigned int instdone, unsigned int instdone1)
> > +{
> > + int i;
> > + static int once;
> > +
> > + if (!once) {
> > + if (!init_instdone_definitions(devid))
> > + return;
> > + once = 1;
> > + }
> > +
> > + for (i = 0; i < num_instdone_bits; i++) {
> > + int busy = 0;
> > +
> > + if (instdone_bits[i].reg == INSTDONE_1) {
> > + if (!(instdone1 & instdone_bits[i].bit))
> > + busy = 1;
> > + } else {
> > + if (!(instdone & instdone_bits[i].bit))
> > + busy = 1;
> > + }
> > +
> > + if (busy)
> > + printf(" busy: %s\n", instdone_bits[i].name);
> > + }
> > +}
> > +
> > +static uint16_t xe_get_engine_class(char *name)
> > +{
> > + uint16_t class;
> > +
> > + if (strcmp(name, "rcs") == 0) {
> > + class = DRM_XE_ENGINE_CLASS_RENDER;
> > + } else if (strcmp(name, "bcs") == 0) {
> > + class = DRM_XE_ENGINE_CLASS_COPY;
> > + } else if (strcmp(name, "vcs") == 0) {
> > + class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> > + } else if (strcmp(name, "vecs") == 0) {
> > + class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> > + } else if (strcmp(name, "ccs") == 0) {
> > + class = DRM_XE_ENGINE_CLASS_COMPUTE;
> > + }
> > +
> > + return class;
> > +}
> > +
> > +static const char *
> > +read_param(const char *line, const char *param)
> > +{
> > + if (!(strstr(line, param)))
> > + return NULL;
> > +
> > + while (*line != ':')
> > + line++;
> > + line += 2;
> > +
> > + return line;
> > +}
> > +
> > +/* parse lines like 'batch_addr[0]: 0x0000effeffff5000 */
> > +bool
> > +read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value)
> > +{
> > + line = read_param(line, parameter);
> > + if (!line)
> > + return false;
> > +
> > + *value = (uint64_t)strtoull(line, NULL, 0);
> > + return true;
> > +}
> > +
> > +/* parse lines like 'PCI ID: 0x9a49' */
> > +bool
> > +read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value)
> > +{
> > + line = read_param(line, parameter);
> > + if (!line)
> > + return false;
> > +
> > + *value = (int)strtoul(line, NULL, 0);
> > + return true;
> > +}
> > +
> > +/* parse lines like 'rcs0 (physical), logical instance=0' */
> > +bool
> > +read_error_decode_xe_engine_name(const char *line, char *ring_name)
> > +{
> > + int i;
> > +
> > + if (!strstr(line, " (physical), logical instance="))
> > + return false;
> > +
> > + i = 0;
> > + for (i = 0; *line != ' '; i++, line++)
> > + ring_name[i] = *line;
> > +
> > + ring_name[i] = 0;
> > + return true;
> > +}
> > +
> > +bool
> > +read_error_decode_topic(const char *line, enum xe_topic *new_topic)
> > +{
> > + static const char *xe_topic_strings[] = {
> > + "**** Xe Device Coredump ****",
> > + "**** GuC CT ****",
> > + "**** Job ****",
> > + "**** HW Engines ****",
> > + "**** VM state ****",
> > + };
> > + bool topic_changed = false;
> > +
> > + for (int i = 0; i < ARRAY_SIZE(xe_topic_strings); i++) {
> > + if (strncmp(xe_topic_strings[i], line, strlen(xe_topic_strings[i])) == 0) {
> > + topic_changed = true;
> > + *new_topic = i;
> > + break;
> > + }
> > + }
> > +
> > + return topic_changed;
> > +}
> > +
> > +void read_xe_data_file(FILE *file)
> > +{
> > + struct {
> > + uint64_t *addrs;
> > + uint8_t len;
> > + } batch_buffers = { .addrs = NULL, .len = 0 };
> > +
> > + unsigned int reg;
> > + uint32_t devid, ring_length = 0;
> > + char *line = NULL;
> > + size_t line_size;
> > + enum xe_topic xe_topic = XE_TOPIC_INVALID;
> > +
> > + while(getline(&line, &line_size, file) > 0) {
> > + bool topic_changed = false;
> > + bool print_line = true;
> > +
> > + topic_changed = read_error_decode_topic(line, &xe_topic);
> > + if(topic_changed) {
> > + print_line = (xe_topic != XE_TOPIC_VM);
> > + if(print_line)
> > + fputs(line, stdout);
> > + continue;
> > + }
> > +
> > + switch (xe_topic) {
> > + case XE_TOPIC_DEVICE: {
> > + uint32_t value;
> > +
> > + if (read_error_decode_xe_hex(line, "PCI ID", &value)) {
> > + devid = value;
> > + printf("Detected GEN%i chipset\n", intel_gen(devid));
> > + }
> > +
> > + break;
> > + }
> > + case XE_TOPIC_HW_ENGINES: {
> > + char engine_name[64];
> > + uint64_t u64_reg;
> > +
> > + if (read_error_decode_xe_engine_name(line, engine_name)) {
> > + xe_get_engine_class(engine_name);
> > + break;
> > + }
> > +
> > + if (read_error_decode_xe_hex(line, "RING_HEAD", ®)) {
> > + xe_print_head(reg);
> > + break;
> > + }
> > +
> > + if (read_error_decode_xe_hex(line, "RING_CTL", ®))
> > + ring_length = xe_print_ctl(reg);
> > +
> > + if (read_error_decode_xe_hex(line, "RING_INSTDONE", ®)) {
> > + fputs(line, stdout);
> > + xe_print_instdone(devid, reg, -1);
> > + break;
> > + }
> > +
> > + if (read_error_decode_xe_u64_hex(line, "ACTHD", &u64_reg)) {
> > + fputs(line, stdout);
> > + xe_print_acthd(u64_reg, ring_length);
> > + break;
> > + }
> > +
> > + if (read_error_decode_xe_hex(line, "SC_INSTDONE", ®)) {
> > + fputs(line, stdout);
> > + xe_print_instdone(devid, reg, -1);
> > + break;
> > + }
> > +
> > + if (read_error_decode_xe_hex(line, "SC_INSTDONE_EXTRA", ®)) {
> > + fputs(line, stdout);
> > + xe_print_instdone(devid, -1, reg);
> > + break;
> > + }
> > +
> > + if (read_error_decode_xe_hex(line, "SAMPLER_INSTDONE", ®)) {
> > + fputs(line, stdout);
> > + xe_print_instdone(devid, reg, -1);
> > + break;
> > + }
> > +
> > + if (read_error_decode_xe_hex(line, "ROW_INSTDONE", ®)) {
> > + fputs(line, stdout);
> > + xe_print_instdone(devid, reg, -1);
> > + break;
> > + }
> > +
> > + break;
> > + }
> > + case XE_TOPIC_JOB: {
> > + uint64_t u64_value;
> > +
> > + if (read_error_decode_xe_u64_hex(line, "batch_addr[", &u64_value)) {
> > + batch_buffers.addrs = realloc(batch_buffers.addrs, sizeof(uint64_t) * (batch_buffers.len + 1));
> > + batch_buffers.addrs[batch_buffers.len] = u64_value;
> > + batch_buffers.len++;
> > + }
> > +
> > + break;
> > + }
> > + default:
> > + break;
> > + }
> > + }
> > +
> > + free(batch_buffers.addrs);
> > + free(line);
> > +}
> > diff --git a/lib/xe/intel_error_decode_xe_lib.h b/lib/xe/intel_error_decode_xe_lib.h
> > new file mode 100644
> > index 000000000..fc69f7cce
> > --- /dev/null
> > +++ b/lib/xe/intel_error_decode_xe_lib.h
> > @@ -0,0 +1,26 @@
> > +/* SPDX-License-Identifier: MIT */
> > +/*
> > +* Copyright © 2025 Intel Corporation
> > +*
> > +* Authors:
> > +* Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > +*/
> > +
> > +#include <stdbool.h>
> > +#include <stdint.h>
> > +
> > +enum xe_topic {
> > + XE_TOPIC_DEVICE = 0,
> > + XE_TOPIC_GUC_CT,
> > + XE_TOPIC_JOB,
> > + XE_TOPIC_HW_ENGINES,
> > + XE_TOPIC_VM,
> > + XE_TOPIC_INVALID,
> > +};
> > +
> > +void read_xe_data_file(FILE *file);
> > +bool read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value);
> > +bool read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value);
> > +bool read_error_decode_xe_engine_name(const char *line, char *ring_name);
> > +
> > +bool read_error_decode_topic(const char *line, enum xe_topic *new_topic);
> > --
> > 2.34.1
> >
More information about the igt-dev
mailing list