[PATCH 1/2] lib/xe/intel_error_decode_xe: error decode support for xe driver
Souza, Jose
jose.souza at intel.com
Tue Feb 11 13:21:44 UTC 2025
On Tue, 2025-02-11 at 05:35 +0000, Ch, Sai Gowtham wrote:
>
> > -----Original Message-----
> > From: Souza, Jose <jose.souza at intel.com>
> > Sent: Friday, February 7, 2025 12:34 AM
> > To: Vivi, Rodrigo <rodrigo.vivi at intel.com>; Ch, Sai Gowtham
> > <sai.gowtham.ch at intel.com>
> > Cc: igt-dev at lists.freedesktop.org
> > Subject: Re: [PATCH 1/2] lib/xe/intel_error_decode_xe: error decode support for xe
> > driver
> >
> > On Thu, 2025-02-06 at 13:48 -0500, Rodrigo Vivi wrote:
> > > On Fri, Jan 31, 2025 at 08:29:39PM +0000, sai.gowtham.ch at intel.com wrote:
> > > > From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > > >
> > > > Adding error decode support for xe driver, this lib support helps us
> > > > to decode the errors generated in the dumps, this lib is enabled in
> > > > the existing intel_error_decode tool to extend them to work for xe dev core
> > dumps.
> > > >
> > >
> > > Cc: Jose
> > >
> > > I'd like to get Jose perspective since he implemented the Mesa decode tool.
> >
> > Most of this and the next patch is a copy of parts of Mesa decoder... so in general
> > looks good but it misses the parse of the VM section and hw context.
> > I know that IGT will not be able to decode it into human readable instructions but
> > it should at least parse it and make sure it exist, if not print a error or fail a test.
> >
> Thanks for your comments,
> VM Section and HW context, Will be part of enhancement to this tool, there is plan to implement it soon.
>
> > Also in my opinion this should be converted into a test, don't make much sense as
> > tool something that don't parse batch buffers so it make more sense as a IGT test
> > that fails if devcoredump "ABI" contract breaks.
> >
> Intension is to use this tool seamlessly in decoding xe dumps and I feel converting this to a test
> Would not be much handy whenever we want to decode any sort of dumps, tbh having xe driver extension to the
> existing i915 tool would make sense to me.
To that tool to be useful it would need to decode the batch buffers and to do that you would need to have information about all the instructions.
Unless you volunteer to add and maintain all the instructions in IGT.
>
> Thanks,
> Gowtham
> >
> > >
> > >
> > > > Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > > > ---
> > > > lib/meson.build | 1 +
> > > > lib/xe/intel_error_decode_xe.c | 287 +++++++++++++++++++++++++++++
> > > > lib/xe/intel_error_decode_xe_lib.h | 26 +++
> > > > 3 files changed, 314 insertions(+)
> > > > create mode 100644 lib/xe/intel_error_decode_xe.c create mode
> > > > 100644 lib/xe/intel_error_decode_xe_lib.h
> > > >
> > > > diff --git a/lib/meson.build b/lib/meson.build index
> > > > 9fffdd3c6..c48a64a2c 100644
> > > > --- a/lib/meson.build
> > > > +++ b/lib/meson.build
> > > > @@ -112,6 +112,7 @@ lib_sources = [
> > > > 'igt_msm.c',
> > > > 'igt_dsc.c',
> > > > 'igt_hook.c',
> > > > + 'xe/intel_error_decode_xe.c',
> > > > 'xe/xe_gt.c',
> > > > 'xe/xe_ioctl.c',
> > > > 'xe/xe_mmio.c',
> > > > diff --git a/lib/xe/intel_error_decode_xe.c
> > > > b/lib/xe/intel_error_decode_xe.c new file mode 100644 index
> > > > 000000000..8da06775d
> > > > --- /dev/null
> > > > +++ b/lib/xe/intel_error_decode_xe.c
> > >
> > > oh, so you are already in the lib/xe dir, sorry for missunderstanding the other
> > patch.
> > > but my comment about the name suggestion is still valid:
> > devcoredump_decode.h ?!
> > > or something like that...
> > >
> > > > @@ -0,0 +1,287 @@
> > > > +/* SPDX-License-Identifier: MIT */
> > > > +/*
> > > > +* Copyright © 2025 Intel Corporation
> > > > +*
> > > > +* Authors:
> > > > +* Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > > > +*/
> > > > +
> > > > +#include <stdbool.h>
> > > > +#include <stdio.h>
> > > > +#include <stdlib.h>
> > > > +#include <string.h>
> > > > +#include <xe_drm.h>
> > > > +
> > > > +#include "drmtest.h"
> > > > +#include "instdone.h"
> > > > +#include "intel_chipset.h"
> > > > +#include "intel_reg.h"
> > > > +#include "i915/intel_decode.h"
> > >
> > > hmmm... I really don't like that...
> > > If we need something in common we do need to have a separate lib at
> > > the lower level...
> > >
> > > > +#include "xe/intel_error_decode_xe_lib.h"
> > > > +
> > > > +static uint32_t
> > > > +xe_print_head(unsigned int reg)
> > > > +{
> > > > + printf(" head = 0x%08x, wraps = %d\n", reg & (0x7ffff<<2), reg >> 21);
> > > > + return reg & (0x7ffff<<2);
> > > > +}
> > > > +
> > > > +static uint32_t
> > > > +xe_print_ctl(unsigned int reg)
> > > > +{
> > > > + uint32_t ring_length = (((reg & (0x1ff << 12)) >> 12) + 1)
> > > > +* 4096;
> > > > +
> > > > +#define BIT_STR(reg, x, on, off) ((1 << (x)) & reg) ? on : off
> > > > +
> > > > + printf(" len=%d%s%s%s\n", ring_length,
> > > > + BIT_STR(reg, 0, ", enabled", ", disabled"),
> > > > + BIT_STR(reg, 10, ", semaphore wait ", ""),
> > > > + BIT_STR(reg, 11, ", rb wait ", "")
> > > > + );
> > > > +#undef BIT_STR
> > > > + return ring_length;
> > > > +}
> > > > +
> > > > +static void
> > > > +xe_print_acthd(unsigned int reg, unsigned int ring_length) {
> > > > + if ((reg & (0x7ffff << 2)) < ring_length)
> > > > + printf(" at ring: 0x%08x\n", reg & (0x7ffff << 2));
> > > > + else
> > > > + printf(" at batch: 0x%08x\n", reg);
> > > > +}
> > > > +
> > > > +static void
> > > > +xe_print_instdone(uint32_t devid, unsigned int instdone, unsigned
> > > > +int instdone1) {
> > > > + int i;
> > > > + static int once;
> > > > +
> > > > + if (!once) {
> > > > + if (!init_instdone_definitions(devid))
> > > > + return;
> > > > + once = 1;
> > > > + }
> > > > +
> > > > + for (i = 0; i < num_instdone_bits; i++) {
> > > > + int busy = 0;
> > > > +
> > > > + if (instdone_bits[i].reg == INSTDONE_1) {
> > > > + if (!(instdone1 & instdone_bits[i].bit))
> > > > + busy = 1;
> > > > + } else {
> > > > + if (!(instdone & instdone_bits[i].bit))
> > > > + busy = 1;
> > > > + }
> > > > +
> > > > + if (busy)
> > > > + printf(" busy: %s\n", instdone_bits[i].name);
> > > > + }
> > > > +}
> > > > +
> > > > +static uint16_t xe_get_engine_class(char *name) {
> > > > + uint16_t class;
> > > > +
> > > > + if (strcmp(name, "rcs") == 0) {
> > > > + class = DRM_XE_ENGINE_CLASS_RENDER;
> > > > + } else if (strcmp(name, "bcs") == 0) {
> > > > + class = DRM_XE_ENGINE_CLASS_COPY;
> > > > + } else if (strcmp(name, "vcs") == 0) {
> > > > + class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> > > > + } else if (strcmp(name, "vecs") == 0) {
> > > > + class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> > > > + } else if (strcmp(name, "ccs") == 0) {
> > > > + class = DRM_XE_ENGINE_CLASS_COMPUTE;
> > > > + }
> > > > +
> > > > + return class;
> > > > +}
> > > > +
> > > > +static const char *
> > > > +read_param(const char *line, const char *param) {
> > > > + if (!(strstr(line, param)))
> > > > + return NULL;
> > > > +
> > > > + while (*line != ':')
> > > > + line++;
> > > > + line += 2;
> > > > +
> > > > + return line;
> > > > +}
> > > > +
> > > > +/* parse lines like 'batch_addr[0]: 0x0000effeffff5000 */ bool
> > > > +read_error_decode_xe_u64_hex(const char *line, const char
> > > > +*parameter, uint64_t *value) {
> > > > + line = read_param(line, parameter);
> > > > + if (!line)
> > > > + return false;
> > > > +
> > > > + *value = (uint64_t)strtoull(line, NULL, 0);
> > > > + return true;
> > > > +}
> > > > +
> > > > +/* parse lines like 'PCI ID: 0x9a49' */ bool
> > > > +read_error_decode_xe_hex(const char *line, const char *parameter,
> > > > +uint32_t *value) {
> > > > + line = read_param(line, parameter);
> > > > + if (!line)
> > > > + return false;
> > > > +
> > > > + *value = (int)strtoul(line, NULL, 0);
> > > > + return true;
> > > > +}
> > > > +
> > > > +/* parse lines like 'rcs0 (physical), logical instance=0' */ bool
> > > > +read_error_decode_xe_engine_name(const char *line, char *ring_name)
> > > > +{
> > > > + int i;
> > > > +
> > > > + if (!strstr(line, " (physical), logical instance="))
> > > > + return false;
> > > > +
> > > > + i = 0;
> > > > + for (i = 0; *line != ' '; i++, line++)
> > > > + ring_name[i] = *line;
> > > > +
> > > > + ring_name[i] = 0;
> > > > + return true;
> > > > +}
> > > > +
> > > > +bool
> > > > +read_error_decode_topic(const char *line, enum xe_topic *new_topic)
> > > > +{
> > > > + static const char *xe_topic_strings[] = {
> > > > + "**** Xe Device Coredump ****",
> > > > + "**** GuC CT ****",
> > > > + "**** Job ****",
> > > > + "**** HW Engines ****",
> > > > + "**** VM state ****",
> > > > + };
> > > > + bool topic_changed = false;
> > > > +
> > > > + for (int i = 0; i < ARRAY_SIZE(xe_topic_strings); i++) {
> > > > + if (strncmp(xe_topic_strings[i], line, strlen(xe_topic_strings[i])) == 0) {
> > > > + topic_changed = true;
> > > > + *new_topic = i;
> > > > + break;
> > > > + }
> > > > + }
> > > > +
> > > > + return topic_changed;
> > > > +}
> > > > +
> > > > +void read_xe_data_file(FILE *file)
> > > > +{
> > > > + struct {
> > > > + uint64_t *addrs;
> > > > + uint8_t len;
> > > > + } batch_buffers = { .addrs = NULL, .len = 0 };
> > > > +
> > > > + unsigned int reg;
> > > > + uint32_t devid, ring_length = 0;
> > > > + char *line = NULL;
> > > > + size_t line_size;
> > > > + enum xe_topic xe_topic = XE_TOPIC_INVALID;
> > > > +
> > > > + while(getline(&line, &line_size, file) > 0) {
> > > > + bool topic_changed = false;
> > > > + bool print_line = true;
> > > > +
> > > > + topic_changed = read_error_decode_topic(line, &xe_topic);
> > > > + if(topic_changed) {
> > > > + print_line = (xe_topic != XE_TOPIC_VM);
> > > > + if(print_line)
> > > > + fputs(line, stdout);
> > > > + continue;
> > > > + }
> > > > +
> > > > + switch (xe_topic) {
> > > > + case XE_TOPIC_DEVICE: {
> > > > + uint32_t value;
> > > > +
> > > > + if (read_error_decode_xe_hex(line, "PCI ID",
> > &value)) {
> > > > + devid = value;
> > > > + printf("Detected GEN%i chipset\n",
> > intel_gen(devid));
> > > > + }
> > > > +
> > > > + break;
> > > > + }
> > > > + case XE_TOPIC_HW_ENGINES: {
> > > > + char engine_name[64];
> > > > + uint64_t u64_reg;
> > > > +
> > > > + if (read_error_decode_xe_engine_name(line,
> > engine_name)) {
> > > > + xe_get_engine_class(engine_name);
> > > > + break;
> > > > + }
> > > > +
> > > > + if (read_error_decode_xe_hex(line,
> > "RING_HEAD", ®)) {
> > > > + xe_print_head(reg);
> > > > + break;
> > > > + }
> > > > +
> > > > + if (read_error_decode_xe_hex(line, "RING_CTL",
> > ®))
> > > > + ring_length = xe_print_ctl(reg);
> > > > +
> > > > + if (read_error_decode_xe_hex(line,
> > "RING_INSTDONE", ®)) {
> > > > + fputs(line, stdout);
> > > > + xe_print_instdone(devid, reg, -1);
> > > > + break;
> > > > + }
> > > > +
> > > > + if (read_error_decode_xe_u64_hex(line,
> > "ACTHD", &u64_reg)) {
> > > > + fputs(line, stdout);
> > > > + xe_print_acthd(u64_reg, ring_length);
> > > > + break;
> > > > + }
> > > > +
> > > > + if (read_error_decode_xe_hex(line,
> > "SC_INSTDONE", ®)) {
> > > > + fputs(line, stdout);
> > > > + xe_print_instdone(devid, reg, -1);
> > > > + break;
> > > > + }
> > > > +
> > > > + if (read_error_decode_xe_hex(line,
> > "SC_INSTDONE_EXTRA", ®)) {
> > > > + fputs(line, stdout);
> > > > + xe_print_instdone(devid, -1, reg);
> > > > + break;
> > > > + }
> > > > +
> > > > + if (read_error_decode_xe_hex(line,
> > "SAMPLER_INSTDONE", ®)) {
> > > > + fputs(line, stdout);
> > > > + xe_print_instdone(devid, reg, -1);
> > > > + break;
> > > > + }
> > > > +
> > > > + if (read_error_decode_xe_hex(line,
> > "ROW_INSTDONE", ®)) {
> > > > + fputs(line, stdout);
> > > > + xe_print_instdone(devid, reg, -1);
> > > > + break;
> > > > + }
> > > > +
> > > > + break;
> > > > + }
> > > > + case XE_TOPIC_JOB: {
> > > > + uint64_t u64_value;
> > > > +
> > > > + if (read_error_decode_xe_u64_hex(line,
> > "batch_addr[", &u64_value)) {
> > > > + batch_buffers.addrs =
> > realloc(batch_buffers.addrs, sizeof(uint64_t) * (batch_buffers.len + 1));
> > > > + batch_buffers.addrs[batch_buffers.len] =
> > u64_value;
> > > > + batch_buffers.len++;
> > > > + }
> > > > +
> > > > + break;
> > > > + }
> > > > + default:
> > > > + break;
> > > > + }
> > > > + }
> > > > +
> > > > + free(batch_buffers.addrs);
> > > > + free(line);
> > > > +}
> > > > diff --git a/lib/xe/intel_error_decode_xe_lib.h
> > > > b/lib/xe/intel_error_decode_xe_lib.h
> > > > new file mode 100644
> > > > index 000000000..fc69f7cce
> > > > --- /dev/null
> > > > +++ b/lib/xe/intel_error_decode_xe_lib.h
> > > > @@ -0,0 +1,26 @@
> > > > +/* SPDX-License-Identifier: MIT */
> > > > +/*
> > > > +* Copyright © 2025 Intel Corporation
> > > > +*
> > > > +* Authors:
> > > > +* Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> > > > +*/
> > > > +
> > > > +#include <stdbool.h>
> > > > +#include <stdint.h>
> > > > +
> > > > +enum xe_topic {
> > > > + XE_TOPIC_DEVICE = 0,
> > > > + XE_TOPIC_GUC_CT,
> > > > + XE_TOPIC_JOB,
> > > > + XE_TOPIC_HW_ENGINES,
> > > > + XE_TOPIC_VM,
> > > > + XE_TOPIC_INVALID,
> > > > +};
> > > > +
> > > > +void read_xe_data_file(FILE *file); bool
> > > > +read_error_decode_xe_u64_hex(const char *line, const char
> > > > +*parameter, uint64_t *value); bool read_error_decode_xe_hex(const
> > > > +char *line, const char *parameter, uint32_t *value); bool
> > > > +read_error_decode_xe_engine_name(const char *line, char
> > > > +*ring_name);
> > > > +
> > > > +bool read_error_decode_topic(const char *line, enum xe_topic
> > > > +*new_topic);
> > > > --
> > > > 2.34.1
> > > >
>
More information about the igt-dev
mailing list