[PATCH 1/2] lib/xe/intel_error_decode_xe: error decode support for xe driver
Rodrigo Vivi
rodrigo.vivi at intel.com
Thu Feb 6 18:48:32 UTC 2025
On Fri, Jan 31, 2025 at 08:29:39PM +0000, sai.gowtham.ch at intel.com wrote:
> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>
> Adding error decode support for xe driver, this lib support helps us to decode
> the errors generated in the dumps, this lib is enabled in the existing intel_error_decode tool
> to extend them to work for xe dev core dumps.
>
Cc: Jose
I'd like to get Jose perspective since he implemented the Mesa decode tool.
> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> ---
> lib/meson.build | 1 +
> lib/xe/intel_error_decode_xe.c | 287 +++++++++++++++++++++++++++++
> lib/xe/intel_error_decode_xe_lib.h | 26 +++
> 3 files changed, 314 insertions(+)
> create mode 100644 lib/xe/intel_error_decode_xe.c
> create mode 100644 lib/xe/intel_error_decode_xe_lib.h
>
> diff --git a/lib/meson.build b/lib/meson.build
> index 9fffdd3c6..c48a64a2c 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -112,6 +112,7 @@ lib_sources = [
> 'igt_msm.c',
> 'igt_dsc.c',
> 'igt_hook.c',
> + 'xe/intel_error_decode_xe.c',
> 'xe/xe_gt.c',
> 'xe/xe_ioctl.c',
> 'xe/xe_mmio.c',
> diff --git a/lib/xe/intel_error_decode_xe.c b/lib/xe/intel_error_decode_xe.c
> new file mode 100644
> index 000000000..8da06775d
> --- /dev/null
> +++ b/lib/xe/intel_error_decode_xe.c
oh, so you are already in the lib/xe dir, sorry for missunderstanding the other patch.
but my comment about the name suggestion is still valid: devcoredump_decode.h ?!
or something like that...
> @@ -0,0 +1,287 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright © 2025 Intel Corporation
> +*
> +* Authors:
> +* Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> +*/
> +
> +#include <stdbool.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <xe_drm.h>
> +
> +#include "drmtest.h"
> +#include "instdone.h"
> +#include "intel_chipset.h"
> +#include "intel_reg.h"
> +#include "i915/intel_decode.h"
hmmm... I really don't like that...
If we need something in common we do need to have a separate lib
at the lower level...
> +#include "xe/intel_error_decode_xe_lib.h"
> +
> +static uint32_t
> +xe_print_head(unsigned int reg)
> +{
> + printf(" head = 0x%08x, wraps = %d\n", reg & (0x7ffff<<2), reg >> 21);
> + return reg & (0x7ffff<<2);
> +}
> +
> +static uint32_t
> +xe_print_ctl(unsigned int reg)
> +{
> + uint32_t ring_length = (((reg & (0x1ff << 12)) >> 12) + 1) * 4096;
> +
> +#define BIT_STR(reg, x, on, off) ((1 << (x)) & reg) ? on : off
> +
> + printf(" len=%d%s%s%s\n", ring_length,
> + BIT_STR(reg, 0, ", enabled", ", disabled"),
> + BIT_STR(reg, 10, ", semaphore wait ", ""),
> + BIT_STR(reg, 11, ", rb wait ", "")
> + );
> +#undef BIT_STR
> + return ring_length;
> +}
> +
> +static void
> +xe_print_acthd(unsigned int reg, unsigned int ring_length)
> +{
> + if ((reg & (0x7ffff << 2)) < ring_length)
> + printf(" at ring: 0x%08x\n", reg & (0x7ffff << 2));
> + else
> + printf(" at batch: 0x%08x\n", reg);
> +}
> +
> +static void
> +xe_print_instdone(uint32_t devid, unsigned int instdone, unsigned int instdone1)
> +{
> + int i;
> + static int once;
> +
> + if (!once) {
> + if (!init_instdone_definitions(devid))
> + return;
> + once = 1;
> + }
> +
> + for (i = 0; i < num_instdone_bits; i++) {
> + int busy = 0;
> +
> + if (instdone_bits[i].reg == INSTDONE_1) {
> + if (!(instdone1 & instdone_bits[i].bit))
> + busy = 1;
> + } else {
> + if (!(instdone & instdone_bits[i].bit))
> + busy = 1;
> + }
> +
> + if (busy)
> + printf(" busy: %s\n", instdone_bits[i].name);
> + }
> +}
> +
> +static uint16_t xe_get_engine_class(char *name)
> +{
> + uint16_t class;
> +
> + if (strcmp(name, "rcs") == 0) {
> + class = DRM_XE_ENGINE_CLASS_RENDER;
> + } else if (strcmp(name, "bcs") == 0) {
> + class = DRM_XE_ENGINE_CLASS_COPY;
> + } else if (strcmp(name, "vcs") == 0) {
> + class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE;
> + } else if (strcmp(name, "vecs") == 0) {
> + class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE;
> + } else if (strcmp(name, "ccs") == 0) {
> + class = DRM_XE_ENGINE_CLASS_COMPUTE;
> + }
> +
> + return class;
> +}
> +
> +static const char *
> +read_param(const char *line, const char *param)
> +{
> + if (!(strstr(line, param)))
> + return NULL;
> +
> + while (*line != ':')
> + line++;
> + line += 2;
> +
> + return line;
> +}
> +
> +/* parse lines like 'batch_addr[0]: 0x0000effeffff5000 */
> +bool
> +read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value)
> +{
> + line = read_param(line, parameter);
> + if (!line)
> + return false;
> +
> + *value = (uint64_t)strtoull(line, NULL, 0);
> + return true;
> +}
> +
> +/* parse lines like 'PCI ID: 0x9a49' */
> +bool
> +read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value)
> +{
> + line = read_param(line, parameter);
> + if (!line)
> + return false;
> +
> + *value = (int)strtoul(line, NULL, 0);
> + return true;
> +}
> +
> +/* parse lines like 'rcs0 (physical), logical instance=0' */
> +bool
> +read_error_decode_xe_engine_name(const char *line, char *ring_name)
> +{
> + int i;
> +
> + if (!strstr(line, " (physical), logical instance="))
> + return false;
> +
> + i = 0;
> + for (i = 0; *line != ' '; i++, line++)
> + ring_name[i] = *line;
> +
> + ring_name[i] = 0;
> + return true;
> +}
> +
> +bool
> +read_error_decode_topic(const char *line, enum xe_topic *new_topic)
> +{
> + static const char *xe_topic_strings[] = {
> + "**** Xe Device Coredump ****",
> + "**** GuC CT ****",
> + "**** Job ****",
> + "**** HW Engines ****",
> + "**** VM state ****",
> + };
> + bool topic_changed = false;
> +
> + for (int i = 0; i < ARRAY_SIZE(xe_topic_strings); i++) {
> + if (strncmp(xe_topic_strings[i], line, strlen(xe_topic_strings[i])) == 0) {
> + topic_changed = true;
> + *new_topic = i;
> + break;
> + }
> + }
> +
> + return topic_changed;
> +}
> +
> +void read_xe_data_file(FILE *file)
> +{
> + struct {
> + uint64_t *addrs;
> + uint8_t len;
> + } batch_buffers = { .addrs = NULL, .len = 0 };
> +
> + unsigned int reg;
> + uint32_t devid, ring_length = 0;
> + char *line = NULL;
> + size_t line_size;
> + enum xe_topic xe_topic = XE_TOPIC_INVALID;
> +
> + while(getline(&line, &line_size, file) > 0) {
> + bool topic_changed = false;
> + bool print_line = true;
> +
> + topic_changed = read_error_decode_topic(line, &xe_topic);
> + if(topic_changed) {
> + print_line = (xe_topic != XE_TOPIC_VM);
> + if(print_line)
> + fputs(line, stdout);
> + continue;
> + }
> +
> + switch (xe_topic) {
> + case XE_TOPIC_DEVICE: {
> + uint32_t value;
> +
> + if (read_error_decode_xe_hex(line, "PCI ID", &value)) {
> + devid = value;
> + printf("Detected GEN%i chipset\n", intel_gen(devid));
> + }
> +
> + break;
> + }
> + case XE_TOPIC_HW_ENGINES: {
> + char engine_name[64];
> + uint64_t u64_reg;
> +
> + if (read_error_decode_xe_engine_name(line, engine_name)) {
> + xe_get_engine_class(engine_name);
> + break;
> + }
> +
> + if (read_error_decode_xe_hex(line, "RING_HEAD", ®)) {
> + xe_print_head(reg);
> + break;
> + }
> +
> + if (read_error_decode_xe_hex(line, "RING_CTL", ®))
> + ring_length = xe_print_ctl(reg);
> +
> + if (read_error_decode_xe_hex(line, "RING_INSTDONE", ®)) {
> + fputs(line, stdout);
> + xe_print_instdone(devid, reg, -1);
> + break;
> + }
> +
> + if (read_error_decode_xe_u64_hex(line, "ACTHD", &u64_reg)) {
> + fputs(line, stdout);
> + xe_print_acthd(u64_reg, ring_length);
> + break;
> + }
> +
> + if (read_error_decode_xe_hex(line, "SC_INSTDONE", ®)) {
> + fputs(line, stdout);
> + xe_print_instdone(devid, reg, -1);
> + break;
> + }
> +
> + if (read_error_decode_xe_hex(line, "SC_INSTDONE_EXTRA", ®)) {
> + fputs(line, stdout);
> + xe_print_instdone(devid, -1, reg);
> + break;
> + }
> +
> + if (read_error_decode_xe_hex(line, "SAMPLER_INSTDONE", ®)) {
> + fputs(line, stdout);
> + xe_print_instdone(devid, reg, -1);
> + break;
> + }
> +
> + if (read_error_decode_xe_hex(line, "ROW_INSTDONE", ®)) {
> + fputs(line, stdout);
> + xe_print_instdone(devid, reg, -1);
> + break;
> + }
> +
> + break;
> + }
> + case XE_TOPIC_JOB: {
> + uint64_t u64_value;
> +
> + if (read_error_decode_xe_u64_hex(line, "batch_addr[", &u64_value)) {
> + batch_buffers.addrs = realloc(batch_buffers.addrs, sizeof(uint64_t) * (batch_buffers.len + 1));
> + batch_buffers.addrs[batch_buffers.len] = u64_value;
> + batch_buffers.len++;
> + }
> +
> + break;
> + }
> + default:
> + break;
> + }
> + }
> +
> + free(batch_buffers.addrs);
> + free(line);
> +}
> diff --git a/lib/xe/intel_error_decode_xe_lib.h b/lib/xe/intel_error_decode_xe_lib.h
> new file mode 100644
> index 000000000..fc69f7cce
> --- /dev/null
> +++ b/lib/xe/intel_error_decode_xe_lib.h
> @@ -0,0 +1,26 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> +* Copyright © 2025 Intel Corporation
> +*
> +* Authors:
> +* Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> +*/
> +
> +#include <stdbool.h>
> +#include <stdint.h>
> +
> +enum xe_topic {
> + XE_TOPIC_DEVICE = 0,
> + XE_TOPIC_GUC_CT,
> + XE_TOPIC_JOB,
> + XE_TOPIC_HW_ENGINES,
> + XE_TOPIC_VM,
> + XE_TOPIC_INVALID,
> +};
> +
> +void read_xe_data_file(FILE *file);
> +bool read_error_decode_xe_u64_hex(const char *line, const char *parameter, uint64_t *value);
> +bool read_error_decode_xe_hex(const char *line, const char *parameter, uint32_t *value);
> +bool read_error_decode_xe_engine_name(const char *line, char *ring_name);
> +
> +bool read_error_decode_topic(const char *line, enum xe_topic *new_topic);
> --
> 2.34.1
>
More information about the igt-dev
mailing list