[PATCH <i-g-t> v4 3/4] tools/gputop/xe_gputop: Add gputop support for xe specific devices
Riana Tauro
riana.tauro at intel.com
Fri Mar 14 04:57:24 UTC 2025
Hi Soham
Replace busy-ticks with engine-active-ticks and total-ticks with
engine-total-ticks to keep it consistent.
Replace all occurences of busy with active in the series.
On 3/13/2025 11:51 AM, Soham Purkait wrote:
> Add gputop support for xe-specific devices. Separate
> driver-specific code into respective source files.
>
> v2 : fix for refactoring GPUTOP into a
> vendor-agnostic tool (Lucas)
>
> v3 : Separate commit (Kamil)
>
> v4 : Headers in alphabetical order
> Engines memory allocation at
> the beginning all at once
> Removed PMU normalization (Riana)
>
> Signed-off-by: Soham Purkait <soham.purkait at intel.com>
> ---
> tools/gputop/xe_gputop.c | 372 +++++++++++++++++++++++++++++++++++++++
> tools/gputop/xe_gputop.h | 73 ++++++++
> 2 files changed, 445 insertions(+)
> create mode 100644 tools/gputop/xe_gputop.c
> create mode 100644 tools/gputop/xe_gputop.h
>
> diff --git a/tools/gputop/xe_gputop.c b/tools/gputop/xe_gputop.c
> new file mode 100644
> index 000000000..4137bb771
> --- /dev/null
> +++ b/tools/gputop/xe_gputop.c
> @@ -0,0 +1,372 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#include "xe_gputop.h"
> +
> +#define engine_ptr(engines, n) (&(engines)->engine + (n))
> +
> +static void __update_sample(struct xe_pmu_counter *counter, uint64_t val)
> +{
> + counter->val.prev = counter->val.cur;
> + counter->val.cur = val;
> +}
> +
> +static void update_sample(struct xe_pmu_counter *counter, uint64_t *val)
> +{
> + if (counter->present)
presence of fd should be enough. present is not required.
The __update contents can be moved to same function
> + __update_sample(counter, val[counter->idx]);
> +}
> +
> +static const char *class_display_name(unsigned int class)
> +{
> + switch (class) {
> + case DRM_XE_ENGINE_CLASS_RENDER:
> + return "Render/3D";
> + case DRM_XE_ENGINE_CLASS_COPY:
> + return "Blitter";
> + case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
> + return "Video";
> + case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
> + return "VideoEnhance";
> + case DRM_XE_ENGINE_CLASS_COMPUTE:
> + return "Compute";
> + default:
> + return "[unknown]";
> + }
> +}
> +
> +static inline void *clean_up(void *engines)
> +{
> + if (engines)
> + free(engines);
> +
> + return NULL;
> +}
> +
> +static char *pmu_name(struct igt_device_card *card)
> +{
> + int card_fd;
> + char device[30];
> + char *path;
> +
> + if (strlen(card->card))
> + card_fd = igt_open_card(card);
> + else if (strlen(card->render))
> + card_fd = igt_open_render(card);
> +
> + if (card_fd == -1)
> + return NULL;
> +
> + xe_perf_device(card_fd, device, sizeof(device));
> + path = strdup(device);
needs a free while cleanup
> + close(card_fd);
> + return path;
> +}
> +
> +static int _open_pmu(uint64_t type, unsigned int *cnt, struct xe_pmu_counter *pmu, int *fd)
> +{
> + int fd__ = igt_perf_open_group(type, pmu->config, *fd);
> +
> + if (fd__ >= 0) {
> + if (*fd == -1)
> + *fd = fd__;
> + pmu->present = true;
> + pmu->idx = (*cnt)++;
> + }
> +
> + return fd__;
> +}
> +
> +void xe_gputop_init(struct xe_gputop *obj,
> + struct igt_device_card *card)
> +{
> + obj->pmu_device = pmu_name(card);
> + if (!obj->pmu_device) {
> + fprintf(stderr, "%s : pmu_device path returned NULL", card->pci_slot_name);
> + exit(EXIT_FAILURE);
> + }
> + obj->card = card;
> +}
> +
> +static int pmu_format_shift(int xe, const char *name)
> +{
> + uint32_t start;
> + int format;
> + char device[80];
> +
> + format = perf_event_format(xe_perf_device(xe, device, sizeof(device)),
> + name, &start);
> + if (format)
> + return 0;
> +
> + return start;
> +}
> +
> +static int engine_cmp(const void *__a, const void *__b)
> +{
> + const struct xe_engine *a = (struct xe_engine *)__a;
> + const struct xe_engine *b = (struct xe_engine *)__b;
> +
> + if (a->drm_xe_engine.engine_class != b->drm_xe_engine.engine_class)
> + return a->drm_xe_engine.engine_class - b->drm_xe_engine.engine_class;
> + else
> + return a->drm_xe_engine.engine_instance - b->drm_xe_engine.engine_instance;
> +}
> +
> +void *xe_discover_engines(const void *obj)
> +{
> + struct igt_device_card *card = ((struct xe_gputop *)obj)->card;
> + struct xe_engines *engines;
> + int ret = 0;
> + char device[30];
> + struct drm_xe_engine_class_instance *hwe;
> + int card_fd;
> +
> + if (!card || !strlen(card->card) || !strlen(card->render))
> + return NULL;
> +
> + if (strlen(card->card)) {
> + card_fd = igt_open_card(card);
> + } else if (strlen(card->render)) {
> + card_fd = igt_open_render(card);
> + } else {
> + fprintf(stderr, "Failed to detect device!\n");
> + return clean_up(engines);
> + }
> + xe_device_get(card_fd);
> + engines = malloc(sizeof(struct xe_engines) +
> + xe_number_engines(card_fd) * sizeof(struct xe_engine));
why additional?
> + if (!engines)
> + return NULL;
> +
> + memset(engines, 0, sizeof(struct xe_engines) +
> + xe_number_engines(card_fd) * sizeof(struct xe_engine));
> +
> + engines->num_engines = 0;
> + engines->device = ((struct xe_gputop *)obj)->pmu_device;
> + xe_for_each_engine(card_fd, hwe) {
> + uint64_t engine_class, engine_instance, gt_shift, param_config;
> + struct xe_engine *engine;
> +
> + engine = engine_ptr(engines, engines->num_engines);
> + gt_shift = pmu_format_shift(card_fd, "gt");
> + engine_class = pmu_format_shift(card_fd, "engine_class");
> + engine_instance = pmu_format_shift(card_fd, "engine_instance");
> + param_config = (uint64_t)hwe->gt_id << gt_shift | hwe->engine_class << engine_class
> + | hwe->engine_instance << engine_instance;
This function does pmu specific initialization too. discover_engines is
not the right name.
you could move pmu specific initialization to pmu_init or rename this
function
> +
> + engine->drm_xe_engine = *hwe;
> +
> + ret = perf_event_config(xe_perf_device(card_fd, device, sizeof(device)),
> + "engine-active-ticks", &engine->busy_ticks.config);
> + if (ret < 0)
> + break;
> +
> + engine->busy_ticks.config |= param_config;
> +
> + ret = perf_event_config(xe_perf_device(card_fd, device, sizeof(device)),
> + "engine-total-ticks", &engine->total_ticks.config);
> + if (ret < 0)
> + break;
> +
> + engine->total_ticks.config |= param_config;
> +
> + if (engine->busy_ticks.config == -1 || engine->total_ticks.config == -1) {
> + ret = ENOENT;
> + break;
> + }
> +
> + ret = asprintf(&engine->display_name, "%s/%u",
> + class_display_name(engine->drm_xe_engine.engine_class),
> + engine->drm_xe_engine.engine_instance);
> +
> + if (ret <= 0) {
> + ret = errno;
> + break;
> + }
> + ret = asprintf(&engine->short_name, "%s/%u",
> + xe_engine_class_short_string(engine->drm_xe_engine.engine_class),
> + engine->drm_xe_engine.engine_instance);
> +
> + if (ret <= 0) {
> + ret = errno;
> + break;
> + }
> +
> + engines->num_engines++;
> + }
> +
> + if (!ret) {
> + errno = ret;
> + return clean_up(engines);
> + }
> +
> + qsort(engine_ptr(engines, 0), engines->num_engines,
> + sizeof(struct xe_engine), engine_cmp);
> +
> + ((struct xe_gputop *)obj)->eng_obj = engines;
> +
> + return engines;
> +}
> +
> +static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> +{
> + uint64_t buf[2 + num];
> + unsigned int i;
> + ssize_t len;
> +
> + memset(buf, 0, sizeof(buf));
> +
> + len = read(fd, buf, sizeof(buf));
> + assert(len == sizeof(buf));
> +
> + for (i = 0; i < num; i++)
> + val[i] = buf[2 + i];
> +
> + return buf[1];
> +}
> +
> +void xe_pmu_sample(const void *obj)
> +{
> + struct xe_engines *engines = ((struct xe_gputop *)obj)->eng_obj;
> + const int num_val = engines->num_counters;
this can be num_engines * 2.
can remove this
> + uint64_t val[2 + num_val];
> + unsigned int i;
> +
> + engines->ts.prev = engines->ts.cur;
> + engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
This is unnecessary
> +
> + for (i = 0; i < engines->num_engines; i++) {
> + struct xe_engine *engine = engine_ptr(engines, i);
> +
> + update_sample(&engine->busy_ticks, val);
> + update_sample(&engine->total_ticks, val);
> + }
> +}
> +
> +int xe_pmu_init(const void *obj)
> +{
> + struct xe_engines *engines = ((struct xe_gputop *)obj)->eng_obj;
> + unsigned int i;
> + int fd;
> + struct xe_engine *engine;
> + uint64_t type = igt_perf_type_id(engines->device);
> +
> + engines->fd = -1;
> + engines->num_counters = 0;
you can use a local variable here
> +
why is 0 outside, Can't it be moved in the for loop
> + engine = engine_ptr(engines, 0);
> + fd = _open_pmu(type, &engines->num_counters, &engine->busy_ticks, &engines->fd);
> + if (fd < 0)
> + return -1;
> + fd = _open_pmu(type, &engines->num_counters, &engine->total_ticks, &engines->fd);
> + if (fd < 0)
> + return -1;
> +
> + for (i = 1; i < engines->num_engines; i++) {
> + engine = engine_ptr(engines, i);
> + fd = _open_pmu(type, &engines->num_counters, &engine->busy_ticks, &engines->fd);
> + if (fd < 0)
> + return -1;
> + fd = _open_pmu(type, &engines->num_counters, &engine->total_ticks, &engines->fd);
> + if (fd < 0)
> + return -1;
> + }
> + return 0;
> +}
> +
> +static double pmu_calc_total(struct xe_pmu_pair *p)
%s/pmu_calc_total/pmu_total_ticks
or calc_total_ticks
> +{
> + double v;
> +
> + v = p->cur - p->prev;
> + return v;
> +}
> +
> +static double pmu_calc(struct xe_pmu_pair *p, double total_tick)
%s/total tick/total ticks
> +{
> + double bz = p->cur - p->prev;
use friendly names
active ticks
I think you can move total ticks also here. pass two pairs and calculate
in same function
> + double total;
> +
> + total = (bz * 100) / total_tick;
> + return total;
> +}
> +
> +static int
> +print_device_description(const void *obj, int lines, int w, int h)
> +{
> + char *desc;
> + int len;
> +
> + len = asprintf(&desc, "DRIVER: %s || BDF: %s",
> + ((struct xe_gputop *)obj)->card->driver,
> + ((struct xe_gputop *)obj)->card->pci_slot_name);
> +
> + printf("\033[7m%s%*s\033[0m\n",
> + desc,
> + (int)(w - len), " ");
> + lines++;
> + free(desc);
> + return lines;
> +}
> +
> +static int
> +print_engines_header(struct xe_engines *engines,
> + int lines, int con_w, int con_h)
> +{
> + const char *a;
> +
> + for (unsigned int i = 0;
> + i < engines->num_engines && lines < con_h;
> + i++) {
> + struct xe_engine *engine = engine_ptr(engines, i);
> +
> + if (!engine->num_counters)
> + continue;
> +
> + a = " ENGINES BUSY ";
> +
> + printf("\033[7m%s%*s\033[0m\n",
> + a,
> + (int)(con_w - strlen(a)), " ");
> + lines++;
> +
> + break;
> + }
> +
> + return lines;
> +}
> +
> +static int
> +print_engine(struct xe_engines *engines, unsigned int i,
> + int lines, int con_w, int con_h)
> +{
> + struct xe_engine *engine = engine_ptr(engines, i);
> + double total_tick = pmu_calc_total(&engine->total_ticks.val);
> + double percentage = pmu_calc(&engine->busy_ticks.val, total_tick);
> +
> + printf("%*s", (int)(strlen(" ENGINES")), engine->display_name);
> + print_percentage_bar(percentage, con_w - strlen(" ENGINES"));
> + printf("\n");
> +
> + return ++lines;
> +}
> +
> +int xe_print_engines(const void *obj, int lines, int w, int h)
> +{
> + struct xe_engines *show = ((struct xe_gputop *)obj)->eng_obj;
> +
> + lines = print_device_description(obj, lines, w, h);
> +
> + lines = print_engines_header(show, lines, w, h);
> +
> + for (unsigned int i = 0; i < show->num_engines && lines < h; i++)
> + lines = print_engine(show, i, lines, w, h);
> +
> + lines = print_engines_footer(lines, w, h);
> +
> + return lines;
> +}
> +
> diff --git a/tools/gputop/xe_gputop.h b/tools/gputop/xe_gputop.h
> new file mode 100644
> index 000000000..1f23b2ed0
> --- /dev/null
> +++ b/tools/gputop/xe_gputop.h
> @@ -0,0 +1,73 @@
> +/* SPDX-License-Identifier: MIT
> + *
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#ifndef __XE_GPUTOP_H__
> +#define __XE_GPUTOP_H__
> +
> +#include <dirent.h>
> +
> +#include "igt_device_scan.h"
> +#include "igt_perf.h"
> +#include "utils.h"
> +#include "xe/xe_query.h"
> +
> +struct xe_pmu_pair {
> + uint64_t cur;
> + uint64_t prev;
> +};
> +
> +struct xe_pmu_counter {
> + uint64_t type;
> + uint64_t config;
> + unsigned int idx;
> + struct xe_pmu_pair val;
> + bool present;
> +};
> +
> +struct xe_engine {
> + const char *name;
> + char *display_name;
> + char *short_name;
> + struct drm_xe_engine_class_instance drm_xe_engine;
> + unsigned int num_counters;
didn't find initialization. needed?
> + struct xe_pmu_counter busy_ticks;
> + struct xe_pmu_counter total_ticks;
> +};
> +
> +struct xe_engines {
> + unsigned int num_engines;
> + unsigned int num_classes;
not used
> + unsigned int num_counters;
> + DIR *root;
> + int fd;
> + struct xe_pmu_pair ts;
can be removed
> + bool discrete;
not used
> + char *device;
> + int num_gts;
not used
> +
> + /* Do not edit below this line.
> + * This structure is reallocated every time a new engine is
> + * found and size is increased by sizeof (engine).
> + */
This comment can be removed
Thanks
Riana
> + struct xe_engine engine;
> +
> +};
> +
> +struct xe_gputop {
> + char *pmu_device;
> + struct igt_device_card *card;
> + struct xe_engines *eng_obj;
> +};
> +
> +void xe_gputop_init(struct xe_gputop *obj,
> + struct igt_device_card *card);
> +
> +void xe_populate_device_instances(struct gputop_device *dv);
> +void *xe_discover_engines(const void *obj);
> +void xe_pmu_sample(const void *obj);
> +int xe_pmu_init(const void *obj);
> +int xe_print_engines(const void *obj, int lines, int w, int h);
> +
> +#endif /* __XE_GPUTOP_H__ */
More information about the igt-dev
mailing list