[PATCH <i-g-t> v4 3/4] tools/gputop/xe_gputop: Add gputop support for xe specific devices
Purkait, Soham
soham.purkait at intel.com
Fri Mar 21 03:26:37 UTC 2025
Hi Riana,
On 14-03-2025 10:27, Riana Tauro wrote:
> Hi Soham
>
> Replace busy-ticks with engine-active-ticks and total-ticks with
> engine-total-ticks to keep it consistent.
>
> Replace all occurences of busy with active in the series.
>
> On 3/13/2025 11:51 AM, Soham Purkait wrote:
>> Add gputop support for xe-specific devices. Separate
>> driver-specific code into respective source files.
>>
>> v2 : fix for refactoring GPUTOP into a
>> vendor-agnostic tool (Lucas)
>>
>> v3 : Separate commit (Kamil)
>>
>> v4 : Headers in alphabetical order
>> Engines memory allocation at
>> the beginning all at once
>> Removed PMU normalization (Riana)
>>
>> Signed-off-by: Soham Purkait <soham.purkait at intel.com>
>> ---
>> tools/gputop/xe_gputop.c | 372 +++++++++++++++++++++++++++++++++++++++
>> tools/gputop/xe_gputop.h | 73 ++++++++
>> 2 files changed, 445 insertions(+)
>> create mode 100644 tools/gputop/xe_gputop.c
>> create mode 100644 tools/gputop/xe_gputop.h
>>
>> diff --git a/tools/gputop/xe_gputop.c b/tools/gputop/xe_gputop.c
>> new file mode 100644
>> index 000000000..4137bb771
>> --- /dev/null
>> +++ b/tools/gputop/xe_gputop.c
>> @@ -0,0 +1,372 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2025 Intel Corporation
>> + */
>> +
>> +#include "xe_gputop.h"
>> +
>> +#define engine_ptr(engines, n) (&(engines)->engine + (n))
>> +
>> +static void __update_sample(struct xe_pmu_counter *counter, uint64_t
>> val)
>> +{
>> + counter->val.prev = counter->val.cur;
>> + counter->val.cur = val;
>> +}
>> +
>> +static void update_sample(struct xe_pmu_counter *counter, uint64_t
>> *val)
>> +{
>> + if (counter->present)
> presence of fd should be enough. present is not required.
> The __update contents can be moved to same function
In order to maintain the sanctity, I believe this approach is more
aligned with the IGT implementation.
>> + __update_sample(counter, val[counter->idx]);
>> +}
>> +
>> +static const char *class_display_name(unsigned int class)
>> +{
>> + switch (class) {
>> + case DRM_XE_ENGINE_CLASS_RENDER:
>> + return "Render/3D";
>> + case DRM_XE_ENGINE_CLASS_COPY:
>> + return "Blitter";
>> + case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
>> + return "Video";
>> + case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
>> + return "VideoEnhance";
>> + case DRM_XE_ENGINE_CLASS_COMPUTE:
>> + return "Compute";
>> + default:
>> + return "[unknown]";
>> + }
>> +}
>> +
>> +static inline void *clean_up(void *engines)
>> +{
>> + if (engines)
>> + free(engines);
>> +
>> + return NULL;
>> +}
>> +
>> +static char *pmu_name(struct igt_device_card *card)
>> +{
>> + int card_fd;
>> + char device[30];
>> + char *path;
>> +
>> + if (strlen(card->card))
>> + card_fd = igt_open_card(card);
>> + else if (strlen(card->render))
>> + card_fd = igt_open_render(card);
>> +
>> + if (card_fd == -1)
>> + return NULL;
>> +
>> + xe_perf_device(card_fd, device, sizeof(device));
>> + path = strdup(device);
> needs a free while cleanup
>> + close(card_fd);
>> + return path;
>> +}
>> +
>> +static int _open_pmu(uint64_t type, unsigned int *cnt, struct
>> xe_pmu_counter *pmu, int *fd)
>> +{
>> + int fd__ = igt_perf_open_group(type, pmu->config, *fd);
>> +
>> + if (fd__ >= 0) {
>> + if (*fd == -1)
>> + *fd = fd__;
>> + pmu->present = true;
>> + pmu->idx = (*cnt)++;
>> + }
>> +
>> + return fd__;
>> +}
>> +
>> +void xe_gputop_init(struct xe_gputop *obj,
>> + struct igt_device_card *card)
>> +{
>> + obj->pmu_device = pmu_name(card);
>> + if (!obj->pmu_device) {
>> + fprintf(stderr, "%s : pmu_device path returned NULL",
>> card->pci_slot_name);
>> + exit(EXIT_FAILURE);
>> + }
>> + obj->card = card;
>> +}
>> +
>> +static int pmu_format_shift(int xe, const char *name)
>> +{
>> + uint32_t start;
>> + int format;
>> + char device[80];
>> +
>> + format = perf_event_format(xe_perf_device(xe, device,
>> sizeof(device)),
>> + name, &start);
>> + if (format)
>> + return 0;
>> +
>> + return start;
>> +}
>> +
>> +static int engine_cmp(const void *__a, const void *__b)
>> +{
>> + const struct xe_engine *a = (struct xe_engine *)__a;
>> + const struct xe_engine *b = (struct xe_engine *)__b;
>> +
>> + if (a->drm_xe_engine.engine_class != b->drm_xe_engine.engine_class)
>> + return a->drm_xe_engine.engine_class -
>> b->drm_xe_engine.engine_class;
>> + else
>> + return a->drm_xe_engine.engine_instance -
>> b->drm_xe_engine.engine_instance;
>> +}
>> +
>> +void *xe_discover_engines(const void *obj)
>> +{
>> + struct igt_device_card *card = ((struct xe_gputop *)obj)->card;
>> + struct xe_engines *engines;
>> + int ret = 0;
>> + char device[30];
>> + struct drm_xe_engine_class_instance *hwe;
>> + int card_fd;
>> +
>> + if (!card || !strlen(card->card) || !strlen(card->render))
>> + return NULL;
>> +
>> + if (strlen(card->card)) {
>> + card_fd = igt_open_card(card);
>> + } else if (strlen(card->render)) {
>> + card_fd = igt_open_render(card);
>> + } else {
>> + fprintf(stderr, "Failed to detect device!\n");
>> + return clean_up(engines);
>> + }
>> + xe_device_get(card_fd);
>> + engines = malloc(sizeof(struct xe_engines) +
>> + xe_number_engines(card_fd) * sizeof(struct xe_engine));
> why additional?
While allocating xe_engines, xe_engine space also needs to get allocated.
>
>> + if (!engines)
>> + return NULL;
>> +
>> + memset(engines, 0, sizeof(struct xe_engines) +
>> + xe_number_engines(card_fd) * sizeof(struct xe_engine));
>> +
>> + engines->num_engines = 0;
>> + engines->device = ((struct xe_gputop *)obj)->pmu_device;
>> + xe_for_each_engine(card_fd, hwe) {
>> + uint64_t engine_class, engine_instance, gt_shift, param_config;
>> + struct xe_engine *engine;
>> +
>> + engine = engine_ptr(engines, engines->num_engines);
>> + gt_shift = pmu_format_shift(card_fd, "gt");
>> + engine_class = pmu_format_shift(card_fd, "engine_class");
>> + engine_instance = pmu_format_shift(card_fd, "engine_instance");
>> + param_config = (uint64_t)hwe->gt_id << gt_shift |
>> hwe->engine_class << engine_class
>> + | hwe->engine_instance << engine_instance;
> This function does pmu specific initialization too. discover_engines
> is not the right name.
> you could move pmu specific initialization to pmu_init or rename this
> function
Here engine specific discoveries is being performed so the name, while
strictly maintaining PMU specific initialization is not possible
due to the underline implementation But It could encourage other driver
to maintain the same structure, while keeping
it vendor agnostic.
>> +
>> + engine->drm_xe_engine = *hwe;
>> +
>> + ret = perf_event_config(xe_perf_device(card_fd, device,
>> sizeof(device)),
>> + "engine-active-ticks", &engine->busy_ticks.config);
>> + if (ret < 0)
>> + break;
>> +
>> + engine->busy_ticks.config |= param_config;
>> +
>> + ret = perf_event_config(xe_perf_device(card_fd, device,
>> sizeof(device)),
>> + "engine-total-ticks", &engine->total_ticks.config);
>> + if (ret < 0)
>> + break;
>> +
>> + engine->total_ticks.config |= param_config;
>> +
>> + if (engine->busy_ticks.config == -1 ||
>> engine->total_ticks.config == -1) {
>> + ret = ENOENT;
>> + break;
>> + }
>> +
>> + ret = asprintf(&engine->display_name, "%s/%u",
>> + class_display_name(engine->drm_xe_engine.engine_class),
>> + engine->drm_xe_engine.engine_instance);
>> +
>> + if (ret <= 0) {
>> + ret = errno;
>> + break;
>> + }
>> + ret = asprintf(&engine->short_name, "%s/%u",
>> + xe_engine_class_short_string(engine->drm_xe_engine.engine_class),
>> + engine->drm_xe_engine.engine_instance);
>> +
>> + if (ret <= 0) {
>> + ret = errno;
>> + break;
>> + }
>> +
>> + engines->num_engines++;
>> + }
>> +
>> + if (!ret) {
>> + errno = ret;
>> + return clean_up(engines);
>> + }
>> +
>> + qsort(engine_ptr(engines, 0), engines->num_engines,
>> + sizeof(struct xe_engine), engine_cmp);
>> +
>> + ((struct xe_gputop *)obj)->eng_obj = engines;
>> +
>> + return engines;
>> +}
>> +
>> +static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>> +{
>> + uint64_t buf[2 + num];
>> + unsigned int i;
>> + ssize_t len;
>> +
>> + memset(buf, 0, sizeof(buf));
>> +
>> + len = read(fd, buf, sizeof(buf));
>> + assert(len == sizeof(buf));
>> +
>> + for (i = 0; i < num; i++)
>> + val[i] = buf[2 + i];
>> +
>> + return buf[1];
>> +}
>> +
>> +void xe_pmu_sample(const void *obj)
>> +{
>> + struct xe_engines *engines = ((struct xe_gputop *)obj)->eng_obj;
>> + const int num_val = engines->num_counters;
> this can be num_engines * 2.
> can remove this
>> + uint64_t val[2 + num_val];
>> + unsigned int i;
>> +
>> + engines->ts.prev = engines->ts.cur;
>> + engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
> This is unnecessary
>> +
>> + for (i = 0; i < engines->num_engines; i++) {
>> + struct xe_engine *engine = engine_ptr(engines, i);
>> +
>> + update_sample(&engine->busy_ticks, val);
>> + update_sample(&engine->total_ticks, val);
>> + }
>> +}
>> +
>> +int xe_pmu_init(const void *obj)
>> +{
>> + struct xe_engines *engines = ((struct xe_gputop *)obj)->eng_obj;
>> + unsigned int i;
>> + int fd;
>> + struct xe_engine *engine;
>> + uint64_t type = igt_perf_type_id(engines->device);
>> +
>> + engines->fd = -1;
>> + engines->num_counters = 0;
> you can use a local variable here
It is being used in xe_pmu_sample as well.
Thanks,
Soham
>> +
> why is 0 outside, Can't it be moved in the for loop
>> + engine = engine_ptr(engines, 0);
>> + fd = _open_pmu(type, &engines->num_counters,
>> &engine->busy_ticks, &engines->fd);
>> + if (fd < 0)
>> + return -1;
>> + fd = _open_pmu(type, &engines->num_counters,
>> &engine->total_ticks, &engines->fd);
>> + if (fd < 0)
>> + return -1;
>> +
>> + for (i = 1; i < engines->num_engines; i++) {
>> + engine = engine_ptr(engines, i);
>> + fd = _open_pmu(type, &engines->num_counters,
>> &engine->busy_ticks, &engines->fd);
>> + if (fd < 0)
>> + return -1;
>> + fd = _open_pmu(type, &engines->num_counters,
>> &engine->total_ticks, &engines->fd);
>> + if (fd < 0)
>> + return -1;
>> + }
>> + return 0;
>> +}
>> +
>> +static double pmu_calc_total(struct xe_pmu_pair *p)
> %s/pmu_calc_total/pmu_total_ticks
> or calc_total_ticks
>> +{
>> + double v;
>> +
>> + v = p->cur - p->prev;
>> + return v;
>> +}
>> +
>> +static double pmu_calc(struct xe_pmu_pair *p, double total_tick)
> %s/total tick/total ticks
>> +{
>> + double bz = p->cur - p->prev;
> use friendly names
> active ticks
>
> I think you can move total ticks also here. pass two pairs and
> calculate in same function
>> + double total;
>> +
>> + total = (bz * 100) / total_tick;
>> + return total;
>> +}
>> +
>> +static int
>> +print_device_description(const void *obj, int lines, int w, int h)
>> +{
>> + char *desc;
>> + int len;
>> +
>> + len = asprintf(&desc, "DRIVER: %s || BDF: %s",
>> + ((struct xe_gputop *)obj)->card->driver,
>> + ((struct xe_gputop *)obj)->card->pci_slot_name);
>> +
>> + printf("\033[7m%s%*s\033[0m\n",
>> + desc,
>> + (int)(w - len), " ");
>> + lines++;
>> + free(desc);
>> + return lines;
>> +}
>> +
>> +static int
>> +print_engines_header(struct xe_engines *engines,
>> + int lines, int con_w, int con_h)
>> +{
>> + const char *a;
>> +
>> + for (unsigned int i = 0;
>> + i < engines->num_engines && lines < con_h;
>> + i++) {
>> + struct xe_engine *engine = engine_ptr(engines, i);
>> +
>> + if (!engine->num_counters)
>> + continue;
>> +
>> + a = " ENGINES BUSY ";
>> +
>> + printf("\033[7m%s%*s\033[0m\n",
>> + a,
>> + (int)(con_w - strlen(a)), " ");
>> + lines++;
>> +
>> + break;
>> + }
>> +
>> + return lines;
>> +}
>> +
>> +static int
>> +print_engine(struct xe_engines *engines, unsigned int i,
>> + int lines, int con_w, int con_h)
>> +{
>> + struct xe_engine *engine = engine_ptr(engines, i);
>> + double total_tick = pmu_calc_total(&engine->total_ticks.val);
>> + double percentage = pmu_calc(&engine->busy_ticks.val, total_tick);
>> +
>> + printf("%*s", (int)(strlen(" ENGINES")),
>> engine->display_name);
>> + print_percentage_bar(percentage, con_w - strlen("
>> ENGINES"));
>> + printf("\n");
>> +
>> + return ++lines;
>> +}
>> +
>> +int xe_print_engines(const void *obj, int lines, int w, int h)
>> +{
>> + struct xe_engines *show = ((struct xe_gputop *)obj)->eng_obj;
>> +
>> + lines = print_device_description(obj, lines, w, h);
>> +
>> + lines = print_engines_header(show, lines, w, h);
>> +
>> + for (unsigned int i = 0; i < show->num_engines && lines < h; i++)
>> + lines = print_engine(show, i, lines, w, h);
>> +
>> + lines = print_engines_footer(lines, w, h);
>> +
>> + return lines;
>> +}
>> +
>> diff --git a/tools/gputop/xe_gputop.h b/tools/gputop/xe_gputop.h
>> new file mode 100644
>> index 000000000..1f23b2ed0
>> --- /dev/null
>> +++ b/tools/gputop/xe_gputop.h
>> @@ -0,0 +1,73 @@
>> +/* SPDX-License-Identifier: MIT
>> + *
>> + * Copyright © 2025 Intel Corporation
>> + */
>> +
>> +#ifndef __XE_GPUTOP_H__
>> +#define __XE_GPUTOP_H__
>> +
>> +#include <dirent.h>
>> +
>> +#include "igt_device_scan.h"
>> +#include "igt_perf.h"
>> +#include "utils.h"
>> +#include "xe/xe_query.h"
>> +
>> +struct xe_pmu_pair {
>> + uint64_t cur;
>> + uint64_t prev;
>> +};
>> +
>> +struct xe_pmu_counter {
>> + uint64_t type;
>> + uint64_t config;
>> + unsigned int idx;
>> + struct xe_pmu_pair val;
>> + bool present;
>> +};
>> +
>> +struct xe_engine {
>> + const char *name;
>> + char *display_name;
>> + char *short_name;
>> + struct drm_xe_engine_class_instance drm_xe_engine;
>> + unsigned int num_counters;
> didn't find initialization. needed?
>> + struct xe_pmu_counter busy_ticks;
>> + struct xe_pmu_counter total_ticks;
>> +};
>> +
>> +struct xe_engines {
>> + unsigned int num_engines;
>> + unsigned int num_classes;
> not used
>> + unsigned int num_counters;
>> + DIR *root;
>> + int fd;
>> + struct xe_pmu_pair ts;
> can be removed
>> + bool discrete;
> not used
>> + char *device;
>> + int num_gts;
> not used
>> +
>> + /* Do not edit below this line.
>> + * This structure is reallocated every time a new engine is
>> + * found and size is increased by sizeof (engine).
>> + */
> This comment can be removed
>
> Thanks
> Riana
>> + struct xe_engine engine;
>> +
>> +};
>> +
>> +struct xe_gputop {
>> + char *pmu_device;
>> + struct igt_device_card *card;
>> + struct xe_engines *eng_obj;
>> +};
>> +
>> +void xe_gputop_init(struct xe_gputop *obj,
>> + struct igt_device_card *card);
>> +
>> +void xe_populate_device_instances(struct gputop_device *dv);
>> +void *xe_discover_engines(const void *obj);
>> +void xe_pmu_sample(const void *obj);
>> +int xe_pmu_init(const void *obj);
>> +int xe_print_engines(const void *obj, int lines, int w, int h);
>> +
>> +#endif /* __XE_GPUTOP_H__ */
>
More information about the igt-dev
mailing list