[PATCH <i-g-t> v4 3/4] tools/gputop/xe_gputop: Add gputop support for xe specific devices
Riana Tauro
riana.tauro at intel.com
Fri Mar 21 06:20:24 UTC 2025
Hi Soham
On 3/21/2025 8:56 AM, Purkait, Soham wrote:
> Hi Riana,
>
> On 14-03-2025 10:27, Riana Tauro wrote:
>> Hi Soham
>>
>> Replace busy-ticks with engine-active-ticks and total-ticks with
>> engine-total-ticks to keep it consistent.
>>
>> Replace all occurences of busy with active in the series.
>>
>> On 3/13/2025 11:51 AM, Soham Purkait wrote:
>>> Add gputop support for xe-specific devices. Separate
>>> driver-specific code into respective source files.
>>>
>>> v2 : fix for refactoring GPUTOP into a
>>> vendor-agnostic tool (Lucas)
>>>
>>> v3 : Separate commit (Kamil)
>>>
>>> v4 : Headers in alphabetical order
>>> Engines memory allocation at
>>> the beginning all at once
>>> Removed PMU normalization (Riana)
>>>
>>> Signed-off-by: Soham Purkait <soham.purkait at intel.com>
>>> ---
>>> tools/gputop/xe_gputop.c | 372 +++++++++++++++++++++++++++++++++++++++
>>> tools/gputop/xe_gputop.h | 73 ++++++++
>>> 2 files changed, 445 insertions(+)
>>> create mode 100644 tools/gputop/xe_gputop.c
>>> create mode 100644 tools/gputop/xe_gputop.h
>>>
>>> diff --git a/tools/gputop/xe_gputop.c b/tools/gputop/xe_gputop.c
>>> new file mode 100644
>>> index 000000000..4137bb771
>>> --- /dev/null
>>> +++ b/tools/gputop/xe_gputop.c
>>> @@ -0,0 +1,372 @@
>>> +/* SPDX-License-Identifier: MIT */
>>> +/*
>>> + * Copyright © 2025 Intel Corporation
>>> + */
>>> +
>>> +#include "xe_gputop.h"
>>> +
>>> +#define engine_ptr(engines, n) (&(engines)->engine + (n))
>>> +
>>> +static void __update_sample(struct xe_pmu_counter *counter, uint64_t
>>> val)
>>> +{
>>> + counter->val.prev = counter->val.cur;
>>> + counter->val.cur = val;
>>> +}
>>> +
>>> +static void update_sample(struct xe_pmu_counter *counter, uint64_t
>>> *val)
>>> +{
>>> + if (counter->present)
>> presence of fd should be enough. present is not required.
>> The __update contents can be moved to same function
> In order to maintain the sanctity, I believe this approach is more
> aligned with the IGT implementation.
Don't think igt is doing this. do you mean xe pmu igt? >>> +
__update_sample(counter, val[counter->idx]);
>>> +}
>>> +
>>> +static const char *class_display_name(unsigned int class)
>>> +{
>>> + switch (class) {
>>> + case DRM_XE_ENGINE_CLASS_RENDER:
>>> + return "Render/3D";
>>> + case DRM_XE_ENGINE_CLASS_COPY:
>>> + return "Blitter";
>>> + case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
>>> + return "Video";
>>> + case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
>>> + return "VideoEnhance";
>>> + case DRM_XE_ENGINE_CLASS_COMPUTE:
>>> + return "Compute";
>>> + default:
>>> + return "[unknown]";
>>> + }
>>> +}
>>> +
>>> +static inline void *clean_up(void *engines)
>>> +{
>>> + if (engines)
>>> + free(engines);
>>> +
>>> + return NULL;
>>> +}
>>> +
>>> +static char *pmu_name(struct igt_device_card *card)
>>> +{
>>> + int card_fd;
>>> + char device[30];
>>> + char *path;
>>> +
>>> + if (strlen(card->card))
>>> + card_fd = igt_open_card(card);
>>> + else if (strlen(card->render))
>>> + card_fd = igt_open_render(card);
>>> +
>>> + if (card_fd == -1)
>>> + return NULL;
>>> +
>>> + xe_perf_device(card_fd, device, sizeof(device));
>>> + path = strdup(device);
>> needs a free while cleanup
>>> + close(card_fd);
>>> + return path;
>>> +}
>>> +
>>> +static int _open_pmu(uint64_t type, unsigned int *cnt, struct
>>> xe_pmu_counter *pmu, int *fd)
>>> +{
>>> + int fd__ = igt_perf_open_group(type, pmu->config, *fd);
>>> +
>>> + if (fd__ >= 0) {
>>> + if (*fd == -1)
>>> + *fd = fd__;
>>> + pmu->present = true;
>>> + pmu->idx = (*cnt)++;
>>> + }
>>> +
>>> + return fd__;
>>> +}
>>> +
>>> +void xe_gputop_init(struct xe_gputop *obj,
>>> + struct igt_device_card *card)
>>> +{
>>> + obj->pmu_device = pmu_name(card);
>>> + if (!obj->pmu_device) {
>>> + fprintf(stderr, "%s : pmu_device path returned NULL", card-
>>> >pci_slot_name);
>>> + exit(EXIT_FAILURE);
>>> + }
>>> + obj->card = card;
>>> +}
>>> +
>>> +static int pmu_format_shift(int xe, const char *name)
>>> +{
>>> + uint32_t start;
>>> + int format;
>>> + char device[80];
>>> +
>>> + format = perf_event_format(xe_perf_device(xe, device,
>>> sizeof(device)),
>>> + name, &start);
>>> + if (format)
>>> + return 0;
>>> +
>>> + return start;
>>> +}
>>> +
>>> +static int engine_cmp(const void *__a, const void *__b)
>>> +{
>>> + const struct xe_engine *a = (struct xe_engine *)__a;
>>> + const struct xe_engine *b = (struct xe_engine *)__b;
>>> +
>>> + if (a->drm_xe_engine.engine_class != b->drm_xe_engine.engine_class)
>>> + return a->drm_xe_engine.engine_class - b-
>>> >drm_xe_engine.engine_class;
>>> + else
>>> + return a->drm_xe_engine.engine_instance - b-
>>> >drm_xe_engine.engine_instance;
>>> +}
>>> +
>>> +void *xe_discover_engines(const void *obj)
>>> +{
>>> + struct igt_device_card *card = ((struct xe_gputop *)obj)->card;
>>> + struct xe_engines *engines;
>>> + int ret = 0;
>>> + char device[30];
>>> + struct drm_xe_engine_class_instance *hwe;
>>> + int card_fd;
>>> +
>>> + if (!card || !strlen(card->card) || !strlen(card->render))
>>> + return NULL;
>>> +
>>> + if (strlen(card->card)) {
>>> + card_fd = igt_open_card(card);
>>> + } else if (strlen(card->render)) {
>>> + card_fd = igt_open_render(card);
>>> + } else {
>>> + fprintf(stderr, "Failed to detect device!\n");
>>> + return clean_up(engines);
>>> + }
>>> + xe_device_get(card_fd);
>>> + engines = malloc(sizeof(struct xe_engines) +
>>> + xe_number_engines(card_fd) * sizeof(struct xe_engine));
>> why additional?
> While allocating xe_engines, xe_engine space also needs to get allocated.
my bad. got confused because of naming>>
>>> + if (!engines)
>>> + return NULL;
>>> +
>>> + memset(engines, 0, sizeof(struct xe_engines) +
>>> + xe_number_engines(card_fd) * sizeof(struct xe_engine));
>>> +
>>> + engines->num_engines = 0;
>>> + engines->device = ((struct xe_gputop *)obj)->pmu_device;
>>> + xe_for_each_engine(card_fd, hwe) {
>>> + uint64_t engine_class, engine_instance, gt_shift, param_config;
>>> + struct xe_engine *engine;
>>> +
>>> + engine = engine_ptr(engines, engines->num_engines);
>>> + gt_shift = pmu_format_shift(card_fd, "gt");
>>> + engine_class = pmu_format_shift(card_fd, "engine_class");
>>> + engine_instance = pmu_format_shift(card_fd, "engine_instance");
>>> + param_config = (uint64_t)hwe->gt_id << gt_shift | hwe-
>>> >engine_class << engine_class
>>> + | hwe->engine_instance << engine_instance;
>> This function does pmu specific initialization too. discover_engines
>> is not the right name.
>> you could move pmu specific initialization to pmu_init or rename this
>> function
> Here engine specific discoveries is being performed so the name,
This is mostly populating the engine pmu configs. querying engines is
done in xe_query.c while
> strictly maintaining PMU specific initialization is not possible
From your implementation i see pmu init is done just after this,
Why can't it be a single function?> due to the underline implementation
But It could encourage other driver
> to maintain the same structure, while keeping
> it vendor agnostic.>>> +
>>> + engine->drm_xe_engine = *hwe;
>>> +
>>> + ret = perf_event_config(xe_perf_device(card_fd, device,
>>> sizeof(device)),
>>> + "engine-active-ticks", &engine->busy_ticks.config);
>>> + if (ret < 0)
>>> + break;
>>> +
>>> + engine->busy_ticks.config |= param_config;
>>> +
>>> + ret = perf_event_config(xe_perf_device(card_fd, device,
>>> sizeof(device)),
>>> + "engine-total-ticks", &engine->total_ticks.config);
>>> + if (ret < 0)
>>> + break;
>>> +
>>> + engine->total_ticks.config |= param_config;
>>> +
>>> + if (engine->busy_ticks.config == -1 || engine-
>>> >total_ticks.config == -1) {
>>> + ret = ENOENT;
>>> + break;
>>> + }
>>> +
>>> + ret = asprintf(&engine->display_name, "%s/%u",
>>> + class_display_name(engine->drm_xe_engine.engine_class),
>>> + engine->drm_xe_engine.engine_instance);
>>> +
>>> + if (ret <= 0) {
>>> + ret = errno;
>>> + break;
>>> + }
>>> + ret = asprintf(&engine->short_name, "%s/%u",
>>> + xe_engine_class_short_string(engine->drm_xe_engine.engine_class),
>>> + engine->drm_xe_engine.engine_instance);
>>> +
>>> + if (ret <= 0) {
>>> + ret = errno;
>>> + break;
>>> + }
>>> +
>>> + engines->num_engines++;
>>> + }
>>> +
>>> + if (!ret) {
>>> + errno = ret;
>>> + return clean_up(engines);
>>> + }
>>> +
>>> + qsort(engine_ptr(engines, 0), engines->num_engines,
>>> + sizeof(struct xe_engine), engine_cmp);
>>> +
>>> + ((struct xe_gputop *)obj)->eng_obj = engines;
>>> +
>>> + return engines;
>>> +}
>>> +
>>> +static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>>> +{
>>> + uint64_t buf[2 + num];
>>> + unsigned int i;
>>> + ssize_t len;
>>> +
>>> + memset(buf, 0, sizeof(buf));
>>> +
>>> + len = read(fd, buf, sizeof(buf));
>>> + assert(len == sizeof(buf));
>>> +
>>> + for (i = 0; i < num; i++)
>>> + val[i] = buf[2 + i];
>>> +
>>> + return buf[1];
>>> +}
>>> +
>>> +void xe_pmu_sample(const void *obj)
>>> +{
>>> + struct xe_engines *engines = ((struct xe_gputop *)obj)->eng_obj;
>>> + const int num_val = engines->num_counters;
>> this can be num_engines * 2.
>> can remove this
>>> + uint64_t val[2 + num_val];
>>> + unsigned int i;
>>> +
>>> + engines->ts.prev = engines->ts.cur;
>>> + engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
>> This is unnecessary
>>> +
>>> + for (i = 0; i < engines->num_engines; i++) {
>>> + struct xe_engine *engine = engine_ptr(engines, i);
>>> +
>>> + update_sample(&engine->busy_ticks, val);
>>> + update_sample(&engine->total_ticks, val);
>>> + }
>>> +}
>>> +
>>> +int xe_pmu_init(const void *obj)
>>> +{
>>> + struct xe_engines *engines = ((struct xe_gputop *)obj)->eng_obj;
>>> + unsigned int i;
>>> + int fd;
>>> + struct xe_engine *engine;
>>> + uint64_t type = igt_perf_type_id(engines->device);
>>> +
>>> + engines->fd = -1;
>>> + engines->num_counters = 0;
>> you can use a local variable here
> It is being used in xe_pmu_sample as well.>
see my comment in xe_pmu_sample
Thanks
Riana>
> Thanks,
> Soham
>
>>> +
>> why is 0 outside, Can't it be moved in the for loop
>>> + engine = engine_ptr(engines, 0);
>>> + fd = _open_pmu(type, &engines->num_counters, &engine-
>>> >busy_ticks, &engines->fd);
>>> + if (fd < 0)
>>> + return -1;
>>> + fd = _open_pmu(type, &engines->num_counters, &engine-
>>> >total_ticks, &engines->fd);
>>> + if (fd < 0)
>>> + return -1;
>>> +
>>> + for (i = 1; i < engines->num_engines; i++) {
>>> + engine = engine_ptr(engines, i);
>>> + fd = _open_pmu(type, &engines->num_counters, &engine-
>>> >busy_ticks, &engines->fd);
>>> + if (fd < 0)
>>> + return -1;
>>> + fd = _open_pmu(type, &engines->num_counters, &engine-
>>> >total_ticks, &engines->fd);
>>> + if (fd < 0)
>>> + return -1;
>>> + }
>>> + return 0;
>>> +}
>>> +
>>> +static double pmu_calc_total(struct xe_pmu_pair *p)
>> %s/pmu_calc_total/pmu_total_ticks
>> or calc_total_ticks
>>> +{
>>> + double v;
>>> +
>>> + v = p->cur - p->prev;
>>> + return v;
>>> +}
>>> +
>>> +static double pmu_calc(struct xe_pmu_pair *p, double total_tick)
>> %s/total tick/total ticks
>>> +{
>>> + double bz = p->cur - p->prev;
>> use friendly names
>> active ticks
>>
>> I think you can move total ticks also here. pass two pairs and
>> calculate in same function
>>> + double total;
>>> +
>>> + total = (bz * 100) / total_tick;
>>> + return total;
>>> +}
>>> +
>>> +static int
>>> +print_device_description(const void *obj, int lines, int w, int h)
>>> +{
>>> + char *desc;
>>> + int len;
>>> +
>>> + len = asprintf(&desc, "DRIVER: %s || BDF: %s",
>>> + ((struct xe_gputop *)obj)->card->driver,
>>> + ((struct xe_gputop *)obj)->card->pci_slot_name);
>>> +
>>> + printf("\033[7m%s%*s\033[0m\n",
>>> + desc,
>>> + (int)(w - len), " ");
>>> + lines++;
>>> + free(desc);
>>> + return lines;
>>> +}
>>> +
>>> +static int
>>> +print_engines_header(struct xe_engines *engines,
>>> + int lines, int con_w, int con_h)
>>> +{
>>> + const char *a;
>>> +
>>> + for (unsigned int i = 0;
>>> + i < engines->num_engines && lines < con_h;
>>> + i++) {
>>> + struct xe_engine *engine = engine_ptr(engines, i);
>>> +
>>> + if (!engine->num_counters)
>>> + continue;
>>> +
>>> + a = " ENGINES BUSY ";
>>> +
>>> + printf("\033[7m%s%*s\033[0m\n",
>>> + a,
>>> + (int)(con_w - strlen(a)), " ");
>>> + lines++;
>>> +
>>> + break;
>>> + }
>>> +
>>> + return lines;
>>> +}
>>> +
>>> +static int
>>> +print_engine(struct xe_engines *engines, unsigned int i,
>>> + int lines, int con_w, int con_h)
>>> +{
>>> + struct xe_engine *engine = engine_ptr(engines, i);
>>> + double total_tick = pmu_calc_total(&engine->total_ticks.val);
>>> + double percentage = pmu_calc(&engine->busy_ticks.val, total_tick);
>>> +
>>> + printf("%*s", (int)(strlen(" ENGINES")), engine-
>>> >display_name);
>>> + print_percentage_bar(percentage, con_w - strlen(" ENGINES"));
>>> + printf("\n");
>>> +
>>> + return ++lines;
>>> +}
>>> +
>>> +int xe_print_engines(const void *obj, int lines, int w, int h)
>>> +{
>>> + struct xe_engines *show = ((struct xe_gputop *)obj)->eng_obj;
>>> +
>>> + lines = print_device_description(obj, lines, w, h);
>>> +
>>> + lines = print_engines_header(show, lines, w, h);
>>> +
>>> + for (unsigned int i = 0; i < show->num_engines && lines < h; i++)
>>> + lines = print_engine(show, i, lines, w, h);
>>> +
>>> + lines = print_engines_footer(lines, w, h);
>>> +
>>> + return lines;
>>> +}
>>> +
>>> diff --git a/tools/gputop/xe_gputop.h b/tools/gputop/xe_gputop.h
>>> new file mode 100644
>>> index 000000000..1f23b2ed0
>>> --- /dev/null
>>> +++ b/tools/gputop/xe_gputop.h
>>> @@ -0,0 +1,73 @@
>>> +/* SPDX-License-Identifier: MIT
>>> + *
>>> + * Copyright © 2025 Intel Corporation
>>> + */
>>> +
>>> +#ifndef __XE_GPUTOP_H__
>>> +#define __XE_GPUTOP_H__
>>> +
>>> +#include <dirent.h>
>>> +
>>> +#include "igt_device_scan.h"
>>> +#include "igt_perf.h"
>>> +#include "utils.h"
>>> +#include "xe/xe_query.h"
>>> +
>>> +struct xe_pmu_pair {
>>> + uint64_t cur;
>>> + uint64_t prev;
>>> +};
>>> +
>>> +struct xe_pmu_counter {
>>> + uint64_t type;
>>> + uint64_t config;
>>> + unsigned int idx;
>>> + struct xe_pmu_pair val;
>>> + bool present;
>>> +};
>>> +
>>> +struct xe_engine {
>>> + const char *name;
>>> + char *display_name;
>>> + char *short_name;
>>> + struct drm_xe_engine_class_instance drm_xe_engine;
>>> + unsigned int num_counters;
>> didn't find initialization. needed?
>>> + struct xe_pmu_counter busy_ticks;
>>> + struct xe_pmu_counter total_ticks;
>>> +};
>>> +
>>> +struct xe_engines {
>>> + unsigned int num_engines;
>>> + unsigned int num_classes;
>> not used
>>> + unsigned int num_counters;
>>> + DIR *root;
>>> + int fd;
>>> + struct xe_pmu_pair ts;
>> can be removed
>>> + bool discrete;
>> not used
>>> + char *device;
>>> + int num_gts;
>> not used
>>> +
>>> + /* Do not edit below this line.
>>> + * This structure is reallocated every time a new engine is
>>> + * found and size is increased by sizeof (engine).
>>> + */
>> This comment can be removed
>>
>> Thanks
>> Riana
>>> + struct xe_engine engine;
>>> +
>>> +};
>>> +
>>> +struct xe_gputop {
>>> + char *pmu_device;
>>> + struct igt_device_card *card;
>>> + struct xe_engines *eng_obj;
>>> +};
>>> +
>>> +void xe_gputop_init(struct xe_gputop *obj,
>>> + struct igt_device_card *card);
>>> +
>>> +void xe_populate_device_instances(struct gputop_device *dv);
>>> +void *xe_discover_engines(const void *obj);
>>> +void xe_pmu_sample(const void *obj);
>>> +int xe_pmu_init(const void *obj);
>>> +int xe_print_engines(const void *obj, int lines, int w, int h);
>>> +
>>> +#endif /* __XE_GPUTOP_H__ */
>>
More information about the igt-dev
mailing list