[PATCH i-g-t 1/2] tools/gputop/gputop: Add per-device engine activity stats in GPUTOP
nishit.sharma at intel.com
nishit.sharma at intel.com
Tue Jun 10 13:53:17 UTC 2025
From: Nishit Sharma <nishit.sharma at intel.com>
This patch adds per-device engine activity stat support in GPUTOP. This leverages the PMU
interface to display the activity of engine instances for the array of requested or all
devices.
Signed-off-by: Nishit Sharma <nishit.sharma at intel.com>
---
lib/igt_device_scan.c | 128 ++++++++
lib/igt_device_scan.h | 1 +
tools/gputop/gputop.c | 638 +++++++++++++++++++++++++++++++++++++++
tools/gputop/meson.build | 6 +
tools/gputop/utils.c | 51 ++++
tools/gputop/utils.h | 64 ++++
tools/gputop/xe_gputop.c | 378 +++++++++++++++++++++++
tools/gputop/xe_gputop.h | 62 ++++
tools/meson.build | 6 +-
9 files changed, 1329 insertions(+), 5 deletions(-)
create mode 100644 tools/gputop/gputop.c
create mode 100644 tools/gputop/meson.build
create mode 100644 tools/gputop/utils.c
create mode 100644 tools/gputop/utils.h
create mode 100644 tools/gputop/xe_gputop.c
create mode 100644 tools/gputop/xe_gputop.h
diff --git a/lib/igt_device_scan.c b/lib/igt_device_scan.c
index e1cebfaed..0ab7b7a0a 100644
--- a/lib/igt_device_scan.c
+++ b/lib/igt_device_scan.c
@@ -1436,6 +1436,7 @@ struct filter {
char *driver;
char *pf;
char *vf;
+ char *subsystem;
} data;
};
@@ -1455,6 +1456,7 @@ static void fill_filter_data(struct filter *filter, const char *key, const char
__fill_key(driver);
__fill_key(pf);
__fill_key(vf);
+ __fill_key(subsystem);
#undef __fill_key
}
@@ -1711,6 +1713,77 @@ static struct igt_list_head *filter_sriov(const struct filter_class *fcls,
return &igt_devs.filtered;
}
+/*
+ * Find appropriate gpu device through matching driver, device type and
+ * card filter arguments.
+ */
+static struct igt_list_head *filter_device(const struct filter_class *fcls,
+ const struct filter *filter)
+{
+ struct igt_device *dev;
+ bool allcards = false;
+ int card = 0;
+ (void)fcls;
+
+ DBG("filter device\n");
+ if (filter->data.card) {
+ char crdop[5] = {0};
+
+ if (sscanf(filter->data.card, "%d", &card) == 1) {
+ if (card < 0)
+ return &igt_devs.filtered;
+ } else {
+ card = 0;
+ if (sscanf(filter->data.card, "%4s", crdop) == 1) {
+ if (!strcmp(crdop, "all"))
+ allcards = true;
+ else
+ return &igt_devs.filtered;
+ } else {
+ return &igt_devs.filtered;
+ }
+ }
+ } else {
+ card = 0;
+ }
+
+ igt_list_for_each_entry(dev, &igt_devs.all, link) {
+ /* Skip if 'driver' doesn't match */
+ if (filter->data.driver && !strequal(filter->data.driver, dev->driver))
+ continue;
+
+ /* Skip if 'device' doesn't match */
+ if (filter->data.device && !is_device_matched(dev, filter->data.device))
+ continue;
+
+ /* Skip if 'subsystem' doesn't match */
+ if (filter->data.subsystem && strcmp(filter->data.subsystem, "all")) {
+ if (strcmp(filter->data.subsystem, get_prop_subsystem(dev)))
+ continue;
+ }
+
+ /* We get n-th card */
+ if (!allcards && !card) {
+ struct igt_device *dup = duplicate_device(dev);
+
+ igt_list_add_tail(&dup->link, &igt_devs.filtered);
+ break;
+ } else if (!allcards) {
+ card--;
+ }
+ /* Include all the cards */
+ else if (allcards) {
+ struct igt_device *dup = duplicate_device(dev);
+
+ igt_list_add(&dup->link, &igt_devs.filtered);
+ }
+ }
+
+ DBG("Filter device filtered size: %d\n", igt_list_length(&igt_devs.filtered));
+
+ return &igt_devs.filtered;
+}
+
static bool sys_path_valid(const struct filter_class *fcls,
const struct filter *filter)
{
@@ -1752,6 +1825,13 @@ static struct filter_class filter_definition_list[] = {
.help = "sriov:[vendor=%04x/name][,device=%04x][,card=%d][,pf=%d][,vf=%d]",
.detail = "find pf or vf\n",
},
+ {
+ .name = "device",
+ .filter_function = filter_device,
+ .help =
+ "device:[driver=name][,subsystem=all|<subsystem>][,device=type][,card=%d|all]",
+ .detail = "find device by driver name, subsystem, device type and card number\n",
+ },
{
.name = NULL,
},
@@ -2065,6 +2145,54 @@ bool igt_device_card_match_pci(const char *filter,
return __igt_device_card_match(filter, card, true);
}
+/**
+ * igt_device_card_match_all
+ * @filter: filter string.
+ * @card: double pointer to igt_device_card structure, containing
+ * an array of igt_device_card structures upon successful return.
+ *
+ * Function applies filter to match device from device array.
+ *
+ * Returns: the number of cards found.
+ *
+ * Note: The caller is responsible for freeing the memory which is
+ * dynamically allocated for the array of igt_device_card structures
+ * upon successful return.
+ */
+int igt_device_card_match_all(const char *filter, struct igt_device_card **card)
+{
+ struct igt_device *dev = NULL;
+ struct igt_device_card *crd = NULL;
+ int count = 0;
+
+ igt_devices_scan();
+
+ if (igt_device_filter_apply(filter) == false)
+ return 0;
+
+ if (igt_list_empty(&igt_devs.filtered))
+ return 0;
+
+ igt_list_for_each_entry(dev, &igt_devs.filtered, link) {
+ count++;
+ }
+
+ crd = calloc(count, sizeof(struct igt_device_card));
+ if (!crd)
+ return 0;
+
+ count = 0;
+
+ igt_list_for_each_entry(dev, &igt_devs.filtered, link) {
+ __copy_dev_to_card(dev, crd + count++);
+ }
+
+ if (count)
+ *card = crd;
+
+ return count;
+}
+
/**
* igt_device_get_pretty_name
* @card: pointer to igt_device_card struct
diff --git a/lib/igt_device_scan.h b/lib/igt_device_scan.h
index f1cd3b1e9..e6e31e799 100644
--- a/lib/igt_device_scan.h
+++ b/lib/igt_device_scan.h
@@ -89,6 +89,7 @@ int igt_device_filter_pci(void);
bool igt_device_card_match(const char *filter, struct igt_device_card *card);
bool igt_device_card_match_pci(const char *filter,
struct igt_device_card *card);
+int igt_device_card_match_all(const char *filter, struct igt_device_card **card);
bool igt_device_find_first_i915_discrete_card(struct igt_device_card *card);
bool igt_device_find_integrated_card(struct igt_device_card *card);
bool igt_device_find_first_xe_discrete_card(struct igt_device_card *card);
diff --git a/tools/gputop/gputop.c b/tools/gputop/gputop.c
new file mode 100644
index 000000000..678ae7935
--- /dev/null
+++ b/tools/gputop/gputop.c
@@ -0,0 +1,638 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2025 Intel Corporation
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <math.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/sysmacros.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "drmtest.h"
+#include "igt_core.h"
+#include "igt_drm_clients.h"
+#include "igt_drm_fdinfo.h"
+#include "igt_perf.h"
+#include "igt_profiling.h"
+#include "xe_gputop.h"
+#include "xe/xe_query.h"
+
+/**
+ * Supported Drivers
+ *
+ * Adhere to the following requirements when implementing support for the
+ * new driver:
+ * @drivers: Update drivers[] with driver string.
+ * @total_count: Update NUM_DRIVER with the total number of supported drivers.
+ * @operations: Update the respective operations of the new driver:
+ * gputop_init,
+ * discover_engines,
+ * pmu_init,
+ * pmu_sample,
+ * print_engines,
+ * clean_up
+ * @devices: Update devices[] array of type "struct gputop_device" with the
+ * initial values.
+ */
+static const char * const drivers[] = {
+ "xe",
+ /* Keep the last one as NULL */
+ NULL
+};
+
+/**
+ * Number of supported drivers needs to be adjusted as per the length of
+ * the drivers[] array.
+ */
+#define NUM_DRIVER 1
+
+/**
+ * Supported operations on driver instances. Update the oprs[] array for
+ * each individual driver specific function. Maintain the sequence as per
+ * drivers[] array.
+ */
+struct device_operations oprs[NUM_DRIVER] = {
+ {
+ xe_gputop_init,
+ xe_populate_engines,
+ xe_pmu_init,
+ xe_pmu_sample,
+ xe_print_engines,
+ xe_clean_up
+ }
+};
+
+/*
+ * devices[] array of type struct gputop_device
+ */
+struct gputop_device devices[] = {
+ {false, 0, NULL}
+};
+
+enum utilization_type {
+ UTILIZATION_TYPE_ENGINE_TIME,
+ UTILIZATION_TYPE_TOTAL_CYCLES,
+};
+
+static void gputop_clean_up(void)
+{
+ for (int i = 0; drivers[i]; i++) {
+ oprs[i].clean_up(devices[i].instances, devices[i].len);
+ free(devices[i].instances);
+ devices[i].driver_present = false;
+ devices[i].len = 0;
+ }
+}
+
+static int find_driver(struct igt_device_card *card)
+{
+ for (int i = 0; drivers[i]; i++) {
+ if (strcmp(drivers[i], card->driver) == 0)
+ return i;
+ }
+ return -1;
+}
+
+/*
+ * If filter is not NULL i will be ignored.
+ */
+static int populate_device_instances(const char *filter)
+{
+ struct igt_device_card *cards = NULL;
+ struct igt_device_card *card_inplace = NULL;
+ struct gputop_device *dev = NULL;
+ int driver_no;
+ int count, final_count = 0;
+
+ count = igt_device_card_match_all(filter, &cards);
+ for (int j = 0; j < count; j++) {
+ if (strcmp((cards + j)->subsystem, "pci") != 0)
+ continue;
+
+ driver_no = find_driver(cards + j);
+ if (driver_no < 0)
+ continue;
+
+ dev = devices + driver_no;
+ if (!dev->driver_present)
+ dev->driver_present = true;
+ dev->len++;
+ dev->instances = realloc(dev->instances,
+ dev->len * sizeof(struct xe_gputop));
+ if (!dev->instances) {
+ fprintf(stderr,
+ "Device instance realloc failed (%s)\n",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ card_inplace = (struct igt_device_card *)
+ calloc(1, sizeof(struct igt_device_card));
+ memcpy(card_inplace, cards + j, sizeof(struct igt_device_card));
+ oprs[driver_no].gputop_init((struct xe_gputop *)(dev->instances + dev->len - 1),
+ card_inplace);
+ final_count++;
+ }
+ if (count)
+ free(cards);
+ return final_count;
+}
+
+static int
+print_client_header(struct igt_drm_client *c, int lines, int con_w, int con_h,
+ int *engine_w)
+{
+ int ret, len;
+
+ if (lines++ >= con_h)
+ return lines;
+
+ printf(ANSI_HEADER);
+ ret = printf("DRM minor %u", c->drm_minor);
+ n_spaces(con_w - ret);
+
+ if (lines++ >= con_h)
+ return lines;
+
+ putchar('\n');
+ if (c->regions->num_regions)
+ len = printf("%*s MEM RSS ",
+ c->clients->max_pid_len, "PID");
+ else
+ len = printf("%*s ", c->clients->max_pid_len, "PID");
+
+ if (c->engines->num_engines) {
+ unsigned int i;
+ int width;
+
+ *engine_w = width =
+ (con_w - len - c->clients->max_name_len - 1) /
+ c->engines->num_engines;
+
+ for (i = 0; i <= c->engines->max_engine_id; i++) {
+ const char *name = c->engines->names[i];
+ int name_len = strlen(name);
+ int pad = (width - name_len) / 2;
+ int spaces = width - pad - name_len;
+
+ if (!name)
+ continue;
+
+ if (pad < 0 || spaces < 0)
+ continue;
+
+ n_spaces(pad);
+ printf("%s", name);
+ n_spaces(spaces);
+ len += pad + name_len + spaces;
+ }
+ }
+
+ printf(" %-*s" ANSI_RESET "\n", con_w - len - 1, "NAME");
+
+ return lines;
+}
+
+static bool
+engines_identical(const struct igt_drm_client *c,
+ const struct igt_drm_client *pc)
+{
+ unsigned int i;
+
+ if (c->engines->num_engines != pc->engines->num_engines ||
+ c->engines->max_engine_id != pc->engines->max_engine_id)
+ return false;
+
+ for (i = 0; i <= c->engines->max_engine_id; i++)
+ if (c->engines->capacity[i] != pc->engines->capacity[i] ||
+ !!c->engines->names[i] != !!pc->engines->names[i] ||
+ strcmp(c->engines->names[i], pc->engines->names[i]))
+ return false;
+
+ return true;
+}
+
+static bool
+newheader(const struct igt_drm_client *c, const struct igt_drm_client *pc)
+{
+ return !pc || c->drm_minor != pc->drm_minor ||
+ /*
+ * Below is a a hack for drivers like amdgpu which omit listing
+ * unused engines. Simply treat them as separate minors which
+ * will ensure the per-engine columns are correctly sized in all
+ * cases.
+ */
+ !engines_identical(c, pc);
+}
+
+static int
+print_size(uint64_t sz)
+{
+ char units[] = {'B', 'K', 'M', 'G'};
+ unsigned int u;
+
+ for (u = 0; u < ARRAY_SIZE(units) - 1; u++) {
+ if (sz < 1024)
+ break;
+ sz /= 1024;
+ }
+
+ return printf("%7"PRIu64"%c ", sz, units[u]);
+}
+
+static int
+print_client(struct igt_drm_client *c, struct igt_drm_client **prevc,
+ double t, int lines, int con_w, int con_h,
+ unsigned int period_us, int *engine_w)
+{
+ enum utilization_type utilization_type;
+ unsigned int i;
+ uint64_t sz;
+ int len;
+
+ if (c->utilization_mask & IGT_DRM_CLIENT_UTILIZATION_TOTAL_CYCLES &&
+ c->utilization_mask & IGT_DRM_CLIENT_UTILIZATION_CYCLES)
+ utilization_type = UTILIZATION_TYPE_TOTAL_CYCLES;
+ else if (c->utilization_mask & IGT_DRM_CLIENT_UTILIZATION_ENGINE_TIME)
+ utilization_type = UTILIZATION_TYPE_ENGINE_TIME;
+ else
+ return 0;
+
+ if (c->samples < 2)
+ return 0;
+
+ /* Filter out idle clients. */
+ switch (utilization_type) {
+ case UTILIZATION_TYPE_ENGINE_TIME:
+ if (!c->total_engine_time)
+ return 0;
+ break;
+ case UTILIZATION_TYPE_TOTAL_CYCLES:
+ if (!c->total_total_cycles)
+ return 0;
+ break;
+ }
+
+ /* Print header when moving to a different DRM card. */
+ if (newheader(c, *prevc)) {
+ lines = print_client_header(c, lines, con_w, con_h, engine_w);
+ if (lines >= con_h)
+ return lines;
+ }
+
+ *prevc = c;
+
+ len = printf("%*s ", c->clients->max_pid_len, c->pid_str);
+
+ if (c->regions->num_regions) {
+ for (sz = 0, i = 0; i <= c->regions->max_region_id; i++)
+ sz += c->memory[i].total;
+ len += print_size(sz);
+
+ for (sz = 0, i = 0; i <= c->regions->max_region_id; i++)
+ sz += c->memory[i].resident;
+ len += print_size(sz);
+ }
+
+ lines++;
+
+ for (i = 0; c->samples > 1 && i <= c->engines->max_engine_id; i++) {
+ double pct;
+
+ if (!c->engines->capacity[i])
+ continue;
+
+ switch (utilization_type) {
+ case UTILIZATION_TYPE_ENGINE_TIME:
+ pct = (double)c->utilization[i].delta_engine_time / period_us / 1e3 * 100 /
+ c->engines->capacity[i];
+ break;
+ case UTILIZATION_TYPE_TOTAL_CYCLES:
+ pct = (double)c->utilization[i].delta_cycles / c->utilization[i].delta_total_cycles * 100 /
+ c->engines->capacity[i];
+ break;
+ }
+
+ /*
+ * Guard against fluctuations between our scanning period and
+ * GPU times as exported by the kernel in fdinfo.
+ */
+ if (pct > 100.0)
+ pct = 100.0;
+
+ print_percentage_bar(pct, *engine_w);
+ len += *engine_w;
+ }
+
+ printf(" %-*s\n", con_w - len - 1, c->print_name);
+
+ return lines;
+}
+
+static int
+__client_id_cmp(const struct igt_drm_client *a,
+ const struct igt_drm_client *b)
+{
+ if (a->id > b->id)
+ return 1;
+ else if (a->id < b->id)
+ return -1;
+ else
+ return 0;
+}
+
+static int client_cmp(const void *_a, const void *_b, void *unused)
+{
+ const struct igt_drm_client *a = _a;
+ const struct igt_drm_client *b = _b;
+ long val_a, val_b;
+
+ /* DRM cards into consecutive buckets first. */
+ val_a = a->drm_minor;
+ val_b = b->drm_minor;
+ if (val_a > val_b)
+ return 1;
+ else if (val_b > val_a)
+ return -1;
+
+ /*
+ * Within buckets sort by last sampling period aggregated runtime, with
+ * client id as a tie-breaker.
+ */
+ val_a = a->agg_delta_engine_time;
+ val_b = b->agg_delta_engine_time;
+ if (val_a == val_b)
+ return __client_id_cmp(a, b);
+ else if (val_b > val_a)
+ return 1;
+ else
+ return -1;
+
+}
+
+static void update_console_size(int *w, int *h)
+{
+ struct winsize ws = {};
+
+ if (ioctl(0, TIOCGWINSZ, &ws) == -1)
+ return;
+
+ *w = ws.ws_col;
+ *h = ws.ws_row;
+
+ if (*w == 0 && *h == 0) {
+ /* Serial console. */
+ *w = 80;
+ *h = 24;
+ }
+}
+
+static void clrscr(void)
+{
+ printf("\033[H\033[J");
+}
+
+struct gputop_args {
+ long n_iter;
+ unsigned long delay_usec;
+ char *device;
+};
+
+static void help(char *full_path)
+{
+ const char *short_program_name = strrchr(full_path, '/');
+
+ if (short_program_name)
+ short_program_name++;
+ else
+ short_program_name = full_path;
+
+ printf("Usage:\n"
+ "\t%s [options]\n\n"
+ "Options:\n"
+ "\t-h, --help show this help\n"
+ "\t-d, --delay =SEC[.TENTHS] iterative delay as SECS [.TENTHS]\n"
+ "\t-n, --iterations =NUMBER number of executions\n"
+ , short_program_name);
+}
+
+static int parse_args(int argc, char * const argv[], struct gputop_args *args)
+{
+ static const char cmdopts_s[] = "hn:d:";
+ static const struct option cmdopts[] = {
+ {"help", no_argument, 0, 'h'},
+ {"delay", required_argument, 0, 'd'},
+ {"iterations", required_argument, 0, 'n'},
+ { }
+ };
+
+ /* defaults */
+ memset(args, 0, sizeof(*args));
+ args->n_iter = -1;
+ args->delay_usec = 2 * USEC_PER_SEC;
+ args->device = NULL;
+
+ for (;;) {
+ int c, idx = 0;
+ char *end_ptr = NULL;
+
+ c = getopt_long(argc, argv, cmdopts_s, cmdopts, &idx);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'n':
+ args->n_iter = strtol(optarg, NULL, 10);
+ break;
+ case 'd':
+ args->delay_usec = strtoul(optarg, &end_ptr, 10) * USEC_PER_SEC;
+ if (*end_ptr == '.')
+ args->delay_usec += strtoul(end_ptr + 1, &end_ptr, 10) * USEC_PER_DECISEC;
+
+ if (!args->delay_usec) {
+ fprintf(stderr, "Invalid delay value: %s\n", optarg);
+ return -1;
+ }
+ break;
+ case 'h':
+ help(argv[0]);
+ return 0;
+ default:
+ fprintf(stderr, "Unkonwn option '%c'.\n", c);
+ return -1;
+ }
+ }
+
+ return 1;
+}
+
+static volatile bool stop_top;
+
+static void sigint_handler(int sig)
+{
+ (void) sig;
+ stop_top = true;
+}
+
+int main(int argc, char **argv)
+{
+ struct gputop_args args;
+ unsigned int period_us;
+ struct igt_profiled_device *profiled_devices = NULL;
+ struct igt_drm_clients *clients = NULL;
+ int con_w = -1, con_h = -1;
+ int ret;
+ long n;
+
+ ret = parse_args(argc, argv, &args);
+ if (ret < 0)
+ return EXIT_FAILURE;
+ if (!ret)
+ return EXIT_SUCCESS;
+
+ n = args.n_iter;
+ period_us = args.delay_usec;
+
+ if (!populate_device_instances(args.device ? args.device
+ : "device:subsystem=pci,card=all")) {
+ printf("No device found.\n");
+ gputop_clean_up();
+ exit(1);
+ }
+
+ for (int i = 0; drivers[i]; i++) {
+ if (devices[i].driver_present) {
+ for (int j = 0; j < devices[i].len; j++) {
+ if (!oprs[i].init_engines(devices[i].instances + j)) {
+ fprintf(stderr,
+ "Failed to initialize engines! (%s)\n",
+ strerror(errno));
+ gputop_clean_up();
+ return EXIT_FAILURE;
+ }
+ ret = oprs[i].pmu_init(devices[i].instances + j);
+
+ if (ret) {
+ fprintf(stderr,
+ "Failed to initialize PMU! (%s)\n",
+ strerror(errno));
+ if (errno == EACCES && geteuid())
+ fprintf(stderr,
+ "\n"
+ "When running as a normal user CAP_PERFMON is required to access performance\n"
+ "monitoring. See \"man 7 capabilities\", \"man 8 setcap\", or contact your\n"
+ "distribution vendor for assistance.\n"
+ "\n"
+ "More information can be found at 'Perf events and tool security' document:\n"
+ "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n");
+
+ igt_devices_free();
+ gputop_clean_up();
+ return EXIT_FAILURE;
+ }
+ }
+ }
+ }
+
+ for (int i = 0; drivers[i]; i++) {
+ for (int j = 0; devices[i].driver_present && j < devices[i].len; j++)
+ oprs[i].pmu_sample(devices[i].instances + j);
+ }
+
+ clients = igt_drm_clients_init(NULL);
+ if (!clients)
+ exit(1);
+
+ profiled_devices = igt_devices_profiled();
+ if (profiled_devices != NULL) {
+ igt_devices_configure_profiling(profiled_devices, true);
+
+ if (signal(SIGINT, sigint_handler) == SIG_ERR) {
+ fprintf(stderr, "Failed to install signal handler!\n");
+ igt_devices_configure_profiling(profiled_devices, false);
+ igt_devices_free_profiling(profiled_devices);
+ profiled_devices = NULL;
+ }
+ }
+
+ igt_drm_clients_scan(clients, NULL, NULL, 0, NULL, 0);
+
+ while ((n != 0) && !stop_top) {
+ struct igt_drm_client *c, *prevc = NULL;
+ int k, engine_w = 0, lines = 0;
+
+ igt_drm_clients_scan(clients, NULL, NULL, 0, NULL, 0);
+
+ for (int i = 0; drivers[i]; i++) {
+ for (int j = 0; devices[i].driver_present && j < devices[i].len; j++)
+ oprs[i].pmu_sample(devices[i].instances + j);
+ }
+
+ igt_drm_clients_sort(clients, client_cmp);
+
+ update_console_size(&con_w, &con_h);
+ clrscr();
+
+ for (int i = 0; drivers[i]; i++) {
+ for (int j = 0; devices[i].driver_present && j < devices[i].len; j++) {
+ lines = oprs[i].print_engines(devices[i].instances + j,
+ lines, con_w, con_h);
+ }
+ }
+
+ if (!clients->num_clients) {
+ const char *msg = " (No GPU clients yet. Start workload to see stats)";
+
+ printf(ANSI_HEADER "%-*s" ANSI_RESET "\n",
+ (int)(con_w - strlen(msg) - 1), msg);
+ }
+
+ igt_for_each_drm_client(clients, c, k) {
+ assert(c->status != IGT_DRM_CLIENT_PROBE);
+ if (c->status != IGT_DRM_CLIENT_ALIVE)
+ break; /* Active clients are first in the array. */
+
+ lines = print_client(c, &prevc, (double)period_us / 1e6,
+ lines, con_w, con_h, period_us,
+ &engine_w);
+ if (lines >= con_h)
+ break;
+ }
+
+ if (lines++ < con_h)
+ printf("\n");
+
+ usleep(period_us);
+ if (n > 0)
+ n--;
+
+ if (profiled_devices != NULL)
+ igt_devices_update_original_profiling_state(profiled_devices);
+ }
+
+ igt_drm_clients_free(clients);
+ gputop_clean_up();
+
+ if (profiled_devices != NULL) {
+ igt_devices_configure_profiling(profiled_devices, false);
+ igt_devices_free_profiling(profiled_devices);
+ }
+ return 0;
+}
diff --git a/tools/gputop/meson.build b/tools/gputop/meson.build
new file mode 100644
index 000000000..4766d8496
--- /dev/null
+++ b/tools/gputop/meson.build
@@ -0,0 +1,6 @@
+gputop_src = [ 'gputop.c', 'utils.c', 'xe_gputop.c']
+executable('gputop', sources : gputop_src,
+ install : true,
+ install_rpath : bindir_rpathdir,
+ dependencies : [igt_deps,lib_igt_perf,lib_igt_drm_clients,lib_igt_drm_fdinfo,lib_igt_profiling,math],
+ install: true)
diff --git a/tools/gputop/utils.c b/tools/gputop/utils.c
new file mode 100644
index 000000000..7f260dc05
--- /dev/null
+++ b/tools/gputop/utils.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+#include <assert.h>
+
+#include "utils.h"
+
+static const char * const bars[] = { " ", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█" };
+
+void n_spaces(const unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ putchar(' ');
+}
+
+void print_percentage_bar(double percent, int max_len)
+{
+ int bar_len, i, len = max_len - 1;
+ const int w = PERCLIENT_ENGINE_WIDTH;
+
+ len -= printf("|%5.1f%% ", percent);
+
+ /* no space left for bars, do what we can */
+ if (len < 0)
+ len = 0;
+
+ bar_len = ceil(w * percent * len / 100.0);
+ if (bar_len > w * len)
+ bar_len = w * len;
+
+ for (i = bar_len; i >= w; i -= w)
+ printf("%s", bars[w]);
+ if (i)
+ printf("%s", bars[i]);
+
+ len -= (bar_len + (w - 1)) / w;
+ n_spaces(len);
+
+ putchar('|');
+}
+
+int print_engines_footer(int lines, int con_w, int con_h)
+{
+ if (lines++ < con_h)
+ printf("\n");
+
+ return lines;
+}
diff --git a/tools/gputop/utils.h b/tools/gputop/utils.h
new file mode 100644
index 000000000..3c62f1c47
--- /dev/null
+++ b/tools/gputop/utils.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef COMMON_GPUTOP_H
+#define COMMON_GPUTOP_H
+
+#include <glib.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "igt_device_scan.h"
+
+#define ANSI_HEADER "\033[7m"
+#define ANSI_RESET "\033[0m"
+
+#define PERCLIENT_ENGINE_WIDTH 8
+
+/**
+ * struct gputop_device
+ *
+ * @driver_present: It is set if at least a
+ * single device of the respective driver is
+ * found
+ * @len: Number of total device discovered
+ * of the respective driver
+ * @instances: pointer to the array of
+ * discovered instances of the devices
+ * of the same driver
+ */
+struct gputop_device {
+ bool driver_present;
+ int len;
+ void *instances;
+};
+
+/**
+ * struct device_operations - Structure to hold function
+ * pointers for device specific operations for each individual driver.
+ * @gputop_init: Function to initialize GPUTOP object
+ * @init_engines: Function to initialize engines for the respective driver.
+ * @pmu_init: Function to initialize the PMU (Performance Monitoring Unit).
+ * @pmu_sample: Function to sample PMU data.
+ * @print_engines: Function to print engine business.
+ * @clean_up: Function to release resources.
+ */
+struct device_operations {
+ void (*gputop_init)(void *ptr,
+ struct igt_device_card *card);
+ void *(*init_engines)(const void *obj);
+ int (*pmu_init)(const void *obj);
+ void (*pmu_sample)(const void *obj);
+ int (*print_engines)(const void *obj, int lines, int w, int h);
+ void (*clean_up)(void *obj, int len);
+};
+
+void print_percentage_bar(double percent, int max_len);
+int print_engines_footer(int lines, int con_w, int con_h);
+void n_spaces(const unsigned int n);
+
+#endif /* COMMON_GPUTOP_H */
diff --git a/tools/gputop/xe_gputop.c b/tools/gputop/xe_gputop.c
new file mode 100644
index 000000000..ac3ed76e6
--- /dev/null
+++ b/tools/gputop/xe_gputop.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_gputop.h"
+
+#define engine_ptr(engines, n) (&(engines)->engine + (n))
+
+static void __update_sample(struct xe_pmu_counter *counter, uint64_t val)
+{
+ counter->val.prev = counter->val.cur;
+ counter->val.cur = val;
+}
+
+static void update_sample(struct xe_pmu_counter *counter, uint64_t *val)
+{
+ if (counter->present)
+ __update_sample(counter, val[counter->idx]);
+}
+
+static const char *class_display_name(unsigned int class)
+{
+ switch (class) {
+ case DRM_XE_ENGINE_CLASS_RENDER:
+ return "Render/3D";
+ case DRM_XE_ENGINE_CLASS_COPY:
+ return "Blitter";
+ case DRM_XE_ENGINE_CLASS_VIDEO_DECODE:
+ return "Video";
+ case DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ return "VideoEnhance";
+ case DRM_XE_ENGINE_CLASS_COMPUTE:
+ return "Compute";
+ default:
+ return "[unknown]";
+ }
+}
+
+void xe_clean_up(void *obj, int len)
+{
+ struct xe_engine *eng;
+ struct xe_pmu_counter pmu;
+ struct xe_gputop *dev = (struct xe_gputop *)obj;
+
+ for (int i = 0; i < len; i++) {
+ if ((dev + i)->card)
+ free((dev + i)->card);
+ if ((dev + i)->eng_obj) {
+
+ for(int j = 0; j < ((struct xe_pmu_device*)(dev + i)->eng_obj)->num_engines ; j++) {
+ eng = engine_ptr((struct xe_pmu_device*)(dev + i)->eng_obj, j);
+ if (eng->display_name)
+ free(eng->display_name);
+
+ pmu = eng->engine_active_ticks;
+ if (pmu.present)
+ close(pmu.fd);
+
+ pmu = eng->engine_total_ticks;
+ if (pmu.present)
+ close(pmu.fd);
+ }
+ free(dev->eng_obj);
+ }
+ if ((dev + i)->pmu_device)
+ free(dev->pmu_device);
+ }
+}
+
+static char *pmu_name(struct igt_device_card *card)
+{
+ int card_fd;
+ char device[30];
+ char *path;
+
+ if (strlen(card->card))
+ card_fd = igt_open_card(card);
+ else if (strlen(card->render))
+ card_fd = igt_open_render(card);
+
+ if (card_fd == -1)
+ return NULL;
+
+ xe_perf_device(card_fd, device, sizeof(device));
+ path = strdup(device);
+ close(card_fd);
+ return path;
+}
+
+static int _open_pmu(uint64_t type, unsigned int *cnt, struct xe_pmu_counter *pmu, int *fd)
+{
+ int fd__ = igt_perf_open_group(type, pmu->config, *fd);
+
+ if (fd__ >= 0) {
+ if (*fd == -1)
+ *fd = fd__;
+ pmu->present = true;
+ pmu->idx = (*cnt)++;
+ pmu->fd = fd__;
+ }
+
+ return fd__;
+}
+
+void xe_gputop_init(void *ptr,
+ struct igt_device_card *card)
+{
+ struct xe_gputop *obj = (struct xe_gputop *)ptr;
+
+ obj->pmu_device = pmu_name(card);
+ if (!obj->pmu_device) {
+ fprintf(stderr, "%s : pmu_device path returned NULL", card->pci_slot_name);
+ exit(EXIT_FAILURE);
+ }
+ obj->card = card;
+}
+
+static int pmu_format_shift(int xe, const char *name)
+{
+ uint32_t start;
+ int format;
+ char device[80];
+
+ format = perf_event_format(xe_perf_device(xe, device, sizeof(device)),
+ name, &start);
+ if (format)
+ return 0;
+
+ return start;
+}
+
+static int engine_cmp(const void *__a, const void *__b)
+{
+ const struct xe_engine *a = (struct xe_engine *)__a;
+ const struct xe_engine *b = (struct xe_engine *)__b;
+
+ if (a->drm_xe_engine.engine_class != b->drm_xe_engine.engine_class)
+ return a->drm_xe_engine.engine_class - b->drm_xe_engine.engine_class;
+ else
+ return a->drm_xe_engine.engine_instance - b->drm_xe_engine.engine_instance;
+}
+
+void *xe_populate_engines(const void *obj)
+{
+ struct igt_device_card *card = ((struct xe_gputop *)obj)->card;
+ struct xe_pmu_device *engines;
+ int ret = 0;
+ char device[30];
+ struct drm_xe_engine_class_instance *hwe;
+ int card_fd;
+ uint64_t engine_class, engine_instance, gt_shift;
+ uint64_t engine_active_config, engine_total_config;
+
+ if (!card || !strlen(card->card) || !strlen(card->render))
+ return NULL;
+
+ if (strlen(card->card)) {
+ card_fd = igt_open_card(card);
+ } else if (strlen(card->render)) {
+ card_fd = igt_open_render(card);
+ } else {
+ fprintf(stderr, "Failed to detect device!\n");
+ return NULL;
+ }
+ xe_device_get(card_fd);
+ engines = malloc(sizeof(struct xe_pmu_device) +
+ xe_number_engines(card_fd) * sizeof(struct xe_engine));
+ if (!engines)
+ return NULL;
+
+ memset(engines, 0, sizeof(struct xe_pmu_device) +
+ xe_number_engines(card_fd) * sizeof(struct xe_engine));
+
+ engines->num_engines = 0;
+ engines->device = ((struct xe_gputop *)obj)->pmu_device;
+ gt_shift = pmu_format_shift(card_fd, "gt");
+ engine_class = pmu_format_shift(card_fd, "engine_class");
+ engine_instance = pmu_format_shift(card_fd, "engine_instance");
+ xe_perf_device(card_fd, device, sizeof(device));
+ ret = perf_event_config(device,
+ "engine-active-ticks",
+ &engine_active_config);
+ if (ret < 0)
+ return NULL;
+ ret = perf_event_config(device,
+ "engine-total-ticks",
+ &engine_total_config);
+ if (ret < 0)
+ return NULL;
+ xe_for_each_engine(card_fd, hwe) {
+ uint64_t param_config;
+ struct xe_engine *engine;
+
+ engine = engine_ptr(engines, engines->num_engines);
+ param_config = (uint64_t)hwe->gt_id << gt_shift | hwe->engine_class << engine_class
+ | hwe->engine_instance << engine_instance;
+ engine->drm_xe_engine = *hwe;
+ engine->engine_active_ticks.config = engine_active_config | param_config;
+ engine->engine_total_ticks.config = engine_total_config | param_config;
+
+ if (engine->engine_active_ticks.config == -1 ||
+ engine->engine_total_ticks.config == -1) {
+ ret = ENOENT;
+ break;
+ }
+
+ ret = asprintf(&engine->display_name, "%s/%u",
+ class_display_name(engine->drm_xe_engine.engine_class),
+ engine->drm_xe_engine.engine_instance);
+
+ if (ret <= 0) {
+ ret = errno;
+ break;
+ }
+
+ engines->num_engines++;
+ }
+
+ if (!ret) {
+ errno = ret;
+ return NULL;
+ }
+
+ qsort(engine_ptr(engines, 0), engines->num_engines,
+ sizeof(struct xe_engine), engine_cmp);
+
+ ((struct xe_gputop *)obj)->eng_obj = engines;
+
+ return engines;
+}
+
+static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+ uint64_t buf[2 + num];
+ unsigned int i;
+ ssize_t len;
+
+ memset(buf, 0, sizeof(buf));
+
+ len = read(fd, buf, sizeof(buf));
+ assert(len == sizeof(buf));
+
+ for (i = 0; i < num; i++)
+ val[i] = buf[2 + i];
+
+ return buf[1];
+}
+
+void xe_pmu_sample(const void *obj)
+{
+ struct xe_pmu_device *engines = ((struct xe_gputop *)obj)->eng_obj;
+ const int num_val = engines->num_counters;
+ uint64_t val[2 + num_val];
+ unsigned int i;
+
+ pmu_read_multi(engines->fd, num_val, val);
+
+ for (i = 0; i < engines->num_engines; i++) {
+ struct xe_engine *engine = engine_ptr(engines, i);
+
+ update_sample(&engine->engine_active_ticks, val);
+ update_sample(&engine->engine_total_ticks, val);
+ }
+}
+
+int xe_pmu_init(const void *obj)
+{
+ struct xe_pmu_device *engines = ((struct xe_gputop *)obj)->eng_obj;
+ unsigned int i;
+ int fd;
+ struct xe_engine *engine;
+ uint64_t type = igt_perf_type_id(engines->device);
+
+ engines->fd = -1;
+ engines->num_counters = 0;
+
+ for (i = 0; i < engines->num_engines; i++) {
+ engine = engine_ptr(engines, i);
+ fd = _open_pmu(type, &engines->num_counters, &engine->engine_active_ticks,
+ &engines->fd);
+ if (fd < 0)
+ return -1;
+ fd = _open_pmu(type, &engines->num_counters, &engine->engine_total_ticks,
+ &engines->fd);
+ if (fd < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static double pmu_active_percentage(struct xe_engine *engine)
+{
+ double pmu_active_ticks = engine->engine_active_ticks.val.cur -
+ engine->engine_active_ticks.val.prev;
+ double pmu_total_ticks = engine->engine_total_ticks.val.cur -
+ engine->engine_total_ticks.val.prev;
+ double percentage;
+
+ percentage = (pmu_active_ticks * 100) / pmu_total_ticks;
+ return percentage;
+}
+
+static int
+print_device_description(const void *obj, int lines, int w, int h)
+{
+ char *desc;
+ int len;
+
+ len = asprintf(&desc, "DRIVER: %s || BDF: %s",
+ ((struct xe_gputop *)obj)->card->driver,
+ ((struct xe_gputop *)obj)->card->pci_slot_name);
+
+ printf("\033[7m%s%*s\033[0m\n",
+ desc,
+ (int)(w - len), " ");
+ lines++;
+ free(desc);
+ return lines;
+}
+
+static int
+print_engines_header(struct xe_pmu_device *engines,
+ int lines, int con_w, int con_h)
+{
+ const char *a;
+
+ for (unsigned int i = 0;
+ i < engines->num_engines && lines < con_h;
+ i++) {
+ struct xe_engine *engine = engine_ptr(engines, i);
+
+ if (!engine->num_counters)
+ continue;
+
+ a = " ENGINES ACTIVITY ";
+
+ printf("\033[7m%s%*s\033[0m\n",
+ a,
+ (int)(con_w - strlen(a)), " ");
+ lines++;
+
+ break;
+ }
+
+ return lines;
+}
+
+static int
+print_engine(struct xe_pmu_device *engines, unsigned int i,
+ int lines, int con_w, int con_h)
+{
+ struct xe_engine *engine = engine_ptr(engines, i);
+ double percentage = pmu_active_percentage(engine);
+
+ printf("%*s", (int)(strlen(" ENGINES")), engine->display_name);
+ print_percentage_bar(percentage, con_w - strlen(" ENGINES"));
+ printf("\n");
+
+ return ++lines;
+}
+
+int xe_print_engines(const void *obj, int lines, int w, int h)
+{
+ struct xe_pmu_device *show = ((struct xe_gputop *)obj)->eng_obj;
+
+ lines = print_device_description(obj, lines, w, h);
+
+ lines = print_engines_header(show, lines, w, h);
+
+ for (unsigned int i = 0; i < show->num_engines && lines < h; i++)
+ lines = print_engine(show, i, lines, w, h);
+
+ lines = print_engines_footer(lines, w, h);
+
+ return lines;
+}
+
diff --git a/tools/gputop/xe_gputop.h b/tools/gputop/xe_gputop.h
new file mode 100644
index 000000000..825ac7e34
--- /dev/null
+++ b/tools/gputop/xe_gputop.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef __XE_GPUTOP_H__
+#define __XE_GPUTOP_H__
+
+#include <dirent.h>
+
+#include "igt_device_scan.h"
+#include "igt_perf.h"
+#include "utils.h"
+#include "xe/xe_query.h"
+
+struct xe_pmu_pair {
+ uint64_t cur;
+ uint64_t prev;
+};
+
+struct xe_pmu_counter {
+ uint64_t type;
+ uint64_t config;
+ unsigned int idx;
+ struct xe_pmu_pair val;
+ int fd;
+ bool present;
+};
+
+struct xe_engine {
+ const char *name;
+ char *display_name;
+ struct drm_xe_engine_class_instance drm_xe_engine;
+ unsigned int num_counters;
+ struct xe_pmu_counter engine_active_ticks;
+ struct xe_pmu_counter engine_total_ticks;
+};
+
+struct xe_pmu_device {
+ unsigned int num_engines;
+ unsigned int num_counters;
+ int fd;
+ char *device;
+ struct xe_engine engine;
+};
+
+struct xe_gputop {
+ char *pmu_device;
+ struct igt_device_card *card;
+ struct xe_pmu_device *eng_obj;
+};
+
+void xe_gputop_init(void *ptr,
+ struct igt_device_card *card);
+void xe_populate_device_instances(struct gputop_device *dv);
+void *xe_populate_engines(const void *obj);
+void xe_pmu_sample(const void *obj);
+int xe_pmu_init(const void *obj);
+int xe_print_engines(const void *obj, int lines, int w, int h);
+void xe_clean_up(void *obj, int len);
+
+#endif /* __XE_GPUTOP_H__ */
diff --git a/tools/meson.build b/tools/meson.build
index 8185ba160..99a732942 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -70,11 +70,6 @@ if libudev.found()
install : true)
endif
-executable('gputop', 'gputop.c',
- install : true,
- install_rpath : bindir_rpathdir,
- dependencies : [lib_igt_drm_clients,lib_igt_drm_fdinfo,lib_igt_profiling,math])
-
intel_l3_parity_src = [ 'intel_l3_parity.c', 'intel_l3_udev_listener.c' ]
executable('intel_l3_parity', sources : intel_l3_parity_src,
dependencies : tool_deps,
@@ -123,3 +118,4 @@ endif
subdir('i915-perf')
subdir('xe-perf')
subdir('null_state_gen')
+subdir('gputop')
--
2.43.0
More information about the igt-dev
mailing list