[Intel-gfx] [igt-dev] [PATCH i-g-t 03/11] intel-gpu-top: Add support for per client stats
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Thu Mar 31 22:08:50 UTC 2022
lgtm, I just have a few nits and questions below:
Regardless, this is
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
Umesh
On Tue, Feb 22, 2022 at 01:55:57PM +0000, Tvrtko Ursulin wrote:
>From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
>Use the i915 exported data in /proc/<pid>/fdinfo to show GPU utilization
>per DRM client.
>
>Example of the output:
>
>intel-gpu-top: Intel Tigerlake (Gen12) @ /dev/dri/card0 - 220/ 221 MHz
> 70% RC6; 0.62/ 7.08 W; 760 irqs/s
>
> ENGINES BUSY MI_SEMA MI_WAIT
> Render/3D 23.06% |██████▊ | 0% 0%
> Blitter 0.00% | | 0% 0%
> Video 5.40% |█▋ | 0% 0%
> VideoEnhance 20.67% |██████ | 0% 0%
>
> PID NAME Render/3D Blitter Video VideoEnhance
> 3082 mpv | || ||▌ ||██ |
> 3117 neverball |█▉ || || || |
> 1 systemd |▍ || || || |
> 2338 gnome-shell | || || || |
>
>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>---
> man/intel_gpu_top.rst | 4 +
> tools/intel_gpu_top.c | 801 +++++++++++++++++++++++++++++++++++++++++-
> tools/meson.build | 2 +-
> 3 files changed, 804 insertions(+), 3 deletions(-)
>
>diff --git a/man/intel_gpu_top.rst b/man/intel_gpu_top.rst
>index b3b765b05feb..f4dbfc5b44d9 100644
>--- a/man/intel_gpu_top.rst
>+++ b/man/intel_gpu_top.rst
>@@ -56,6 +56,10 @@ Supported keys:
> 'q' Exit from the tool.
> 'h' Show interactive help.
> '1' Toggle between aggregated engine class and physical engine mode.
>+ 'n' Toggle display of numeric client busyness overlay.
>+ 's' Toggle between sort modes (runtime, total runtime, pid, client id).
>+ 'i' Toggle display of clients which used no GPU time.
>+ 'H' Toggle between per PID aggregation and individual clients.
>
> DEVICE SELECTION
> ================
>diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
>index bc11fce2bb1e..73815cdea8aa 100644
>--- a/tools/intel_gpu_top.c
>+++ b/tools/intel_gpu_top.c
>@@ -43,8 +43,10 @@
> #include <sys/types.h>
> #include <unistd.h>
> #include <termios.h>
>+#include <sys/sysmacros.h>
>
> #include "igt_perf.h"
>+#include "igt_drm_fdinfo.h"
>
> #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
>
>@@ -311,7 +313,8 @@ static int engine_cmp(const void *__a, const void *__b)
> return a->instance - b->instance;
> }
>
>-#define is_igpu_pci(x) (strcmp(x, "0000:00:02.0") == 0)
>+#define IGPU_PCI "0000:00:02.0"
>+#define is_igpu_pci(x) (strcmp(x, IGPU_PCI) == 0)
> #define is_igpu(x) (strcmp(x, "i915") == 0)
>
> static struct engines *discover_engines(char *device)
>@@ -635,6 +638,547 @@ static void pmu_sample(struct engines *engines)
> }
> }
>
>+enum client_status {
>+ FREE = 0, /* mbz */
>+ ALIVE,
>+ PROBE
>+};
>+
>+struct clients;
>+
>+struct client {
>+ struct clients *clients;
>+
>+ enum client_status status;
>+ unsigned int id;
>+ unsigned int pid;
>+ char name[24];
>+ char print_name[24];
>+ unsigned int samples;
>+ unsigned long total_runtime;
>+ unsigned long last_runtime;
>+ unsigned long *val;
>+ uint64_t *last;
>+};
>+
>+struct clients {
>+ unsigned int num_clients;
>+ unsigned int active_clients;
>+
>+ unsigned int num_classes;
>+ struct engine_class *class;
>+
>+ char pci_slot[64];
>+
>+ struct client *client;
>+};
>+
>+#define for_each_client(clients, c, tmp) \
>+ for ((tmp) = (clients)->num_clients, c = (clients)->client; \
>+ (tmp > 0); (tmp)--, (c)++)
>+
>+static struct clients *init_clients(const char *pci_slot)
>+{
>+ struct clients *clients;
>+
>+ clients = malloc(sizeof(*clients));
>+ if (!clients)
>+ return NULL;
>+
>+ memset(clients, 0, sizeof(*clients));
>+
>+ strncpy(clients->pci_slot, pci_slot, sizeof(clients->pci_slot));
>+
>+ return clients;
>+}
>+
>+static struct client *
>+find_client(struct clients *clients, enum client_status status, unsigned int id)
>+{
>+ unsigned int start, num;
>+ struct client *c;
>+
>+ start = status == FREE ? clients->active_clients : 0; /* Free block at the end. */
>+ num = clients->num_clients - start;
>+
>+ for (c = &clients->client[start]; num; c++, num--) {
>+ if (status != c->status)
>+ continue;
>+
>+ if (status == FREE || c->id == id)
>+ return c;
>+ }
>+
>+ return NULL;
>+}
>+
>+static void
>+update_client(struct client *c, unsigned int pid, char *name, uint64_t val[16])
>+{
>+ unsigned int i;
>+
>+ if (c->pid != pid)
>+ c->pid = pid;
>+
>+ if (strcmp(c->name, name)) {
>+ char *p;
>+
>+ strncpy(c->name, name, sizeof(c->name) - 1);
>+ strncpy(c->print_name, name, sizeof(c->print_name) - 1);
>+
>+ p = c->print_name;
>+ while (*p) {
>+ if (!isprint(*p))
>+ *p = '*';
>+ p++;
>+ }
>+ }
>+
>+ c->last_runtime = 0;
>+ c->total_runtime = 0;
>+
assert(c->clients->num_classes <= 16);
>+ for (i = 0; i < c->clients->num_classes; i++) {
>+ if (val[i] < c->last[i])
>+ continue; /* It will catch up soon. */
>+
>+ c->total_runtime += val[i];
As far as busyness is concerned c->total_runtime += val[i] should be it.
>+ c->val[i] = val[i] - c->last[i];
>+ c->last_runtime += c->val[i];
>+ c->last[i] = val[i];
Can you describe what these 3 lines are doing?
>+ }
>+
>+ c->samples++;
>+ c->status = ALIVE;
>+}
>+
>+static void
>+add_client(struct clients *clients, unsigned int id, unsigned int pid,
>+ char *name, uint64_t busy[16])
>+{
>+ struct client *c;
>+
>+ assert(!find_client(clients, ALIVE, id));
>+
>+ c = find_client(clients, FREE, 0);
>+ if (!c) {
>+ unsigned int idx = clients->num_clients;
>+
>+ clients->num_clients += (clients->num_clients + 2) / 2;
>+ clients->client = realloc(clients->client,
>+ clients->num_clients * sizeof(*c));
>+ assert(clients->client);
>+
>+ c = &clients->client[idx];
>+ memset(c, 0, (clients->num_clients - idx) * sizeof(*c));
>+ }
>+
>+ c->id = id;
>+ c->clients = clients;
>+ c->val = calloc(clients->num_classes, sizeof(c->val));
>+ c->last = calloc(clients->num_classes, sizeof(c->last));
>+ assert(c->val && c->last);
>+
>+ update_client(c, pid, name, busy);
>+}
>+
>+static void free_client(struct client *c)
>+{
>+ free(c->val);
>+ free(c->last);
>+ memset(c, 0, sizeof(*c));
>+}
>+
>+static int client_last_cmp(const void *_a, const void *_b)
>+{
>+ const struct client *a = _a;
>+ const struct client *b = _b;
>+ long tot_a, tot_b;
>+
>+ /*
>+ * Sort clients in descending order of runtime in the previous sampling
>+ * period for active ones, followed by inactive. Tie-breaker is client
>+ * id.
>+ */
>+
>+ tot_a = a->status == ALIVE ? a->last_runtime : -1;
>+ tot_b = b->status == ALIVE ? b->last_runtime : -1;
>+
>+ tot_b -= tot_a;
>+ if (tot_b > 0)
>+ return 1;
>+ if (tot_b < 0)
>+ return -1;
>+
>+ return (int)b->id - a->id;
nit: the compare functions could use a single if, if you agree on just
returning tot_b.
if (!tot_b)
return (int)b->id - a->id;
return tot_b;
>+}
>+
>+static int client_total_cmp(const void *_a, const void *_b)
>+{
>+ const struct client *a = _a;
>+ const struct client *b = _b;
>+ long tot_a, tot_b;
>+
>+ tot_a = a->status == ALIVE ? a->total_runtime : -1;
>+ tot_b = b->status == ALIVE ? b->total_runtime : -1;
>+
>+ tot_b -= tot_a;
>+ if (tot_b > 0)
>+ return 1;
>+ if (tot_b < 0)
>+ return -1;
>+
>+ return (int)b->id - a->id;
>+}
>+
>+static int client_id_cmp(const void *_a, const void *_b)
>+{
>+ const struct client *a = _a;
>+ const struct client *b = _b;
>+ int id_a, id_b;
>+
>+ id_a = a->status == ALIVE ? a->id : -1;
>+ id_b = b->status == ALIVE ? b->id : -1;
>+
>+ id_b -= id_a;
>+ if (id_b > 0)
>+ return 1;
>+ if (id_b < 0)
>+ return -1;
>+
>+ return (int)b->id - a->id;
>+}
>+
>+static int client_pid_cmp(const void *_a, const void *_b)
>+{
>+ const struct client *a = _a;
>+ const struct client *b = _b;
>+ int pid_a, pid_b;
>+
>+ pid_a = a->status == ALIVE ? a->pid : INT_MAX;
>+ pid_b = b->status == ALIVE ? b->pid : INT_MAX;
>+
>+ pid_b -= pid_a;
>+ if (pid_b > 0)
>+ return -1;
>+ if (pid_b < 0)
>+ return 1;
>+
>+ return (int)a->id - b->id;
>+}
>+
>+static int (*client_cmp)(const void *, const void *) = client_last_cmp;
>+
>+static struct clients *sort_clients(struct clients *clients,
>+ int (*cmp)(const void *, const void *))
>+{
>+ unsigned int active, free;
>+ struct client *c;
>+ int tmp;
>+
>+ if (!clients)
>+ return clients;
>+
>+ qsort(clients->client, clients->num_clients, sizeof(*clients->client),
>+ cmp);
>+
>+ /* Trim excessive array space. */
>+ active = 0;
>+ for_each_client(clients, c, tmp) {
>+ if (c->status != ALIVE)
>+ break; /* Active clients are first in the array. */
>+ active++;
>+ }
>+
>+ clients->active_clients = active;
>+
>+ free = clients->num_clients - active;
>+ if (free > clients->num_clients / 2) {
>+ active = clients->num_clients - free / 2;
>+ if (active != clients->num_clients) {
>+ clients->num_clients = active;
>+ clients->client = realloc(clients->client,
>+ clients->num_clients *
>+ sizeof(*c));
>+ }
>+ }
>+
>+ return clients;
>+}
>+
>+static bool aggregate_pids = true;
>+
>+static struct clients *display_clients(struct clients *clients)
>+{
>+ struct client *ac, *c, *cp = NULL;
>+ struct clients *aggregated;
>+ int tmp, num = 0;
>+
>+ if (!aggregate_pids)
>+ goto out;
>+
>+ /* Sort by pid first to make it easy to aggregate while walking. */
>+ sort_clients(clients, client_pid_cmp);
>+
>+ aggregated = calloc(1, sizeof(*clients));
>+ assert(aggregated);
>+
>+ ac = calloc(clients->num_clients, sizeof(*c));
>+ assert(ac);
>+
>+ aggregated->num_classes = clients->num_classes;
>+ aggregated->class = clients->class;
>+ aggregated->client = ac;
>+
>+ for_each_client(clients, c, tmp) {
>+ unsigned int i;
>+
>+ if (c->status == FREE)
>+ break;
>+
>+ assert(c->status == ALIVE);
>+
>+ if ((cp && c->pid != cp->pid) || !cp) {
same as: if (!cp || c->pid != cp->pid), fine either ways
>+ ac = &aggregated->client[num++];
>+
>+ /* New pid. */
>+ ac->clients = aggregated;
>+ ac->status = ALIVE;
>+ ac->id = -c->pid;
>+ ac->pid = c->pid;
>+ strcpy(ac->name, c->name);
>+ strcpy(ac->print_name, c->print_name);
>+ ac->val = calloc(clients->num_classes,
>+ sizeof(ac->val[0]));
>+ assert(ac->val);
>+ ac->samples = 1;
>+ }
>+
>+ cp = c;
>+
>+ if (c->samples < 2)
>+ continue;
>+
>+ ac->samples = 2; /* All what matters for display. */
>+ ac->total_runtime += c->total_runtime;
>+ ac->last_runtime += c->last_runtime;
>+
>+ for (i = 0; i < clients->num_classes; i++)
>+ ac->val[i] += c->val[i];
>+ }
>+
>+ aggregated->num_clients = num;
>+ aggregated->active_clients = num;
>+
>+ clients = aggregated;
>+
>+out:
>+ return sort_clients(clients, client_cmp);
>+}
>+
>+static void free_clients(struct clients *clients)
>+{
>+ struct client *c;
>+ unsigned int tmp;
>+
>+ for_each_client(clients, c, tmp) {
>+ free(c->val);
>+ free(c->last);
>+ }
>+
>+ free(clients->client);
>+ free(clients);
>+}
>+
>+static bool is_drm_fd(DIR *fd_dir, const char *name)
>+{
>+ struct stat stat;
>+ int ret;
>+
>+ ret = fstatat(dirfd(fd_dir), name, &stat, 0);
>+
>+ return ret == 0 &&
>+ (stat.st_mode & S_IFMT) == S_IFCHR &&
>+ major(stat.st_rdev) == 226;
>+}
>+
>+static bool get_task_name(const char *buffer, char *out, unsigned long sz)
>+{
>+ char *s = index(buffer, '(');
>+ char *e = rindex(buffer, ')');
>+ unsigned int len;
>+
>+ if (!s || !e)
>+ return false;
maybe assert(e > s);
>+
>+ len = --e - ++s + 1;
len = e - ++s;
Thanks,
Umesh
More information about the Intel-gfx
mailing list