[Intel-gfx] [igt-dev] [RFC i-g-t 1/1] intel-gpu-top: Support for client stats
Chris Wilson
chris at chris-wilson.co.uk
Fri May 10 15:33:18 UTC 2019
Quoting Tvrtko Ursulin (2019-05-10 14:23:12)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> Adds support for per-client engine busyness stats i915 exports in sysfs
> and produces output like the below:
>
> ==========================================================================
> intel-gpu-top - 935/ 935 MHz; 0% RC6; 14.73 Watts; 1097 irqs/s
>
> IMC reads: 1401 MiB/s
> IMC writes: 4 MiB/s
>
> ENGINE BUSY MI_SEMA MI_WAIT
> Render/3D/0 63.73% |███████████████████ | 3% 0%
> Blitter/0 9.53% |██▊ | 6% 0%
> Video/0 39.32% |███████████▊ | 16% 0%
> Video/1 15.62% |████▋ | 0% 0%
> VideoEnhance/0 0.00% | | 0% 0%
>
> PID NAME RCS BCS VCS VECS
> 4084 gem_wsim |█████▌ ||█ || || |
> 4086 gem_wsim |█▌ || ||███ || |
> ==========================================================================
>
> Apart from the existing physical engine utilization it now also shows
> utilization per client and per engine class.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> tools/intel_gpu_top.c | 590 +++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 584 insertions(+), 6 deletions(-)
>
> diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
> index cc8db7c539ed..88e1ad52d17c 100644
> --- a/tools/intel_gpu_top.c
> +++ b/tools/intel_gpu_top.c
> @@ -659,8 +659,403 @@ static void pmu_sample(struct engines *engines)
> }
> }
>
> +enum client_status {
> + FREE = 0, /* mbz */
> + ALIVE,
> + PROBE
> +};
> +
> +struct clients;
> +
> +struct client {
> + struct clients *clients;
> +
> + enum client_status status;
> + unsigned int id;
> + unsigned int pid;
> + char name[128];
> + unsigned int samples;
> + unsigned long total;
> + struct engines *engines;
> + unsigned long *val;
> + uint64_t *last;
> +};
> +
> +struct engine_class {
> + unsigned int class;
> + const char *name;
> + unsigned int num_engines;
> +};
> +
> +struct clients {
> + unsigned int num_classes;
> + struct engine_class *class;
> +
> + unsigned int num_clients;
> + struct client *client;
> +};
> +
> +#define for_each_client(clients, c, tmp) \
> + for ((tmp) = (clients)->num_clients, c = (clients)->client; \
> + (tmp > 0); (tmp)--, (c)++)
> +
> +#define SYSFS_ENABLE "/sys/class/drm/card0/clients/enable_stats"
> +
> +bool __stats_enabled;
> +
> +static int __set_stats(bool val)
> +{
> + int fd, ret;
> +
> + fd = open(SYSFS_ENABLE, O_WRONLY);
> + if (fd < 0)
> + return -errno;
> +
> + ret = write(fd, val ? "1" : "0", 2);
close(fd);
Might as well still be tidy on error when it's trivial to do so.
> + if (ret < 0)
> + return -errno;
> + else if (ret < 2)
> + return 1;
> +
> + close(fd);
> +
> + return 0;
> +}
> +
> +static void __restore_stats(void)
> +{
> + int ret;
> +
> + if (__stats_enabled)
> + return;
> +
> + ret = __set_stats(false);
> + if (ret)
> + fprintf(stderr, "Failed to disable per-client stats! (%d)\n",
> + ret);
> +}
> +
> +static void __restore_stats_signal(int sig)
> +{
> + exit(0);
> +}
> +
> +static int enable_stats(void)
> +{
> + int fd, ret;
> +
> + fd = open(SYSFS_ENABLE, O_RDONLY);
> + if (fd < 0)
> + return -errno;
> +
> + close(fd);
> +
> + __stats_enabled = filename_to_u64(SYSFS_ENABLE, 10);
> + if (__stats_enabled)
> + return 0;
> +
> + ret = __set_stats(true);
> + if (!ret) {
> + if (atexit(__restore_stats))
> + fprintf(stderr, "Failed to register exit handler!");
> +
> + if (signal(SIGINT, __restore_stats_signal))
> + fprintf(stderr, "Failed to register signal handler!");
That really suggests an alternative mechanism where the stats are only
active for as long as the open(sysfs/stats) is. However, iirc, sysfs
doesn't allow us to hook into the open!
In which case we could hook into the write, and keep it enabled for as
long as the user write("1") until the fd is closed. Food for thought, I
hope you convince me we don't need optional stats in the first place :)
> + } else {
> + fprintf(stderr, "Failed to enable per-client stats! (%d)\n",
> + ret);
> + }
> +
> + return ret;
> +}
> +
> +static struct clients *init_clients(void)
> +{
> + struct clients *clients = malloc(sizeof(*clients));
We purport this to be a user-facing tool, it should generally avoid
nasty errors.
> +
> + if (enable_stats()) {
> + free(clients);
> + return NULL;
> + }
> +
> + return memset(clients, 0, sizeof(*clients));
> +}
> +
> +#define SYSFS_CLIENTS "/sys/class/drm/card0/clients"
We need to detect which cardN. Which turns into scan all possible
dirents.
> +static uint64_t read_client_busy(unsigned int id, unsigned int class)
> +{
> + char buf[256];
> + ssize_t ret;
> +
> + ret = snprintf(buf, sizeof(buf),
> + SYSFS_CLIENTS "/%u/busy/%u",
> + id, class);
> + assert(ret > 0 && ret < sizeof(buf));
> + if (ret <= 0 || ret == sizeof(buf))
> + return 0;
> +
> + return filename_to_u64(buf, 10);
> +}
> +
> +static struct client *
> +find_client(struct clients *clients, enum client_status status, unsigned int id)
> +{
> + struct client *c;
> + int tmp;
> +
> + for_each_client(clients, c, tmp) {
> + if ((status == FREE && c->status == FREE) ||
> + (status == c->status && c->id == id))
> + return c;
if (status != c->status)
continue;
if (status == FREE || c->id == id)
return c;
> + }
> +
> + return NULL;
> +}
> +
> +static void update_client(struct client *c, unsigned int pid, char *name)
> +{
> + uint64_t val[c->clients->num_classes];
> + unsigned int i;
> +
> + if (c->pid != pid)
> + c->pid = pid;
> +
> + if (strncmp(c->name, name, sizeof(c->name)))
> + strncpy(c->name, name, sizeof(c->name));
> +
> + for (i = 0; i < c->clients->num_classes; i++)
> + val[i] = read_client_busy(c->id, c->clients->class[i].class);
> +
> + c->total = 0;
> +
> + for (i = 0; i < c->clients->num_classes; i++) {
> + assert(val[i] >= c->last[i]);
> + c->val[i] = val[i] - c->last[i];
> + c->total += c->val[i];
> + c->last[i] = val[i];
Where's the normalisation for capacity? Ok, later on, but only for
interactive conversion to %%. What about json output, how do they know
capacity? Wait.. json is using %% as well without scaling for
num_engines.
Should we not say timestamp the sampling and compute the % here?
-Chris
More information about the Intel-gfx
mailing list