[PATCH i-g-t] tools/gputop: Add GT freq and c6 stats

Belgaumkar, Vinay vinay.belgaumkar at intel.com
Fri Dec 20 00:33:03 UTC 2024


On 12/18/2024 11:27 AM, Kamil Konieczny wrote:
> Hi Vinay,
> On 2024-12-15 at 16:32:38 -0800, Vinay Belgaumkar wrote:
>> Add GT C6 and Frequency support. These will use the PMU interface
>> and are displayed per GT/device in the header.
>>
>> GT: 0, c6:  94.54% req_freq:  750.63 MHz act_freq:    0.00 MHz
>> GT: 1, c6:   2.75% req_freq: 1200.71 MHz act_freq: 1112.66 MHz
> Will it work for other drivers? Without pmu?
Yes, If it doesn't find the necessary PMU paths, it will skip PMU part 
and go back to printing whatever it did earlier. The pmu_info is also 
stored as a void*, so other vendors can add their own pmu_info.
>
>> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> ---
>>   lib/igt_drm_clients.c |  17 +++++
>>   lib/igt_drm_clients.h |  25 +++++++
>>   lib/igt_perf.c        |  57 +++++++++++++-
>>   lib/igt_perf.h        |   2 +
> Please split lib changes into separate patches as they
> could be used by xe_drm_fdinfo test.
ok.
>
>>   tools/gputop.c        | 168 +++++++++++++++++++++++++++++++++++++++++-
>>   tools/intel_gpu_top.c |  18 +----
> imho here also it could be splitted into two patches each for
> separate tool.
ok.
>
>>   tools/meson.build     |   2 +-
>>   7 files changed, 271 insertions(+), 18 deletions(-)
>>
>> diff --git a/lib/igt_drm_clients.c b/lib/igt_drm_clients.c
>> index 858cd3645..add2696fa 100644
>> --- a/lib/igt_drm_clients.c
>> +++ b/lib/igt_drm_clients.c
>> @@ -19,6 +19,7 @@
>>   
>>   #include "igt_drm_clients.h"
>>   #include "igt_drm_fdinfo.h"
>> +#include "igt_perf.h"
>>   
>>   #ifndef ARRAY_SIZE
>>   #define ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
>> @@ -164,6 +165,7 @@ igt_drm_client_add(struct igt_drm_clients *clients,
>>   {
>>   	struct igt_drm_client *c;
>>   	unsigned int i;
>> +	char *path;
>>   
>>   	assert(!igt_drm_clients_find(clients, IGT_DRM_CLIENT_ALIVE,
>>   				     drm_minor, info->id));
>> @@ -190,6 +192,18 @@ igt_drm_client_add(struct igt_drm_clients *clients,
>>   	c->drm_minor = drm_minor;
>>   	c->clients = clients;
>>   
>> +	if (info->driver && info->pdev) {
>> +		snprintf(c->device_events_path, sizeof(c->device_events_path) - 1,
>> +			 "%s_%s", info->driver, info->pdev);
>> +		path = c->device_events_path;
>> +		for (; *path; ++path)
>> +			if (*path == ':')
>> +				*path = '_';
>> +		c->pmu_fd = -1;
>> +		c->num_gts = 0;
>> +		c->num_pmu_counters = 0;
>> +	}
>> +
>>   	/* Engines */
>>   	c->engines = calloc(1, sizeof(*c->engines));
>>   	assert(c->engines);
>> @@ -262,6 +276,9 @@ void igt_drm_client_free(struct igt_drm_client *c, bool clear)
>>   
>>   	free(c->memory);
>>   
>> +	if (c->pmu_fd != -1)
>> +		close(c->pmu_fd);
>> +
>>   	if (clear)
>>   		memset(c, 0, sizeof(*c));
>>   }
>> diff --git a/lib/igt_drm_clients.h b/lib/igt_drm_clients.h
>> index 946d709de..9d5b966ee 100644
>> --- a/lib/igt_drm_clients.h
>> +++ b/lib/igt_drm_clients.h
>> @@ -56,6 +56,22 @@ struct igt_drm_client_regions {
>>   
>>   struct igt_drm_clients;
>>   
>> +struct pmu_pair {
>> +	uint64_t cur;
>> +	uint64_t prev;
>> +};
>> +
>> +struct pmu_counter {
>> +	uint64_t type;
>> +	uint64_t config;
>> +	unsigned int idx;
>> +	struct pmu_pair val;
>> +	double scale;
>> +	const char *units;
>> +	bool present;
>> +};
>> +
>> +#define MAX_GTS 4
>>   struct igt_drm_client {
>>   	struct igt_drm_clients *clients; /* Owning list. */
>>   
>> @@ -86,6 +102,15 @@ struct igt_drm_client {
>>   		uint64_t last_total_cycles; /* Engine total cycles data as parsed from fdinfo. */
>>   	} *utilization; /* Array of engine utilization */
>>   
>> +	char device_events_path[300];
>> +	int num_gts;
>> +	int pmu_fd;
>> +	int num_pmu_counters;
>> +	struct pmu_counter freq_req_gt[MAX_GTS];
>> +	struct pmu_counter freq_act_gt[MAX_GTS];
>> +	struct pmu_counter c6_gt[MAX_GTS];
>> +	uint64_t ts_cur, ts_prev;
>> +
>>   	struct drm_client_meminfo *memory; /* Array of region memory utilisation as parsed from fdinfo. */
>>   };
>>   
>> diff --git a/lib/igt_perf.c b/lib/igt_perf.c
>> index 3866c6d77..14c362515 100644
>> --- a/lib/igt_perf.c
>> +++ b/lib/igt_perf.c
>> @@ -129,6 +129,61 @@ uint64_t igt_perf_type_id(const char *device)
>>   	return strtoull(buf, NULL, 0);
>>   }
>>   
> Add description to each new library function.
ok.
>
>> +int perf_xe_format_gt_id(const char *device)
>> +{
>> +	char buf[150];
>> +	ssize_t ret;
>> +	int fd, start, end;
>> +
>> +	snprintf(buf, sizeof(buf),
>> +		 "/sys/bus/event_source/devices/%s/format/gt_id",
>> +		 device);
>> +
>> +	fd = open(buf, O_RDONLY);
>> +	if (fd < 0)
>> +		return -EINVAL;
>> +
>> +	ret = read(fd, buf, sizeof(buf) - 1);
>> +	close(fd);
>> +	if (ret < 1)
>> +		return ret;
>> +
>> +	buf[ret] = '\0';
>> +	ret = sscanf(buf, "config:%d-%d", &start, &end);
>> +	if (ret != 2)
>> +		return -EINVAL;
>> +
> Why only start?
we want just the number of bits to shift. Changing the name of the 
function to reflect that.
>
>> +	return start;
>> +}
>> +
> Same here, add description.
ok.
>
>> +int perf_xe_event_config(const char *device, const char *event, uint64_t *config)
>> +{
>> +	char buf[150];
>> +	ssize_t ret;
>> +	int fd;
>> +
>> +	snprintf(buf, sizeof(buf),
>> +		 "/sys/bus/event_source/devices/%s/events/%s",
>> +		 device,
>> +		 event);
>> +
>> +	fd = open(buf, O_RDONLY);
>> +	if (fd < 0)
>> +		return -EINVAL;
>> +
>> +	ret = read(fd, buf, sizeof(buf) - 1);
>> +	close(fd);
>> +	if (ret < 1)
>> +		return ret;
>> +
>> +	buf[ret] = '\0';
>> +	ret = sscanf(buf, "config=0x%lx", config);
>> +	if (ret != 1)
>> +		return -EINVAL;
>> +
>> +	return ret;
>> +}
>> +
>>   int igt_perf_events_dir(int i915)
>>   {
>>   	char buf[80];
>> @@ -180,7 +235,7 @@ int perf_igfx_open_group(uint64_t config, int group)
>>   int perf_xe_open(int xe, uint64_t config)
>>   {
>>   	return _perf_open(xe_perf_type_id(xe), config, -1,
>> -			PERF_FORMAT_TOTAL_TIME_ENABLED);
>> +			  PERF_FORMAT_TOTAL_TIME_ENABLED);
> Please do not make style corrections in a bigger patch.
ok.
>
>>   }
>>   
>>   int perf_i915_open(int i915, uint64_t config)
>> diff --git a/lib/igt_perf.h b/lib/igt_perf.h
>> index 3d9ba2917..f1c433657 100644
>> --- a/lib/igt_perf.h
>> +++ b/lib/igt_perf.h
>> @@ -71,5 +71,7 @@ int perf_i915_open(int i915, uint64_t config);
>>   int perf_i915_open_group(int i915, uint64_t config, int group);
>>   
>>   int perf_xe_open(int xe, uint64_t config);
>> +int perf_xe_event_config(const char *device, const char *event, uint64_t *config);
>> +int perf_xe_format_gt_id(const char *device);
>>   
>>   #endif /* I915_PERF_H */
>> diff --git a/tools/gputop.c b/tools/gputop.c
>> index 43b01f566..2c2f2f471 100644
>> --- a/tools/gputop.c
>> +++ b/tools/gputop.c
>> @@ -29,6 +29,7 @@
>>   #include "igt_core.h"
>>   #include "igt_drm_clients.h"
>>   #include "igt_drm_fdinfo.h"
>> +#include "igt_perf.h"
>>   #include "igt_profiling.h"
>>   #include "drmtest.h"
>>   
>> @@ -76,6 +77,170 @@ static void print_percentage_bar(double percent, int max_len)
>>   	putchar('|');
>>   }
>>   
>> +static int
>> +get_num_gts(uint64_t type, uint64_t config, int gt_shift)
>> +{
>> +	int fd, gt_id;
>> +
>> +	errno = 0;
>> +	for (gt_id = 0; gt_id < MAX_GTS; gt_id++) {
>> +		config |= (uint64_t)gt_id << gt_shift;
>> +		fd = igt_perf_open(type, config);
>> +		if (fd < 0)
>> +			break;
>> +		close(fd);
>> +	}
>> +
>> +	if (!gt_id || (errno && errno != ENOENT))
>> +		gt_id = -errno;
>> +
>> +	return gt_id;
>> +}
>> +
>> +#define _open_pmu(type, cnt, pmu, fd) \
>> +({ \
>> +	int fd__; \
>> +\
>> +	fd__ = igt_perf_open_group((type), (pmu)->config, (fd)); \
>> +	if (fd__ >= 0) { \
>> +		if ((fd) == -1) \
>> +			(fd) = fd__; \
>> +		(pmu)->present = true; \
>> +		(pmu)->idx = (cnt)++; \
>> +	} \
>> +\
>> +	fd__; \
>> +})
>> +
>> +static int pmu_init(struct igt_drm_client *c)
>> +{
>> +	unsigned int i, num_cntr = 0;
>> +	int fd = -1, ret;
>> +	uint64_t type = igt_perf_type_id(c->device_events_path);
>> +	uint64_t config;
>> +	int gt_shift;
>> +	char event_str[100];
>> +
>> +	/* Get a sample event config which can be used to find num_gts */
>> +	ret = perf_xe_event_config(c->device_events_path, "actual-frequency", &config);
>> +	assert(ret >= 0);
> Please do not assert in a tool, handle error gracefully.
ok.
>
>> +	gt_shift = perf_xe_format_gt_id(c->device_events_path);
>> +	assert(ret > 0);
> Same here.
ok.
>
>> +
>> +	c->num_gts = get_num_gts(type, config, gt_shift);
>> +
>> +	for (i = 0; i < c->num_gts; i++) {
>> +		snprintf(event_str, sizeof(event_str), "c6-residency");
>> +		ret = perf_xe_event_config(c->device_events_path, event_str,
>> +					   &c->c6_gt[i].config);
>> +		assert(ret >= 0);
> Same here and in this for loop.
Yup, changing all of these to return a value instead.
>
>> +		c->c6_gt[i].config |= (uint64_t)i << gt_shift;
>> +		_open_pmu(type, num_cntr, &c->c6_gt[i], fd);
>> +
>> +		snprintf(event_str, sizeof(event_str), "actual-frequency");
>> +		ret = perf_xe_event_config(c->device_events_path, event_str,
>> +					   &c->freq_act_gt[i].config);
>> +		assert(ret >= 0);
>> +		c->freq_act_gt[i].config |= (uint64_t)i << gt_shift;
>> +		_open_pmu(type, num_cntr, &c->freq_act_gt[i], fd);
>> +
>> +		snprintf(event_str, sizeof(event_str), "requested-frequency");
>> +		ret = perf_xe_event_config(c->device_events_path, event_str,
>> +					   &c->freq_req_gt[i].config);
>> +		assert(ret >= 0);
>> +		c->freq_req_gt[i].config |= (uint64_t)i << gt_shift;
>> +		_open_pmu(type, num_cntr, &c->freq_req_gt[i], fd);
>> +	}
>> +
>> +	/* Saved the pmu fd */
>> +	assert(fd > 0);
> Same here, handle errors gracefully.
>
>> +	c->pmu_fd = fd;
>> +	c->num_pmu_counters = num_cntr;
>> +
>> +	return fd;
>> +}
>> +
>> +static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>> +{
>> +	uint64_t buf[2 + num];
>> +	unsigned int i;
>> +	ssize_t len;
>> +
>> +	memset(buf, 0, sizeof(buf));
>> +
>> +	len = read(fd, buf, sizeof(buf));
>> +	assert(len == sizeof(buf));
> Same here.
>
>> +
>> +	for (i = 0; i < num; i++)
>> +		val[i] = buf[2 + i];
>> +
>> +	return buf[1];
>> +}
>> +
>> +static void __update_sample(struct pmu_counter *counter, uint64_t val)
>> +{
>> +	counter->val.prev = counter->val.cur;
>> +	counter->val.cur = val;
>> +}
>> +
>> +static void update_sample(struct pmu_counter *counter, uint64_t *val)
>> +{
>> +	if (counter->present)
>> +		__update_sample(counter, val[counter->idx]);
>> +}
>> +
>> +static void
>> +calc_c6_pct(struct igt_drm_client *c, unsigned int gt, unsigned long t)
>> +{
>> +	unsigned long c6_diff = c->c6_gt[gt].val.cur - c->c6_gt[gt].val.prev;
>> +
>> +	printf("GT: %d, c6: %6.2lf%%", gt, 100 * (double)((1e6 * c6_diff) / (double)t));
>> +}
>> +
>> +static void
>> +calc_freq(struct igt_drm_client *c, uint8_t gt, uint64_t t)
>> +{
>> +	uint64_t req_freq_diff = 1e9 * (c->freq_req_gt[gt].val.cur - c->freq_req_gt[gt].val.prev);
>> +	uint64_t act_freq_diff = 1e9 * (c->freq_act_gt[gt].val.cur - c->freq_act_gt[gt].val.prev);
>> +
>> +	printf(" req_freq: %7.2lf MHz", (double)req_freq_diff / (double)t);
>> +	printf(" act_freq: %7.2lf MHz", (double)act_freq_diff / (double)t);
>> +}
>> +
>> +static int
>> +print_pmu_stats(struct igt_drm_client *c, int *lines)
>> +{
>> +	int ret = 0;
>> +	int i;
>> +	uint64_t *val;
>> +	uint64_t ts_diff;
>> +
>> +	if (c->pmu_fd == -1)
>> +		pmu_init(c);
>> +
>> +	assert(c->num_pmu_counters > 0);
> Same here.
>
>> +
>> +	val = (uint64_t *)malloc(c->num_pmu_counters * sizeof(uint64_t));
>> +
>> +	c->ts_prev = c->ts_cur;
>> +	c->ts_cur = pmu_read_multi(c->pmu_fd, c->num_pmu_counters, val);
>> +	ts_diff = c->ts_cur - c->ts_prev;
>> +
>> +	for (i = 0; i < c->num_gts; i++) {
>> +		update_sample(&c->c6_gt[i], val);
>> +		update_sample(&c->freq_req_gt[i], val);
>> +		update_sample(&c->freq_act_gt[i], val);
>> +		calc_c6_pct(c, i, ts_diff);
>> +		calc_freq(c, i, ts_diff);
>> +		putchar('\n');
>> +	}
>> +
>> +	if (val)
>> +		free(val);
>> +
>> +	return ret;
>> +}
>> +
>>   static int
>>   print_client_header(struct igt_drm_client *c, int lines, int con_w, int con_h,
>>   		    int *engine_w)
>> @@ -92,6 +257,8 @@ print_client_header(struct igt_drm_client *c, int lines, int con_w, int con_h,
>>   	if (lines++ >= con_h)
>>   		return lines;
>>   
>> +	ret += print_pmu_stats(c, &lines);
>> +
>>   	putchar('\n');
>>   	if (c->regions->num_regions)
>>   		len = printf("%*s      MEM      RSS ",
>> @@ -219,7 +386,6 @@ print_client(struct igt_drm_client *c, struct igt_drm_client **prevc,
>>   	}
>>   
>>   	*prevc = c;
>> -
> Do not make style changes in a big patch.

Ok, thanks for the review!

Vinay.

>
> Regards,
> Kamil
>
>>   	len = printf("%*s ", c->clients->max_pid_len, c->pid_str);
>>   
>>   	if (c->regions->num_regions) {
>> diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
>> index a608b894d..585e2acec 100644
>> --- a/tools/intel_gpu_top.c
>> +++ b/tools/intel_gpu_top.c
>> @@ -52,21 +52,6 @@
>>   
>>   #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
>>   
>> -struct pmu_pair {
>> -	uint64_t cur;
>> -	uint64_t prev;
>> -};
>> -
>> -struct pmu_counter {
>> -	uint64_t type;
>> -	uint64_t config;
>> -	unsigned int idx;
>> -	struct pmu_pair val;
>> -	double scale;
>> -	const char *units;
>> -	bool present;
>> -};
>> -
>>   struct engine_class {
>>   	unsigned int engine_class;
>>   	const char *name;
>> @@ -724,6 +709,8 @@ static void pmu_sample(struct engines *engines)
>>   	uint64_t val[2 + num_val];
>>   	unsigned int i;
>>   
>> +	printf("\n num counters: %d", num_val);
>> +
>>   	engines->ts.prev = engines->ts.cur;
>>   	engines->ts.cur = pmu_read_multi(engines->fd, num_val, val);
>>   
>> @@ -735,6 +722,7 @@ static void pmu_sample(struct engines *engines)
>>   		update_sample(&engines->freq_req_gt[i], val);
>>   		engines->freq_req.val.cur += engines->freq_req_gt[i].val.cur;
>>   		engines->freq_req.val.prev += engines->freq_req_gt[i].val.prev;
>> +		printf("\n GT: %d, ctr idx: %d", i, engines->freq_req_gt[i].idx);
>>   
>>   		update_sample(&engines->freq_act_gt[i], val);
>>   		engines->freq_act.val.cur += engines->freq_act_gt[i].val.cur;
>> diff --git a/tools/meson.build b/tools/meson.build
>> index 38b04851c..9e6c8546a 100644
>> --- a/tools/meson.build
>> +++ b/tools/meson.build
>> @@ -70,7 +70,7 @@ endif
>>   executable('gputop', 'gputop.c',
>>              install : true,
>>              install_rpath : bindir_rpathdir,
>> -           dependencies : [lib_igt_drm_clients,lib_igt_drm_fdinfo,lib_igt_profiling,math])
>> +           dependencies : [lib_igt_perf,lib_igt_drm_clients,lib_igt_drm_fdinfo,lib_igt_profiling,math])
>>   
>>   intel_l3_parity_src = [ 'intel_l3_parity.c', 'intel_l3_udev_listener.c' ]
>>   executable('intel_l3_parity', sources : intel_l3_parity_src,
>> -- 
>> 2.38.1
>>


More information about the igt-dev mailing list