[Intel-gfx] [RFC i-g-t 5/6] intel-gpu-top: Add queue depths and load average

Tvrtko Ursulin tursulin at ursulin.net
Wed Oct 3 12:07:17 UTC 2018


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

With the driver now exporting various request queue depths for each
engine, we can display this information here. Both as raw counters,
and by adding a load average like metrics composed from number of
runnable and running requests in a given time period. 1s, 30s and 5m
periods are used for load average.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 tools/Makefile.am     |   2 +-
 tools/intel_gpu_top.c | 122 +++++++++++++++++++++++++++++++++++++-----
 tools/meson.build     |   2 +-
 3 files changed, 110 insertions(+), 16 deletions(-)

diff --git a/tools/Makefile.am b/tools/Makefile.am
index e7de4d90241c..e03842afce8d 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -29,7 +29,7 @@ intel_aubdump_la_LDFLAGS = -module -avoid-version -no-undefined
 intel_aubdump_la_SOURCES = aubdump.c
 intel_aubdump_la_LIBADD = $(top_builddir)/lib/libintel_tools.la -ldl
 
-intel_gpu_top_LDADD = $(top_builddir)/lib/libigt_perf.la
+intel_gpu_top_LDADD = $(top_builddir)/lib/libigt_perf.la -lm
 
 bin_SCRIPTS = intel_aubdump
 CLEANFILES = $(bin_SCRIPTS)
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c
index b923c3cfbe97..8990ef17b771 100644
--- a/tools/intel_gpu_top.c
+++ b/tools/intel_gpu_top.c
@@ -56,6 +56,10 @@ struct pmu_counter {
 	struct pmu_pair val;
 };
 
+#define NUM_QUEUES (3)
+
+#define NUM_LOADS (3)
+
 struct engine {
 	const char *name;
 	const char *display_name;
@@ -65,9 +69,13 @@ struct engine {
 
 	unsigned int num_counters;
 
+	double qd[3];
+	double load_avg[NUM_LOADS];
+
 	struct pmu_counter busy;
 	struct pmu_counter wait;
 	struct pmu_counter sema;
+	struct pmu_counter queue[NUM_QUEUES];
 };
 
 struct engines {
@@ -95,6 +103,11 @@ struct engines {
 	struct pmu_counter imc_reads;
 	struct pmu_counter imc_writes;
 
+	double qd_scale;
+
+	double load_exp[NUM_LOADS];
+	double load_avg[NUM_LOADS];
+
 	struct engine engine;
 };
 
@@ -320,6 +333,13 @@ static double filename_to_double(const char *filename)
 	return v;
 }
 
+#define I915_EVENT "/sys/devices/i915/events/"
+
+static double i915_qd_scale(void)
+{
+	return filename_to_double(I915_EVENT "rcs0-queued.scale");
+}
+
 #define RAPL_ROOT "/sys/devices/power/"
 #define RAPL_EVENT "/sys/devices/power/events/"
 
@@ -453,6 +473,8 @@ static int pmu_init(struct engines *engines)
 	engines->rc6.config = I915_PMU_RC6_RESIDENCY;
 	_open_pmu(engines->num_counters, &engines->rc6, engines->fd);
 
+	engines->qd_scale = i915_qd_scale();
+
 	for (i = 0; i < engines->num_engines; i++) {
 		struct engine *engine = engine_ptr(engines, i);
 		struct {
@@ -462,6 +484,9 @@ static int pmu_init(struct engines *engines)
 			{ .pmu = &engine->busy, .counter = "busy" },
 			{ .pmu = &engine->wait, .counter = "wait" },
 			{ .pmu = &engine->sema, .counter = "sema" },
+			{ .pmu = &engine->queue[0], .counter = "queued" },
+			{ .pmu = &engine->queue[1], .counter = "runnable" },
+			{ .pmu = &engine->queue[2], .counter = "running" },
 			{ .pmu = NULL, .counter = NULL },
 		};
 
@@ -576,12 +601,11 @@ static void fill_str(char *buf, unsigned int bufsz, char c, unsigned int num)
 	*buf = 0;
 }
 
-static void pmu_calc(struct pmu_counter *cnt,
-		     char *buf, unsigned int bufsz,
-		     unsigned int width, unsigned width_dec,
-		     double d, double t, double s)
+static void _pmu_calc(struct pmu_counter *cnt,
+		      char *buf, unsigned int bufsz,
+		      unsigned int width, unsigned width_dec,
+		      double val)
 {
-	double val;
 	int len;
 
 	assert(bufsz >= (width + width_dec + 1));
@@ -591,8 +615,6 @@ static void pmu_calc(struct pmu_counter *cnt,
 		return;
 	}
 
-	val = __pmu_calc(&cnt->val, d, t, s);
-
 	len = snprintf(buf, bufsz, "%*.*f", width + width_dec, width_dec, val);
 	if (len < 0 || len == bufsz) {
 		fill_str(buf, bufsz, 'X', width + width_dec);
@@ -600,6 +622,16 @@ static void pmu_calc(struct pmu_counter *cnt,
 	}
 }
 
+static void pmu_calc(struct pmu_counter *cnt,
+		     char *buf, unsigned int bufsz,
+		     unsigned int width, unsigned width_dec,
+		     double d, double t, double s)
+{
+	double val = __pmu_calc(&cnt->val, d, t, s);
+
+	_pmu_calc(cnt, buf, bufsz, width, width_dec, val);
+}
+
 static uint64_t __pmu_read_single(int fd, uint64_t *ts)
 {
 	uint64_t data[2] = { };
@@ -658,10 +690,14 @@ static void pmu_sample(struct engines *engines)
 
 	for (i = 0; i < engines->num_engines; i++) {
 		struct engine *engine = engine_ptr(engines, i);
+		unsigned int j;
 
 		update_sample(&engine->busy, val);
 		update_sample(&engine->sema, val);
 		update_sample(&engine->wait, val);
+
+		for (j = 0; j < NUM_QUEUES; j++)
+			update_sample(&engine->queue[j], val);
 	}
 }
 
@@ -702,12 +738,19 @@ usage(const char *appname)
 		appname, DEFAULT_PERIOD_MS);
 }
 
+static double update_load(double load, double exp, double val)
+{
+	return val + exp * (load - val);
+}
+
 int main(int argc, char **argv)
 {
 	unsigned int period_us = DEFAULT_PERIOD_MS * 1000;
+	const double load_period[NUM_LOADS] = { 1.0, 30.0, 900.0 };
 	int con_w = -1, con_h = -1;
 	struct engines *engines;
 	unsigned int i;
+	double period;
 	int ret, ch;
 
 	/* Parse options */
@@ -741,10 +784,15 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
+	/* Load average setup. */
+	period = (double)period_us / 1e6;
+	for (i = 0; i < NUM_LOADS; i++)
+		engines->load_exp[i] = exp(-period / load_period[i]);
+
 	pmu_sample(engines);
 
 	for (;;) {
-		double t;
+		double t, qd = 0;
 #define BUFSZ 16
 		char freq[BUFSZ];
 		char fact[BUFSZ];
@@ -754,6 +802,7 @@ int main(int argc, char **argv)
 		char reads[BUFSZ];
 		char writes[BUFSZ];
 		struct winsize ws;
+		unsigned int j;
 		int lines = 0;
 
 		/* Update terminal size. */
@@ -778,8 +827,44 @@ int main(int argc, char **argv)
 		pmu_calc(&engines->imc_writes, writes, BUFSZ, 6, 0, 1.0, t,
 			 engines->imc_writes_scale);
 
+               for (i = 0; i < engines->num_engines; i++) {
+                       struct engine *engine = engine_ptr(engines, i);
+
+			if (!engine->num_counters)
+				continue;
+
+			for (j = 0; j < NUM_QUEUES; j++) {
+				if (!engine->queue[j].present)
+					continue;
+
+				engine->qd[j] =
+					__pmu_calc(&engine->queue[j].val, 1, t,
+						   engines->qd_scale);
+			}
+
+			qd += engine->qd[1] + engine->qd[2];
+
+			for (j = 0; j < NUM_LOADS; j++) {
+				engine->load_avg[j] =
+					update_load(engine->load_avg[j],
+						    engines->load_exp[j],
+						    engine->qd[1] +
+						    engine->qd[2]);
+			}
+               }
+
+		for (j = 0; j < NUM_LOADS; j++) {
+			engines->load_avg[j] =
+				update_load(engines->load_avg[j],
+					    engines->load_exp[j],
+					    qd);
+		}
+
 		if (lines++ < con_h)
-			printf("intel-gpu-top - %s/%s MHz;  %s%% RC6; %s %s; %s irqs/s\n",
+			printf("intel-gpu-top - load avg %5.2f, %5.2f, %5.2f; %s/%s MHz;  %s%% RC6; %s %s; %s irqs/s\n",
+			       engines->load_avg[0],
+			       engines->load_avg[1],
+			       engines->load_avg[2],
 			       fact, freq, rc6, power, engines->rapl_unit, irq);
 
 		if (lines++ < con_h)
@@ -803,7 +888,7 @@ int main(int argc, char **argv)
 
 			if (engine->num_counters && lines < con_h) {
 				const char *a = "          ENGINE      BUSY ";
-				const char *b = " MI_SEMA MI_WAIT";
+				const char *b = "Q   r   R MI_SEMA MI_WAIT";
 
 				printf("\033[7m%s%*s%s\033[0m\n",
 				       a,
@@ -817,6 +902,7 @@ int main(int argc, char **argv)
 		for (i = 0; i < engines->num_engines && lines < con_h; i++) {
 			struct engine *engine = engine_ptr(engines, i);
 			unsigned int max_w = con_w - 1;
+			char qdbuf[NUM_LOADS][BUFSZ];
 			unsigned int len;
 			char sema[BUFSZ];
 			char wait[BUFSZ];
@@ -827,14 +913,22 @@ int main(int argc, char **argv)
 			if (!engine->num_counters)
 				continue;
 
+			for (j = 0; j < NUM_QUEUES; j++)
+				_pmu_calc(&engine->queue[j], qdbuf[j], BUFSZ,
+					  3, 0, engine->qd[j]);
+
 			pmu_calc(&engine->sema, sema, BUFSZ, 3, 0, 1e9, t, 100);
 			pmu_calc(&engine->wait, wait, BUFSZ, 3, 0, 1e9, t, 100);
-			len = snprintf(buf, sizeof(buf), "    %s%%    %s%%",
+
+			len = snprintf(buf, sizeof(buf),
+				       " %s %s %s    %s%%    %s%%",
+				       qdbuf[0], qdbuf[1], qdbuf[2],
 				       sema, wait);
 
-			pmu_calc(&engine->busy, busy, BUFSZ, 6, 2, 1e9, t,
-				 100);
-			len += printf("%16s %s%% ", engine->display_name, busy);
+			pmu_calc(&engine->busy, busy, BUFSZ, 6, 2, 1e9, t, 100);
+
+			len += printf("%16s %s%% ",
+				      engine->display_name, busy);
 
 			val = __pmu_calc(&engine->busy.val, 1e9, t, 100);
 			print_percentage_bar(val, max_w - len);
diff --git a/tools/meson.build b/tools/meson.build
index e4517d667299..c1dd71fada79 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -97,7 +97,7 @@ shared_library('intel_aubdump', 'aubdump.c',
 executable('intel_gpu_top', 'intel_gpu_top.c',
 	   install : true,
 	   install_rpath : bindir_rpathdir,
-	   dependencies : tool_deps + [ lib_igt_perf ])
+	   dependencies : tool_deps + [ lib_igt_perf, math ])
 
 conf_data = configuration_data()
 conf_data.set('prefix', prefix)
-- 
2.17.1



More information about the Intel-gfx mailing list