[igt-dev] [RFC i-g-t] tests/perf_pmu: Verify engine busyness accuracy

Tvrtko Ursulin tursulin at ursulin.net
Tue Jan 30 18:11:04 UTC 2018


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

A subtest to verify that the engine busyness is reported with expected
accuracy on platforms where the feature is available.

We test three patterns: 2%, 50% and 98% load per engine.

v2:
 * Use spin batch instead of nop calibration.
 * Various tweaks.

v3:
 * Change loops to be time based.
 * Use __igt_spin_batch_new inside timing sensitive loops.
 * Fixed PWM sleep handling.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 tests/perf_pmu.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 2f7d33414a53..c6083d7102bd 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -35,6 +35,7 @@
 #include <dirent.h>
 #include <time.h>
 #include <poll.h>
+#include <sched.h>
 
 #include "igt.h"
 #include "igt_core.h"
@@ -1169,6 +1170,140 @@ test_rc6(int gem_fd)
 	assert_within_epsilon(busy - prev, 0.0, tolerance);
 }
 
+static uint64_t __pmu_read_single(int fd, uint64_t *ts)
+{
+	uint64_t data[2];
+
+	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
+
+	*ts = data[1];
+
+	return data[0];
+}
+
+static double __error(double val, double ref)
+{
+	return (100.0 * val / ref) - 100.0;
+}
+
+static void debug_error(const char *str, double val, double ref)
+{
+	igt_debug("%s=%.2f%% (%.2f/%.2f)\n", str, __error(val, ref), val, ref);
+}
+
+static void log_error(const char *str, double val, double ref)
+{
+	debug_error(str, val, ref);
+	igt_info("%s=%.2f%%\n", str, __error(val, ref));
+}
+
+#define div_round_up(a, b) (((a) + (b) - 1) / (b))
+
+static void
+accuracy(int gem_fd, const struct intel_execution_engine2 *e,
+	 unsigned long target_busy_pct)
+{
+	const unsigned int test_us = 1e6;
+	const unsigned int pwm_calibration_us = 500e3;
+	unsigned long busy_us = 2500;
+	unsigned long idle_us = 100 * (busy_us - target_busy_pct *
+				busy_us / 100) / target_busy_pct;
+	double busy_r;
+	uint64_t val[2];
+	uint64_t ts[2];
+	int fd;
+
+	/* Sampling platforms cannot reach the high accuracy criteria. */
+	igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
+
+	while (idle_us < 2500) {
+		busy_us *= 2;
+		idle_us *= 2;
+	}
+
+	assert_within_epsilon((double)busy_us / (busy_us + idle_us),
+				(double)target_busy_pct / 100.0, tolerance);
+
+	/* Emit PWM pattern on the engine from a child. */
+	igt_fork(child, 1) {
+		struct sched_param rt = { .sched_priority = 99 };
+		struct timespec test_start = { };
+		unsigned long overhead_ns = 0;
+		unsigned long loops = 0;
+
+		/* We need the best sleep accuracy we can get. */
+		igt_require(sched_setscheduler(0,
+					       SCHED_FIFO | SCHED_RESET_ON_FORK,
+					       &rt) == 0);
+
+		/* Measure setup overhead. */
+		igt_nsec_elapsed(&test_start);
+		do {
+			struct timespec start = { };
+			igt_spin_t *spin;
+			unsigned int ns;
+
+			igt_nsec_elapsed(&start);
+			spin = __igt_spin_batch_new(gem_fd, 0,
+						    e2ring(gem_fd, e), 0);
+			igt_spin_batch_end(spin);
+			ns = igt_nsec_elapsed(&start);
+			gem_sync(gem_fd, spin->handle);
+			igt_spin_batch_free(gem_fd, spin);
+			overhead_ns += ns;
+			loops++;
+			usleep(1000);
+		} while (igt_nsec_elapsed(&test_start) <
+			 pwm_calibration_us * 1000);
+
+		overhead_ns = div_round_up(overhead_ns, loops);
+		igt_debug("spin setup overhead = %luus\n", overhead_ns / 1000);
+		igt_assert(overhead_ns < busy_us * 1000);
+
+		/* Emit PWM busy signal. */
+		busy_us -= overhead_ns / 1000;
+		memset(&test_start, 0, sizeof(test_start));
+		igt_nsec_elapsed(&test_start);
+		do {
+			struct timespec start = { };
+			igt_spin_t *spin;
+			unsigned int ns, sleep;
+
+			spin = __igt_spin_batch_new(gem_fd, 0,
+						    e2ring(gem_fd, e), 0);
+			ns = measured_usleep(busy_us);
+			igt_spin_batch_end(spin);
+			debug_error("busy sleep error", ns, busy_us * 1000);
+			gem_sync(gem_fd, spin->handle);
+			igt_nsec_elapsed(&start);
+			igt_spin_batch_free(gem_fd, spin);
+			ns = igt_nsec_elapsed(&start);
+			igt_assert_lt(ns, idle_us * 1000);
+			sleep = idle_us * 1000 - ns;
+			ns = measured_usleep(sleep / 1000);
+			debug_error("idle sleep error", ns, sleep);
+		} while (igt_nsec_elapsed(&test_start) < test_us * 2 * 1000);
+	}
+
+	/* Let the child run. */
+	usleep(pwm_calibration_us * 2);
+
+	/* Collect engine busyness for a subset of child runtime. */
+	fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
+	val[0] = __pmu_read_single(fd, &ts[0]);
+	usleep(test_us / 2);
+	val[1] = __pmu_read_single(fd, &ts[1]);
+	close(fd);
+
+	igt_waitchildren();
+
+	busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
+
+	log_error("error", busy_r, target_busy_pct / 100.0);
+
+	assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.15);
+}
+
 igt_main
 {
 	const unsigned int num_other_metrics =
@@ -1197,6 +1332,8 @@ igt_main
 		invalid_init();
 
 	for_each_engine_class_instance(fd, e) {
+		const unsigned int pct[] = { 2, 50, 98 };
+
 		/**
 		 * Test that a single engine metric can be initialized.
 		 */
@@ -1277,6 +1414,14 @@ igt_main
 		 */
 		igt_subtest_f("busy-double-start-%s", e->name)
 			busy_double_start(fd, e);
+
+		/**
+		 * Check engine busyness accuracy is as expected.
+		 */
+		for (i = 0; i < ARRAY_SIZE(pct); i++) {
+			igt_subtest_f("busy-accuracy-%u-%s", pct[i], e->name)
+				accuracy(fd, e, pct[i]);
+		}
 	}
 
 	/**
-- 
2.14.1



More information about the igt-dev mailing list