[igt-dev] [PATCH i-g-t] tests/perf_pmu: Verify engine busyness accuracy
Tvrtko Ursulin
tursulin at ursulin.net
Thu Feb 15 11:53:44 UTC 2018
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
A subtest to verify that the engine busyness is reported with expected
accuracy on platforms where the feature is available.
We test three patterns: 2%, 50% and 98% load per engine.
v2:
* Use spin batch instead of nop calibration.
* Various tweaks.
v3:
* Change loops to be time based.
* Use __igt_spin_batch_new inside timing sensitive loops.
* Fixed PWM sleep handling.
v4:
* Use restarting spin batch.
* Calibrate more carefully by looking at the real PWM loop.
v5:
* Made standalone.
* Better info messages.
* Tweak sleep compensation.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
tests/perf_pmu.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 174 insertions(+), 18 deletions(-)
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index a7501ca5f7a4..fa9b54793a52 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -35,6 +35,7 @@
#include <dirent.h>
#include <time.h>
#include <poll.h>
+#include <sched.h>
#include "igt.h"
#include "igt_core.h"
@@ -385,6 +386,22 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
gem_quiescent_gpu(gem_fd);
}
+static void
+__submit_spin_batch(int gem_fd, igt_spin_t *spin,
+ const struct intel_execution_engine2 *e)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = spin->handle
+ };
+ struct drm_i915_gem_execbuffer2 eb = {
+ .buffer_count = 1,
+ .buffers_ptr = to_user_pointer(&obj),
+ .flags = e2ring(gem_fd, e),
+ };
+
+ gem_execbuf(gem_fd, &eb);
+}
+
static void
most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
const unsigned int num_engines, unsigned int flags)
@@ -405,15 +422,7 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
if (e == e_) {
idle_idx = i;
} else if (spin) {
- struct drm_i915_gem_exec_object2 obj = {
- .handle = spin->handle
- };
- struct drm_i915_gem_execbuffer2 eb = {
- .buffer_count = 1,
- .buffers_ptr = to_user_pointer(&obj),
- .flags = e2ring(gem_fd, e_),
- };
- gem_execbuf(gem_fd, &eb);
+ __submit_spin_batch(gem_fd, spin, e_);
} else {
spin = igt_spin_batch_new(gem_fd, 0,
e2ring(gem_fd, e_), 0);
@@ -469,15 +478,7 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
continue;
if (spin) {
- struct drm_i915_gem_exec_object2 obj = {
- .handle = spin->handle
- };
- struct drm_i915_gem_execbuffer2 eb = {
- .buffer_count = 1,
- .buffers_ptr = to_user_pointer(&obj),
- .flags = e2ring(gem_fd, e),
- };
- gem_execbuf(gem_fd, &eb);
+ __submit_spin_batch(gem_fd, spin, e);
} else {
spin = igt_spin_batch_new(gem_fd, 0,
e2ring(gem_fd, e), 0);
@@ -1390,6 +1391,150 @@ test_enable_race(int gem_fd, const struct intel_execution_engine2 *e)
gem_quiescent_gpu(gem_fd);
}
+static double __error(double val, double ref)
+{
+ igt_assert(ref != 0.0);
+ return (100.0 * val / ref) - 100.0;
+}
+
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+ const uint32_t mi_arb_chk = 0x5 << 23;
+
+ *spin->batch = mi_arb_chk;
+ __sync_synchronize();
+}
+
+#define div_round_up(a, b) (((a) + (b) - 1) / (b))
+
+static void
+accuracy(int gem_fd, const struct intel_execution_engine2 *e,
+ unsigned long target_busy_pct)
+{
+ const unsigned int min_test_loops = 7;
+ const unsigned long min_test_us = 1e6;
+ unsigned long busy_us = 2500;
+ unsigned long idle_us = 100 * (busy_us - target_busy_pct *
+ busy_us / 100) / target_busy_pct;
+ unsigned long pwm_calibration_us;
+ unsigned long test_us;
+ double busy_r;
+ uint64_t val[2];
+ uint64_t ts[2];
+ int fd;
+
+ /* Sampling platforms cannot reach the high accuracy criteria. */
+ igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
+
+ while (idle_us < 2500) {
+ busy_us *= 2;
+ idle_us *= 2;
+ }
+
+ pwm_calibration_us = min_test_loops * (busy_us + idle_us);
+ while (pwm_calibration_us < min_test_us)
+ pwm_calibration_us += busy_us + idle_us;
+ test_us = min_test_loops * (idle_us + busy_us);
+ while (test_us < min_test_us)
+ test_us += busy_us + idle_us;
+
+ igt_info("calibration=%luus, test=%luus; busy=%luus, idle=%luus\n",
+ pwm_calibration_us, test_us, busy_us, idle_us);
+
+ assert_within_epsilon((double)busy_us / (busy_us + idle_us),
+ (double)target_busy_pct / 100.0, tolerance);
+
+ /* Emit PWM pattern on the engine from a child. */
+ igt_fork(child, 1) {
+ struct sched_param rt = { .sched_priority = 99 };
+ const unsigned long timeout[] = { pwm_calibration_us * 1000,
+ test_us * 2 * 1000 };
+ unsigned long sleep_busy = busy_us;
+ unsigned long sleep_idle = idle_us;
+ igt_spin_t *spin;
+
+ /* We need the best sleep accuracy we can get. */
+ igt_require(sched_setscheduler(0,
+ SCHED_FIFO | SCHED_RESET_ON_FORK,
+ &rt) == 0);
+
+ /* Allocate our spin batch and idle it. */
+ spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ igt_spin_batch_end(spin);
+ gem_sync(gem_fd, spin->handle);
+
+ /* 1st pass is calibration, second pass is the test. */
+ for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
+ unsigned long busy_ns = 0, idle_ns = 0;
+ struct timespec test_start = { };
+ unsigned long loops = 0;
+ double err_busy, err_idle;
+
+ igt_nsec_elapsed(&test_start);
+ do {
+ struct timespec t_busy = { };
+
+ igt_nsec_elapsed(&t_busy);
+
+ /* Restart the spinbatch. */
+ __rearm_spin_batch(spin);
+ __submit_spin_batch(gem_fd, spin, e);
+ measured_usleep(sleep_busy);
+ igt_spin_batch_end(spin);
+ gem_sync(gem_fd, spin->handle);
+
+ busy_ns += igt_nsec_elapsed(&t_busy);
+
+ idle_ns += measured_usleep(sleep_idle);
+
+ loops++;
+ } while (igt_nsec_elapsed(&test_start) < timeout[pass]);
+
+ busy_ns = div_round_up(busy_ns, loops);
+ idle_ns = div_round_up(idle_ns, loops);
+
+ err_busy = __error(busy_ns / 1000, busy_us);
+ err_idle = __error(idle_ns / 1000, idle_us);
+
+ igt_info("%u: busy %lu/%lu %.2f%%, idle %lu/%lu %.2f%%\n",
+ pass,
+ busy_ns / 1000, busy_us, err_busy,
+ idle_ns / 1000, idle_us, err_idle);
+
+ if (pass == 0) {
+ sleep_busy = (double)busy_us -
+ (double)busy_us * err_busy / 100.0;
+ sleep_idle = (double)idle_us -
+ (double)idle_us * err_idle / 100.0;
+ igt_info("calibrated sleeps: busy=%lu, idle=%lu\n",
+ sleep_busy, sleep_idle);
+ }
+ }
+
+ igt_spin_batch_free(gem_fd, spin);
+ }
+
+ /* Let the child run. */
+ usleep(pwm_calibration_us * 2);
+
+ /* Collect engine busyness for an interesting part of child runtime. */
+ fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
+ val[0] = __pmu_read_single(fd, &ts[0]);
+ usleep(test_us / 2);
+ val[1] = __pmu_read_single(fd, &ts[1]);
+ close(fd);
+
+ igt_waitchildren();
+
+ busy_r = (double)(val[1] - val[0]) / (ts[1] - ts[0]);
+
+ igt_info("error=%.2f%% (%.2f%% vs %lu%%)\n",
+ __error(busy_r, target_busy_pct / 100.0),
+ busy_r * 100.0, target_busy_pct);
+
+ assert_within_epsilon(busy_r, (double)target_busy_pct / 100.0, 0.15);
+}
+
igt_main
{
const unsigned int num_other_metrics =
@@ -1418,6 +1563,8 @@ igt_main
invalid_init();
for_each_engine_class_instance(fd, e) {
+ const unsigned int pct[] = { 2, 50, 98 };
+
/**
* Test that a single engine metric can be initialized or it
* is correctly rejected.
@@ -1524,6 +1671,15 @@ igt_main
*/
igt_subtest_f("enable-race-%s", e->name)
test_enable_race(fd, e);
+
+ /**
+ * Check engine busyness accuracy is as expected.
+ */
+ for (i = 0; i < ARRAY_SIZE(pct); i++) {
+ igt_subtest_f("busy-accuracy-%u-%s",
+ pct[i], e->name)
+ accuracy(fd, e, pct[i]);
+ }
}
/**
--
2.14.1
More information about the igt-dev
mailing list