[Intel-gfx] [PATCH i-g-t] i915/perf_pmu: Compare semaphore and busy measurements
Chris Wilson
chris at chris-wilson.co.uk
Tue Sep 24 22:01:37 UTC 2019
Our semaphore time is measured by sampling a ring register, whereas our
busy time is measured exactly. This leaves a window of discrepancy that
we wish to keep small (at least within sample tolerance).
References: https://bugs.freedesktop.org/show_bug.cgi?id=111788
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
tests/perf_pmu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 72 insertions(+), 1 deletion(-)
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 8a06e5d44..2fcaf88de 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -130,7 +130,7 @@ static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
#define __assert_within_epsilon(x, ref, tol_up, tol_down) \
igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
(double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
- "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
+ "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
#x, #ref, (double)(x), \
(tol_up) * 100.0, (tol_down) * 100.0, \
(double)(ref))
@@ -744,6 +744,74 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
assert_within_epsilon(val[1] - val[0], slept, tolerance);
}
+static void
+__sema_busy(int gem_fd, int pmu,
+ const struct intel_execution_engine2 *e,
+ const struct intel_execution_engine2 *signal,
+ int sema_pct,
+ int busy_pct)
+{
+ uint64_t total, sema, busy;
+ uint64_t start[2], end[2];
+ igt_spin_t *spin[2];
+
+ spin[0] = igt_spin_new(gem_fd,
+ .engine = signal->flags,
+ .flags = IGT_SPIN_FENCE_OUT);
+ spin[1] = igt_spin_new(gem_fd,
+ .engine = e->flags,
+ .fence = spin[0]->out_fence,
+ .flags = IGT_SPIN_FENCE_IN);
+
+ total = pmu_read_multi(pmu, 2, start);
+
+ sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
+ igt_spin_end(spin[0]);
+ busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);
+ igt_spin_end(spin[1]);
+ measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);
+
+ total = pmu_read_multi(pmu, 2, end) - total;
+
+ igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured {%.1f%%, %.1f%%}\n",
+ e->name, signal->name,
+ sema * 100. / total, sema_pct,
+ (sema + busy) * 100. / total, busy_pct,
+ (end[0] - start[0]) * 100. / total,
+ (end[1] - start[1]) * 100. / total);
+
+ assert_within_epsilon(end[0] - start[0], sema, tolerance);
+ assert_within_epsilon(end[1] - start[1], sema + busy, tolerance);
+ igt_assert((end[0] - start[0]) < (end[1] - start[1]) * (1 + tolerance));
+
+ igt_spin_free(gem_fd, spin[1]);
+ igt_spin_free(gem_fd, spin[0]);
+}
+
+static void
+sema_busy(int gem_fd,
+ const struct intel_execution_engine2 *e,
+ unsigned int flags)
+{
+ const struct intel_execution_engine2 *signal;
+ int fd;
+
+ fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
+ open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
+
+ __for_each_physical_engine(gem_fd, signal) {
+ if (e->class == signal->class &&
+ e->instance == signal->instance)
+ continue;
+
+ __sema_busy(gem_fd, fd, e, signal, 50, 100);
+ __sema_busy(gem_fd, fd, e, signal, 25, 50);
+ __sema_busy(gem_fd, fd, e, signal, 75, 75);
+ }
+
+ close(fd);
+}
+
#define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
#define MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
#define MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
@@ -1774,6 +1842,9 @@ igt_main
sema_wait(fd, e,
TEST_BUSY | TEST_TRAILING_IDLE);
+ igt_subtest_f("semaphore-busy-%s", e->name)
+ sema_busy(fd, e, 0);
+
/**
* Check that two perf clients do not influence each
* others observations.
--
2.23.0
More information about the Intel-gfx
mailing list