[Intel-gfx] [PATCH i-g-t v2] i915/perf_pmu: Compare semaphore and busy measurements
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Sep 25 09:27:58 UTC 2019
On 25/09/2019 10:23, Chris Wilson wrote:
> Our semaphore time is measured by sampling a ring register, whereas our
> busy time is measured exactly. This leaves a window of discrepancy that
> we wish to keep small (at least within sample tolerance).
>
> v2: Explain the sema <= busy assert, and lots of other Tvrtko tweaks
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=111788
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> tests/perf_pmu.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 92 insertions(+), 1 deletion(-)
>
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 8a06e5d44..2aed3381f 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -130,7 +130,7 @@ static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
> igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
> (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
> - "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
> + "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
> #x, #ref, (double)(x), \
> (tol_up) * 100.0, (tol_down) * 100.0, \
> (double)(ref))
> @@ -744,6 +744,94 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
> assert_within_epsilon(val[1] - val[0], slept, tolerance);
> }
>
> +static void
> +__sema_busy(int gem_fd, int pmu,
> + const struct intel_execution_engine2 *e,
> + const struct intel_execution_engine2 *signal,
> + int sema_pct,
> + int busy_pct)
> +{
> + enum {
> + SEMA = 0,
> + BUSY,
> + };
> + uint64_t total, sema, busy;
> + uint64_t start[2], val[2];
> + igt_spin_t *spin[2];
> +
> + /* Time spent being busy includes time waiting on semaphores */
> + igt_assert(busy_pct >= sema_pct);
> +
> + gem_quiescent_gpu(gem_fd);
> +
> + spin[0] = igt_spin_new(gem_fd,
> + .engine = signal->flags,
> + .flags = IGT_SPIN_FENCE_OUT | IGT_SPIN_POLL_RUN);
> + spin[1] = igt_spin_new(gem_fd,
> + .engine = e->flags,
> + .fence = spin[0]->out_fence,
> + .flags = IGT_SPIN_FENCE_IN);
> +
> + igt_spin_busywait_until_started(spin[0]);
> +
> + total = pmu_read_multi(pmu, 2, start);
> +
> + sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
> + igt_spin_end(spin[0]);
> + busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);
> + igt_spin_end(spin[1]);
> + measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);
> +
> + total = pmu_read_multi(pmu, 2, val) - total;
> +
> + busy += sema;
> + val[SEMA] -= start[SEMA];
> + val[BUSY] -= start[BUSY];
> +
> + igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured: {%.1f%%, %.1f%%}\n",
> + e->name, signal->name,
> + sema * 100. / total, sema_pct,
> + busy * 100. / total, busy_pct,
> + val[SEMA] * 100. / total,
> + val[BUSY] * 100. / total);
> +
> + assert_within_epsilon(val[SEMA], sema, tolerance);
> + assert_within_epsilon(val[BUSY], busy, tolerance);
> + igt_assert_f(val[SEMA] < val[BUSY] * (1 + tolerance),
> + "Semaphore time (%.3fus, %1.f%%) greater than total time busy (%.3fus, %1.f%%)!\n",
> + val[SEMA] * 1e-3, val[SEMA] * 100. / total,
> + val[BUSY] * 1e-3, val[BUSY] * 100. / total);
> +
> + igt_spin_free(gem_fd, spin[1]);
> + igt_spin_free(gem_fd, spin[0]);
> +}
> +
> +static void
> +sema_busy(int gem_fd,
> + const struct intel_execution_engine2 *e,
> + unsigned int flags)
> +{
> + const struct intel_execution_engine2 *signal;
> + int fd;
> +
> + igt_require(gem_scheduler_has_semaphores(gem_fd));
> +
> + fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
> + open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
> +
> + __for_each_physical_engine(gem_fd, signal) {
> + if (e->class == signal->class &&
> + e->instance == signal->instance)
> + continue;
> +
> + __sema_busy(gem_fd, fd, e, signal, 50, 100);
> + __sema_busy(gem_fd, fd, e, signal, 25, 50);
> + __sema_busy(gem_fd, fd, e, signal, 75, 75);
> + }
> +
> + close(fd);
> +}
> +
> #define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
> #define MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
> #define MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
> @@ -1774,6 +1862,9 @@ igt_main
> sema_wait(fd, e,
> TEST_BUSY | TEST_TRAILING_IDLE);
>
> + igt_subtest_f("semaphore-busy-%s", e->name)
> + sema_busy(fd, e, 0);
> +
> /**
> * Check that two perf clients do not influence each
> * others observations.
>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list