[Intel-gfx] [PATCH i-g-t] i915/perf_pmu: Compare semaphore and busy measurements
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Sep 25 08:36:10 UTC 2019
On 24/09/2019 23:01, Chris Wilson wrote:
> Our semaphore time is measured by sampling a ring register, whereas our
> busy time is measured exactly. This leaves a window of discrepancy that
> we wish to keep small (at least within sample tolerance).
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=111788
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> tests/perf_pmu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 72 insertions(+), 1 deletion(-)
>
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 8a06e5d44..2fcaf88de 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -130,7 +130,7 @@ static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
> igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
> (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
> - "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
> + "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
> #x, #ref, (double)(x), \
> (tol_up) * 100.0, (tol_down) * 100.0, \
> (double)(ref))
> @@ -744,6 +744,74 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
> assert_within_epsilon(val[1] - val[0], slept, tolerance);
> }
>
> +static void
> +__sema_busy(int gem_fd, int pmu,
> + const struct intel_execution_engine2 *e,
> + const struct intel_execution_engine2 *signal,
> + int sema_pct,
> + int busy_pct)
> +{
> + uint64_t total, sema, busy;
> + uint64_t start[2], end[2];
> + igt_spin_t *spin[2];
> +
> + spin[0] = igt_spin_new(gem_fd,
> + .engine = signal->flags,
> + .flags = IGT_SPIN_FENCE_OUT);
> + spin[1] = igt_spin_new(gem_fd,
> + .engine = e->flags,
> + .fence = spin[0]->out_fence,
> + .flags = IGT_SPIN_FENCE_IN);
> +
> + total = pmu_read_multi(pmu, 2, start);
Might be worth for result stability for have signaler with
IGT_SPIN_POLL_RUN and wait for it to run here before proceeding with sleeps.
> +
> + sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
> + igt_spin_end(spin[0]);
> + busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);
busy is only ever used together with sema so it may make sense for
clarity to add sema to it straight away and then it would directly
correspond with the metric semantics.
> + igt_spin_end(spin[1]);
> + measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);
Required relationship between input sema_pct and busy_pct is a bit
non-obvious.
igt_assert(busy_pct >= sema_pct)?
> +
> + total = pmu_read_multi(pmu, 2, end) - total;
> +
> + igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured {%.1f%%, %.1f%%}\n",
> + e->name, signal->name,
> + sema * 100. / total, sema_pct,
> + (sema + busy) * 100. / total, busy_pct,
> + (end[0] - start[0]) * 100. / total,
> + (end[1] - start[1]) * 100. / total);
> +
> + assert_within_epsilon(end[0] - start[0], sema, tolerance);
> + assert_within_epsilon(end[1] - start[1], sema + busy, tolerance);
> + igt_assert((end[0] - start[0]) < (end[1] - start[1]) * (1 + tolerance));
__assert_within_epsilon with one of the tolerances zero?
> +
> + igt_spin_free(gem_fd, spin[1]);
> + igt_spin_free(gem_fd, spin[0]);
> +}
> +
> +static void
> +sema_busy(int gem_fd,
> + const struct intel_execution_engine2 *e,
> + unsigned int flags)
> +{
> + const struct intel_execution_engine2 *signal;
> + int fd;
> +
> + fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
> + open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
> +
> + __for_each_physical_engine(gem_fd, signal) {
> + if (e->class == signal->class &&
> + e->instance == signal->instance)
> + continue;
> +
> + __sema_busy(gem_fd, fd, e, signal, 50, 100);
> + __sema_busy(gem_fd, fd, e, signal, 25, 50);
> + __sema_busy(gem_fd, fd, e, signal, 75, 75);
> + }
> +
> + close(fd);
> +}
> +
> #define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
> #define MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
> #define MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
> @@ -1774,6 +1842,9 @@ igt_main
> sema_wait(fd, e,
> TEST_BUSY | TEST_TRAILING_IDLE);
>
> + igt_subtest_f("semaphore-busy-%s", e->name)
> + sema_busy(fd, e, 0);
> +
> /**
> * Check that two perf clients do not influence each
> * others observations.
>
Looking forward to results!
Regards,
Tvrtko
More information about the Intel-gfx
mailing list