[Intel-gfx] [PATCH i-g-t v2] i915/perf_pmu: Compare semaphore and busy measurements

Wed Sep 25 09:27:58 UTC 2019

On 25/09/2019 10:23, Chris Wilson wrote:
> Our semaphore time is measured by sampling a ring register, whereas our
> busy time is measured exactly. This leaves a window of discrepancy that
> we wish to keep small (at least within sample tolerance).
> 
> v2: Explain the sema <= busy assert, and lots of other Tvrtko tweaks
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=111788
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   tests/perf_pmu.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 92 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 8a06e5d44..2aed3381f 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -130,7 +130,7 @@ static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>   #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
>   	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
>   		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
> -		     "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
> +		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
>   		     #x, #ref, (double)(x), \
>   		     (tol_up) * 100.0, (tol_down) * 100.0, \
>   		     (double)(ref))
> @@ -744,6 +744,94 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
>   	assert_within_epsilon(val[1] - val[0], slept, tolerance);
>   }
>   
> +static void
> +__sema_busy(int gem_fd, int pmu,
> +	    const struct intel_execution_engine2 *e,
> +	    const struct intel_execution_engine2 *signal,
> +	    int sema_pct,
> +	    int busy_pct)
> +{
> +	enum {
> +		SEMA = 0,
> +		BUSY,
> +	};
> +	uint64_t total, sema, busy;
> +	uint64_t start[2], val[2];
> +	igt_spin_t *spin[2];
> +
> +	/* Time spent being busy includes time waiting on semaphores */
> +	igt_assert(busy_pct >= sema_pct);
> +
> +	gem_quiescent_gpu(gem_fd);
> +
> +	spin[0] = igt_spin_new(gem_fd,
> +			       .engine = signal->flags,
> +			       .flags = IGT_SPIN_FENCE_OUT | IGT_SPIN_POLL_RUN);
> +	spin[1] = igt_spin_new(gem_fd,
> +			       .engine = e->flags,
> +			       .fence = spin[0]->out_fence,
> +			       .flags = IGT_SPIN_FENCE_IN);
> +
> +	igt_spin_busywait_until_started(spin[0]);
> +
> +	total = pmu_read_multi(pmu, 2, start);
> +
> +	sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
> +	igt_spin_end(spin[0]);
> +	busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);
> +	igt_spin_end(spin[1]);
> +	measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);
> +
> +	total = pmu_read_multi(pmu, 2, val) - total;
> +
> +	busy += sema;
> +	val[SEMA] -= start[SEMA];
> +	val[BUSY] -= start[BUSY];
> +
> +	igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured: {%.1f%%, %.1f%%}\n",
> +		 e->name, signal->name,
> +		 sema * 100. / total, sema_pct,
> +		 busy * 100. / total, busy_pct,
> +		 val[SEMA] * 100. / total,
> +		 val[BUSY] * 100. / total);
> +
> +	assert_within_epsilon(val[SEMA], sema, tolerance);
> +	assert_within_epsilon(val[BUSY], busy, tolerance);
> +	igt_assert_f(val[SEMA] < val[BUSY] * (1 + tolerance),
> +		     "Semaphore time (%.3fus, %1.f%%) greater than total time busy (%.3fus, %1.f%%)!\n",
> +		     val[SEMA] * 1e-3, val[SEMA] * 100. / total,
> +		     val[BUSY] * 1e-3, val[BUSY] * 100. / total);
> +
> +	igt_spin_free(gem_fd, spin[1]);
> +	igt_spin_free(gem_fd, spin[0]);
> +}
> +
> +static void
> +sema_busy(int gem_fd,
> +	  const struct intel_execution_engine2 *e,
> +	  unsigned int flags)
> +{
> +	const struct intel_execution_engine2 *signal;
> +	int fd;
> +
> +	igt_require(gem_scheduler_has_semaphores(gem_fd));
> +
> +	fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
> +	open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
> +
> +	__for_each_physical_engine(gem_fd, signal) {
> +		if (e->class == signal->class &&
> +		    e->instance == signal->instance)
> +			continue;
> +
> +		__sema_busy(gem_fd, fd, e, signal, 50, 100);
> +		__sema_busy(gem_fd, fd, e, signal, 25, 50);
> +		__sema_busy(gem_fd, fd, e, signal, 75, 75);
> +	}
> +
> +	close(fd);
> +}
> +
>   #define   MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
>   #define   MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
>   #define   MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
> @@ -1774,6 +1862,9 @@ igt_main
>   			sema_wait(fd, e,
>   				  TEST_BUSY | TEST_TRAILING_IDLE);
>   
> +		igt_subtest_f("semaphore-busy-%s", e->name)
> +			sema_busy(fd, e, 0);
> +
>   		/**
>   		 * Check that two perf clients do not influence each
>   		 * others observations.
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko