[Intel-gfx] [PATCH i-g-t] i915/perf_pmu: Compare semaphore and busy measurements

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Sep 25 08:36:10 UTC 2019


On 24/09/2019 23:01, Chris Wilson wrote:
> Our semaphore time is measured by sampling a ring register, whereas our
> busy time is measured exactly. This leaves a window of discrepancy that
> we wish to keep small (at least within sample tolerance).
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=111788
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   tests/perf_pmu.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 72 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 8a06e5d44..2fcaf88de 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -130,7 +130,7 @@ static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
>   #define __assert_within_epsilon(x, ref, tol_up, tol_down) \
>   	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
>   		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
> -		     "'%s' != '%s' (%f not within +%f%%/-%f%% tolerance of %f)\n",\
> +		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n",\
>   		     #x, #ref, (double)(x), \
>   		     (tol_up) * 100.0, (tol_down) * 100.0, \
>   		     (double)(ref))
> @@ -744,6 +744,74 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
>   	assert_within_epsilon(val[1] - val[0], slept, tolerance);
>   }
>   
> +static void
> +__sema_busy(int gem_fd, int pmu,
> +	    const struct intel_execution_engine2 *e,
> +	    const struct intel_execution_engine2 *signal,
> +	    int sema_pct,
> +	    int busy_pct)
> +{
> +	uint64_t total, sema, busy;
> +	uint64_t start[2], end[2];
> +	igt_spin_t *spin[2];
> +
> +	spin[0] = igt_spin_new(gem_fd,
> +			       .engine = signal->flags,
> +			       .flags = IGT_SPIN_FENCE_OUT);
> +	spin[1] = igt_spin_new(gem_fd,
> +			       .engine = e->flags,
> +			       .fence = spin[0]->out_fence,
> +			       .flags = IGT_SPIN_FENCE_IN);
> +
> +	total = pmu_read_multi(pmu, 2, start);

Might be worth for result stability for have signaler with 
IGT_SPIN_POLL_RUN and wait for it to run here before proceeding with sleeps.

> +
> +	sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
> +	igt_spin_end(spin[0]);
> +	busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);

busy is only ever used together with sema so it may make sense for 
clarity to add sema to it straight away and then it would directly 
correspond with the metric semantics.

> +	igt_spin_end(spin[1]);
> +	measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);

Required relationship between input sema_pct and busy_pct is a bit 
non-obvious.

igt_assert(busy_pct >= sema_pct)?

> +
> +	total = pmu_read_multi(pmu, 2, end) - total;
> +
> +	igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured {%.1f%%, %.1f%%}\n",
> +		 e->name, signal->name,
> +		 sema * 100. / total, sema_pct,
> +		 (sema + busy) * 100. / total, busy_pct,
> +		 (end[0] - start[0]) * 100. / total,
> +		 (end[1] - start[1]) * 100. / total);
> +
> +	assert_within_epsilon(end[0] - start[0], sema, tolerance);
> +	assert_within_epsilon(end[1] - start[1], sema + busy, tolerance);
> +	igt_assert((end[0] - start[0]) < (end[1] - start[1]) * (1 + tolerance));

__assert_within_epsilon with one of the tolerances zero?

> +
> +	igt_spin_free(gem_fd, spin[1]);
> +	igt_spin_free(gem_fd, spin[0]);
> +}
> +
> +static void
> +sema_busy(int gem_fd,
> +	  const struct intel_execution_engine2 *e,
> +	  unsigned int flags)
> +{
> +	const struct intel_execution_engine2 *signal;
> +	int fd;
> +
> +	fd = open_group(I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
> +	open_group(I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
> +
> +	__for_each_physical_engine(gem_fd, signal) {
> +		if (e->class == signal->class &&
> +		    e->instance == signal->instance)
> +			continue;
> +
> +		__sema_busy(gem_fd, fd, e, signal, 50, 100);
> +		__sema_busy(gem_fd, fd, e, signal, 25, 50);
> +		__sema_busy(gem_fd, fd, e, signal, 75, 75);
> +	}
> +
> +	close(fd);
> +}
> +
>   #define   MI_WAIT_FOR_PIPE_C_VBLANK (1<<21)
>   #define   MI_WAIT_FOR_PIPE_B_VBLANK (1<<11)
>   #define   MI_WAIT_FOR_PIPE_A_VBLANK (1<<3)
> @@ -1774,6 +1842,9 @@ igt_main
>   			sema_wait(fd, e,
>   				  TEST_BUSY | TEST_TRAILING_IDLE);
>   
> +		igt_subtest_f("semaphore-busy-%s", e->name)
> +			sema_busy(fd, e, 0);
> +
>   		/**
>   		 * Check that two perf clients do not influence each
>   		 * others observations.
> 

Looking forward to results!

Regards,

Tvrtko


More information about the Intel-gfx mailing list