[Intel-gfx] [PATCH igt] igt/perf_pmu: Recalibrate interrupt loop.

Fri Nov 24 09:11:06 UTC 2017

On 23/11/2017 15:03, Chris Wilson wrote:
> We have to be careful in our calibration loop, too slow and we timeout,
> too fast and we don't emit an interrupt! On fast legacy devices, we
> would overflow the calibration calcuation...
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
>   tests/perf_pmu.c | 112 ++++++++++++++++++++++++++-----------------------------
>   1 file changed, 52 insertions(+), 60 deletions(-)
> 
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index c699fb00..b8800e61 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -40,6 +40,7 @@
>   #include "igt_core.h"
>   #include "igt_perf.h"
>   #include "igt_sysfs.h"
> +#include "sw_sync.h"
>   
>   IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
>   
> @@ -802,18 +803,18 @@ static void cpu_hotplug(int gem_fd)
>   	assert_within_epsilon(val, ref, tolerance);
>   }
>   
> -static unsigned long calibrate_nop(int fd, const unsigned int calibration_us)
> +static unsigned long calibrate_nop(int fd, const uint64_t calibration_us)
>   {
> -	const unsigned int cal_min_us = calibration_us * 3;
> +	const uint64_t cal_min_us = calibration_us * 3;
>   	const unsigned int tolerance_pct = 10;
>   	const uint32_t bbe = MI_BATCH_BUFFER_END;
>   	const unsigned int loops = 17;
>   	struct drm_i915_gem_exec_object2 obj = {};
> -	struct drm_i915_gem_execbuffer2 eb =
> -		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffer_count = 1, .buffers_ptr = to_user_pointer(&obj),
> +	};
>   	struct timespec t_begin = { };
> -	long size, last_size;
> -	unsigned long ns;
> +	uint64_t size, last_size, ns;
>   
>   	igt_nsec_elapsed(&t_begin);
>   
> @@ -843,81 +844,72 @@ static unsigned long calibrate_nop(int fd, const unsigned int calibration_us)
>   	} while (igt_nsec_elapsed(&t_begin) / 1000 < cal_min_us ||
>   		 abs(size - last_size) > (size * tolerance_pct / 100));
>   
> -	return size / sizeof(uint32_t);
> +	return size;

Ah.. I did not spot this at all.. strange..

>   }
>   
> -static void exec_nop(int gem_fd, unsigned long sz)
> +static void
> +test_interrupts(int gem_fd)
>   {
> -	struct drm_i915_gem_exec_object2 obj = {};
> -	struct drm_i915_gem_execbuffer2 eb =
> -		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
>   	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	struct drm_i915_gem_exec_object2 obj = { };
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffers_ptr = to_user_pointer(&obj),
> +		.buffer_count = 1,
> +		.flags = I915_EXEC_FENCE_OUT,
> +	};
> +	unsigned long sz;
> +	igt_spin_t *spin;
> +	const int target = 30;
>   	struct pollfd pfd;
> -	int fence;
> +	uint64_t idle, busy;
> +	int fd;
>   
> -	sz = ALIGN(sz, sizeof(uint32_t));
> +	sz = calibrate_nop(gem_fd, 1e6 / target);
> +	gem_quiescent_gpu(gem_fd);
> +
> +	fd = open_pmu(I915_PMU_INTERRUPTS);
> +	spin = igt_spin_batch_new(gem_fd, 0, 0, 0);

What's the spin batch for?

>   
>   	obj.handle = gem_create(gem_fd, sz);
>   	gem_write(gem_fd, obj.handle, sz - sizeof(bbe), &bbe, sizeof(bbe));
>   
> -	eb.flags = I915_EXEC_RENDER | I915_EXEC_FENCE_OUT;
> -
> -	gem_execbuf_wr(gem_fd, &eb);
> -	fence = eb.rsvd2 >> 32;
> +	pfd.fd = -1;
> +	for (int i = 0; i < target; i++) {
> +		int new;
>   
> -	/*
> -	 * Poll on the output fence to ensure user interrupts will be
> -	 * generated and listened to.
> -	 */
> -	pfd.fd = fence;
> -	pfd.events = POLLIN;
> -	igt_assert_eq(poll(&pfd, 1, -1), 1);
> -
> -	close(fence);
> -	gem_close(gem_fd, obj.handle);
> -}
> -
> -static void
> -test_interrupts(int gem_fd)
> -{
> -	const unsigned int calibration_us = 250000;
> -	const unsigned int batch_len_us = 100000;
> -	const unsigned int batch_count = 3e6 / batch_len_us;
> -	uint64_t idle, busy, prev;
> -	unsigned long cal, sz;
> -	unsigned int i;
> -	int fd;
> -
> -	fd = open_pmu(I915_PMU_INTERRUPTS);
> -
> -	cal = calibrate_nop(gem_fd, calibration_us);
> -	sz = batch_len_us * cal / calibration_us;
> -
> -	gem_quiescent_gpu(gem_fd);
> +		/* Merge all the fences together so we can wait on them all */
> +		gem_execbuf_wr(gem_fd, &eb);
> +		new = eb.rsvd2 >> 32;
> +		if (pfd.fd == -1) {
> +			pfd.fd = new;
> +		} else {
> +			int old = pfd.fd;
> +			pfd.fd = sync_fence_merge(old, new);
> +			close(old);
> +			close(new);
> +		}
> +	}
>   
>   	/* Wait for idle state. */
> -	prev = pmu_read_single(fd);
> -	idle = prev + 1;
> -	while (idle != prev) {
> -		usleep(1e6);
> -		prev = idle;
> +	idle = pmu_read_single(fd);
> +	do {
> +		busy = idle;
> +		usleep(1e3);
>   		idle = pmu_read_single(fd);
> -	}
> +	} while (idle != busy);
>   
> -	igt_assert_eq(idle - prev, 0);
> +	pfd.events = POLLIN;
> +	igt_assert_eq(poll(&pfd, 1, 10), 0);
>   
> -	/*
> -	 * Send some no-op batches waiting on output fences to
> -	 * ensure interrupts.
> -	 */
> -	for (i = 0; i < batch_count; i++)
> -		exec_nop(gem_fd, sz);
> +	igt_spin_batch_free(gem_fd, spin);
> +	igt_assert_eq(poll(&pfd, 1, 2000), 1);
> +	close(pfd.fd);
>   
>   	/* Check at least as many interrupts has been generated. */
>   	busy = pmu_read_single(fd) - idle;
>   	close(fd);
>   
> -	igt_assert(busy >= batch_count);
> +	igt_assert_lte(target, busy);
>   }
>   
>   static void
> 
Rest looks good.

Regards,

Tvrtko