[Intel-gfx] [PATCH igt] igt/perf_pmu: Recalibrate interrupt loop.
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Nov 24 09:11:06 UTC 2017
On 23/11/2017 15:03, Chris Wilson wrote:
> We have to be careful in our calibration loop, too slow and we timeout,
> too fast and we don't emit an interrupt! On fast legacy devices, we
> would overflow the calibration calcuation...
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> tests/perf_pmu.c | 112 ++++++++++++++++++++++++++-----------------------------
> 1 file changed, 52 insertions(+), 60 deletions(-)
>
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index c699fb00..b8800e61 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -40,6 +40,7 @@
> #include "igt_core.h"
> #include "igt_perf.h"
> #include "igt_sysfs.h"
> +#include "sw_sync.h"
>
> IGT_TEST_DESCRIPTION("Test the i915 pmu perf interface");
>
> @@ -802,18 +803,18 @@ static void cpu_hotplug(int gem_fd)
> assert_within_epsilon(val, ref, tolerance);
> }
>
> -static unsigned long calibrate_nop(int fd, const unsigned int calibration_us)
> +static unsigned long calibrate_nop(int fd, const uint64_t calibration_us)
> {
> - const unsigned int cal_min_us = calibration_us * 3;
> + const uint64_t cal_min_us = calibration_us * 3;
> const unsigned int tolerance_pct = 10;
> const uint32_t bbe = MI_BATCH_BUFFER_END;
> const unsigned int loops = 17;
> struct drm_i915_gem_exec_object2 obj = {};
> - struct drm_i915_gem_execbuffer2 eb =
> - { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffer_count = 1, .buffers_ptr = to_user_pointer(&obj),
> + };
> struct timespec t_begin = { };
> - long size, last_size;
> - unsigned long ns;
> + uint64_t size, last_size, ns;
>
> igt_nsec_elapsed(&t_begin);
>
> @@ -843,81 +844,72 @@ static unsigned long calibrate_nop(int fd, const unsigned int calibration_us)
> } while (igt_nsec_elapsed(&t_begin) / 1000 < cal_min_us ||
> abs(size - last_size) > (size * tolerance_pct / 100));
>
> - return size / sizeof(uint32_t);
> + return size;
Ah.. I did not spot this at all.. strange..
> }
>
> -static void exec_nop(int gem_fd, unsigned long sz)
> +static void
> +test_interrupts(int gem_fd)
> {
> - struct drm_i915_gem_exec_object2 obj = {};
> - struct drm_i915_gem_execbuffer2 eb =
> - { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
> const uint32_t bbe = MI_BATCH_BUFFER_END;
> + struct drm_i915_gem_exec_object2 obj = { };
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffers_ptr = to_user_pointer(&obj),
> + .buffer_count = 1,
> + .flags = I915_EXEC_FENCE_OUT,
> + };
> + unsigned long sz;
> + igt_spin_t *spin;
> + const int target = 30;
> struct pollfd pfd;
> - int fence;
> + uint64_t idle, busy;
> + int fd;
>
> - sz = ALIGN(sz, sizeof(uint32_t));
> + sz = calibrate_nop(gem_fd, 1e6 / target);
> + gem_quiescent_gpu(gem_fd);
> +
> + fd = open_pmu(I915_PMU_INTERRUPTS);
> + spin = igt_spin_batch_new(gem_fd, 0, 0, 0);
What's the spin batch for?
>
> obj.handle = gem_create(gem_fd, sz);
> gem_write(gem_fd, obj.handle, sz - sizeof(bbe), &bbe, sizeof(bbe));
>
> - eb.flags = I915_EXEC_RENDER | I915_EXEC_FENCE_OUT;
> -
> - gem_execbuf_wr(gem_fd, &eb);
> - fence = eb.rsvd2 >> 32;
> + pfd.fd = -1;
> + for (int i = 0; i < target; i++) {
> + int new;
>
> - /*
> - * Poll on the output fence to ensure user interrupts will be
> - * generated and listened to.
> - */
> - pfd.fd = fence;
> - pfd.events = POLLIN;
> - igt_assert_eq(poll(&pfd, 1, -1), 1);
> -
> - close(fence);
> - gem_close(gem_fd, obj.handle);
> -}
> -
> -static void
> -test_interrupts(int gem_fd)
> -{
> - const unsigned int calibration_us = 250000;
> - const unsigned int batch_len_us = 100000;
> - const unsigned int batch_count = 3e6 / batch_len_us;
> - uint64_t idle, busy, prev;
> - unsigned long cal, sz;
> - unsigned int i;
> - int fd;
> -
> - fd = open_pmu(I915_PMU_INTERRUPTS);
> -
> - cal = calibrate_nop(gem_fd, calibration_us);
> - sz = batch_len_us * cal / calibration_us;
> -
> - gem_quiescent_gpu(gem_fd);
> + /* Merge all the fences together so we can wait on them all */
> + gem_execbuf_wr(gem_fd, &eb);
> + new = eb.rsvd2 >> 32;
> + if (pfd.fd == -1) {
> + pfd.fd = new;
> + } else {
> + int old = pfd.fd;
> + pfd.fd = sync_fence_merge(old, new);
> + close(old);
> + close(new);
> + }
> + }
>
> /* Wait for idle state. */
> - prev = pmu_read_single(fd);
> - idle = prev + 1;
> - while (idle != prev) {
> - usleep(1e6);
> - prev = idle;
> + idle = pmu_read_single(fd);
> + do {
> + busy = idle;
> + usleep(1e3);
> idle = pmu_read_single(fd);
> - }
> + } while (idle != busy);
>
> - igt_assert_eq(idle - prev, 0);
> + pfd.events = POLLIN;
> + igt_assert_eq(poll(&pfd, 1, 10), 0);
>
> - /*
> - * Send some no-op batches waiting on output fences to
> - * ensure interrupts.
> - */
> - for (i = 0; i < batch_count; i++)
> - exec_nop(gem_fd, sz);
> + igt_spin_batch_free(gem_fd, spin);
> + igt_assert_eq(poll(&pfd, 1, 2000), 1);
> + close(pfd.fd);
>
> /* Check at least as many interrupts has been generated. */
> busy = pmu_read_single(fd) - idle;
> close(fd);
>
> - igt_assert(busy >= batch_count);
> + igt_assert_lte(target, busy);
> }
>
> static void
>
Rest looks good.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list