[Intel-gfx] [igt-dev] [PATCH i-g-t 2/3] tests/gem_eio: Speed up test execution

Chris Wilson chris at chris-wilson.co.uk
Thu Mar 22 17:44:38 UTC 2018


Quoting Tvrtko Ursulin (2018-03-22 17:24:16)
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> If we stop relying on regular GPU hangs to be detected, but trigger them
> manually as soon as we know our batch of interest is actually executing
> on the GPU, we can dramatically speed up various subtests.
> 
> This is enabled by the pollable spin batch added in the previous patch.
> 
> v2:
>  * Test gem_wait after reset/wedge and with reset/wedge after a few
>    predefined intervals since gem_wait invocation. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Antonio Argenziano <antonio.argenziano at intel.com>
> ---
>  lib.tar         | Bin 0 -> 102400 bytes
>  tests/gem_eio.c | 214 ++++++++++++++++++++++++++++++++++++++++++--------------
>  2 files changed, 160 insertions(+), 54 deletions(-)
>  create mode 100644 lib.tar
> 
> diff --git a/tests/gem_eio.c b/tests/gem_eio.c
> index 4bcc5937db39..2f9e954085ec 100644
> --- a/tests/gem_eio.c
> +++ b/tests/gem_eio.c
> @@ -35,6 +35,7 @@
>  #include <inttypes.h>
>  #include <errno.h>
>  #include <sys/ioctl.h>
> +#include <signal.h>
>  
>  #include <drm.h>
>  
> @@ -71,26 +72,23 @@ static void trigger_reset(int fd)
>         gem_quiescent_gpu(fd);
>  }
>  
> -static void wedge_gpu(int fd)
> +static void manual_hang(int drm_fd)
>  {
> -       /* First idle the GPU then disable GPU resets before injecting a hang */
> -       gem_quiescent_gpu(fd);
> -
> -       igt_require(i915_reset_control(false));
> +       int dir = igt_debugfs_dir(drm_fd);
>  
> -       igt_debug("Wedging GPU by injecting hang\n");
> -       igt_post_hang_ring(fd, igt_hang_ring(fd, I915_EXEC_DEFAULT));
> +       igt_sysfs_set(dir, "i915_wedged", "-1");
>  
> -       igt_assert(i915_reset_control(true));
> +       close(dir);
>  }
>  
> -static void wedgeme(int drm_fd)
> +static void wedge_gpu(int fd)
>  {
> -       int dir = igt_debugfs_dir(drm_fd);
> -
> -       igt_sysfs_set(dir, "i915_wedged", "-1");
> +       /* First idle the GPU then disable GPU resets before injecting a hang */
> +       gem_quiescent_gpu(fd);
>  
> -       close(dir);
> +       igt_require(i915_reset_control(false));
> +       manual_hang(fd);
> +       igt_assert(i915_reset_control(true));
>  }
>  
>  static int __gem_throttle(int fd)
> @@ -149,26 +147,111 @@ static int __gem_wait(int fd, uint32_t handle, int64_t timeout)
>         return err;
>  }
>  
> -static void test_wait(int fd)
> +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags)
> +{
> +       if (gem_can_store_dword(fd, flags))
> +               return __igt_spin_batch_new_poll(fd, ctx, flags);
> +       else
> +               return __igt_spin_batch_new(fd, ctx, flags, 0);
> +}
> +
> +static void __spin_wait(int fd, igt_spin_t *spin)
> +{
> +       if (spin->running) {
> +               igt_spin_busywait_until_running(spin);
> +       } else {
> +               igt_debug("__spin_wait - usleep mode\n");
> +               usleep(500e3); /* Better than nothing! */
> +       }
> +}
> +
> +static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags)
> +{
> +       igt_spin_t *spin = __spin_poll(fd, ctx, flags);
> +
> +       __spin_wait(fd, spin);
> +
> +       return spin;
> +}
> +
> +static int debugfs_dir = -1;
> +
> +static void hang_handler(int sig)
> +{
> +       igt_sysfs_set(debugfs_dir, "i915_wedged", "-1");
> +}
> +
> +static void hang_after(int fd, unsigned int us)
> +{
> +        struct sigaction sa = { .sa_handler = hang_handler };
> +       struct itimerval itv = { };
> +
> +       debugfs_dir = igt_debugfs_dir(fd);
> +       igt_assert_fd(debugfs_dir);
> +
> +       igt_assert_eq(sigaction(SIGALRM, &sa, NULL), 0);
> +
> +       itv.it_value.tv_sec = us / 1000000;

USEC_PER_SEC.

> +       itv.it_value.tv_usec = us % 1000000;
> +       setitimer(ITIMER_REAL, &itv, NULL);

Ok, that gives a single shot signal.

I would have used
struct sigevent sev = {
	.sigev_notify = SIGEV_THREAD,
	.sigev_value.sigval_int = debugfs_dir
	.sigev_notify_function = hang_handler
};
timer_create(CLOCK_MONOTONIC, &sec, &timer);
timer_settime(timer, 0, &its, NULL);

Then 

static void hang_handler(union sigval arg)
{
	igt_sysfs_set(arg.sival_int, "i915_wedged", 1);
}

No signals, nor globals required :)

The problem with using a signal is that it interrupts the gem_wait()
and so we don't actually check that it is being woken by the hang
because it is already awake. Gah.

> +static void cleanup_hang(void)
> +{
> +       struct itimerval itv = { };
> +
> +       igt_assert_eq(setitimer(ITIMER_REAL, &itv, NULL), 0);

You also need a sleep here as it does not flush inflight signals.
(Also timer_destroy :)

> +       igt_assert_fd(debugfs_dir);
> +       close(debugfs_dir);
> +       debugfs_dir = -1;
> +}
> +
> +static int __check_wait(int fd, uint32_t bo, unsigned int wait)
> +{
> +       unsigned long wait_timeout = 250e6; /* Some margin for actual reset. */
> +       int ret;
> +
> +       if (wait) {
> +               wait_timeout += wait * 2000; /* x2 for safety. */
> +               wait_timeout += 250e6; /* Margin for signal delay. */;
> +               hang_after(fd, wait);
> +       } else {
> +               manual_hang(fd);
> +       }
> +
> +       ret = __gem_wait(fd, bo, wait_timeout);

Ok, I understand where the concerned about how long it took to recover
from reset came from :)

> +
> +       if (wait)
> +               cleanup_hang();
> +
> +       return ret;
> +}
> +
> +#define TEST_WEDGE (1)
> +
> +static void test_wait(int fd, unsigned int flags, unsigned int wait)
>  {
> -       igt_hang_t hang;
> +       igt_spin_t *hang;
>  
>         igt_require_gem(fd);
>  
> -       /* If the request we wait on completes due to a hang (even for
> +       /*
> +        * If the request we wait on completes due to a hang (even for
>          * that request), the user expects the return value to 0 (success).
>          */
> -       hang = igt_hang_ring(fd, I915_EXEC_DEFAULT);
> -       igt_assert_eq(__gem_wait(fd, hang.handle, -1), 0);
> -       igt_post_hang_ring(fd, hang);
>  
> -       /* If the GPU is wedged during the wait, again we expect the return
> -        * value to be 0 (success).
> -        */
> -       igt_require(i915_reset_control(false));
> -       hang = igt_hang_ring(fd, I915_EXEC_DEFAULT);
> -       igt_assert_eq(__gem_wait(fd, hang.handle, -1), 0);
> -       igt_post_hang_ring(fd, hang);
> +       if (flags & TEST_WEDGE)
> +               igt_require(i915_reset_control(false));
> +       else
> +               igt_require(i915_reset_control(true));
> +
> +       hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
> +
> +       igt_assert_eq(__check_wait(fd, hang->handle, wait), 0);
> +
> +       igt_spin_batch_free(fd, hang);
> +
>         igt_require(i915_reset_control(true));
>  
>         trigger_reset(fd);
> @@ -181,7 +264,7 @@ static void test_suspend(int fd, int state)
>  
>         /* Check we can suspend when the driver is already wedged */
>         igt_require(i915_reset_control(false));
> -       wedgeme(fd);
> +       manual_hang(fd);
>  
>         igt_system_suspend_autoresume(state, SUSPEND_TEST_DEVICES);
>  
> @@ -189,7 +272,7 @@ static void test_suspend(int fd, int state)
>         trigger_reset(fd);
>  }
>  
> -static void test_inflight(int fd)
> +static void test_inflight(int fd, unsigned int wait)
>  {
>         const uint32_t bbe = MI_BATCH_BUFFER_END;
>         struct drm_i915_gem_exec_object2 obj[2];
> @@ -209,11 +292,10 @@ static void test_inflight(int fd)
>                 int fence[64]; /* conservative estimate of ring size */
>  
>                 gem_quiescent_gpu(fd);
> -
>                 igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
>                 igt_require(i915_reset_control(false));
>  
> -               hang = __igt_spin_batch_new(fd, 0, engine, 0);
> +               hang = spin_sync(fd, 0, engine);
>                 obj[0].handle = hang->handle;
>  
>                 memset(&execbuf, 0, sizeof(execbuf));
> @@ -227,7 +309,8 @@ static void test_inflight(int fd)
>                         igt_assert(fence[n] != -1);
>                 }
>  
> -               igt_assert_eq(__gem_wait(fd, obj[1].handle, -1), 0);
> +               igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0);
> +
>                 for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
>                         igt_assert_eq(sync_fence_status(fence[n]), -EIO);
>                         close(fence[n]);
> @@ -256,7 +339,7 @@ static void test_inflight_suspend(int fd)
>         obj[1].handle = gem_create(fd, 4096);
>         gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
>  
> -       hang = __igt_spin_batch_new(fd, 0, 0, 0);
> +       hang = spin_sync(fd, 0, 0);
>         obj[0].handle = hang->handle;
>  
>         memset(&execbuf, 0, sizeof(execbuf));
> @@ -273,7 +356,8 @@ static void test_inflight_suspend(int fd)
>         igt_set_autoresume_delay(30);
>         igt_system_suspend_autoresume(SUSPEND_STATE_MEM, SUSPEND_TEST_NONE);
>  
> -       igt_assert_eq(__gem_wait(fd, obj[1].handle, -1), 0);
> +       igt_assert_eq(__check_wait(fd, obj[1].handle, 10), 0);
> +
>         for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
>                 igt_assert_eq(sync_fence_status(fence[n]), -EIO);
>                 close(fence[n]);
> @@ -301,7 +385,7 @@ static uint32_t context_create_safe(int i915)
>         return param.ctx_id;
>  }
>  
> -static void test_inflight_contexts(int fd)
> +static void test_inflight_contexts(int fd, unsigned int wait)
>  {
>         struct drm_i915_gem_exec_object2 obj[2];
>         const uint32_t bbe = MI_BATCH_BUFFER_END;
> @@ -330,7 +414,7 @@ static void test_inflight_contexts(int fd)
>                 igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
>                 igt_require(i915_reset_control(false));
>  
> -               hang = __igt_spin_batch_new(fd, 0, engine, 0);
> +               hang = spin_sync(fd, 0, engine);
>                 obj[0].handle = hang->handle;
>  
>                 memset(&execbuf, 0, sizeof(execbuf));
> @@ -345,7 +429,8 @@ static void test_inflight_contexts(int fd)
>                         igt_assert(fence[n] != -1);
>                 }
>  
> -               igt_assert_eq(__gem_wait(fd, obj[1].handle, -1), 0);
> +               igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0);
> +
>                 for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
>                         igt_assert_eq(sync_fence_status(fence[n]), -EIO);
>                         close(fence[n]);
> @@ -375,7 +460,7 @@ static void test_inflight_external(int fd)
>         fence = igt_cork_plug(&cork, fd);
>  
>         igt_require(i915_reset_control(false));
> -       hang = __igt_spin_batch_new(fd, 0, 0, 0);
> +       hang = __spin_poll(fd, 0, 0);
>  
>         memset(&obj, 0, sizeof(obj));
>         obj.handle = gem_create(fd, 4096);
> @@ -393,6 +478,9 @@ static void test_inflight_external(int fd)
>         fence = execbuf.rsvd2 >> 32;
>         igt_assert(fence != -1);
>  
> +       __spin_wait(fd, hang);
> +       manual_hang(fd);
> +
>         gem_sync(fd, hang->handle); /* wedged, with an unready batch */
>         igt_assert(!gem_bo_busy(fd, hang->handle));
>         igt_assert(gem_bo_busy(fd, obj.handle));
> @@ -407,7 +495,7 @@ static void test_inflight_external(int fd)
>         trigger_reset(fd);
>  }
>  
> -static void test_inflight_internal(int fd)
> +static void test_inflight_internal(int fd, unsigned int wait)
>  {
>         struct drm_i915_gem_execbuffer2 execbuf;
>         struct drm_i915_gem_exec_object2 obj[2];
> @@ -420,7 +508,7 @@ static void test_inflight_internal(int fd)
>         igt_require(gem_has_exec_fence(fd));
>  
>         igt_require(i915_reset_control(false));
> -       hang = __igt_spin_batch_new(fd, 0, 0, 0);
> +       hang = spin_sync(fd, 0, 0);
>  
>         memset(obj, 0, sizeof(obj));
>         obj[0].handle = hang->handle;
> @@ -441,7 +529,8 @@ static void test_inflight_internal(int fd)
>                 nfence++;
>         }
>  
> -       igt_assert_eq(__gem_wait(fd, obj[1].handle, -1), 0);
> +       igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0);
> +
>         while (nfence--) {
>                 igt_assert_eq(sync_fence_status(fences[nfence]), -EIO);
>                 close(fences[nfence]);
> @@ -484,29 +573,46 @@ igt_main
>         igt_subtest("execbuf")
>                 test_execbuf(fd);
>  
> -       igt_subtest("wait")
> -               test_wait(fd);
> -
>         igt_subtest("suspend")
>                 test_suspend(fd, SUSPEND_STATE_MEM);
>  
>         igt_subtest("hibernate")
>                 test_suspend(fd, SUSPEND_STATE_DISK);
>  
> -       igt_subtest("in-flight")
> -               test_inflight(fd);
> -
> -       igt_subtest("in-flight-contexts")
> -               test_inflight_contexts(fd);
> -
>         igt_subtest("in-flight-external")
>                 test_inflight_external(fd);
>  
> -       igt_subtest("in-flight-internal") {
> -               igt_skip_on(gem_has_semaphores(fd));
> -               test_inflight_internal(fd);
> -       }
> -
>         igt_subtest("in-flight-suspend")
>                 test_inflight_suspend(fd);
> +
> +       igt_subtest_group {
> +               const struct {
> +                       unsigned int wait;
> +                       const char *name;
> +               } waits[] = {
> +                       { .wait = 0, .name = "immediate" },
> +                       { .wait = 10, .name = "10us" },

i915_request_spin is set to 2us currently :| I guess that's a really hard
window to hit reliably. Maybe we should spin for 200ms just to make
testing easier!

> +                       { .wait = 10000, .name = "10ms" },
> +               };
> +               unsigned int i;
> +
> +               for (i = 0; i < sizeof(waits) / sizeof(waits[0]); i++) {
> +                       igt_subtest_f("wait-%s", waits[i].name)
> +                               test_wait(fd, 0, waits[i].wait);
> +
> +                       igt_subtest_f("wait-wedge-%s", waits[i].name)
> +                               test_wait(fd, TEST_WEDGE, waits[i].wait);

Ok.

> +
> +                       igt_subtest_f("in-flight-%s", waits[i].name)
> +                               test_inflight(fd, waits[i].wait);
> +
> +                       igt_subtest_f("in-flight-contexts-%s", waits[i].name)
> +                               test_inflight_contexts(fd, waits[i].wait);
> +
> +                       igt_subtest_f("in-flight-internal-%s", waits[i].name) {
> +                               igt_skip_on(gem_has_semaphores(fd));
> +                               test_inflight_internal(fd, waits[i].wait);

And ok.
-Chris


More information about the Intel-gfx mailing list