[Intel-gfx] [PATCH i-g-t] i915/gem_ctx_switch: Use minimum qlen over all engines and measure switches
Caz Yokoyama
Caz.Yokoyama at intel.com
Mon Feb 25 18:28:34 UTC 2019
Chris,
By your patch, measure_qlen() reports how many gem_execbuf() can be
executed(queue length) within timeout of the slowest engine, correct?
Run time becomes 95 sec which is less than half.
-caz
On Sat, 2019-02-23 at 01:34 +0000, Chris Wilson wrote:
> Not all engines are created equal, and our weighting ends up
> favouring
> the many faster xCS rings at the expense of RCS. Our qlen estimation
> also failed to factor in the context switch overhead, which is a
> significant factor for nop batches. So we oversubscribe the number of
> batches submitted to RCS and end up waiting for those to complete at
> the
> end of our subtest timeslice.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Caz Yokoyama <caz.yokoyama at intel.com>
> ---
> tests/i915/gem_ctx_switch.c | 39 +++++++++++++++++++++++++++++----
> ----
> 1 file changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/tests/i915/gem_ctx_switch.c
> b/tests/i915/gem_ctx_switch.c
> index 1208cb8d7..87e13b915 100644
> --- a/tests/i915/gem_ctx_switch.c
> +++ b/tests/i915/gem_ctx_switch.c
> @@ -26,6 +26,7 @@
> */
>
> #include "igt.h"
> +#include <limits.h>
> #include <unistd.h>
> #include <stdlib.h>
> #include <stdint.h>
> @@ -58,29 +59,50 @@ static int measure_qlen(int fd,
> {
> const struct drm_i915_gem_exec_object2 * const obj =
> (struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuf-
> >buffers_ptr;
> - int qlen = 64;
> + uint32_t ctx[64];
> + int min = INT_MAX, max = 0;
> +
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> + ctx[i] = gem_context_create(fd);
>
> for (unsigned int n = 0; n < nengine; n++) {
> uint64_t saved = execbuf->flags;
> struct timespec tv = {};
> + int q;
>
> execbuf->flags |= engine[n];
>
> - igt_nsec_elapsed(&tv);
> - for (int loop = 0; loop < qlen; loop++)
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
> + execbuf->rsvd1 = ctx[i];
> gem_execbuf(fd, execbuf);
> + }
> gem_sync(fd, obj->handle);
>
> - execbuf->flags = saved;
> + igt_nsec_elapsed(&tv);
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
> + execbuf->rsvd1 = ctx[i];
> + gem_execbuf(fd, execbuf);
> + }
> + gem_sync(fd, obj->handle);
>
> /*
> * Be conservative and aim not to overshoot timeout, so
> scale
> * down by 8 for hopefully a max of 12.5% error.
> */
> - qlen = qlen * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8
> + 1;
> + q = ARRAY_SIZE(ctx) * timeout * 1e9 /
> igt_nsec_elapsed(&tv) / 8 + 1;
> + if (q < min)
> + min = q;
> + if (q > max)
> + max = q;
> +
> + execbuf->flags = saved;
> }
>
> - return qlen;
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> + gem_context_destroy(fd, ctx[i]);
> +
> + igt_debug("Estimated qlen: {min:%d, max:%d}\n", min, max);
> + return min;
> }
>
> static void single(int fd, uint32_t handle,
> @@ -259,9 +281,10 @@ static void all(int fd, uint32_t handle,
> unsigned flags, int timeout)
> clock_gettime(CLOCK_MONOTONIC, &now);
> gem_close(fd, obj[0].handle);
>
> - igt_info("[%d:%d] %s: %'u cycles:
> %.3fus%s\n",
> + igt_info("[%d:%d] %s: %'u cycles:
> %.3fus%s (elapsed: %.3fs)\n",
> nctx, child, name[child],
> count, elapsed(&start, &now)*1e6 / count,
> - flags & INTERRUPTIBLE ? "
> (interruptible)" : "");
> + flags & INTERRUPTIBLE ? "
> (interruptible)" : "",
> + elapsed(&start, &now));
> }
> igt_waitchildren();
> }
More information about the Intel-gfx
mailing list