[Intel-gfx] [PATCH i-g-t 18/19] i915: Add gem_exec_balancer
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Tue Mar 12 10:23:12 UTC 2019
On 08/03/2019 18:11, Chris Wilson wrote:
> Exercise the in-kernel load balancer checking that we can distribute
> batches across the set of ctx->engines to avoid load.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> tests/Makefile.am | 1 +
> tests/Makefile.sources | 1 +
> tests/i915/gem_exec_balancer.c | 627 +++++++++++++++++++++++++++++++++
> tests/meson.build | 7 +
> 4 files changed, 636 insertions(+)
> create mode 100644 tests/i915/gem_exec_balancer.c
>
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 289249b42..68a9c14bf 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -102,6 +102,7 @@ gem_close_race_LDADD = $(LDADD) -lpthread
> gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> gem_ctx_thrash_LDADD = $(LDADD) -lpthread
> gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> +i915_gem_exec_balancer_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
> gem_exec_capture_LDADD = $(LDADD) -lz
> gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> gem_exec_parallel_LDADD = $(LDADD) -lpthread
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 41e756f15..f6c21a1aa 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -23,6 +23,7 @@ TESTS_progs = \
> drm_read \
> i915/gem_ctx_engines \
> i915/gem_ctx_shared \
> + i915/gem_exec_balancer \
> kms_3d \
> kms_addfb_basic \
> kms_atomic \
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> new file mode 100644
> index 000000000..d9fdffe67
> --- /dev/null
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -0,0 +1,627 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include <sched.h>
> +
> +#include "igt.h"
> +#include "igt_perf.h"
> +#include "i915/gem_ring.h"
> +#include "sw_sync.h"
> +
> +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing");
> +
> +struct class_instance {
> + uint16_t class;
> + uint16_t instance;
> +};
> +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
> +
> +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
> +{
> + int fd;
> +
> + fd = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
> + if (fd != -1) {
> + close(fd);
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static struct class_instance *
> +list_engines(int i915, uint32_t class_mask, unsigned int *out)
> +{
> + unsigned int count = 0, size = 64;
> + struct class_instance *engines;
> +
> + engines = malloc(size * sizeof(*engines));
> + if (!engines) {
> + *out = 0;
> + return NULL;
> + }
> +
> + for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
> + class_mask;
> + class++, class_mask >>= 1) {
> + if (!(class_mask & 1))
> + continue;
> +
> + for (unsigned int instance = 0;
> + instance < INSTANCE_COUNT;
> + instance++) {
> + if (!has_class_instance(i915, class, instance))
> + continue;
> +
> + if (count == size) {
> + struct class_instance *e;
> +
> + size *= 2;
> + e = realloc(engines, size*sizeof(*engines));
> + if (!e) {
> + *out = count;
> + return engines;
> + }
> +
> + engines = e;
> + }
> +
> + engines[count++] = (struct class_instance){
> + .class = class,
> + .instance = instance,
> + };
> + }
> + }
> +
> + if (!count) {
> + free(engines);
> + engines = NULL;
> + }
> +
> + *out = count;
> + return engines;
> +}
> +
> +static int __set_load_balancer(int i915, uint32_t ctx,
> + const struct class_instance *ci,
> + unsigned int count)
> +{
> + struct i915_context_engines_load_balance balancer = {
> + { .name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE },
> + .engines_mask = ~0ull,
> + };
> + I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, count + 1);
> + struct drm_i915_gem_context_param p = {
> + .ctx_id = ctx,
> + .param = I915_CONTEXT_PARAM_ENGINES,
> + .size = sizeof(&engines),
sizeof(engines)
Regards,
Tvrtko
> + .value = to_user_pointer(&engines)
> + };
> +
> + engines.extensions = to_user_pointer(&balancer),
> + engines.class_instance[0].engine_class = I915_ENGINE_CLASS_INVALID;
> + engines.class_instance[0].engine_instance = I915_ENGINE_CLASS_INVALID_NONE;
> + memcpy(engines.class_instance + 1, ci, count * sizeof(uint32_t));
> +
> + return __gem_context_set_param(i915, &p);
> +}
> +
> +static void set_load_balancer(int i915, uint32_t ctx,
> + const struct class_instance *ci,
> + unsigned int count)
> +{
> + igt_assert_eq(__set_load_balancer(i915, ctx, ci, count), 0);
> +}
> +
> +static uint32_t load_balancer_create(int i915,
> + const struct class_instance *ci,
> + unsigned int count)
> +{
> + uint32_t ctx;
> +
> + ctx = gem_context_create(i915);
> + set_load_balancer(i915, ctx, ci, count);
> +
> + return ctx;
> +}
> +
> +static void kick_kthreads(int period_us)
> +{
> + sched_yield();
> + usleep(period_us);
> +}
> +
> +static double measure_load(int pmu, int period_us)
> +{
> + uint64_t data[2];
> + uint64_t d_t, d_v;
> +
> + kick_kthreads(period_us);
> +
> + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> + d_v = -data[0];
> + d_t = -data[1];
> +
> + usleep(period_us);
> +
> + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> + d_v += data[0];
> + d_t += data[1];
> +
> + return d_v / (double)d_t;
> +}
> +
> +static double measure_min_load(int pmu, unsigned int num, int period_us)
> +{
> + uint64_t data[2 + num];
> + uint64_t d_t, d_v[num];
> + uint64_t min = -1, max = 0;
> +
> + kick_kthreads(period_us);
> +
> + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> + for (unsigned int n = 0; n < num; n++)
> + d_v[n] = -data[2 + n];
> + d_t = -data[1];
> +
> + usleep(period_us);
> +
> + igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +
> + d_t += data[1];
> + for (unsigned int n = 0; n < num; n++) {
> + d_v[n] += data[2 + n];
> + igt_debug("engine[%d]: %.1f%%\n",
> + n, d_v[n] / (double)d_t * 100);
> + if (d_v[n] < min)
> + min = d_v[n];
> + if (d_v[n] > max)
> + max = d_v[n];
> + }
> +
> + igt_debug("elapsed: %"PRIu64"ns, load [%.1f, %.1f]%%\n",
> + d_t, min / (double)d_t * 100, max / (double)d_t * 100);
> +
> + return min / (double)d_t;
> +}
> +
> +static void check_individual_engine(int i915,
> + uint32_t ctx,
> + const struct class_instance *ci,
> + int idx)
> +{
> + igt_spin_t *spin;
> + double load;
> + int pmu;
> +
> + pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(ci[idx].class,
> + ci[idx].instance));
> +
> + spin = igt_spin_batch_new(i915, .ctx = ctx, .engine = idx + 1);
> + load = measure_load(pmu, 10000);
> + igt_spin_batch_free(i915, spin);
> +
> + close(pmu);
> +
> + igt_assert_f(load > 0.90,
> + "engine %d (class:instance %d:%d) was found to be only %.1f%% busy\n",
> + idx, ci[idx].class, ci[idx].instance, load*100);
> +}
> +
> +static void individual(int i915)
> +{
> + uint32_t ctx;
> +
> + /*
> + * I915_CONTEXT_PARAM_ENGINE allows us to index into the user
> + * supplied array from gem_execbuf(). Our check is to build the
> + * ctx->engine[] with various different engine classes, feed in
> + * a spinner and then ask pmu to confirm it the expected engine
> + * was busy.
> + */
> +
> + ctx = gem_context_create(i915);
> +
> + for (int mask = 0; mask < 32; mask++) {
> + struct class_instance *ci;
> + unsigned int count;
> +
> + ci = list_engines(i915, 1u << mask, &count);
> + if (!ci)
> + continue;
> +
> + igt_debug("Found %d engines of class %d\n", count, mask);
> +
> + for (int pass = 0; pass < count; pass++) { /* approx. count! */
> + igt_permute_array(ci, count, igt_exchange_int64);
> + set_load_balancer(i915, ctx, ci, count);
> + for (unsigned int n = 0; n < count; n++)
> + check_individual_engine(i915, ctx, ci, n);
> + }
> +
> + free(ci);
> + }
> +
> + gem_context_destroy(i915, ctx);
> +}
> +
> +static int add_pmu(int pmu, const struct class_instance *ci)
> +{
> + return perf_i915_open_group(I915_PMU_ENGINE_BUSY(ci->class,
> + ci->instance),
> + pmu);
> +}
> +
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> + const uint32_t bbe = MI_BATCH_BUFFER_END;
> + uint32_t handle;
> +
> + handle = gem_create(i915, ALIGN(offset + 4, 4096));
> + gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> + return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> + return __batch_create(i915, 0);
> +}
> +
> +static void full(int i915, unsigned int flags)
> +#define PULSE 0x1
> +#define LATE 0x2
> +{
> + struct drm_i915_gem_exec_object2 batch = {
> + .handle = batch_create(i915),
> + };
> +
> + if (flags & LATE)
> + igt_require_sw_sync();
> +
> + /*
> + * I915_CONTEXT_PARAM_ENGINE changes the meaning of I915_EXEC_DEFAULT
> + * to provide an automatic selection from the ctx->engine[]. It
> + * employs load-balancing to evenly distribute the workload the
> + * array. If we submit N spinners, we expect them to be simultaneously
> + * running across N engines and use PMU to confirm that the entire
> + * set of engines are busy.
> + *
> + * We complicate matters by interpersing shortlived tasks to challenge
> + * the kernel to search for space in which to insert new batches.
> + */
> +
> +
> + for (int mask = 0; mask < 32; mask++) {
> + struct class_instance *ci;
> + igt_spin_t *spin = NULL;
> + unsigned int count;
> + IGT_CORK_FENCE(cork);
> + double load;
> + int fence = -1;
> + int *pmu;
> +
> + ci = list_engines(i915, 1u << mask, &count);
> + if (!ci)
> + continue;
> +
> + igt_debug("Found %d engines of class %d\n", count, mask);
> +
> + pmu = malloc(sizeof(*pmu) * count);
> + igt_assert(pmu);
> +
> + if (flags & LATE)
> + fence = igt_cork_plug(&cork, i915);
> +
> + pmu[0] = -1;
> + for (unsigned int n = 0; n < count; n++) {
> + uint32_t ctx;
> +
> + pmu[n] = add_pmu(pmu[0], &ci[n]);
> +
> + if (flags & PULSE) {
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffers_ptr = to_user_pointer(&batch),
> + .buffer_count = 1,
> + .rsvd2 = fence,
> + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> + };
> +
> + gem_execbuf(i915, &eb);
> + }
> +
> + /*
> + * Each spinner needs to be one a new timeline,
> + * otherwise they will just sit in the single queue
> + * and not run concurrently.
> + */
> + ctx = load_balancer_create(i915, ci, count);
> +
> + if (spin == NULL) {
> + spin = __igt_spin_batch_new(i915, ctx, 0, 0);
> + } else {
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = spin->handle,
> + };
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffers_ptr = to_user_pointer(&obj),
> + .buffer_count = 1,
> + .rsvd1 = ctx,
> + .rsvd2 = fence,
> + .flags = flags & LATE ? I915_EXEC_FENCE_IN : 0,
> + };
> +
> + gem_execbuf(i915, &eb);
> + }
> +
> + gem_context_destroy(i915, ctx);
> + }
> +
> + if (flags & LATE) {
> + igt_cork_unplug(&cork);
> + close(fence);
> + }
> +
> + load = measure_min_load(pmu[0], count, 10000);
> + igt_spin_batch_free(i915, spin);
> +
> + close(pmu[0]);
> + free(pmu);
> +
> + free(ci);
> +
> + igt_assert_f(load > 0.90,
> + "minimum load for %d x class:%d was found to be only %.1f%% busy\n",
> + count, mask, load*100);
> + }
> +
> + gem_close(i915, batch.handle);
> +}
> +
> +static void ping(int i915, uint32_t ctx, unsigned int engine)
> +{
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = batch_create(i915),
> + };
> + struct drm_i915_gem_execbuffer2 execbuf = {
> + .buffers_ptr = to_user_pointer(&obj),
> + .buffer_count = 1,
> + .flags = engine,
> + .rsvd1 = ctx,
> + };
> + gem_execbuf(i915, &execbuf);
> + gem_sync(i915, obj.handle);
> + gem_close(i915, obj.handle);
> +}
> +
> +static void semaphore(int i915)
> +{
> + uint32_t block[2], scratch;
> + igt_spin_t *spin[3];
> +
> + /*
> + * If we are using HW semaphores to launch serialised requests
> + * on different engine concurrently, we want to verify that real
> + * work is unimpeded.
> + */
> + igt_require(gem_scheduler_has_preemption(i915));
> +
> + block[0] = gem_context_create(i915);
> + block[1] = gem_context_create(i915);
> +
> + scratch = gem_create(i915, 4096);
> + spin[2] = igt_spin_batch_new(i915, .dependency = scratch);
> + for (int mask = 1; mask < 32; mask++) {
> + struct class_instance *ci;
> + unsigned int count;
> + uint32_t vip;
> +
> + ci = list_engines(i915, 1u << mask, &count);
> + if (!ci)
> + continue;
> +
> + if (count < ARRAY_SIZE(block))
> + continue;
> +
> + /* Ensure that we completely occupy all engines in this group */
> + count = ARRAY_SIZE(block);
> +
> + for (int i = 0; i < count; i++) {
> + set_load_balancer(i915, block[i], ci, count);
> + spin[i] = __igt_spin_batch_new(i915,
> + .ctx = block[i],
> + .dependency = scratch);
> + }
> +
> + /*
> + * Either we haven't blocked both engines with semaphores,
> + * or we let the vip through. If not, we hang.
> + */
> + vip = gem_context_create(i915);
> + set_load_balancer(i915, vip, ci, count);
> + ping(i915, vip, 0);
> + gem_context_destroy(i915, vip);
> +
> + for (int i = 0; i < count; i++)
> + igt_spin_batch_free(i915, spin[i]);
> +
> + free(ci);
> + }
> + igt_spin_batch_free(i915, spin[2]);
> + gem_close(i915, scratch);
> +
> + gem_context_destroy(i915, block[1]);
> + gem_context_destroy(i915, block[0]);
> +}
> +
> +static void smoketest(int i915, int timeout)
> +{
> + struct drm_i915_gem_exec_object2 batch[2] = {
> + { .handle = __batch_create(i915, 16380) }
> + };
> + unsigned int ncontext = 0;
> + uint32_t *contexts = NULL;
> + uint32_t *handles = NULL;
> +
> + igt_require_sw_sync();
> +
> + for (int mask = 0; mask < 32; mask++) {
> + struct class_instance *ci;
> + unsigned int count = 0;
> +
> + ci = list_engines(i915, 1u << mask, &count);
> + if (!ci || count < 2) {
> + free(ci);
> + continue;
> + }
> +
> + igt_debug("Found %d engines of class %d\n", count, mask);
> +
> + ncontext += 128;
> + contexts = realloc(contexts, sizeof(*contexts) * ncontext);
> + igt_assert(contexts);
> +
> + for (unsigned int n = ncontext - 128; n < ncontext; n++) {
> + contexts[n] = load_balancer_create(i915, ci, count);
> + igt_assert(contexts[n]);
> + }
> +
> + free(ci);
> + }
> + igt_debug("Created %d virtual engines (one per context)\n", ncontext);
> + igt_require(ncontext);
> +
> + contexts = realloc(contexts, sizeof(*contexts) * ncontext * 4);
> + igt_assert(contexts);
> + memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> + ncontext *= 2;
> + memcpy(contexts + ncontext, contexts, ncontext * sizeof(*contexts));
> + ncontext *= 2;
> +
> + handles = malloc(sizeof(*handles) * ncontext);
> + igt_assert(handles);
> + for (unsigned int n = 0; n < ncontext; n++)
> + handles[n] = gem_create(i915, 4096);
> +
> + igt_until_timeout(timeout) {
> + unsigned int count = 1 + (rand() % (ncontext - 1));
> + IGT_CORK_FENCE(cork);
> + int fence = igt_cork_plug(&cork, i915);
> +
> + for (unsigned int n = 0; n < count; n++) {
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffers_ptr = to_user_pointer(batch),
> + .buffer_count = ARRAY_SIZE(batch),
> + .rsvd1 = contexts[n],
> + .rsvd2 = fence,
> + .flags = I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_IN,
> + };
> + batch[1].handle = handles[n];
> + gem_execbuf(i915, &eb);
> + }
> + igt_permute_array(handles, count, igt_exchange_int);
> +
> + igt_cork_unplug(&cork);
> + for (unsigned int n = 0; n < count; n++)
> + gem_sync(i915, handles[n]);
> +
> + close(fence);
> + }
> +
> + for (unsigned int n = 0; n < ncontext; n++) {
> + gem_close(i915, handles[n]);
> + __gem_context_destroy(i915, contexts[n]);
> + }
> + free(handles);
> + free(contexts);
> + gem_close(i915, batch[0].handle);
> +}
> +
> +static bool has_context_engines(int i915)
> +{
> + struct drm_i915_gem_context_param p = {
> + .param = I915_CONTEXT_PARAM_ENGINES,
> + };
> +
> + return __gem_context_set_param(i915, &p) == 0;
> +}
> +
> +static bool has_load_balancer(int i915)
> +{
> + struct class_instance ci = {};
> + uint32_t ctx;
> + int err;
> +
> + ctx = gem_context_create(i915);
> + err = __set_load_balancer(i915, ctx, &ci, 1);
> + gem_context_destroy(i915, ctx);
> +
> + return err == 0;
> +}
> +
> +igt_main
> +{
> + int i915 = -1;
> +
> + igt_skip_on_simulation();
> +
> + igt_fixture {
> + i915 = drm_open_driver(DRIVER_INTEL);
> + igt_require_gem(i915);
> +
> + gem_require_contexts(i915);
> + igt_require(has_context_engines(i915));
> + igt_require(has_load_balancer(i915));
> +
> + igt_fork_hang_detector(i915);
> + }
> +
> + igt_subtest("individual")
> + individual(i915);
> +
> + igt_subtest_group {
> + static const struct {
> + const char *name;
> + unsigned int flags;
> + } phases[] = {
> + { "", 0 },
> + { "-pulse", PULSE },
> + { "-late", LATE },
> + { "-late-pulse", PULSE | LATE },
> + { }
> + };
> + for (typeof(*phases) *p = phases; p->name; p++)
> + igt_subtest_f("full%s", p->name)
> + full(i915, p->flags);
> + }
> +
> + igt_subtest("semaphore")
> + semaphore(i915);
> +
> + igt_subtest("smoke")
> + smoketest(i915, 20);
> +
> + igt_fixture {
> + igt_stop_hang_detector();
> + }
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 0539c20c8..bd2db825e 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -293,6 +293,13 @@ test_executables += executable('gem_eio',
> install : true)
> test_list += 'gem_eio'
>
> +test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
> + dependencies : test_deps + [ lib_igt_perf ],
> + install_dir : libexecdir,
> + install_rpath : libexecdir_rpathdir,
> + install : true)
> +test_progs += 'gem_exec_balancer'
> +
> test_executables += executable('gem_mocs_settings',
> join_paths('i915', 'gem_mocs_settings.c'),
> dependencies : test_deps + [ lib_igt_perf ],
>
More information about the Intel-gfx
mailing list