[igt-dev] [PATCH i-g-t v3 10/11] tests/i915/vm_bind: Add gem_exec3_balancer test
Niranjana Vishwanathapura
niranjana.vishwanathapura at intel.com
Mon Oct 17 17:15:20 UTC 2022
On Fri, Oct 14, 2022 at 11:12:10AM +0200, Kamil Konieczny wrote:
>Hi Niranjana,
>
>On 2022-10-09 at 23:59:28 -0700, Niranjana Vishwanathapura wrote:
>> From: "Vishwanathapura, Niranjana" <niranjana.vishwanathapura at intel.com>
>- ^^^^^ ^ ^
>You still keep this here switched, but you can just delete this
>line as you are sending this patch.
Thanks Kamil,
Ah, I forgot about new patches. Will update my global git config.
>
>>
>> To test parallel submissions support in execbuf3, port the subtest
>> gem_exec_balancer at parallel-ordering to a new gem_exec3_balancer test
>> and switch to execbuf3 ioctl.
>
>Could you keep this test inside old one ? If not, then how many
>new code you added ? What new functions did you add ? What about
>just adding new subtest @vmbind-parallel-ordering ?
>
No, we need to port other gem_exec_balancer subtests also here eventually
(those still applicable to execbuf3). I only ported one to validate
parallel submission handling in i915 driver.
In general, we want to keep legacy gem_exec_* and newer gem_exec3_*
tests separate. Execbuf2 and Execbuf3 differs in various aspects and
lot of legacy stuff are not applicable in execbuf3. So, I think it is
better to keep execbuf3 tests separate.
>>
>> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>> ---
>> tests/i915/gem_exec3_balancer.c | 500 ++++++++++++++++++++++++++++++++
>> tests/meson.build | 7 +
>> 2 files changed, 507 insertions(+)
>> create mode 100644 tests/i915/gem_exec3_balancer.c
>>
>> diff --git a/tests/i915/gem_exec3_balancer.c b/tests/i915/gem_exec3_balancer.c
>> new file mode 100644
>> index 0000000000..3719e9fe0c
>> --- /dev/null
>> +++ b/tests/i915/gem_exec3_balancer.c
>> @@ -0,0 +1,500 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2022 Intel Corporation
>> + */
>> +
>> +/** @file gem_exec3_balancer.c
>> + *
>> + * Load balancer tests with execbuf3.
>> + * Ported from gem_exec_balancer and made to work with
>> + * vm_bind and execbuf3.
>> + *
>> + */
>> +
>> +#include <poll.h>
>> +
>> +#include "i915/gem.h"
>> +#include "i915/gem_engine_topology.h"
>> +#include "i915/gem_create.h"
>> +#include "i915/gem_vm.h"
>> +#include "igt.h"
>> +#include "igt_gt.h"
>> +#include "igt_perf.h"
>> +#include "igt_syncobj.h"
>> +
>> +IGT_TEST_DESCRIPTION("Exercise in-kernel load-balancing with execbuf3");
>> +
>> +#define INSTANCE_COUNT (1 << I915_PMU_SAMPLE_INSTANCE_BITS)
>> +
>> +static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
>> +{
>> + int fd;
>> +
>> + fd = perf_i915_open(i915, I915_PMU_ENGINE_BUSY(class, instance));
>> + if (fd >= 0) {
>> + close(fd);
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> +
>> +static struct i915_engine_class_instance *
>> +list_engines(int i915, uint32_t class_mask, unsigned int *out)
>> +{
>> + unsigned int count = 0, size = 64;
>> + struct i915_engine_class_instance *engines;
>> +
>> + engines = malloc(size * sizeof(*engines));
>> + igt_assert(engines);
>> +
>> + for (enum drm_i915_gem_engine_class class = I915_ENGINE_CLASS_RENDER;
>> + class_mask;
>> + class++, class_mask >>= 1) {
>> + if (!(class_mask & 1))
>> + continue;
>> +
>> + for (unsigned int instance = 0;
>> + instance < INSTANCE_COUNT;
>> + instance++) {
>> + if (!has_class_instance(i915, class, instance))
>> + continue;
>> +
>> + if (count == size) {
>> + size *= 2;
>> + engines = realloc(engines,
>> + size * sizeof(*engines));
>> + igt_assert(engines);
>> + }
>> +
>> + engines[count++] = (struct i915_engine_class_instance){
>> + .engine_class = class,
>> + .engine_instance = instance,
>> + };
>> + }
>> + }
>> +
>> + if (!count) {
>> + free(engines);
>> + engines = NULL;
>> + }
>> +
>> + *out = count;
>> + return engines;
>> +}
>> +
>> +static bool has_perf_engines(int i915)
>> +{
>> + return i915_perf_type_id(i915);
>> +}
>> +
>> +static intel_ctx_cfg_t
>> +ctx_cfg_for_engines(const struct i915_engine_class_instance *ci,
>> + unsigned int count)
>> +{
>> + intel_ctx_cfg_t cfg = { };
>> + unsigned int i;
>> +
>> + for (i = 0; i < count; i++)
>> + cfg.engines[i] = ci[i];
>> + cfg.num_engines = count;
>> +
>> + return cfg;
>> +}
>> +
>> +static const intel_ctx_t *
>> +ctx_create_engines(int i915, const struct i915_engine_class_instance *ci,
>> + unsigned int count)
>> +{
>> + intel_ctx_cfg_t cfg = ctx_cfg_for_engines(ci, count);
>> + return intel_ctx_create(i915, &cfg);
>> +}
>> +
>> +static void check_bo(int i915, uint32_t handle, unsigned int expected,
>> + bool wait)
>> +{
>> + uint32_t *map;
>> +
>> + map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_READ);
>> + if (wait)
>> + gem_set_domain(i915, handle, I915_GEM_DOMAIN_CPU,
>> + I915_GEM_DOMAIN_CPU);
>> + igt_assert_eq(map[0], expected);
>> + munmap(map, 4096);
>> +}
>> +
>> +static struct drm_i915_query_engine_info *query_engine_info(int i915)
>> +{
>> + struct drm_i915_query_engine_info *engines;
>> +
>> +#define QUERY_SIZE 0x4000
>> + engines = malloc(QUERY_SIZE);
>> + igt_assert(engines);
>> + memset(engines, 0, QUERY_SIZE);
>> + igt_assert(!__gem_query_engines(i915, engines, QUERY_SIZE));
>> +#undef QUERY_SIZE
>> +
>> + return engines;
>> +}
>> +
>> +/* This function only works if siblings contains all instances of a class */
>> +static void logical_sort_siblings(int i915,
>> + struct i915_engine_class_instance *siblings,
>> + unsigned int count)
>> +{
>> + struct i915_engine_class_instance *sorted;
>> + struct drm_i915_query_engine_info *engines;
>> + unsigned int i, j;
>> +
>> + sorted = calloc(count, sizeof(*sorted));
>> + igt_assert(sorted);
>> +
>> + engines = query_engine_info(i915);
>> +
>> + for (j = 0; j < count; ++j) {
>> + for (i = 0; i < engines->num_engines; ++i) {
>> + if (siblings[j].engine_class ==
>> + engines->engines[i].engine.engine_class &&
>> + siblings[j].engine_instance ==
>> + engines->engines[i].engine.engine_instance) {
>> + uint16_t logical_instance =
>> + engines->engines[i].logical_instance;
>> +
>> + igt_assert(logical_instance < count);
>> + igt_assert(!sorted[logical_instance].engine_class);
>> + igt_assert(!sorted[logical_instance].engine_instance);
>> +
>> + sorted[logical_instance] = siblings[j];
>> + break;
>> + }
>> + }
>> + igt_assert(i != engines->num_engines);
>> + }
>> +
>> + memcpy(siblings, sorted, sizeof(*sorted) * count);
>> + free(sorted);
>> + free(engines);
>> +}
>> +
>> +static bool fence_busy(int fence)
>> +{
>> + return poll(&(struct pollfd){fence, POLLIN}, 1, 0) == 0;
>> +}
>> +
>> +/*
>> + * Always reading from engine instance 0, with GuC submission the values are the
>> + * same across all instances. Execlists they may differ but quite unlikely they
>> + * would be and if they are we can live with this.
>> + */
>> +static unsigned int get_timeslice(int i915,
>> + struct i915_engine_class_instance engine)
>> +{
>> + unsigned int val;
>> +
>> + switch (engine.engine_class) {
>> + case I915_ENGINE_CLASS_RENDER:
>> + gem_engine_property_scanf(i915, "rcs0", "timeslice_duration_ms",
>> + "%d", &val);
>> + break;
>> + case I915_ENGINE_CLASS_COPY:
>> + gem_engine_property_scanf(i915, "bcs0", "timeslice_duration_ms",
>> + "%d", &val);
>> + break;
>> + case I915_ENGINE_CLASS_VIDEO:
>> + gem_engine_property_scanf(i915, "vcs0", "timeslice_duration_ms",
>> + "%d", &val);
>> + break;
>> + case I915_ENGINE_CLASS_VIDEO_ENHANCE:
>> + gem_engine_property_scanf(i915, "vecs0", "timeslice_duration_ms",
>> + "%d", &val);
>> + break;
>> + }
>> +
>> + return val;
>> +}
>> +
>> +static void i915_vm_bind(int i915, uint32_t vm_id, uint64_t va, uint32_t handle,
>> + uint64_t length, uint32_t syncobj)
>> +{
>> + struct drm_i915_gem_vm_bind bind;
>> +
>> + memset(&bind, 0, sizeof(bind));
>> + bind.vm_id = vm_id;
>> + bind.handle = handle;
>> + bind.start = va;
>> + bind.offset = 0;
>> + bind.length = length;
>> + bind.fence.flags = I915_TIMELINE_FENCE_SIGNAL;
>> + bind.fence.handle = syncobj;
>> +
>> + gem_vm_bind(i915, &bind);
>> +}
>> +
>> +static void i915_vm_unbind(int i915, uint32_t vm_id, uint64_t va, uint64_t length)
>> +{
>> + struct drm_i915_gem_vm_unbind unbind;
>> +
>> + memset(&unbind, 0, sizeof(unbind));
>> + unbind.vm_id = vm_id;
>> + unbind.start = va;
>> + unbind.length = length;
>> +
>> + gem_vm_unbind(i915, &unbind);
>> +}
>> +
>> +static uint64_t gettime_ns(void)
>> +{
>> + struct timespec current;
>> + clock_gettime(CLOCK_MONOTONIC, ¤t);
>> + return (uint64_t)current.tv_sec * NSEC_PER_SEC + current.tv_nsec;
>> +}
>> +
>> +static bool syncobj_busy(int i915, uint32_t handle)
>> +{
>> + bool result;
>> + int sf;
>> +
>> + sf = syncobj_handle_to_fd(i915, handle,
>> + DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE);
>> + result = poll(&(struct pollfd){sf, POLLIN}, 1, 0) == 0;
>> + close(sf);
>> +
>> + return result;
>> +}
>> +
>> +/*
>> + * Ensure a parallel submit actually runs on HW in parallel by putting on a
>> + * spinner on 1 engine, doing a parallel submit, and parallel submit is blocked
>> + * behind spinner.
>> + */
>> +static void parallel_ordering(int i915, unsigned int flags)
>> +{
>> + uint32_t vm_id;
>> + int class;
>> +
>> + vm_id = gem_vm_create_in_vm_bind_mode(i915);
>> +
>> + for (class = 0; class < 32; class++) {
>> + struct drm_i915_gem_timeline_fence exec_fence[32] = { };
>> + const intel_ctx_t *ctx = NULL, *spin_ctx = NULL;
>> + uint64_t fence_value = 0, batch_addr[32] = { };
>> + struct i915_engine_class_instance *siblings;
>> + uint32_t exec_syncobj, bind_syncobj[32];
>> + struct drm_i915_gem_execbuffer3 execbuf;
>> + uint32_t batch[16], obj[32];
>> + intel_ctx_cfg_t cfg;
>> + unsigned int count;
>> + igt_spin_t *spin;
>> + uint64_t ahnd;
>> + int i = 0;
>> +
>> + siblings = list_engines(i915, 1u << class, &count);
>> + if (!siblings)
>> + continue;
>> +
>> + if (count < 2) {
>> + free(siblings);
>> + continue;
>> + }
>> +
>> + logical_sort_siblings(i915, siblings, count);
>> +
>> + memset(&cfg, 0, sizeof(cfg));
>> + cfg.parallel = true;
>> + cfg.num_engines = 1;
>> + cfg.width = count;
>> + memcpy(cfg.engines, siblings, sizeof(*siblings) * count);
>> +
>> + if (__intel_ctx_create(i915, &cfg, &ctx)) {
>> + free(siblings);
>> + continue;
>> + }
>> + gem_context_set_vm(i915, ctx->id, vm_id);
>> +
>> + batch[i] = MI_ATOMIC | MI_ATOMIC_INC;
>> +#define TARGET_BO_OFFSET (0x1 << 16)
>> + batch[++i] = TARGET_BO_OFFSET;
>> + batch[++i] = 0;
>> + batch[++i] = MI_BATCH_BUFFER_END;
>> +
>> + obj[0] = gem_create(i915, 4096);
>> + bind_syncobj[0] = syncobj_create(i915, 0);
>> + exec_fence[0].handle = bind_syncobj[0];
>> + exec_fence[0].flags = I915_TIMELINE_FENCE_WAIT;
>> + i915_vm_bind(i915, vm_id, TARGET_BO_OFFSET, obj[0], 4096, bind_syncobj[0]);
>> +
>> + for (i = 1; i < count + 1; ++i) {
>> + obj[i] = gem_create(i915, 4096);
>> + gem_write(i915, obj[i], 0, batch, sizeof(batch));
>> +
>> + batch_addr[i - 1] = TARGET_BO_OFFSET * (i + 1);
>> + bind_syncobj[i] = syncobj_create(i915, 0);
>> + exec_fence[i].handle = bind_syncobj[i];
>> + exec_fence[i].flags = I915_TIMELINE_FENCE_WAIT;
>> + i915_vm_bind(i915, vm_id, batch_addr[i - 1], obj[i], 4096, bind_syncobj[i]);
>> + }
>> +
>> + exec_syncobj = syncobj_create(i915, 0);
>> + exec_fence[i].handle = exec_syncobj;
>> + exec_fence[i].flags = I915_TIMELINE_FENCE_SIGNAL;
>> +
>> + memset(&execbuf, 0, sizeof(execbuf));
>> + execbuf.ctx_id = ctx->id,
>> + execbuf.batch_address = to_user_pointer(batch_addr),
>> + execbuf.fence_count = count + 2,
>> + execbuf.timeline_fences = to_user_pointer(exec_fence),
>> +
>> + /* Block parallel submission */
>> + spin_ctx = ctx_create_engines(i915, siblings, count);
>> + ahnd = get_simple_ahnd(i915, spin_ctx->id);
>> + spin = __igt_spin_new(i915,
>> + .ahnd = ahnd,
>> + .ctx = spin_ctx,
>> + .engine = 0,
>> + .flags = IGT_SPIN_FENCE_OUT |
>> + IGT_SPIN_NO_PREEMPTION);
>> +
>> + /* Wait for spinners to start */
>> + usleep(5 * 10000);
>> + igt_assert(fence_busy(spin->out_fence));
>> +
>> + /* Submit parallel execbuf */
>> + gem_execbuf3(i915, &execbuf);
>> +
>> + /*
>> + * Wait long enough for timeslcing to kick in but not
>> + * preemption. Spinner + parallel execbuf should be
>> + * active. Assuming default timeslice / preemption values, if
>> + * these are changed it is possible for the test to fail.
>> + */
>> + usleep(get_timeslice(i915, siblings[0]) * 2);
>> + igt_assert(fence_busy(spin->out_fence));
>> + igt_assert(syncobj_busy(i915, exec_syncobj));
>> + check_bo(i915, obj[0], 0, false);
>> +
>> + /*
>> + * End spinner and wait for spinner + parallel execbuf
>> + * to compelte.
>> + */
>> + igt_spin_end(spin);
>> + igt_assert(syncobj_timeline_wait(i915, &exec_syncobj, &fence_value, 1,
>> + gettime_ns() + (2 * NSEC_PER_SEC),
>> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT, NULL));
>> + igt_assert(!syncobj_busy(i915, exec_syncobj));
>> + syncobj_destroy(i915, exec_syncobj);
>> + for (i = 0; i < count + 1; ++i)
>> + syncobj_destroy(i915, bind_syncobj[i]);
>> + check_bo(i915, obj[0], count, true);
>> +
>> + /* Clean up */
>> + intel_ctx_destroy(i915, ctx);
>> + intel_ctx_destroy(i915, spin_ctx);
>> + i915_vm_unbind(i915, vm_id, TARGET_BO_OFFSET, 4096);
>> + for (i = 1; i < count + 1; ++i)
>> + i915_vm_unbind(i915, vm_id, batch_addr[i - 1], 4096);
>> +
>> + for (i = 0; i < count + 1; ++i)
>> + gem_close(i915, obj[i]);
>> + free(siblings);
>> + igt_spin_free(i915, spin);
>> + put_ahnd(ahnd);
>> + }
>> +
>> + gem_vm_destroy(i915, vm_id);
>> +}
>> +
>> +static bool has_load_balancer(int i915)
>> +{
>> + const intel_ctx_cfg_t cfg = {
>> + .load_balance = true,
>> + .num_engines = 1,
>> + };
>> + const intel_ctx_t *ctx = NULL;
>> + int err;
>> +
>> + err = __intel_ctx_create(i915, &cfg, &ctx);
>> + intel_ctx_destroy(i915, ctx);
>> +
>> + return err == 0;
>> +}
>> +
>> +static bool has_logical_mapping(int i915)
>> +{
>> + struct drm_i915_query_engine_info *engines;
>> + unsigned int i;
>> +
>> + engines = query_engine_info(i915);
>> +
>> + for (i = 0; i < engines->num_engines; ++i)
>> + if (!(engines->engines[i].flags &
>> + I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE)) {
>> + free(engines);
>> + return false;
>> + }
>> +
>> + free(engines);
>> + return true;
>> +}
>> +
>> +static bool has_parallel_execbuf(int i915)
>> +{
>> + intel_ctx_cfg_t cfg = {
>> + .parallel = true,
>> + .num_engines = 1,
>> + };
>> + const intel_ctx_t *ctx = NULL;
>> + int err;
>> +
>> + for (int class = 0; class < 32; class++) {
>> + struct i915_engine_class_instance *siblings;
>> + unsigned int count;
>> +
>> + siblings = list_engines(i915, 1u << class, &count);
>> + if (!siblings)
>> + continue;
>> +
>> + if (count < 2) {
>> + free(siblings);
>> + continue;
>> + }
>> +
>> + logical_sort_siblings(i915, siblings, count);
>> +
>> + cfg.width = count;
>> + memcpy(cfg.engines, siblings, sizeof(*siblings) * count);
>> + free(siblings);
>> +
>> + err = __intel_ctx_create(i915, &cfg, &ctx);
>> + intel_ctx_destroy(i915, ctx);
>> +
>> + return err == 0;
>> + }
>> +
>> + return false;
>> +}
>> +
>> +igt_main
>> +{
>> + int i915 = -1;
>> +
>> + igt_fixture {
>> + i915 = drm_open_driver(DRIVER_INTEL);
>> + igt_require_gem(i915);
>> +
>> + gem_require_contexts(i915);
>> + igt_require(gem_has_engine_topology(i915));
>> + igt_require(has_load_balancer(i915));
>> + igt_require(has_perf_engines(i915));
>
>imho we could have also function
> has_vm_bind(i915)
>and use it
> igt_require(has_vm_bind(i915));
>
Other vm_bind tests have vm_bind version check.
Will make it a common i915_vm_bind_version() library function and
add a check here.
Regards,
Niranjana
>Regards,
>Kamil
>
>> + }
>> +
>> + igt_subtest_group {
>> + igt_fixture {
>> + igt_require(has_logical_mapping(i915));
>> + igt_require(has_parallel_execbuf(i915));
>> + }
>> +
>> + igt_describe("Ensure a parallel submit actually runs in parallel");
>> + igt_subtest("parallel-ordering")
>> + parallel_ordering(i915, 0);
>> + }
>> +}
>> diff --git a/tests/meson.build b/tests/meson.build
>> index 50de8b7e89..6ca5a94282 100644
>> --- a/tests/meson.build
>> +++ b/tests/meson.build
>> @@ -375,6 +375,13 @@ test_executables += executable('gem_exec_balancer', 'i915/gem_exec_balancer.c',
>> install : true)
>> test_list += 'gem_exec_balancer'
>>
>> +test_executables += executable('gem_exec3_balancer', 'i915/gem_exec3_balancer.c',
>> + dependencies : test_deps + [ lib_igt_perf ],
>> + install_dir : libexecdir,
>> + install_rpath : libexecdir_rpathdir,
>> + install : true)
>> +test_list += 'gem_exec3_balancer'
>> +
>> test_executables += executable('gem_mmap_offset',
>> join_paths('i915', 'gem_mmap_offset.c'),
>> dependencies : test_deps + [ libatomic ],
>> --
>> 2.21.0.rc0.32.g243a4c7e27
>>
More information about the igt-dev
mailing list