[igt-dev] [PATCH i-g-t 1/1] i915/gem_exec_fence: Allow wait_bb to run on !rcs
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Fri Dec 3 18:10:27 UTC 2021
On Fri, Dec 03, 2021 at 07:40:49AM +0100, Zbigniew Kempczyński wrote:
> From: Chris Wilson <chris.p.wilson at intel.com>
>
> From Haswell up until gen12, only rcs supports all the registers and MI
> commands required to build the wait batch; before Haswell MI does not
> support the required MATH operations nor the predication. However, after
> gen12 we may not always have rcs available and so require to run the
> delay on other command streamers, which requires us to use the
> appropriate mmio base for the alternate engine.
>
> Signed-off-by: Chris Wilson <chris.p.wilson at intel.com>
LGTM and according to no-reloc work will make my life easier:
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
--
Zbigniew
> ---
> tests/i915/gem_exec_fence.c | 141 ++++++++++++++++++------------------
> 1 file changed, 72 insertions(+), 69 deletions(-)
>
> diff --git a/tests/i915/gem_exec_fence.c b/tests/i915/gem_exec_fence.c
> index 8859c81cd..9a6336ce9 100644
> --- a/tests/i915/gem_exec_fence.c
> +++ b/tests/i915/gem_exec_fence.c
> @@ -2490,13 +2490,6 @@ struct inter_engine_context {
> void *write_ptrs[2];
> } *batches;
>
> - void *wait_bb;
> - uint32_t wait_bb_len;
> - uint32_t wait_bb_handle;
> -
> - void *jump_ptr;
> - void *timestamp2_ptr;
> -
> const intel_ctx_t *wait_ctx;
> uint32_t wait_timeline;
>
> @@ -2549,37 +2542,73 @@ static void submit_timeline_execbuf(struct inter_engine_context *context,
> gem_execbuf(context->fd, execbuf);
> }
>
> -static void build_wait_bb(struct inter_engine_context *context,
> - uint64_t delay,
> - uint64_t timestamp_frequency)
> +static unsigned int offset_in_page(void *addr)
> +{
> + return (uintptr_t)addr & 4095;
> +}
> +
> +static uint64_t
> +get_cs_timestamp_frequency(int fd)
> {
> - uint32_t *bb = context->wait_bb = calloc(1, 4096);
> + int cs_ts_freq = 0;
> + drm_i915_getparam_t gp;
> +
> + gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
> + gp.value = &cs_ts_freq;
> + if (igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
> + return cs_ts_freq;
> +
> + igt_skip("Kernel with PARAM_CS_TIMESTAMP_FREQUENCY support required\n");
> +}
> +
> +static struct drm_i915_gem_exec_object2
> +build_wait_bb(int i915,
> + const struct intel_execution_engine2 *engine,
> + uint64_t delay,
> + struct drm_i915_gem_relocation_entry *relocs)
> +{
> + const uint64_t timestamp_frequency = get_cs_timestamp_frequency(i915);
> uint64_t wait_value =
> 0xffffffffffffffff - (delay * timestamp_frequency) / NSEC_PER_SEC;
> + struct drm_i915_gem_exec_object2 obj = {};
> + uint32_t mmio_base = gem_engine_mmio_base(i915, engine->name);
> + uint32_t *map, *bb;
> +
> + igt_debug("%s wait_value=0x%"PRIx64", %x\n", engine->name, wait_value, mmio_base);
> + igt_assert(mmio_base);
> +
> + obj.handle = gem_create(i915, 4096);
> + obj.relocs_ptr = to_user_pointer(memset(relocs, 0, sizeof(*relocs)));
> + obj.relocation_count = 1;
> + obj.offset = 64 << 20;
>
> - igt_debug("wait_value=0x%"PRIx64"\n", wait_value);
> + relocs->target_handle = obj.handle;
> + relocs->presumed_offset = obj.offset;
> +
> + map = gem_mmap__device_coherent(i915, obj.handle, 0, 4096, PROT_WRITE);
> + bb = map;
>
> *bb++ = MI_LOAD_REGISTER_IMM;
> - *bb++ = 0x2000 + HSW_CS_GPR(0);
> + *bb++ = mmio_base + HSW_CS_GPR(0);
> *bb++ = wait_value & 0xffffffff;
> *bb++ = MI_LOAD_REGISTER_IMM;
> - *bb++ = 0x2000 + HSW_CS_GPR(0) + 4;
> + *bb++ = mmio_base + HSW_CS_GPR(0) + 4;
> *bb++ = wait_value >> 32;
>
> *bb++ = MI_LOAD_REGISTER_REG;
> - *bb++ = 0x2000 + RING_TIMESTAMP;
> - *bb++ = 0x2000 + HSW_CS_GPR(1);
> + *bb++ = mmio_base + RING_TIMESTAMP;
> + *bb++ = mmio_base + HSW_CS_GPR(1);
> *bb++ = MI_LOAD_REGISTER_IMM;
> - *bb++ = 0x2000 + HSW_CS_GPR(1) + 4;
> + *bb++ = mmio_base + HSW_CS_GPR(1) + 4;
> *bb++ = 0;
>
> - context->timestamp2_ptr = bb;
> - *bb++ = MI_LOAD_REGISTER_REG;
> - *bb++ = 0x2000 + RING_TIMESTAMP;
> - *bb++ = 0x2000 + HSW_CS_GPR(2);
> *bb++ = MI_LOAD_REGISTER_IMM;
> - *bb++ = 0x2000 + HSW_CS_GPR(2) + 4;
> + *bb++ = mmio_base + HSW_CS_GPR(2) + 4;
> *bb++ = 0;
> + relocs->delta = offset_in_page(bb);
> + *bb++ = MI_LOAD_REGISTER_REG;
> + *bb++ = mmio_base + RING_TIMESTAMP;
> + *bb++ = mmio_base + HSW_CS_GPR(2);
>
> *bb++ = MI_MATH(4);
> *bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(2));
> @@ -2594,52 +2623,47 @@ static void build_wait_bb(struct inter_engine_context *context,
> *bb++ = MI_MATH_STOREINV(MI_MATH_REG(4), MI_MATH_REG_CF);
>
> *bb++ = MI_LOAD_REGISTER_REG;
> - *bb++ = 0x2000 + HSW_CS_GPR(4);
> - *bb++ = 0x2000 + MI_PREDICATE_RESULT_1;
> + *bb++ = mmio_base + HSW_CS_GPR(4);
> + *bb++ = mmio_base + MI_PREDICATE_RESULT_1;
>
> *bb++ = MI_BATCH_BUFFER_START | MI_BATCH_PREDICATE | 1;
> - context->jump_ptr = bb;
> - *bb++ = 0;
> - *bb++ = 0;
> + relocs->offset = offset_in_page(bb);
> + *bb++ = obj.offset + relocs->delta;
> + *bb++ = obj.offset >> 32;
>
> *bb++ = MI_BATCH_BUFFER_END;
>
> - context->wait_bb_len = ALIGN((void *) bb - context->wait_bb, 8);
> + munmap(map, 4096);
> + return obj;
> }
>
> -static void wait_engine(struct inter_engine_context *context,
> +static void wait_engine(int i915,
> + struct inter_engine_context *context,
> uint32_t run_engine_idx,
> uint32_t signal_syncobj,
> uint64_t signal_value)
> {
> - struct drm_i915_gem_relocation_entry relocs[1];
> + struct drm_i915_gem_relocation_entry reloc;
> struct drm_i915_gem_exec_object2 objects[2] = {
> context->engine_counter_object,
> - {
> - .handle = context->wait_bb_handle,
> - .relocs_ptr = to_user_pointer(&relocs),
> - .relocation_count = ARRAY_SIZE(relocs),
> - },
> + build_wait_bb(i915,
> + &context->engines.engines[run_engine_idx],
> + 20 * 1000 * 1000ull /* 20ms */,
> + &reloc),
> };
> struct drm_i915_gem_execbuffer2 execbuf = {
> .buffers_ptr = to_user_pointer(&objects[0]),
> .buffer_count = 2,
> - .flags = I915_EXEC_HANDLE_LUT,
> .rsvd1 = context->wait_ctx->id,
> - .batch_len = context->wait_bb_len,
> + .flags = I915_EXEC_NO_RELOC,
> + .batch_len = 4096,
> };
>
> - memset(&relocs, 0, sizeof(relocs));
> -
> - /* MI_BATCH_BUFFER_START */
> - relocs[0].target_handle = 1;
> - relocs[0].delta = context->timestamp2_ptr - context->wait_bb;
> - relocs[0].offset = context->jump_ptr - context->wait_bb;
> - relocs[0].presumed_offset = -1;
> -
> submit_timeline_execbuf(context, &execbuf, run_engine_idx,
> 0, 0,
> signal_syncobj, signal_value);
> +
> + gem_close(i915, objects[1].handle);
> }
>
> static void build_increment_engine_bb(struct inter_engine_batches *batch,
> @@ -2775,20 +2799,6 @@ static uint64_t fib(uint32_t iters)
> return last_value;
> }
>
> -static uint64_t
> -get_cs_timestamp_frequency(int fd)
> -{
> - int cs_ts_freq = 0;
> - drm_i915_getparam_t gp;
> -
> - gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
> - gp.value = &cs_ts_freq;
> - if (igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
> - return cs_ts_freq;
> -
> - igt_skip("Kernel with PARAM_CS_TIMESTAMP_FREQUENCY support required\n");
> -}
> -
> static void setup_timeline_chain_engines(struct inter_engine_context *context, int fd,
> const intel_ctx_cfg_t *cfg)
> {
> @@ -2812,11 +2822,6 @@ static void setup_timeline_chain_engines(struct inter_engine_context *context, i
> I915_CONTEXT_MAX_USER_PRIORITY - ARRAY_SIZE(context->iterations) + i);
> }
>
> - build_wait_bb(context, 20 * 1000 * 1000ull /* 20ms */, get_cs_timestamp_frequency(fd));
> - context->wait_bb_handle = gem_create(fd, 4096);
> - gem_write(fd, context->wait_bb_handle, 0,
> - context->wait_bb, context->wait_bb_len);
> -
> context->batches = calloc(context->engines.nengines,
> sizeof(*context->batches));
> for (uint32_t e = 0; e < context->engines.nengines; e++) {
> @@ -2854,8 +2859,6 @@ static void teardown_timeline_chain_engines(struct inter_engine_context *context
>
> intel_ctx_destroy(context->fd, context->wait_ctx);
> syncobj_destroy(context->fd, context->wait_timeline);
> - gem_close(context->fd, context->wait_bb_handle);
> - free(context->wait_bb);
>
> for (uint32_t e = 0; e < context->engines.nengines; e++) {
> struct inter_engine_batches *batches = &context->batches[e];
> @@ -2878,7 +2881,7 @@ static void test_syncobj_timeline_chain_engines(int fd, const intel_ctx_cfg_t *c
> * Delay all the other operations by making them depend on an
> * active wait on the RCS.
> */
> - wait_engine(&ctx, 0, ctx.wait_timeline, 1);
> + wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);
>
> for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
> for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
> @@ -2938,7 +2941,7 @@ static void test_syncobj_stationary_timeline_chain_engines(int fd, const intel_c
> * Delay all the other operations by making them depend on an
> * active wait on the RCS.
> */
> - wait_engine(&ctx, 0, ctx.wait_timeline, 1);
> + wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);
>
> for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
> for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
> @@ -2999,7 +3002,7 @@ static void test_syncobj_backward_timeline_chain_engines(int fd, const intel_ctx
> * Delay all the other operations by making them depend on an
> * active wait on the RCS.
> */
> - wait_engine(&ctx, 0, ctx.wait_timeline, 1);
> + wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);
>
> for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
> for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
> --
> 2.26.0
>
More information about the igt-dev
mailing list