[igt-dev] [PATCH i-g-t 1/1] i915/gem_exec_fence: Allow wait_bb to run on !rcs

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Fri Dec 3 18:10:27 UTC 2021


On Fri, Dec 03, 2021 at 07:40:49AM +0100, Zbigniew Kempczyński wrote:
> From: Chris Wilson <chris.p.wilson at intel.com>
> 
> From Haswell up until gen12, only rcs supports all the registers and MI
> commands required to build the wait batch; before Haswell MI does not
> support the required MATH operations nor the predication. However, after
> gen12 we may not always have rcs available and so require to run the
> delay on other command streamers, which requires us to use the
> appropriate mmio base for the alternate engine.
> 
> Signed-off-by: Chris Wilson <chris.p.wilson at intel.com>

LGTM and according to no-reloc work will make my life easier:

Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>

--
Zbigniew

> ---
>  tests/i915/gem_exec_fence.c | 141 ++++++++++++++++++------------------
>  1 file changed, 72 insertions(+), 69 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_fence.c b/tests/i915/gem_exec_fence.c
> index 8859c81cd..9a6336ce9 100644
> --- a/tests/i915/gem_exec_fence.c
> +++ b/tests/i915/gem_exec_fence.c
> @@ -2490,13 +2490,6 @@ struct inter_engine_context {
>  		void *write_ptrs[2];
>  	} *batches;
>  
> -	void *wait_bb;
> -	uint32_t wait_bb_len;
> -	uint32_t wait_bb_handle;
> -
> -	void *jump_ptr;
> -	void *timestamp2_ptr;
> -
>  	const intel_ctx_t *wait_ctx;
>  	uint32_t wait_timeline;
>  
> @@ -2549,37 +2542,73 @@ static void submit_timeline_execbuf(struct inter_engine_context *context,
>  	gem_execbuf(context->fd, execbuf);
>  }
>  
> -static void build_wait_bb(struct inter_engine_context *context,
> -			  uint64_t delay,
> -			  uint64_t timestamp_frequency)
> +static unsigned int offset_in_page(void *addr)
> +{
> +	return (uintptr_t)addr & 4095;
> +}
> +
> +static uint64_t
> +get_cs_timestamp_frequency(int fd)
>  {
> -	uint32_t *bb = context->wait_bb = calloc(1, 4096);
> +	int cs_ts_freq = 0;
> +	drm_i915_getparam_t gp;
> +
> +	gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
> +	gp.value = &cs_ts_freq;
> +	if (igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
> +		return cs_ts_freq;
> +
> +	igt_skip("Kernel with PARAM_CS_TIMESTAMP_FREQUENCY support required\n");
> +}
> +
> +static struct drm_i915_gem_exec_object2
> +build_wait_bb(int i915,
> +	      const struct intel_execution_engine2 *engine,
> +	      uint64_t delay,
> +	      struct drm_i915_gem_relocation_entry *relocs)
> +{
> +	const uint64_t timestamp_frequency = get_cs_timestamp_frequency(i915);
>  	uint64_t wait_value =
>  		0xffffffffffffffff - (delay * timestamp_frequency) / NSEC_PER_SEC;
> +	struct drm_i915_gem_exec_object2 obj = {};
> +	uint32_t mmio_base = gem_engine_mmio_base(i915, engine->name);
> +	uint32_t *map, *bb;
> +
> +	igt_debug("%s wait_value=0x%"PRIx64", %x\n", engine->name, wait_value, mmio_base);
> +	igt_assert(mmio_base);
> +
> +	obj.handle = gem_create(i915, 4096);
> +	obj.relocs_ptr = to_user_pointer(memset(relocs, 0, sizeof(*relocs)));
> +	obj.relocation_count = 1;
> +	obj.offset = 64 << 20;
>  
> -	igt_debug("wait_value=0x%"PRIx64"\n", wait_value);
> +	relocs->target_handle = obj.handle;
> +	relocs->presumed_offset = obj.offset;
> +
> +	map = gem_mmap__device_coherent(i915, obj.handle, 0, 4096, PROT_WRITE);
> +	bb = map;
>  
>  	*bb++ = MI_LOAD_REGISTER_IMM;
> -	*bb++ = 0x2000 + HSW_CS_GPR(0);
> +	*bb++ = mmio_base + HSW_CS_GPR(0);
>  	*bb++ = wait_value & 0xffffffff;
>  	*bb++ = MI_LOAD_REGISTER_IMM;
> -	*bb++ = 0x2000 + HSW_CS_GPR(0) + 4;
> +	*bb++ = mmio_base + HSW_CS_GPR(0) + 4;
>  	*bb++ = wait_value >> 32;
>  
>  	*bb++ = MI_LOAD_REGISTER_REG;
> -	*bb++ = 0x2000 + RING_TIMESTAMP;
> -	*bb++ = 0x2000 + HSW_CS_GPR(1);
> +	*bb++ = mmio_base + RING_TIMESTAMP;
> +	*bb++ = mmio_base + HSW_CS_GPR(1);
>  	*bb++ = MI_LOAD_REGISTER_IMM;
> -	*bb++ = 0x2000 + HSW_CS_GPR(1) + 4;
> +	*bb++ = mmio_base + HSW_CS_GPR(1) + 4;
>  	*bb++ = 0;
>  
> -	context->timestamp2_ptr = bb;
> -	*bb++ = MI_LOAD_REGISTER_REG;
> -	*bb++ = 0x2000 + RING_TIMESTAMP;
> -	*bb++ = 0x2000 + HSW_CS_GPR(2);
>  	*bb++ = MI_LOAD_REGISTER_IMM;
> -	*bb++ = 0x2000 + HSW_CS_GPR(2) + 4;
> +	*bb++ = mmio_base + HSW_CS_GPR(2) + 4;
>  	*bb++ = 0;
> +	relocs->delta = offset_in_page(bb);
> +	*bb++ = MI_LOAD_REGISTER_REG;
> +	*bb++ = mmio_base + RING_TIMESTAMP;
> +	*bb++ = mmio_base + HSW_CS_GPR(2);
>  
>  	*bb++ = MI_MATH(4);
>  	*bb++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(2));
> @@ -2594,52 +2623,47 @@ static void build_wait_bb(struct inter_engine_context *context,
>  	*bb++ = MI_MATH_STOREINV(MI_MATH_REG(4), MI_MATH_REG_CF);
>  
>  	*bb++ = MI_LOAD_REGISTER_REG;
> -	*bb++ = 0x2000 + HSW_CS_GPR(4);
> -	*bb++ = 0x2000 + MI_PREDICATE_RESULT_1;
> +	*bb++ = mmio_base + HSW_CS_GPR(4);
> +	*bb++ = mmio_base + MI_PREDICATE_RESULT_1;
>  
>  	*bb++ = MI_BATCH_BUFFER_START | MI_BATCH_PREDICATE | 1;
> -	context->jump_ptr = bb;
> -	*bb++ = 0;
> -	*bb++ = 0;
> +	relocs->offset = offset_in_page(bb);
> +	*bb++ = obj.offset + relocs->delta;
> +	*bb++ = obj.offset >> 32;
>  
>  	*bb++ = MI_BATCH_BUFFER_END;
>  
> -	context->wait_bb_len = ALIGN((void *) bb - context->wait_bb, 8);
> +	munmap(map, 4096);
> +	return obj;
>  }
>  
> -static void wait_engine(struct inter_engine_context *context,
> +static void wait_engine(int i915,
> +			struct inter_engine_context *context,
>  			uint32_t run_engine_idx,
>  			uint32_t signal_syncobj,
>  			uint64_t signal_value)
>  {
> -	struct drm_i915_gem_relocation_entry relocs[1];
> +	struct drm_i915_gem_relocation_entry reloc;
>  	struct drm_i915_gem_exec_object2 objects[2] = {
>  		context->engine_counter_object,
> -		{
> -			.handle = context->wait_bb_handle,
> -			.relocs_ptr = to_user_pointer(&relocs),
> -			.relocation_count = ARRAY_SIZE(relocs),
> -		},
> +		build_wait_bb(i915,
> +			      &context->engines.engines[run_engine_idx],
> +			      20 * 1000 * 1000ull /* 20ms */,
> +			      &reloc),
>  	};
>  	struct drm_i915_gem_execbuffer2 execbuf = {
>  		.buffers_ptr = to_user_pointer(&objects[0]),
>  		.buffer_count = 2,
> -		.flags = I915_EXEC_HANDLE_LUT,
>  		.rsvd1 = context->wait_ctx->id,
> -		.batch_len = context->wait_bb_len,
> +		.flags = I915_EXEC_NO_RELOC,
> +		.batch_len = 4096,
>  	};
>  
> -	memset(&relocs, 0, sizeof(relocs));
> -
> -	/* MI_BATCH_BUFFER_START */
> -	relocs[0].target_handle = 1;
> -	relocs[0].delta = context->timestamp2_ptr - context->wait_bb;
> -	relocs[0].offset = context->jump_ptr - context->wait_bb;
> -	relocs[0].presumed_offset = -1;
> -
>  	submit_timeline_execbuf(context, &execbuf, run_engine_idx,
>  				0, 0,
>  				signal_syncobj, signal_value);
> +
> +	gem_close(i915, objects[1].handle);
>  }
>  
>  static void build_increment_engine_bb(struct inter_engine_batches *batch,
> @@ -2775,20 +2799,6 @@ static uint64_t fib(uint32_t iters)
>  	return last_value;
>  }
>  
> -static uint64_t
> -get_cs_timestamp_frequency(int fd)
> -{
> -	int cs_ts_freq = 0;
> -	drm_i915_getparam_t gp;
> -
> -	gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
> -	gp.value = &cs_ts_freq;
> -	if (igt_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
> -		return cs_ts_freq;
> -
> -	igt_skip("Kernel with PARAM_CS_TIMESTAMP_FREQUENCY support required\n");
> -}
> -
>  static void setup_timeline_chain_engines(struct inter_engine_context *context, int fd,
>  					 const intel_ctx_cfg_t *cfg)
>  {
> @@ -2812,11 +2822,6 @@ static void setup_timeline_chain_engines(struct inter_engine_context *context, i
>  					 I915_CONTEXT_MAX_USER_PRIORITY - ARRAY_SIZE(context->iterations) + i);
>  	}
>  
> -	build_wait_bb(context, 20 * 1000 * 1000ull /* 20ms */, get_cs_timestamp_frequency(fd));
> -	context->wait_bb_handle = gem_create(fd, 4096);
> -	gem_write(fd, context->wait_bb_handle, 0,
> -		  context->wait_bb, context->wait_bb_len);
> -
>  	context->batches = calloc(context->engines.nengines,
>  				  sizeof(*context->batches));
>  	for (uint32_t e = 0; e < context->engines.nengines; e++) {
> @@ -2854,8 +2859,6 @@ static void teardown_timeline_chain_engines(struct inter_engine_context *context
>  
>  	intel_ctx_destroy(context->fd, context->wait_ctx);
>  	syncobj_destroy(context->fd, context->wait_timeline);
> -	gem_close(context->fd, context->wait_bb_handle);
> -	free(context->wait_bb);
>  
>  	for (uint32_t e = 0; e < context->engines.nengines; e++) {
>  		struct inter_engine_batches *batches = &context->batches[e];
> @@ -2878,7 +2881,7 @@ static void test_syncobj_timeline_chain_engines(int fd, const intel_ctx_cfg_t *c
>  	 * Delay all the other operations by making them depend on an
>  	 * active wait on the RCS.
>  	 */
> -	wait_engine(&ctx, 0, ctx.wait_timeline, 1);
> +	wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);
>  
>  	for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
>  		for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
> @@ -2938,7 +2941,7 @@ static void test_syncobj_stationary_timeline_chain_engines(int fd, const intel_c
>  	 * Delay all the other operations by making them depend on an
>  	 * active wait on the RCS.
>  	 */
> -	wait_engine(&ctx, 0, ctx.wait_timeline, 1);
> +	wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);
>  
>  	for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
>  		for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
> @@ -2999,7 +3002,7 @@ static void test_syncobj_backward_timeline_chain_engines(int fd, const intel_ctx
>  	 * Delay all the other operations by making them depend on an
>  	 * active wait on the RCS.
>  	 */
> -	wait_engine(&ctx, 0, ctx.wait_timeline, 1);
> +	wait_engine(fd, &ctx, 0, ctx.wait_timeline, 1);
>  
>  	for (uint32_t iter = 0; iter < ARRAY_SIZE(ctx.iterations); iter++) {
>  		for (uint32_t engine = 0; engine < ctx.engines.nengines; engine++) {
> -- 
> 2.26.0
> 


More information about the igt-dev mailing list