[PATCH 1/3] lib/intel_compute: loop_kernel_duration in Pipeline

Thu Aug 21 10:14:18 UTC 2025

On Tue, Aug 19, 2025 at 07:43:36AM +0000, nishit.sharma at intel.com wrote:
> From: Nishit Sharma <nishit.sharma at intel.com>
> 
> To execute KERNEL in GPU for specific duration a brief sleep is
> required. New variable loop_kernel_duration is introduced in
> struct user_execenv which holds duration for sleep and during this
> duration the GPU will be running kernel. xe_run_intel_compute_xxx()
> calls are synchronous hence to execute workload on GPU for sometime
> sleep has been called. loop_kernel_duration acts as sleep duration.
> loop_kernel_duration is checked for all pipelines. Currently xe2lpg and
> xe3lpg pipeline support loop_kernel shader hence loop_kernel_duration
> works in these pipelines only.

I've reviewed your patch and I missed there's missing ufence. So
r-b applies to this one after you alter a commit message according
to my comment.

--
Zbigniew

> 
> Signed-off-by: Nishit Sharma <nishit.sharma at intel.com>
> ---
>  lib/intel_compute.c | 25 ++++++++++++++++++++++++-
>  lib/intel_compute.h |  2 ++
>  2 files changed, 26 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/intel_compute.c b/lib/intel_compute.c
> index 147dd2916..5a919deb8 100644
> --- a/lib/intel_compute.c
> +++ b/lib/intel_compute.c
> @@ -849,6 +849,9 @@ static void compute_exec(int fd, const unsigned char *kernel,
>  	uint16_t devid = intel_get_drm_devid(fd);
>  	int entries = ARRAY_SIZE(bo_dict);
>  
> +	if (user && (user->kernel || user->loop_kernel_duration))
> +		igt_skip("Pipeline doesn't support loop_kernel\n");
> +
>  	bo_execenv_create(fd, &execenv, eci, user);
>  
>  	/* Set dynamic sizes */
> @@ -1131,6 +1134,9 @@ static void xehp_compute_exec(int fd, const unsigned char *kernel,
>  	uint64_t bind_output_addr = (user && user->output_addr) ? user->output_addr : ADDR_OUTPUT;
>  	int entries = ARRAY_SIZE(bo_dict);
>  
> +	if (user && (user->kernel || user->loop_kernel_duration))
> +		igt_skip("Pipeline doesn't support loop_kernel\n");
> +
>  	bo_execenv_create(fd, &execenv, eci, user);
>  
>  	/* Set dynamic sizes */
> @@ -1343,6 +1349,9 @@ static void xehpc_compute_exec(int fd, const unsigned char *kernel,
>  	uint64_t bind_output_addr = (user && user->output_addr) ? user->output_addr : ADDR_OUTPUT;
>  	int entries = ARRAY_SIZE(bo_dict);
>  
> +	if (user && (user->kernel || user->loop_kernel_duration))
> +		igt_skip("Pipeline doesn't support loop_kernel\n");
> +
>  	bo_execenv_create(fd, &execenv, eci, user);
>  
>  	/* Set dynamic sizes */
> @@ -1731,6 +1740,9 @@ static void xelpg_compute_exec(int fd, const unsigned char *kernel,
>  	uint64_t bind_output_addr = (user && user->output_addr) ? user->output_addr : ADDR_OUTPUT;
>  	int entries = ARRAY_SIZE(bo_dict);
>  
> +	if (user && (user->kernel || user->loop_kernel_duration))
> +		igt_skip("Pipeline doesn't support loop_kernel\n");
> +
>  	bo_execenv_create(fd, &execenv, eci, user);
>  
>  	/* Set dynamic sizes */
> @@ -1818,6 +1830,7 @@ static void xe2lpg_compute_exec(int fd, const unsigned char *kernel,
>  	uint64_t bind_input_addr = (user && user->input_addr) ? user->input_addr : ADDR_INPUT;
>  	uint64_t bind_output_addr = (user && user->output_addr) ? user->output_addr : ADDR_OUTPUT;
>  	int entries = ARRAY_SIZE(bo_dict);
> +	int64_t timeout_one_ns = 1;
>  
>  	bo_execenv_create(fd, &execenv, eci, user);
>  
> @@ -1849,7 +1862,17 @@ static void xe2lpg_compute_exec(int fd, const unsigned char *kernel,
>  				    OFFSET_KERNEL, 0, false,
>  				    execenv.array_size);
>  
> -	bo_execenv_exec(&execenv, ADDR_BATCH);
> +	if (user && user->loop_kernel_duration) {
> +		bo_execenv_exec_async(&execenv, ADDR_BATCH);
> +		igt_measured_usleep(user->loop_kernel_duration);
> +		/* Check that the loop kernel has not completed yet */
> +		igt_assert_neq(0, __xe_wait_ufence(fd, &execenv.bo_sync->sync, USER_FENCE_VALUE,
> +					execenv.exec_queue, &timeout_one_ns));
> +		((int *)bo_dict[4].data)[0] = MAGIC_LOOP_STOP;
> +		bo_execenv_sync(&execenv);
> +		user->skip_results_check = 1;
> +	} else
> +		bo_execenv_exec(&execenv, ADDR_BATCH);
>  
>  	if (!user || (user && !user->skip_results_check))
>  		bo_check_square(input_data, output_data, execenv.array_size);
> diff --git a/lib/intel_compute.h b/lib/intel_compute.h
> index 412791d07..54a1c7f82 100644
> --- a/lib/intel_compute.h
> +++ b/lib/intel_compute.h
> @@ -63,6 +63,8 @@ struct user_execenv {
>  	uint64_t input_addr;
>  	/** @output_addr: override default address of the output array if provided */
>  	uint64_t output_addr;
> +	/** @loop_kernel_duration: duration till kernel should execute in gpu **/
> +	uint64_t loop_kernel_duration;
>  };
>  
>  enum execenv_alloc_prefs {
> -- 
> 2.43.0
>