[PATCH i-g-t v3 2/5] lib/intel_compute_square_kernels: use stoppable loop for LNL/BMG

Dandamudi, Priyanka priyanka.dandamudi at intel.com
Wed Apr 9 08:43:33 UTC 2025



> -----Original Message-----
> From: Kempczynski, Zbigniew <zbigniew.kempczynski at intel.com>
> Sent: 09 April 2025 12:21 AM
> To: igt-dev at lists.freedesktop.org
> Cc: Kempczynski, Zbigniew <zbigniew.kempczynski at intel.com>; Dugast,
> Francois <francois.dugast at intel.com>; Dandamudi, Priyanka
> <priyanka.dandamudi at intel.com>
> Subject: [PATCH i-g-t v3 2/5] lib/intel_compute_square_kernels: use stoppable
> loop for LNL/BMG
> 
> Instead of tweaked loop start using loop in which we may stop it via simple
> cpu write to memory. Currently this is possible for LNL and BMG platforms.
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Francois Dugast <francois.dugast at intel.com>
> Cc: Priyanka Dandamudi <priyanka.dandamudi at intel.com>
> ---
>  lib/intel_compute_square_kernels.c | 41
> ++++++++++++++++++++++++++++++
>  1 file changed, 41 insertions(+)
> 
> diff --git a/lib/intel_compute_square_kernels.c
> b/lib/intel_compute_square_kernels.c
> index 76c48c4511..626dbc4cec 100644
> --- a/lib/intel_compute_square_kernels.c
> +++ b/lib/intel_compute_square_kernels.c
> @@ -3844,6 +3844,43 @@ static const unsigned char xe2lpg_kernel_inc_bin[]
> = {
>  	0x00, 0x00, 0x00, 0x00
>  };
> 
> +/*
> + * Opencl code is in opencl/loop.cl
> + *
> + * To work properly it requires to use uncached reads, so ocloc has to
> + * be called with: -options " -igc_opts 'LscLoadCacheControlOverride=1'
> +arg */
> +
> +static const unsigned char xe2lpg_kernel_loop_bin[] = {
> +	0x65, 0x00, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f, 0x04, 0x00, 0x00, 0x02,
> +	0xc0, 0xff, 0xff, 0xff, 0x40, 0x19, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f,
> +	0x04, 0x7f, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x31, 0x20, 0x01, 0x80,
> +	0x00, 0x00, 0x0c, 0x02, 0x8f, 0x7f, 0x00, 0xfa, 0x03, 0x00, 0x70, 0xf6,
> +	0x61, 0x00, 0x10, 0x2c, 0x01, 0x00, 0x10, 0x00, 0x66, 0x09, 0x00, 0x80,
> +	0x20, 0x82, 0x01, 0x80, 0x00, 0x80, 0x00, 0x02, 0xc0, 0x04, 0x00, 0x40,
> +	0x01, 0x09, 0x8c, 0x3c, 0x00, 0x00, 0x10, 0x00, 0x61, 0x80, 0x84, 0xa4,
> +	0x04, 0x02, 0x10, 0x00, 0x31, 0x21, 0x01, 0x80, 0x00, 0x00, 0x0c, 0x03,
> +	0x0c, 0x04, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00, 0x70, 0x81, 0x14, 0x80,
> +	0x60, 0x86, 0x01, 0x00, 0x04, 0x03, 0x00, 0x16, 0x34, 0x12, 0x34, 0x12,
> +	0x20, 0x00, 0x00, 0x94, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0xd8, 0xff, 0xff, 0xff, 0x61, 0x00, 0x10, 0x28, 0x7f, 0x01, 0x10, 0x00,
> +	0x31, 0x22, 0x02, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x30,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
> +
>  unsigned char xelpg_kernel_square_bin[] = {
>  	0x61, 0x00, 0x03, 0x80, 0x20, 0x42, 0x05, 0x7f, 0x00, 0x00, 0x00, 0x00,
>  	0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x80, 0x20, 0x82, 0x45, 0x7f,
> @@ -6629,6 +6666,8 @@ const struct intel_compute_kernels
> intel_compute_square_kernels[] = {
>  		.long_kernel_size = sizeof(xe2lpg_kernel_inc_bin),
>  		.sip_kernel = xe2lpg_kernel_sip_bin,
>  		.sip_kernel_size = sizeof(xe2lpg_kernel_sip_bin),
> +		.loop_kernel = xe2lpg_kernel_loop_bin,
> +		.loop_kernel_size = sizeof(xe2lpg_kernel_loop_bin),
>  	},
>  	{
>  		.ip_ver = IP_VER(20, 04),
> @@ -6638,6 +6677,8 @@ const struct intel_compute_kernels
> intel_compute_square_kernels[] = {
>  		.long_kernel_size = sizeof(xe2lpg_kernel_inc_bin),
>  		.sip_kernel = xe2lpg_kernel_sip_bin,
>  		.sip_kernel_size = sizeof(xe2lpg_kernel_sip_bin),
> +		.loop_kernel = xe2lpg_kernel_loop_bin,
> +		.loop_kernel_size = sizeof(xe2lpg_kernel_loop_bin),
>  	},
>  	{
>  		.ip_ver = IP_VER(30, 00),
LGTM,
Reviewed-by: Priyanka Dandamudi <priyanka.dandamudi at intel.com>
> --
> 2.34.1



More information about the igt-dev mailing list