[PATCH i-g-t 2/2] lib/intel_compute_square_kernels: use stoppable loop for LNL/BMG

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Fri Apr 4 09:26:24 UTC 2025


Instead of tweaked loop start using loop in which we may stop
it via simple cpu write to memory. Currently this is possible
for LNL and BMG platforms.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Francois Dugast <francois.dugast at intel.com>
Cc: Priyanka Dandamudi <priyanka.dandamudi at intel.com>
---
 lib/intel_compute_square_kernels.c | 41 ++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/lib/intel_compute_square_kernels.c b/lib/intel_compute_square_kernels.c
index 76c48c4511..626dbc4cec 100644
--- a/lib/intel_compute_square_kernels.c
+++ b/lib/intel_compute_square_kernels.c
@@ -3844,6 +3844,43 @@ static const unsigned char xe2lpg_kernel_inc_bin[] = {
 	0x00, 0x00, 0x00, 0x00
 };
 
+/*
+ * Opencl code is in opencl/loop.cl
+ *
+ * To work properly it requires to use uncached reads, so ocloc has to
+ * be called with: -options " -igc_opts 'LscLoadCacheControlOverride=1' arg
+*/
+
+static const unsigned char xe2lpg_kernel_loop_bin[] = {
+	0x65, 0x00, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f, 0x04, 0x00, 0x00, 0x02,
+	0xc0, 0xff, 0xff, 0xff, 0x40, 0x19, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f,
+	0x04, 0x7f, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x31, 0x20, 0x01, 0x80,
+	0x00, 0x00, 0x0c, 0x02, 0x8f, 0x7f, 0x00, 0xfa, 0x03, 0x00, 0x70, 0xf6,
+	0x61, 0x00, 0x10, 0x2c, 0x01, 0x00, 0x10, 0x00, 0x66, 0x09, 0x00, 0x80,
+	0x20, 0x82, 0x01, 0x80, 0x00, 0x80, 0x00, 0x02, 0xc0, 0x04, 0x00, 0x40,
+	0x01, 0x09, 0x8c, 0x3c, 0x00, 0x00, 0x10, 0x00, 0x61, 0x80, 0x84, 0xa4,
+	0x04, 0x02, 0x10, 0x00, 0x31, 0x21, 0x01, 0x80, 0x00, 0x00, 0x0c, 0x03,
+	0x0c, 0x04, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00, 0x70, 0x81, 0x14, 0x80,
+	0x60, 0x86, 0x01, 0x00, 0x04, 0x03, 0x00, 0x16, 0x34, 0x12, 0x34, 0x12,
+	0x20, 0x00, 0x00, 0x94, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0xd8, 0xff, 0xff, 0xff, 0x61, 0x00, 0x10, 0x28, 0x7f, 0x01, 0x10, 0x00,
+	0x31, 0x22, 0x02, 0x80, 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x30,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
 unsigned char xelpg_kernel_square_bin[] = {
 	0x61, 0x00, 0x03, 0x80, 0x20, 0x42, 0x05, 0x7f, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x80, 0x20, 0x82, 0x45, 0x7f,
@@ -6629,6 +6666,8 @@ const struct intel_compute_kernels intel_compute_square_kernels[] = {
 		.long_kernel_size = sizeof(xe2lpg_kernel_inc_bin),
 		.sip_kernel = xe2lpg_kernel_sip_bin,
 		.sip_kernel_size = sizeof(xe2lpg_kernel_sip_bin),
+		.loop_kernel = xe2lpg_kernel_loop_bin,
+		.loop_kernel_size = sizeof(xe2lpg_kernel_loop_bin),
 	},
 	{
 		.ip_ver = IP_VER(20, 04),
@@ -6638,6 +6677,8 @@ const struct intel_compute_kernels intel_compute_square_kernels[] = {
 		.long_kernel_size = sizeof(xe2lpg_kernel_inc_bin),
 		.sip_kernel = xe2lpg_kernel_sip_bin,
 		.sip_kernel_size = sizeof(xe2lpg_kernel_sip_bin),
+		.loop_kernel = xe2lpg_kernel_loop_bin,
+		.loop_kernel_size = sizeof(xe2lpg_kernel_loop_bin),
 	},
 	{
 		.ip_ver = IP_VER(30, 00),
-- 
2.34.1



More information about the igt-dev mailing list