[PATCH i-g-t 3/4] lib/intel_compute: migrate compute loop count kernels
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Wed Aug 13 08:25:06 UTC 2025
Migrate compute loop count kernels to keep main kernels file clean.
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
---
lib/intel_compute_krn_loop_count.h | 93 ++++++++++++++++++++++++++++++
lib/intel_compute_square_kernels.c | 80 +------------------------
2 files changed, 94 insertions(+), 79 deletions(-)
create mode 100644 lib/intel_compute_krn_loop_count.h
diff --git a/lib/intel_compute_krn_loop_count.h b/lib/intel_compute_krn_loop_count.h
new file mode 100644
index 0000000000..ecad7fe256
--- /dev/null
+++ b/lib/intel_compute_krn_loop_count.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef __INTEL_COMPUTE_KRN_LOOP_COUNT_H__
+#define __INTEL_COMPUTE_KRN_LOOP_COUNT_H__
+
+#if !defined INTEL_COMPUTE_KRN_COMPILE_GUARD
+#error "Header designed to be included in compute kernels file"
+#endif
+
+/*
+ * Opencl code is in opencl/loop_count.cl
+ *
+ * To work properly it requires to use uncached reads, so ocloc has to
+ * be called with: -options " -igc_opts 'LscLoadCacheControlOverride=1'" arg
+ */
+static const unsigned char xe2lpg_kernel_count_bin[] = {
+ 0x01, 0xa1, 0x8c, 0x3c, 0x00, 0x00, 0x10, 0x00, 0x65, 0xa0, 0x00, 0x80,
+ 0x20, 0x82, 0x05, 0x7f, 0x04, 0x00, 0x00, 0x02, 0xc0, 0xff, 0xff, 0xff,
+ 0x40, 0x19, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f, 0x04, 0x7f, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x31, 0x22, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x05,
+ 0x8f, 0x7f, 0x00, 0xfa, 0x03, 0x00, 0x70, 0xf6, 0x61, 0x80, 0x10, 0x2c,
+ 0x02, 0x00, 0x10, 0x00, 0x66, 0x09, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
+ 0x00, 0x80, 0x00, 0x02, 0xc0, 0x04, 0x00, 0x40, 0x41, 0x22, 0x03, 0x80,
+ 0x60, 0x06, 0x01, 0x20, 0x54, 0x05, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00,
+ 0x53, 0x81, 0x00, 0x80, 0x60, 0x06, 0x05, 0x03, 0x54, 0x05, 0x00, 0x06,
+ 0x14, 0x02, 0x00, 0x00, 0x70, 0x00, 0x14, 0x80, 0x60, 0x86, 0x01, 0x00,
+ 0x44, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x40, 0x1a, 0x14, 0x00,
+ 0x60, 0x06, 0x05, 0x06, 0x04, 0x03, 0x00, 0x01, 0x04, 0x01, 0x10, 0x00,
+ 0x20, 0x00, 0x00, 0x84, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xf0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xf4, 0x03, 0x00, 0x02, 0x00,
+ 0x61, 0x00, 0x84, 0xa4, 0x09, 0x05, 0x10, 0x00, 0x31, 0x24, 0x03, 0x80,
+ 0x00, 0x00, 0x0c, 0x08, 0x0c, 0x09, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
+ 0x70, 0x84, 0x94, 0x80, 0x60, 0x86, 0x01, 0x00, 0x04, 0x08, 0x00, 0x16,
+ 0x34, 0x12, 0x34, 0x12, 0x20, 0x00, 0x80, 0x84, 0x00, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xa4,
+ 0x0a, 0x05, 0x16, 0x00, 0x40, 0x00, 0x00, 0x80, 0x60, 0x86, 0x15, 0x03,
+ 0x14, 0x03, 0x00, 0x05, 0x01, 0x00, 0x01, 0x00, 0x31, 0x45, 0x03, 0x80,
+ 0x00, 0x00, 0x0c, 0x0b, 0x0c, 0x0a, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
+ 0x61, 0xa3, 0x10, 0x00, 0xa0, 0x0a, 0x06, 0x0e, 0x04, 0x0a, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x02, 0xa0, 0x0a, 0x06, 0x10,
+ 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x00,
+ 0xa0, 0x0a, 0x16, 0x0e, 0x14, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x61, 0x12, 0x10, 0x02, 0x20, 0x02, 0x16, 0x10, 0x14, 0x0a, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x70, 0x1a, 0x54, 0x80, 0x20, 0x02, 0x01, 0x00,
+ 0x14, 0x03, 0x00, 0x52, 0x44, 0x05, 0x00, 0x00, 0x52, 0x85, 0xb8, 0x20,
+ 0x0c, 0x0b, 0x04, 0x06, 0x31, 0x23, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x24, 0x0e, 0x08, 0xfb, 0x14, 0x0c, 0x00, 0x00, 0x20, 0x00, 0x40, 0x84,
+ 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff,
+ 0x61, 0x00, 0x10, 0x28, 0x7f, 0x02, 0x10, 0x00, 0x31, 0x26, 0x02, 0x80,
+ 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const unsigned char xe3lpg_kernel_count_bin[] = {
+ 0x01, 0xa1, 0x8c, 0x3c, 0x00, 0x00, 0x10, 0x00, 0x65, 0xa0, 0x00, 0x80,
+ 0x20, 0x82, 0x05, 0x1f, 0x04, 0x00, 0x00, 0x02, 0xc0, 0xff, 0xff, 0xff,
+ 0x40, 0x19, 0x00, 0x80, 0x20, 0x82, 0x05, 0x1f, 0x04, 0x1f, 0x00, 0x02,
+ 0x00, 0x00, 0x00, 0x00, 0x31, 0x22, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x05,
+ 0x8f, 0x1f, 0x00, 0xfa, 0x03, 0x00, 0x70, 0xf6, 0x61, 0x80, 0x10, 0x2c,
+ 0x02, 0x00, 0x10, 0x00, 0x66, 0x09, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
+ 0x00, 0x80, 0x00, 0x02, 0xc0, 0x04, 0x00, 0x40, 0x41, 0x22, 0x03, 0x80,
+ 0x60, 0x06, 0x01, 0x20, 0x54, 0x05, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00,
+ 0x53, 0x81, 0x00, 0x80, 0x60, 0x06, 0x05, 0x03, 0x54, 0x05, 0x00, 0x06,
+ 0x14, 0x02, 0x00, 0x00, 0x70, 0x00, 0x14, 0x80, 0x60, 0x86, 0x01, 0x00,
+ 0x44, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x40, 0x1a, 0x14, 0x00,
+ 0x60, 0x06, 0x05, 0x06, 0x04, 0x03, 0x00, 0x01, 0x04, 0x01, 0x10, 0x00,
+ 0x20, 0x00, 0x00, 0x84, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xf0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xf4, 0x03, 0x00, 0x02, 0x00,
+ 0x61, 0x00, 0x84, 0xa4, 0x09, 0x05, 0x10, 0x00, 0x31, 0x24, 0x03, 0x80,
+ 0x00, 0x00, 0x0c, 0x08, 0x0c, 0x09, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
+ 0x70, 0x84, 0x94, 0x80, 0x60, 0x86, 0x01, 0x00, 0x04, 0x08, 0x00, 0x16,
+ 0x34, 0x12, 0x34, 0x12, 0x20, 0x00, 0x80, 0x84, 0x00, 0x40, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xa4,
+ 0x0a, 0x05, 0x16, 0x00, 0x40, 0x00, 0x00, 0x80, 0x60, 0x86, 0x15, 0x03,
+ 0x14, 0x03, 0x00, 0x05, 0x01, 0x00, 0x01, 0x00, 0x31, 0x45, 0x03, 0x80,
+ 0x00, 0x00, 0x0c, 0x0b, 0x0c, 0x0a, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
+ 0x61, 0xa3, 0x10, 0x00, 0xa0, 0x0a, 0x06, 0x0e, 0x04, 0x0a, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x02, 0xa0, 0x0a, 0x06, 0x10,
+ 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x00,
+ 0xa0, 0x0a, 0x16, 0x0e, 0x14, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x61, 0x12, 0x10, 0x02, 0x20, 0x02, 0x16, 0x10, 0x14, 0x0a, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x70, 0x1a, 0x54, 0x80, 0x20, 0x02, 0x01, 0x00,
+ 0x14, 0x03, 0x00, 0x52, 0x44, 0x05, 0x00, 0x00, 0x52, 0x85, 0xb8, 0x20,
+ 0x0c, 0x0b, 0x04, 0x06, 0x31, 0x23, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x24, 0x0e, 0x08, 0xfb, 0x14, 0x0c, 0x00, 0x00, 0x20, 0x00, 0x40, 0x84,
+ 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff,
+ 0x61, 0x00, 0x10, 0x28, 0x12, 0x02, 0x10, 0x00, 0x31, 0x26, 0x02, 0x80,
+ 0x04, 0x00, 0x00, 0x00, 0x0c, 0x12, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00,
+};
+
+#endif /* __INTEL_COMPUTE_KRN_LOOP_COUNT_H__ */
diff --git a/lib/intel_compute_square_kernels.c b/lib/intel_compute_square_kernels.c
index 1fffb1fb88..51b80f50f5 100644
--- a/lib/intel_compute_square_kernels.c
+++ b/lib/intel_compute_square_kernels.c
@@ -13,49 +13,8 @@
#include "lib/intel_compute.h"
#include "lib/intel_compute_krn_sip.h"
#include "lib/intel_compute_krn_square.h"
+#include "lib/intel_compute_krn_loop_count.h"
-/*
- * Opencl code is in opencl/loop_count.cl
- *
- * To work properly it requires to use uncached reads, so ocloc has to
- * be called with: -options " -igc_opts 'LscLoadCacheControlOverride=1'" arg
- */
-static const unsigned char xe2lpg_kernel_count_bin[] = {
- 0x01, 0xa1, 0x8c, 0x3c, 0x00, 0x00, 0x10, 0x00, 0x65, 0xa0, 0x00, 0x80,
- 0x20, 0x82, 0x05, 0x7f, 0x04, 0x00, 0x00, 0x02, 0xc0, 0xff, 0xff, 0xff,
- 0x40, 0x19, 0x00, 0x80, 0x20, 0x82, 0x05, 0x7f, 0x04, 0x7f, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x00, 0x31, 0x22, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x05,
- 0x8f, 0x7f, 0x00, 0xfa, 0x03, 0x00, 0x70, 0xf6, 0x61, 0x80, 0x10, 0x2c,
- 0x02, 0x00, 0x10, 0x00, 0x66, 0x09, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
- 0x00, 0x80, 0x00, 0x02, 0xc0, 0x04, 0x00, 0x40, 0x41, 0x22, 0x03, 0x80,
- 0x60, 0x06, 0x01, 0x20, 0x54, 0x05, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00,
- 0x53, 0x81, 0x00, 0x80, 0x60, 0x06, 0x05, 0x03, 0x54, 0x05, 0x00, 0x06,
- 0x14, 0x02, 0x00, 0x00, 0x70, 0x00, 0x14, 0x80, 0x60, 0x86, 0x01, 0x00,
- 0x44, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x40, 0x1a, 0x14, 0x00,
- 0x60, 0x06, 0x05, 0x06, 0x04, 0x03, 0x00, 0x01, 0x04, 0x01, 0x10, 0x00,
- 0x20, 0x00, 0x00, 0x84, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0xf0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xf4, 0x03, 0x00, 0x02, 0x00,
- 0x61, 0x00, 0x84, 0xa4, 0x09, 0x05, 0x10, 0x00, 0x31, 0x24, 0x03, 0x80,
- 0x00, 0x00, 0x0c, 0x08, 0x0c, 0x09, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
- 0x70, 0x84, 0x94, 0x80, 0x60, 0x86, 0x01, 0x00, 0x04, 0x08, 0x00, 0x16,
- 0x34, 0x12, 0x34, 0x12, 0x20, 0x00, 0x80, 0x84, 0x00, 0x40, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xa4,
- 0x0a, 0x05, 0x16, 0x00, 0x40, 0x00, 0x00, 0x80, 0x60, 0x86, 0x15, 0x03,
- 0x14, 0x03, 0x00, 0x05, 0x01, 0x00, 0x01, 0x00, 0x31, 0x45, 0x03, 0x80,
- 0x00, 0x00, 0x0c, 0x0b, 0x0c, 0x0a, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
- 0x61, 0xa3, 0x10, 0x00, 0xa0, 0x0a, 0x06, 0x0e, 0x04, 0x0a, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x02, 0xa0, 0x0a, 0x06, 0x10,
- 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x00,
- 0xa0, 0x0a, 0x16, 0x0e, 0x14, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x61, 0x12, 0x10, 0x02, 0x20, 0x02, 0x16, 0x10, 0x14, 0x0a, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x70, 0x1a, 0x54, 0x80, 0x20, 0x02, 0x01, 0x00,
- 0x14, 0x03, 0x00, 0x52, 0x44, 0x05, 0x00, 0x00, 0x52, 0x85, 0xb8, 0x20,
- 0x0c, 0x0b, 0x04, 0x06, 0x31, 0x23, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x24, 0x0e, 0x08, 0xfb, 0x14, 0x0c, 0x00, 0x00, 0x20, 0x00, 0x40, 0x84,
- 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff,
- 0x61, 0x00, 0x10, 0x28, 0x7f, 0x02, 0x10, 0x00, 0x31, 0x26, 0x02, 0x80,
- 0x04, 0x00, 0x00, 0x00, 0x0c, 0x7f, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00,
-};
/*
* Opencl code is in opencl/loop.cl
*
@@ -127,43 +86,6 @@ static const unsigned char xe3lpg_kernel_loop_bin[] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
-static const unsigned char xe3lpg_kernel_count_bin[] = {
- 0x01, 0xa1, 0x8c, 0x3c, 0x00, 0x00, 0x10, 0x00, 0x65, 0xa0, 0x00, 0x80,
- 0x20, 0x82, 0x05, 0x1f, 0x04, 0x00, 0x00, 0x02, 0xc0, 0xff, 0xff, 0xff,
- 0x40, 0x19, 0x00, 0x80, 0x20, 0x82, 0x05, 0x1f, 0x04, 0x1f, 0x00, 0x02,
- 0x00, 0x00, 0x00, 0x00, 0x31, 0x22, 0x03, 0x80, 0x00, 0x00, 0x0c, 0x05,
- 0x8f, 0x1f, 0x00, 0xfa, 0x03, 0x00, 0x70, 0xf6, 0x61, 0x80, 0x10, 0x2c,
- 0x02, 0x00, 0x10, 0x00, 0x66, 0x09, 0x00, 0x80, 0x20, 0x82, 0x01, 0x80,
- 0x00, 0x80, 0x00, 0x02, 0xc0, 0x04, 0x00, 0x40, 0x41, 0x22, 0x03, 0x80,
- 0x60, 0x06, 0x01, 0x20, 0x54, 0x05, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00,
- 0x53, 0x81, 0x00, 0x80, 0x60, 0x06, 0x05, 0x03, 0x54, 0x05, 0x00, 0x06,
- 0x14, 0x02, 0x00, 0x00, 0x70, 0x00, 0x14, 0x80, 0x60, 0x86, 0x01, 0x00,
- 0x44, 0x05, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x40, 0x1a, 0x14, 0x00,
- 0x60, 0x06, 0x05, 0x06, 0x04, 0x03, 0x00, 0x01, 0x04, 0x01, 0x10, 0x00,
- 0x20, 0x00, 0x00, 0x84, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0xf0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xf4, 0x03, 0x00, 0x02, 0x00,
- 0x61, 0x00, 0x84, 0xa4, 0x09, 0x05, 0x10, 0x00, 0x31, 0x24, 0x03, 0x80,
- 0x00, 0x00, 0x0c, 0x08, 0x0c, 0x09, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
- 0x70, 0x84, 0x94, 0x80, 0x60, 0x86, 0x01, 0x00, 0x04, 0x08, 0x00, 0x16,
- 0x34, 0x12, 0x34, 0x12, 0x20, 0x00, 0x80, 0x84, 0x00, 0x40, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x61, 0x00, 0x84, 0xa4,
- 0x0a, 0x05, 0x16, 0x00, 0x40, 0x00, 0x00, 0x80, 0x60, 0x86, 0x15, 0x03,
- 0x14, 0x03, 0x00, 0x05, 0x01, 0x00, 0x01, 0x00, 0x31, 0x45, 0x03, 0x80,
- 0x00, 0x00, 0x0c, 0x0b, 0x0c, 0x0a, 0x00, 0xfb, 0x00, 0x00, 0xa0, 0x00,
- 0x61, 0xa3, 0x10, 0x00, 0xa0, 0x0a, 0x06, 0x0e, 0x04, 0x0a, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x02, 0xa0, 0x0a, 0x06, 0x10,
- 0x04, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x10, 0x00,
- 0xa0, 0x0a, 0x16, 0x0e, 0x14, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x61, 0x12, 0x10, 0x02, 0x20, 0x02, 0x16, 0x10, 0x14, 0x0a, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x70, 0x1a, 0x54, 0x80, 0x20, 0x02, 0x01, 0x00,
- 0x14, 0x03, 0x00, 0x52, 0x44, 0x05, 0x00, 0x00, 0x52, 0x85, 0xb8, 0x20,
- 0x0c, 0x0b, 0x04, 0x06, 0x31, 0x23, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x24, 0x0e, 0x08, 0xfb, 0x14, 0x0c, 0x00, 0x00, 0x20, 0x00, 0x40, 0x84,
- 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xff, 0xff, 0xff,
- 0x61, 0x00, 0x10, 0x28, 0x12, 0x02, 0x10, 0x00, 0x31, 0x26, 0x02, 0x80,
- 0x04, 0x00, 0x00, 0x00, 0x0c, 0x12, 0x20, 0x30, 0x00, 0x00, 0x00, 0x00,
-};
-
const struct intel_compute_kernels intel_compute_square_kernels[] = {
{
.ip_ver = IP_VER(12, 0),
--
2.43.0
More information about the igt-dev
mailing list