[PATCH i-g-t v4] lib/gpgpu_fill: Write kernel using inline iga64 api
Dominik Grzegorzek
dominik.grzegorzek at intel.com
Fri Aug 23 06:44:53 UTC 2024
Rewrite gpgpu_fill shaders to utilize the newly introduced
method of writing IGA64 assembly inline.
Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda at intel.com>
---
lib/gpgpu_fill.c | 142 ++++++++----------
lib/gpgpu_fill.h | 13 --
.../shaders/gpgpu/gen12p72_gpgpu_kernel.asm | 12 --
.../shaders/gpgpu/xe2lpg_gpgpu_kernel.asm | 13 --
lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm | 12 --
lib/iga64_generated_codes.c | 76 +++++++++-
lib/intel_batchbuffer.c | 8 +-
lib/meson.build | 2 +-
8 files changed, 140 insertions(+), 138 deletions(-)
delete mode 100644 lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm
delete mode 100644 lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
delete mode 100644 lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 1270c2b22..fe0b8b35d 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -31,6 +31,7 @@
#include "drmtest.h"
#include "gpgpu_fill.h"
+#include "gpgpu_shader.h"
#include "gpu_cmds.h"
/* lib/i915/shaders/gpgpu/gpgpu_fill.gxa */
@@ -99,43 +100,6 @@ static const uint32_t gen12_gpgpu_kernel[][4] = {
{ 0x00040131, 0x00000004, 0x7020700c, 0x10000000 },
};
-static const uint32_t xehp_gpgpu_kernel[][4] = {
- { 0x00020061, 0x01050000, 0x00000104, 0x00000000 },
- { 0x00000069, 0x02058220, 0x02000024, 0x00000004 },
- { 0x00000061, 0x02250220, 0x000000c4, 0x00000000 },
- { 0x00030061, 0x04050220, 0x00460005, 0x00000000 },
- { 0x00011a61, 0x04050220, 0x00220205, 0x00000000 },
- { 0x00000061, 0x04454220, 0x00000000, 0x0000000f },
- { 0x00041e61, 0x05050220, 0x00000104, 0x00000000 },
- { 0x80001901, 0x00010000, 0x00000000, 0x00000000 },
- { 0x00044031, 0x00000000, 0xc0000414, 0x02a00000 },
- { 0x00030031, 0x00000004, 0x3000500c, 0x00000000 },
-};
-
-static const uint32_t xehpc_gpgpu_kernel[][4] = {
- { 0x00080061, 0x01050000, 0x00000104, 0x00000000 },
- { 0x00000069, 0x02058220, 0x02000014, 0x00000004 },
- { 0x00000061, 0x02150220, 0x00000064, 0x00000000 },
- { 0x000c0061, 0x04050220, 0x00460005, 0x00000000 },
- { 0x00041a61, 0x04050220, 0x00220205, 0x00000000 },
- { 0x00000061, 0x04254220, 0x00000000, 0x0000000f },
- { 0x00101e61, 0x05050220, 0x00000104, 0x00000000 },
- { 0x00132031, 0x00000000, 0xc0000414, 0x02a00000 },
- { 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
-};
-
-static const uint32_t xe2lpg_gpgpu_kernel[][4] = {
- { 0x00080061, 0x01050000, 0x00000104, 0x00000000 },
- { 0x00000069, 0x02058220, 0x02000014, 0x00000004 },
- { 0x00000061, 0x02150220, 0x00000064, 0x00000000 },
- { 0x00100061, 0x04054220, 0x00000000, 0x00000000 },
- { 0x00041a61, 0x04550220, 0x00220205, 0x00000000 },
- { 0x00000061, 0x04754550, 0x00000000, 0x000f000f },
- { 0x00101e61, 0x05050220, 0x00000104, 0x00000000 },
- { 0x00132031, 0x00000000, 0xd00e0494, 0x04000000 },
- { 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
-};
-
/*
* This sets up the gpgpu pipeline,
*
@@ -317,15 +281,66 @@ __gen9_gpgpu_fillfunc(int i915,
intel_bb_destroy(ibb);
}
-static void
-__xehp_gpgpu_fillfunc(int i915,
- struct intel_buf *buf,
- unsigned int x, unsigned int y,
- unsigned int width, unsigned int height,
- uint8_t color, const uint32_t kernel[][4],
- size_t kernel_size)
+static struct gpgpu_shader *__xehp_gpgpu_kernel(int i915)
+{
+ struct gpgpu_shader *kernel = gpgpu_shader_create(i915);
+
+ emit_iga64_code(kernel, gpgpu_fill, " \n\
+// fill up r1 with target colour \n\
+mov (4|M0) r1.0<1>:ub r1.0<0;1,0>:ub \n\
+// prepare block x offset (Thread Group Id X * 16) \n\
+shl (1|M0) r2.0<1>:ud r0.1<0;1,0>:ud 0x4:ud \n\
+// prepare block y offset (Thread Group Id Y) \n\
+mov (1|M0) r2.1<1>:ud r0.6<0;1,0>:ud \n\
+// zero message header payload \n\
+mov (8|M0) r4.0<1>:ud 0x0:ud \n\
+// fill up message payload with target colour \n\
+mov (16|M0) r5.0<1>:ud r1.0<0;1,0>:ud \n\
+#if GEN_VER < 2000 \n\
+// load block offsets into message header payload \n\
+mov (2|M0) r4.0<1>:ud r2.0<2;2,1>:ud \n\
+// load block width \n\
+mov (1|M0) r4.2<1>:ud 0xF:ud \n\
+// load FFTID from R0 header \n\
+mov (1|M0) r4.4<1>:ud r0.5<0;1,0>:ud \n\
+// Media block write to bti[0] surface \n\
+// Message Descriptor \n\
+// 0x40A8000: \n\
+// [28:25] Mlen: 2 \n\
+// [24:20] Rlen: 0 \n\
+// [19] Header: 1 (included) \n\
+// [18:14] MessageType: 0xA (media block write) \n\
+// [7:0] BTI: 0 \n\
+send.dc1 (16|M0) null r4 src1_null 0x0 0x40A8000 \n\
+#else \n\
+// load block offsets into message header payload \n\
+mov (2|M0) r4.5<1>:ud r2.0<2;2,1>:ud \n\
+// load block width \n\
+mov (1|M0) r4.14<1>:w 0xF:w \n\
+// Typed 2D block store to bti[0] surface \n\
+// Message Descriptor \n\
+// 0x6400007: \n\
+// [30:29] AddrType: 3 (BTI) \n\
+// [28:25] Mlen: 2 \n\
+// [24:20] Rlen: 0 \n\
+// [19:17] Caching: 0 (use state settings for both L1 and L3) \n\
+// [5:0] Opcode: 0x07 (store_block2d) \n\
+send.tgm (16|M0) null r4 null 0x0 0x64000007 \n\
+#endif \n\
+ ");
+
+ gpgpu_shader__eot(kernel);
+ return kernel;
+}
+
+void xehp_gpgpu_fillfunc(int i915,
+ struct intel_buf *buf,
+ unsigned int x, unsigned int y,
+ unsigned int width, unsigned int height,
+ uint8_t color)
{
struct intel_bb *ibb;
+ struct gpgpu_shader *kernel;
struct xehp_interface_descriptor_data idd;
ibb = intel_bb_create(i915, PAGE_SIZE);
@@ -333,8 +348,10 @@ __xehp_gpgpu_fillfunc(int i915,
intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
- xehp_fill_interface_descriptor(ibb, buf,
- kernel, kernel_size, &idd);
+ kernel = __xehp_gpgpu_kernel(i915);
+ xehp_fill_interface_descriptor(ibb, buf, kernel->instr,
+ kernel->size * 4, &idd);
+ gpgpu_shader_destroy(kernel);
intel_bb_ptr_set(ibb, 0);
@@ -388,36 +405,3 @@ void gen12_gpgpu_fillfunc(int i915,
gen12_gpgpu_kernel,
sizeof(gen12_gpgpu_kernel));
}
-
-void xehp_gpgpu_fillfunc(int i915,
- struct intel_buf *buf,
- unsigned int x, unsigned int y,
- unsigned int width, unsigned int height,
- uint8_t color)
-{
- __xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
- xehp_gpgpu_kernel,
- sizeof(xehp_gpgpu_kernel));
-}
-
-void xehpc_gpgpu_fillfunc(int i915,
- struct intel_buf *buf,
- unsigned int x, unsigned int y,
- unsigned int width, unsigned int height,
- uint8_t color)
-{
- __xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
- xehpc_gpgpu_kernel,
- sizeof(xehpc_gpgpu_kernel));
-}
-
-void xe2lpg_gpgpu_fillfunc(int i915,
- struct intel_buf *buf,
- unsigned int x, unsigned int y,
- unsigned int width, unsigned int height,
- uint8_t color)
-{
- __xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
- xe2lpg_gpgpu_kernel,
- sizeof(xe2lpg_gpgpu_kernel));
-}
diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
index c3b47c10a..a483859e5 100644
--- a/lib/gpgpu_fill.h
+++ b/lib/gpgpu_fill.h
@@ -68,17 +68,4 @@ xehp_gpgpu_fillfunc(int i915,
unsigned int width, unsigned int height,
uint8_t color);
-void
-xehpc_gpgpu_fillfunc(int i915,
- struct intel_buf *dst,
- unsigned int x, unsigned int y,
- unsigned int width, unsigned int height,
- uint8_t color);
-
-void xe2lpg_gpgpu_fillfunc(int i915,
- struct intel_buf *buf,
- unsigned int x, unsigned int y,
- unsigned int width, unsigned int height,
- uint8_t color);
-
#endif /* GPGPU_FILL_H */
diff --git a/lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm
deleted file mode 100644
index 52699a475..000000000
--- a/lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm
+++ /dev/null
@@ -1,12 +0,0 @@
-L0:
- mov (4|M0) r1.0<1>:ub r1.0<0;1,0>:ub
- shl (1|M0) r2.0<1>:ud r0.1<0;1,0>:ud 0x4:ud
- mov (1|M0) r2.1<1>:ud r0.6<0;1,0>:ud
- mov (8|M0) r4.0<1>:ud r0.0<8;8,1>:ud
- mov (2|M0) r4.0<1>:ud r2.0<2;2,1>:ud {I at 2}
- mov (1|M0) r4.2<1>:ud 0xF:ud
- mov (16|M0) r5.0<1>:ud r1.0<0;1,0>:ud {I at 6}
-(W) sync.nop null {I at 1}
- send.dc1 (16|M0) null r4 null:0 0x0 0x40A8000 {$0} // wr:2h+0, rd:0, Media Block Write msc:0, to #0
- send.gtwy (8|M0) null r80 null:0 0x0 0x02000000 {EOT}
-L176:
diff --git a/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
deleted file mode 100644
index e2ecc71f5..000000000
--- a/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
+++ /dev/null
@@ -1,13 +0,0 @@
-L0:
- mov (4|M0) r1.0<1>:ub r1.0<0;1,0>:ub // Load r1.0-3 with color byte
- shl (1|M0) r2.0<1>:ud r0.1<0;1,0>:ud 0x4:ud // Load r2.0-3 with tg id X << 4
- mov (1|M0) r2.1<1>:ud r0.6<0;1,0>:ud // Load r2.4-7 with tg id Y
-
- // payload setup
- mov (16|M0) r4.0<1>:ud 0x0:ud // Zero out register R4
- mov (2|M0) r4.5<1>:ud r2.0<2;2,1>:ud // Store X and Y block start (160:191 and 192:223)
- mov (1|M0) r4.14<1>:w 0xF:w // Store X and Y block size (224:231 and 232:239)
- mov (16|M0) r5.0<1>:ud r1.0<0;1,0>:ud // Load r5-r6 with color byte
-
- send.tgm (16|M0) null r4 null:0 0x0 0x64000007 // Send TypedStore2DBlock to tgm port
- send.gtwy (8|M0) null r80 null:0 0x0 0x02000000 {EOT}
diff --git a/lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm
deleted file mode 100644
index 7adfbd0f0..000000000
--- a/lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm
+++ /dev/null
@@ -1,12 +0,0 @@
-L0:
- mov (4|M0) r1.0<1>:ub r1.0<0;1,0>:ub
- shl (1|M0) r2.0<1>:ud r0.1<0;1,0>:ud 0x4:ud
- mov (1|M0) r2.1<1>:ud r0.6<0;1,0>:ud
- mov (8|M0) r4.0<1>:ud r0.0<8;8,1>:ud
- mov (2|M0) r4.0<1>:ud r2.0<2;2,1>:ud {I at 2}
- mov (1|M0) r4.2<1>:ud 0xF:ud
- mov (16|M0) r5.0<1>:ud r1.0<0;1,0>:ud {I at 6}
-(W) sync.nop null {I at 1}
- send.dc1 (16|M0) null r4 null 0x0 0x40A8000 {$0} // wr:2h+0, rd:0, Media Block Write msc:0, to #0
- send.gtwy (8|M0) null r80 null 0x0 0x02000000 {EOT}
-L176:
diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
index 87270bb46..ea8d0f097 100644
--- a/lib/iga64_generated_codes.c
+++ b/lib/iga64_generated_codes.c
@@ -3,7 +3,81 @@
#include "gpgpu_shader.h"
-#define MD5_SUM_IGA64_ASMS 96abf1aa2d0cc900ebba8203cefdd30c
+#define MD5_SUM_IGA64_ASMS 9977ade854d57c5af5c5ca9e93c0f37e
+
+struct iga64_template const iga64_code_gpgpu_fill[] = {
+ { .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
+ 0x00080061, 0x01050000, 0x00000104, 0x00000000,
+ 0x00000069, 0x02058220, 0x02000014, 0x00000004,
+ 0x00000061, 0x02150220, 0x00000064, 0x00000000,
+ 0x000c0061, 0x04054220, 0x00000000, 0x00000000,
+ 0x00101c61, 0x05050220, 0x00000104, 0x00000000,
+ 0x00041b61, 0x04550220, 0x00220205, 0x00000000,
+ 0x00000061, 0x04754550, 0x00000000, 0x000f000f,
+ 0x00132031, 0x00000000, 0xd00e0494, 0x04000000,
+ 0x80000001, 0x00010000, 0x20000000, 0x00000000,
+ 0x80000001, 0x00010000, 0x30000000, 0x00000000,
+ 0x80000901, 0x00010000, 0x00000000, 0x00000000,
+ }},
+ { .gen_ver = 1270, .size = 52, .code = (const uint32_t []) {
+ 0x00020061, 0x01050000, 0x00000104, 0x00000000,
+ 0x00000069, 0x02058220, 0x02000024, 0x00000004,
+ 0x00000061, 0x02250220, 0x000000c4, 0x00000000,
+ 0x00030061, 0x04054220, 0x00000000, 0x00000000,
+ 0x00041c61, 0x05050220, 0x00000104, 0x00000000,
+ 0x00011b61, 0x04050220, 0x00220205, 0x00000000,
+ 0x00000061, 0x04454220, 0x00000000, 0x0000000f,
+ 0x00000061, 0x04850220, 0x000000a4, 0x00000000,
+ 0x80001901, 0x00010000, 0x00000000, 0x00000000,
+ 0x00044031, 0x00000000, 0xc0000414, 0x02a00000,
+ 0x80000001, 0x00010000, 0x20000000, 0x00000000,
+ 0x80000001, 0x00010000, 0x30000000, 0x00000000,
+ 0x80000901, 0x00010000, 0x00000000, 0x00000000,
+ }},
+ { .gen_ver = 1260, .size = 48, .code = (const uint32_t []) {
+ 0x00080061, 0x01050000, 0x00000104, 0x00000000,
+ 0x00000069, 0x02058220, 0x02000014, 0x00000004,
+ 0x00000061, 0x02150220, 0x00000064, 0x00000000,
+ 0x000c0061, 0x04054220, 0x00000000, 0x00000000,
+ 0x00101c61, 0x05050220, 0x00000104, 0x00000000,
+ 0x00041b61, 0x04050220, 0x00220205, 0x00000000,
+ 0x00000061, 0x04254220, 0x00000000, 0x0000000f,
+ 0x00000061, 0x04450220, 0x00000054, 0x00000000,
+ 0x00132031, 0x00000000, 0xc0000414, 0x02a00000,
+ 0x80000001, 0x00010000, 0x20000000, 0x00000000,
+ 0x80000001, 0x00010000, 0x30000000, 0x00000000,
+ 0x80000901, 0x00010000, 0x00000000, 0x00000000,
+ }},
+ { .gen_ver = 1250, .size = 52, .code = (const uint32_t []) {
+ 0x00020061, 0x01050000, 0x00000104, 0x00000000,
+ 0x00000069, 0x02058220, 0x02000024, 0x00000004,
+ 0x00000061, 0x02250220, 0x000000c4, 0x00000000,
+ 0x00030061, 0x04054220, 0x00000000, 0x00000000,
+ 0x00041c61, 0x05050220, 0x00000104, 0x00000000,
+ 0x00011b61, 0x04050220, 0x00220205, 0x00000000,
+ 0x00000061, 0x04454220, 0x00000000, 0x0000000f,
+ 0x00000061, 0x04850220, 0x000000a4, 0x00000000,
+ 0x80001901, 0x00010000, 0x00000000, 0x00000000,
+ 0x00044031, 0x00000000, 0xc0000414, 0x02a00000,
+ 0x80000001, 0x00010000, 0x20000000, 0x00000000,
+ 0x80000001, 0x00010000, 0x30000000, 0x00000000,
+ 0x80000901, 0x00010000, 0x00000000, 0x00000000,
+ }},
+ { .gen_ver = 0, .size = 48, .code = (const uint32_t []) {
+ 0x00020061, 0x01050000, 0x00000104, 0x00000000,
+ 0x00000069, 0x02058220, 0x02000024, 0x00000004,
+ 0x00000061, 0x02250220, 0x000000c4, 0x00000000,
+ 0x00030061, 0x04054220, 0x00000000, 0x00000000,
+ 0x00040461, 0x05050220, 0x00000104, 0x00000000,
+ 0x00010361, 0x04050220, 0x00220205, 0x00000000,
+ 0x00000061, 0x04454220, 0x00000000, 0x0000000f,
+ 0x00000061, 0x04850220, 0x000000a4, 0x00000000,
+ 0x00049031, 0x00000000, 0xc0000414, 0x02a00000,
+ 0x80000001, 0x00010000, 0x20000000, 0x00000000,
+ 0x80000001, 0x00010000, 0x30000000, 0x00000000,
+ 0x80000101, 0x00010000, 0x00000000, 0x00000000,
+ }}
+};
struct iga64_template const iga64_code_media_block_write[] = {
{ .gen_ver = 2000, .size = 56, .code = (const uint32_t []) {
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 824e92831..f91091bc4 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -758,13 +758,7 @@ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
{
igt_fillfunc_t fill = NULL;
- if (intel_graphics_ver(devid) >= IP_VER(20, 0))
- fill = xe2lpg_gpgpu_fillfunc;
- else if (IS_METEORLAKE(devid))
- fill = xehp_gpgpu_fillfunc;
- else if (intel_graphics_ver(devid) >= IP_VER(12, 60))
- fill = xehpc_gpgpu_fillfunc;
- else if (intel_graphics_ver(devid) >= IP_VER(12, 50))
+ if (intel_graphics_ver(devid) >= IP_VER(12, 50))
fill = xehp_gpgpu_fillfunc;
else if (IS_GEN12(devid))
fill = gen12_gpgpu_fillfunc;
diff --git a/lib/meson.build b/lib/meson.build
index ab4cf9c7a..4af2bc743 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -217,7 +217,7 @@ lib_version = vcs_tag(input : 'version.h.in', output : 'version.h',
fallback : 'NO-GIT',
command : vcs_command )
-iga64_assembly_sources = [ 'gpgpu_shader.c' ]
+iga64_assembly_sources = [ 'gpgpu_shader.c', 'gpgpu_fill.c' ]
lib_intermediates = []
iga64_assembly_libs = []
--
2.34.1
More information about the igt-dev
mailing list