[PATCH i-g-t v2] lib/gpgpu_fill: Write kernel using inline iga64 api

Wed Aug 14 15:54:39 UTC 2024

On Wed, 2024-08-14 at 13:32 +0200, Andrzej Hajda wrote:
> 
> On 14.08.2024 10:56, Dominik Grzegorzek wrote:
> > Rewrite gpgpu_fill shaders to utilize the newly introduced
> > method of writing IGA64 assembly inline.
> > 
> > v2: start with gen12 (Andrzej)
> > 
> > Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> > Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > Cc: Christoph Manszewski <christoph.manszewski at intel.com>
> > Cc: Andrzej Hajda <andrzej.hajda at intel.com>
> 
> Reviewed-by: Andrzej Hajda <andrzej.hajda at intel.com>
> 
> Regards
> Andrzej
> 
> > ---
> >   lib/gpgpu_fill.c                              | 166 +++++++-----------
> >   lib/gpgpu_fill.h                              |  19 --
> >   lib/i915/shaders/gpgpu/gen12_gpgpu_kernel.asm |  12 --
> >   .../shaders/gpgpu/gen12p72_gpgpu_kernel.asm   |  12 --
> >   .../shaders/gpgpu/xe2lpg_gpgpu_kernel.asm     |  13 --
> >   lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm  |  12 --
> >   lib/iga64_generated_codes.c                   |  76 +++++++-
> >   lib/intel_batchbuffer.c                       |  10 +-
> >   lib/meson.build                               |   2 +-
> >   9 files changed, 140 insertions(+), 182 deletions(-)
> >   delete mode 100644 lib/i915/shaders/gpgpu/gen12_gpgpu_kernel.asm
> >   delete mode 100644 lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm
> >   delete mode 100644 lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
> >   delete mode 100644 lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm
> > 
> > diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> > index 1270c2b22..dc3ebd8f0 100644
> > --- a/lib/gpgpu_fill.c
> > +++ b/lib/gpgpu_fill.c
> > @@ -31,6 +31,7 @@
> >   #include "drmtest.h"
> >   
> >   #include "gpgpu_fill.h"
> > +#include "gpgpu_shader.h"
> >   #include "gpu_cmds.h"
> >   
> >   /* lib/i915/shaders/gpgpu/gpgpu_fill.gxa */
> > @@ -86,56 +87,6 @@ static const uint32_t gen11_gpgpu_kernel[][4] = {
> >   	{ 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
> >   };
> >   
> > -static const uint32_t gen12_gpgpu_kernel[][4] = {
> > -	{ 0x00020061, 0x01050000, 0x00000104, 0x00000000 },
> > -	{ 0x00000069, 0x02058220, 0x02000024, 0x00000004 },
> > -	{ 0x00000061, 0x02250220, 0x000000c4, 0x00000000 },
> > -	{ 0x00030061, 0x04050220, 0x00460005, 0x00000000 },
> > -	{ 0x00010261, 0x04050220, 0x00220205, 0x00000000 },
> > -	{ 0x00000061, 0x04454220, 0x00000000, 0x0000000f },
> > -	{ 0x00040661, 0x05050220, 0x00000104, 0x00000000 },
> > -	{ 0x00049031, 0x00000000, 0xc0000414, 0x02a00000 },
> > -	{ 0x00030061, 0x70050220, 0x00460005, 0x00000000 },
> > -	{ 0x00040131, 0x00000004, 0x7020700c, 0x10000000 },
> > -};
> > -
> > -static const uint32_t xehp_gpgpu_kernel[][4] = {
> > -	{ 0x00020061, 0x01050000, 0x00000104, 0x00000000 },
> > -	{ 0x00000069, 0x02058220, 0x02000024, 0x00000004 },
> > -	{ 0x00000061, 0x02250220, 0x000000c4, 0x00000000 },
> > -	{ 0x00030061, 0x04050220, 0x00460005, 0x00000000 },
> > -	{ 0x00011a61, 0x04050220, 0x00220205, 0x00000000 },
> > -	{ 0x00000061, 0x04454220, 0x00000000, 0x0000000f },
> > -	{ 0x00041e61, 0x05050220, 0x00000104, 0x00000000 },
> > -	{ 0x80001901, 0x00010000, 0x00000000, 0x00000000 },
> > -	{ 0x00044031, 0x00000000, 0xc0000414, 0x02a00000 },
> > -	{ 0x00030031, 0x00000004, 0x3000500c, 0x00000000 },
> > -};
> > -
> > -static const uint32_t xehpc_gpgpu_kernel[][4] = {
> > -	{ 0x00080061, 0x01050000, 0x00000104, 0x00000000 },
> > -	{ 0x00000069, 0x02058220, 0x02000014, 0x00000004 },
> > -	{ 0x00000061, 0x02150220, 0x00000064, 0x00000000 },
> > -	{ 0x000c0061, 0x04050220, 0x00460005, 0x00000000 },
> > -	{ 0x00041a61, 0x04050220, 0x00220205, 0x00000000 },
> > -	{ 0x00000061, 0x04254220, 0x00000000, 0x0000000f },
> > -	{ 0x00101e61, 0x05050220, 0x00000104, 0x00000000 },
> > -	{ 0x00132031, 0x00000000, 0xc0000414, 0x02a00000 },
> > -	{ 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
> > -};
> > -
> > -static const uint32_t xe2lpg_gpgpu_kernel[][4] = {
> > -	{ 0x00080061, 0x01050000, 0x00000104, 0x00000000 },
> > -	{ 0x00000069, 0x02058220, 0x02000014, 0x00000004 },
> > -	{ 0x00000061, 0x02150220, 0x00000064, 0x00000000 },
> > -	{ 0x00100061, 0x04054220, 0x00000000, 0x00000000 },
> > -	{ 0x00041a61, 0x04550220, 0x00220205, 0x00000000 },
> > -	{ 0x00000061, 0x04754550, 0x00000000, 0x000f000f },
> > -	{ 0x00101e61, 0x05050220, 0x00000104, 0x00000000 },
> > -	{ 0x00132031, 0x00000000, 0xd00e0494, 0x04000000 },
> > -	{ 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
> > -};
> > -
> >   /*
> >    * This sets up the gpgpu pipeline,
> >    *
> > @@ -317,15 +268,66 @@ __gen9_gpgpu_fillfunc(int i915,
> >   	intel_bb_destroy(ibb);
> >   }
> >   
> > -static void
> > -__xehp_gpgpu_fillfunc(int i915,
> > -		      struct intel_buf *buf,
> > -		      unsigned int x, unsigned int y,
> > -		      unsigned int width, unsigned int height,
> > -		      uint8_t color, const uint32_t kernel[][4],
> > -		      size_t kernel_size)
> > +static struct gpgpu_shader *__xehp_gpgpu_kernel(int i915)
> > +{
> > +	struct gpgpu_shader *kernel = gpgpu_shader_create(i915);
> > +
> > +	emit_iga64_code(kernel, gpgpu_fill, "					\n\
> > +// fill up r1 with target colour						\n\
> > +mov (4|M0)		r1.0<1>:ub	r1.0<0;1,0>:ub				\n\
> > +// prepare block x offset (Thread Group Id X * 16)				\n\
> > +shl (1|M0)		r2.0<1>:ud	r0.1<0;1,0>:ud	0x4:ud			\n\
> > +// prepare block y offset (Thread Group Id Y)					\n\
> > +mov (1|M0)		r2.1<1>:ud	r0.6<0;1,0>:ud				\n\
> > +// zero message header payload							\n\
> > +mov (8|M0)		r4.0<1>:ud	0x0:ud					\n\
> > +// fill up message payload with target colour					\n\
> > +mov (16|M0)		r5.0<1>:ud	r1.0<0;1,0>:ud				\n\
> > +#if GEN_VER < 2000								\n\
> > +// load block offsets into message header payload				\n\
> > +mov (2|M0)		r4.0<1>:ud	r2.0<2;2,1>:ud				\n\
> > +// load block width								\n\
> > +mov (1|M0)		r4.2<1>:ud	0xF:ud					\n\
> > +// load FFTID from R0 header							\n\
> > +mov (1|M0)		r4.4<1>:ud	r0.5<0;1,0>:ud				\n\
> > +// Media block write to bti[0] surface						\n\
> > +// Message Descriptor								\n\
> > +//	0x40A8000:								\n\
> > +//	[28:25]		Mlen: 2							\n\
> > +//	[24:20]		Rlen: 0							\n\
> > +//	[19]		Header: 1 (included)					\n\
> > +//	[18:14]		MessageType: 0xA (media block write)			\n\
> > +//	[7:0]		BTI: 0							\n\
> > +send.dc1 (16|M0)	null	r4	src1_null	0x0	0x40A8000	\n\
> > +#else										\n\
> > +// load block offsets into message header payload				\n\
> > +mov (2|M0)		r4.5<1>:ud	r2.0<2;2,1>:ud				\n\
> > +// load block width								\n\
> > +mov (1|M0)		 r4.14<1>:w	0xF:w					\n\
> > +// Typed 2D block store to bti[0] surface					\n\
> > +// Message Descriptor								\n\
> > +//	0x6400007:								\n\
> > +//	[30:29]		AddrType: 3 (BTI)					\n\
> > +//	[28:25]		Mlen: 2							\n\
> > +//	[24:20]		Rlen: 0							\n\
> > +//	[19:17]		Caching: 0  (use state settings for both L1 and L3)	\n\
> > +//	[5:0]		Opcode: 0x07  (store_block2d)				\n\
> > +send.tgm (16|M0)	null	r4	null	0x0	0x64000007		\n\
> > +#endif										\n\
> > +	");
> > +
> > +	gpgpu_shader__eot(kernel);
> > +	return kernel;
> > +}
> > +
> > +void xehp_gpgpu_fillfunc(int i915,
> > +			 struct intel_buf *buf,
> > +			 unsigned int x, unsigned int y,
> > +			 unsigned int width, unsigned int height,
> > +			 uint8_t color)
> >   {
> >   	struct intel_bb *ibb;
> > +	struct gpgpu_shader *kernel;
> >   	struct xehp_interface_descriptor_data idd;
> >   
> >   	ibb = intel_bb_create(i915, PAGE_SIZE);
> > @@ -333,8 +335,10 @@ __xehp_gpgpu_fillfunc(int i915,
> >   
> >   	intel_bb_ptr_set(ibb, BATCH_STATE_SPLIT);
> >   
> > -	xehp_fill_interface_descriptor(ibb, buf,
> > -				       kernel, kernel_size, &idd);
> > +	kernel = __xehp_gpgpu_kernel(i915);
> > +	xehp_fill_interface_descriptor(ibb, buf, kernel->instr,
> > +				       kernel->size * 4, &idd);
> > +	gpgpu_shader_destroy(kernel);
> >   
> >   	intel_bb_ptr_set(ibb, 0);
> >   
> > @@ -377,47 +381,3 @@ void gen11_gpgpu_fillfunc(int i915,
> >   			      gen11_gpgpu_kernel,
> >   			      sizeof(gen11_gpgpu_kernel));
> >   }
> > -
> > -void gen12_gpgpu_fillfunc(int i915,
> > -			  struct intel_buf *buf,
> > -			  unsigned x, unsigned y,
> > -			  unsigned width, unsigned height,
> > -			  uint8_t color)
> > -{
> > -	__gen9_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
> > -			      gen12_gpgpu_kernel,
> > -			      sizeof(gen12_gpgpu_kernel));
This function used gen9_gpgpu_fillfunc so I could not remove it, thus failure in premerge.
I need to bring it back. Of course, I could call __xehp_gpgpu_kernel here and pass
that kernel to this function, but I'm do not feel there is a need. Let make it consistent, so 
platforms which are using gen9 pipeline will keep using old fashioned precompiled kernels.

So effectively v1 version of that patch is the correct one. Let me know if v1 works for you.

Regards,
Dominik
> > -}
> > -
> > -void xehp_gpgpu_fillfunc(int i915,
> > -			 struct intel_buf *buf,
> > -			 unsigned int x, unsigned int y,
> > -			 unsigned int width, unsigned int height,
> > -			 uint8_t color)
> > -{
> > -	__xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
> > -			      xehp_gpgpu_kernel,
> > -			      sizeof(xehp_gpgpu_kernel));
> > -}
> > -
> > -void xehpc_gpgpu_fillfunc(int i915,
> > -			  struct intel_buf *buf,
> > -			  unsigned int x, unsigned int y,
> > -			  unsigned int width, unsigned int height,
> > -			  uint8_t color)
> > -{
> > -	__xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
> > -			      xehpc_gpgpu_kernel,
> > -			      sizeof(xehpc_gpgpu_kernel));
> > -}
> > -
> > -void xe2lpg_gpgpu_fillfunc(int i915,
> > -			   struct intel_buf *buf,
> > -			   unsigned int x, unsigned int y,
> > -			   unsigned int width, unsigned int height,
> > -			   uint8_t color)
> > -{
> > -	__xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
> > -			      xe2lpg_gpgpu_kernel,
> > -			      sizeof(xe2lpg_gpgpu_kernel));
> > -}
> > diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
> > index c3b47c10a..f4e207077 100644
> > --- a/lib/gpgpu_fill.h
> > +++ b/lib/gpgpu_fill.h
> > @@ -55,12 +55,6 @@ void gen11_gpgpu_fillfunc(int i915,
> >   			  unsigned width, unsigned height,
> >   			  uint8_t color);
> >   
> > -void gen12_gpgpu_fillfunc(int i915,
> > -			  struct intel_buf *buf,
> > -			  unsigned x, unsigned y,
> > -			  unsigned width, unsigned height,
> > -			  uint8_t color);
> > -
> >   void
> >   xehp_gpgpu_fillfunc(int i915,
> >   		    struct intel_buf *dst,
> > @@ -68,17 +62,4 @@ xehp_gpgpu_fillfunc(int i915,
> >   		    unsigned int width, unsigned int height,
> >   		    uint8_t color);
> >   
> > -void
> > -xehpc_gpgpu_fillfunc(int i915,
> > -		     struct intel_buf *dst,
> > -		     unsigned int x, unsigned int y,
> > -		     unsigned int width, unsigned int height,
> > -		     uint8_t color);
> > -
> > -void xe2lpg_gpgpu_fillfunc(int i915,
> > -			   struct intel_buf *buf,
> > -			   unsigned int x, unsigned int y,
> > -			   unsigned int width, unsigned int height,
> > -			   uint8_t color);
> > -
> >   #endif /* GPGPU_FILL_H */
> > diff --git a/lib/i915/shaders/gpgpu/gen12_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/gen12_gpgpu_kernel.asm
> > deleted file mode 100644
> > index ede87a055..000000000
> > --- a/lib/i915/shaders/gpgpu/gen12_gpgpu_kernel.asm
> > +++ /dev/null
> > @@ -1,12 +0,0 @@
> > -L0:
> > -         mov (4|M0)               r1.0<1>:ub    r1.0<0;1,0>:ub
> > -         shl (1|M0)               r2.0<1>:ud    r0.1<0;1,0>:ud    0x4:ud
> > -         mov (1|M0)               r2.1<1>:ud    r0.6<0;1,0>:ud
> > -         mov (8|M0)               r4.0<1>:ud    r0.0<8;8,1>:ud
> > -         mov (2|M0)               r4.0<1>:ud    r2.0<2;2,1>:ud                   {@2}
> > -         mov (1|M0)               r4.2<1>:ud    0xF:ud
> > -         mov (16|M0)              r5.0<1>:ud    r1.0<0;1,0>:ud                   {@6}
> > -         send.dc1 (16|M0)         null     r4      null    0x0         0x40A8000  {@1, $0} //    wr:2h+0, rd:0, Media Block Write msc:0, to #0
> > -         mov (8|M0)               r112.0<1>:ud  r0.0<8;8,1>:ud
> > -         send.ts (16|M0)          null     r112    null    0x10000000  0x2000010  {EOT, @1} //    wr:1+0, rd:0, fc: 0x10
> > -L160:
> > diff --git a/lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm
> > deleted file mode 100644
> > index 52699a475..000000000
> > --- a/lib/i915/shaders/gpgpu/gen12p72_gpgpu_kernel.asm
> > +++ /dev/null
> > @@ -1,12 +0,0 @@
> > -L0:
> > -         mov (4|M0)               r1.0<1>:ub    r1.0<0;1,0>:ub
> > -         shl (1|M0)               r2.0<1>:ud    r0.1<0;1,0>:ud    0x4:ud
> > -         mov (1|M0)               r2.1<1>:ud    r0.6<0;1,0>:ud
> > -         mov (8|M0)               r4.0<1>:ud    r0.0<8;8,1>:ud
> > -         mov (2|M0)               r4.0<1>:ud    r2.0<2;2,1>:ud                   {I at 2}
> > -         mov (1|M0)               r4.2<1>:ud    0xF:ud
> > -         mov (16|M0)              r5.0<1>:ud    r1.0<0;1,0>:ud                   {I at 6}
> > -(W)      sync.nop                             null                             {I at 1}
> > -         send.dc1 (16|M0)         null     r4      null:0    0x0         0x40A8000  {$0} //    wr:2h+0, rd:0, Media Block Write msc:0, to #0
> > -         send.gtwy (8|M0)         null     r80     null:0    0x0         0x02000000 {EOT}
> > -L176:
> > diff --git a/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
> > deleted file mode 100644
> > index e2ecc71f5..000000000
> > --- a/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
> > +++ /dev/null
> > @@ -1,13 +0,0 @@
> > -L0:
> > -         mov (4|M0)               r1.0<1>:ub    r1.0<0;1,0>:ub                        // Load r1.0-3 with color byte
> > -         shl (1|M0)               r2.0<1>:ud    r0.1<0;1,0>:ud    0x4:ud              // Load r2.0-3 with tg id X << 4
> > -         mov (1|M0)               r2.1<1>:ud    r0.6<0;1,0>:ud                        // Load r2.4-7 with tg id Y
> > -
> > -         // payload setup
> > -         mov (16|M0)              r4.0<1>:ud    0x0:ud                                // Zero out register R4
> > -         mov (2|M0)               r4.5<1>:ud    r2.0<2;2,1>:ud                        // Store X and Y block start (160:191 and 192:223)
> > -         mov (1|M0)               r4.14<1>:w    0xF:w                                 // Store X and Y block size (224:231 and 232:239)
> > -         mov (16|M0)              r5.0<1>:ud    r1.0<0;1,0>:ud                        // Load r5-r6 with color byte
> > -
> > -         send.tgm (16|M0)         null     r4    null:0    0x0    0x64000007          // Send TypedStore2DBlock to tgm port
> > -         send.gtwy (8|M0)         null    r80    null:0    0x0    0x02000000 {EOT}
> > diff --git a/lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm
> > deleted file mode 100644
> > index 7adfbd0f0..000000000
> > --- a/lib/i915/shaders/gpgpu/xehp_gpgpu_kernel.asm
> > +++ /dev/null
> > @@ -1,12 +0,0 @@
> > -L0:
> > -         mov (4|M0)               r1.0<1>:ub    r1.0<0;1,0>:ub
> > -         shl (1|M0)               r2.0<1>:ud    r0.1<0;1,0>:ud    0x4:ud
> > -         mov (1|M0)               r2.1<1>:ud    r0.6<0;1,0>:ud
> > -         mov (8|M0)               r4.0<1>:ud    r0.0<8;8,1>:ud
> > -         mov (2|M0)               r4.0<1>:ud    r2.0<2;2,1>:ud                   {I at 2}
> > -         mov (1|M0)               r4.2<1>:ud    0xF:ud
> > -         mov (16|M0)              r5.0<1>:ud    r1.0<0;1,0>:ud                   {I at 6}
> > -(W)      sync.nop                             null                             {I at 1}
> > -         send.dc1 (16|M0)         null     r4      null    0x0         0x40A8000  {$0} //    wr:2h+0, rd:0, Media Block Write msc:0, to #0
> > -         send.gtwy (8|M0)         null     r80     null    0x0         0x02000000 {EOT}
> > -L176:
> > diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
> > index 6a08c4844..c22b1e9e7 100644
> > --- a/lib/iga64_generated_codes.c
> > +++ b/lib/iga64_generated_codes.c
> > @@ -3,7 +3,81 @@
> >   
> >   #include "gpgpu_shader.h"
> >   
> > -#define MD5_SUM_IGA64_ASMS 2c503cbfbd7b3043e9a52188ae4da7a8
> > +#define MD5_SUM_IGA64_ASMS efa80cb5c2d50f515af3642cee8dc062
> > +
> > +struct iga64_template const iga64_code_gpgpu_fill[] = {
> > +	{ .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
> > +		0x00080061, 0x01050000, 0x00000104, 0x00000000,
> > +		0x00000069, 0x02058220, 0x02000014, 0x00000004,
> > +		0x00000061, 0x02150220, 0x00000064, 0x00000000,
> > +		0x000c0061, 0x04054220, 0x00000000, 0x00000000,
> > +		0x00101c61, 0x05050220, 0x00000104, 0x00000000,
> > +		0x00041b61, 0x04550220, 0x00220205, 0x00000000,
> > +		0x00000061, 0x04754550, 0x00000000, 0x000f000f,
> > +		0x00132031, 0x00000000, 0xd00e0494, 0x04000000,
> > +		0x80000001, 0x00010000, 0x20000000, 0x00000000,
> > +		0x80000001, 0x00010000, 0x30000000, 0x00000000,
> > +		0x80000901, 0x00010000, 0x00000000, 0x00000000,
> > +	}},
> > +	{ .gen_ver = 1270, .size = 52, .code = (const uint32_t []) {
> > +		0x00020061, 0x01050000, 0x00000104, 0x00000000,
> > +		0x00000069, 0x02058220, 0x02000024, 0x00000004,
> > +		0x00000061, 0x02250220, 0x000000c4, 0x00000000,
> > +		0x00030061, 0x04054220, 0x00000000, 0x00000000,
> > +		0x00041c61, 0x05050220, 0x00000104, 0x00000000,
> > +		0x00011b61, 0x04050220, 0x00220205, 0x00000000,
> > +		0x00000061, 0x04454220, 0x00000000, 0x0000000f,
> > +		0x00000061, 0x04850220, 0x000000a4, 0x00000000,
> > +		0x80001901, 0x00010000, 0x00000000, 0x00000000,
> > +		0x00044031, 0x00000000, 0xc0000414, 0x02a00000,
> > +		0x80000001, 0x00010000, 0x20000000, 0x00000000,
> > +		0x80000001, 0x00010000, 0x30000000, 0x00000000,
> > +		0x80000901, 0x00010000, 0x00000000, 0x00000000,
> > +	}},
> > +	{ .gen_ver = 1260, .size = 48, .code = (const uint32_t []) {
> > +		0x00080061, 0x01050000, 0x00000104, 0x00000000,
> > +		0x00000069, 0x02058220, 0x02000014, 0x00000004,
> > +		0x00000061, 0x02150220, 0x00000064, 0x00000000,
> > +		0x000c0061, 0x04054220, 0x00000000, 0x00000000,
> > +		0x00101c61, 0x05050220, 0x00000104, 0x00000000,
> > +		0x00041b61, 0x04050220, 0x00220205, 0x00000000,
> > +		0x00000061, 0x04254220, 0x00000000, 0x0000000f,
> > +		0x00000061, 0x04450220, 0x00000054, 0x00000000,
> > +		0x00132031, 0x00000000, 0xc0000414, 0x02a00000,
> > +		0x80000001, 0x00010000, 0x20000000, 0x00000000,
> > +		0x80000001, 0x00010000, 0x30000000, 0x00000000,
> > +		0x80000901, 0x00010000, 0x00000000, 0x00000000,
> > +	}},
> > +	{ .gen_ver = 1250, .size = 52, .code = (const uint32_t []) {
> > +		0x00020061, 0x01050000, 0x00000104, 0x00000000,
> > +		0x00000069, 0x02058220, 0x02000024, 0x00000004,
> > +		0x00000061, 0x02250220, 0x000000c4, 0x00000000,
> > +		0x00030061, 0x04054220, 0x00000000, 0x00000000,
> > +		0x00041c61, 0x05050220, 0x00000104, 0x00000000,
> > +		0x00011b61, 0x04050220, 0x00220205, 0x00000000,
> > +		0x00000061, 0x04454220, 0x00000000, 0x0000000f,
> > +		0x00000061, 0x04850220, 0x000000a4, 0x00000000,
> > +		0x80001901, 0x00010000, 0x00000000, 0x00000000,
> > +		0x00044031, 0x00000000, 0xc0000414, 0x02a00000,
> > +		0x80000001, 0x00010000, 0x20000000, 0x00000000,
> > +		0x80000001, 0x00010000, 0x30000000, 0x00000000,
> > +		0x80000901, 0x00010000, 0x00000000, 0x00000000,
> > +	}},
> > +	{ .gen_ver = 0, .size = 48, .code = (const uint32_t []) {
> > +		0x00020061, 0x01050000, 0x00000104, 0x00000000,
> > +		0x00000069, 0x02058220, 0x02000024, 0x00000004,
> > +		0x00000061, 0x02250220, 0x000000c4, 0x00000000,
> > +		0x00030061, 0x04054220, 0x00000000, 0x00000000,
> > +		0x00040461, 0x05050220, 0x00000104, 0x00000000,
> > +		0x00010361, 0x04050220, 0x00220205, 0x00000000,
> > +		0x00000061, 0x04454220, 0x00000000, 0x0000000f,
> > +		0x00000061, 0x04850220, 0x000000a4, 0x00000000,
> > +		0x00049031, 0x00000000, 0xc0000414, 0x02a00000,
> > +		0x80000001, 0x00010000, 0x20000000, 0x00000000,
> > +		0x80000001, 0x00010000, 0x30000000, 0x00000000,
> > +		0x80000101, 0x00010000, 0x00000000, 0x00000000,
> > +	}}
> > +};
> >   
> >   struct iga64_template const iga64_code_media_block_write[] = {
> >   	{ .gen_ver = 2000, .size = 56, .code = (const uint32_t []) {
> > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> > index 824e92831..a7185cf05 100644
> > --- a/lib/intel_batchbuffer.c
> > +++ b/lib/intel_batchbuffer.c
> > @@ -758,16 +758,8 @@ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
> >   {
> >   	igt_fillfunc_t fill = NULL;
> >   
> > -	if (intel_graphics_ver(devid) >= IP_VER(20, 0))
> > -		fill = xe2lpg_gpgpu_fillfunc;
> > -	else if (IS_METEORLAKE(devid))
> > +	if (intel_graphics_ver(devid) >= IP_VER(12, 00))
> >   		fill = xehp_gpgpu_fillfunc;
> > -	else if (intel_graphics_ver(devid) >= IP_VER(12, 60))
> > -		fill = xehpc_gpgpu_fillfunc;
> > -	else if (intel_graphics_ver(devid) >= IP_VER(12, 50))
> > -		fill = xehp_gpgpu_fillfunc;
> > -	else if (IS_GEN12(devid))
> > -		fill = gen12_gpgpu_fillfunc;
> >   	else if (IS_GEN11(devid))
> >   		fill = gen11_gpgpu_fillfunc;
> >   	else if (IS_GEN9(devid) || IS_GEN10(devid))
> > diff --git a/lib/meson.build b/lib/meson.build
> > index f711e60a7..2fac522d3 100644
> > --- a/lib/meson.build
> > +++ b/lib/meson.build
> > @@ -216,7 +216,7 @@ lib_version = vcs_tag(input : 'version.h.in', output : 'version.h',
> >   		      fallback : 'NO-GIT',
> >   		      command : vcs_command )
> >   
> > -iga64_assembly_sources = [ 'gpgpu_shader.c' ]
> > +iga64_assembly_sources = [ 'gpgpu_shader.c', 'gpgpu_fill.c' ]
> >   
> >   lib_intermediates = []
> >   iga64_assembly_libs = []
>