[igt-dev] [PATCH 1/1] lib/gpgpu_fill: Add support for Xe2 platforms
Manszewski, Christoph
christoph.manszewski at intel.com
Mon Nov 6 12:03:12 UTC 2023
Hi Jagmeet,
On 3.11.2023 21:47, Jagmeet Randhawa wrote:
> Add xe2lpg_gpgpu_fillfunc to have gpgpu_fill running on XE2
> On XE2 there are a few changes to gpu command instruction lengths.
>
> There's also no 'Media Block Write' message, thus 'Typed 2D Block
> Store' message has to be used in the shader.
>
> The shader was compiled using the following command:
>
> iga64 -p=2 -Wall -Xprint-ldst -Xauto-deps --assemble xe2lpg_gpgpu_kernel.asm
> | od -A n -v -t x4 |sed -e 's/ / 0x/g' | sed -e 's/^/\t{/' | sed -e
> 's/([0-9]|[a-f]|[A-F]) /\1, /g' | sed -e 's/$/ },/g' | sed -e 's/\t /\t/g'
>
> Signed-off-by: Christoph Manszewski <christoph.manszewski at intel.com>
> Signed-off-by: Jagmeet Randhawa <jagmeet.randhawa at intel.com>
> Reviewed-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> ---
> lib/gpgpu_fill.c | 23 +++++++
> lib/gpgpu_fill.h | 6 ++
> lib/gpu_cmds.c | 61 ++++++++++++++++---
> .../shaders/gpgpu/xe2lpg_gpgpu_kernel.asm | 13 ++++
> lib/intel_batchbuffer.c | 4 +-
> 5 files changed, 96 insertions(+), 11 deletions(-)
> create mode 100644 lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
>
> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
> index eed821872..1270c2b22 100644
> --- a/lib/gpgpu_fill.c
> +++ b/lib/gpgpu_fill.c
> @@ -124,6 +124,18 @@ static const uint32_t xehpc_gpgpu_kernel[][4] = {
> { 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
> };
>
> +static const uint32_t xe2lpg_gpgpu_kernel[][4] = {
> + { 0x00080061, 0x01050000, 0x00000104, 0x00000000 },
> + { 0x00000069, 0x02058220, 0x02000014, 0x00000004 },
> + { 0x00000061, 0x02150220, 0x00000064, 0x00000000 },
> + { 0x00100061, 0x04054220, 0x00000000, 0x00000000 },
> + { 0x00041a61, 0x04550220, 0x00220205, 0x00000000 },
> + { 0x00000061, 0x04754550, 0x00000000, 0x000f000f },
> + { 0x00101e61, 0x05050220, 0x00000104, 0x00000000 },
> + { 0x00132031, 0x00000000, 0xd00e0494, 0x04000000 },
> + { 0x000c0031, 0x00000004, 0x3000500c, 0x00000000 },
> +};
> +
> /*
> * This sets up the gpgpu pipeline,
> *
> @@ -398,3 +410,14 @@ void xehpc_gpgpu_fillfunc(int i915,
> xehpc_gpgpu_kernel,
> sizeof(xehpc_gpgpu_kernel));
> }
> +
> +void xe2lpg_gpgpu_fillfunc(int i915,
> + struct intel_buf *buf,
> + unsigned int x, unsigned int y,
> + unsigned int width, unsigned int height,
> + uint8_t color)
> +{
> + __xehp_gpgpu_fillfunc(i915, buf, x, y, width, height, color,
> + xe2lpg_gpgpu_kernel,
> + sizeof(xe2lpg_gpgpu_kernel));
> +}
> diff --git a/lib/gpgpu_fill.h b/lib/gpgpu_fill.h
> index f81cd0b53..c3b47c10a 100644
> --- a/lib/gpgpu_fill.h
> +++ b/lib/gpgpu_fill.h
> @@ -75,4 +75,10 @@ xehpc_gpgpu_fillfunc(int i915,
> unsigned int width, unsigned int height,
> uint8_t color);
>
> +void xe2lpg_gpgpu_fillfunc(int i915,
> + struct intel_buf *buf,
> + unsigned int x, unsigned int y,
> + unsigned int width, unsigned int height,
> + uint8_t color);
> +
> #endif /* GPGPU_FILL_H */
> diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
> index f19f93b28..ed608e95a 100644
> --- a/lib/gpu_cmds.c
> +++ b/lib/gpu_cmds.c
> @@ -328,18 +328,45 @@ fill_binding_table(struct intel_bb *ibb, struct intel_buf *buf)
> binding_table = intel_bb_ptr(ibb);
> intel_bb_ptr_add(ibb, 64);
>
> - if (intel_graphics_ver(devid) >= IP_VER(12, 50))
> + if (intel_graphics_ver(devid) >= IP_VER(20, 0)){
Missing space before brace.
> + /*
> + * Up until now, SURFACEFORMAT_R8_UNROM was used regardless of the 'bpp' value.
Wrong alignment.
> + * For bpp 32 this results in a surface that is 4x narrower than expected. However
> + * it worked, because the 'Media Block Read/Write' message assumes the surface width
> + * is always in units of dwords.
> + *
> + * Since Xe2 the Media Block Write message got replaced with 'Typed 2D Block
> + * Load/Store Message' which correctly interprets the surface format.
> + */
> + if (buf->bpp == 32)
> + binding_table[0] = xehp_fill_surface_state(ibb, buf,
> + SURFACEFORMAT_R8G8B8A8_UNORM,
> + 1);
> + else if (buf->bpp == 8)
> + binding_table[0] = xehp_fill_surface_state(ibb, buf,
> + SURFACEFORMAT_R8_UNORM,
> + 1);
> + else
> + igt_assert_f(false,
> + "Surface state for bpp = %u not implemented",
> + buf->bpp);
> + }
> + else if (intel_graphics_ver(devid) >= IP_VER(12, 50)){
Closing braces should be on the same line as 'else' statement.
> binding_table[0] = xehp_fill_surface_state(ibb, buf,
> SURFACEFORMAT_R8_UNORM, 1);
> - else if (intel_graphics_ver(devid) >= IP_VER(9, 0))
> + }
> + else if (intel_graphics_ver(devid) >= IP_VER(9, 0)){
> binding_table[0] = gen9_fill_surface_state(ibb, buf,
> SURFACEFORMAT_R8_UNORM, 1);
> - else if (intel_graphics_ver(devid) >= IP_VER(8, 0))
> + }
> + else if (intel_graphics_ver(devid) >= IP_VER(8, 0)){
> binding_table[0] = gen8_fill_surface_state(ibb, buf,
> SURFACEFORMAT_R8_UNORM, 1);
> - else
> + }
> + else {
> binding_table[0] = gen7_fill_surface_state(ibb, buf,
> SURFACEFORMAT_R8_UNORM, 1);
> + }
>
> return binding_table_offset;
> }
> @@ -959,8 +986,12 @@ xehp_emit_cfe_state(struct intel_bb *ibb, uint32_t threads)
> void
> xehp_emit_state_compute_mode(struct intel_bb *ibb)
> {
> - intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE);
> + uint32_t dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0);
> + intel_bb_out(ibb, XEHP_STATE_COMPUTE_MODE | dword_length);
> intel_bb_out(ibb, 0);
> +
> + if (dword_length)
> + intel_bb_out(ibb, 0);
> }
>
> void
> @@ -976,6 +1007,8 @@ xehp_emit_state_binding_table_pool_alloc(struct intel_bb *ibb)
> void
> xehp_emit_state_base_address(struct intel_bb *ibb)
> {
> + uint32_t tmp;
> +
> intel_bb_out(ibb, GEN8_STATE_BASE_ADDRESS | 0x14); //dw0
>
> /* general */
> @@ -983,7 +1016,8 @@ xehp_emit_state_base_address(struct intel_bb *ibb)
> intel_bb_out(ibb, 0);
>
> /* stateless data port */
> - intel_bb_out(ibb, 0 | BASE_ADDRESS_MODIFY); //dw3
> + tmp = intel_graphics_ver(ibb->devid) == IP_VER(20, 0) ? 0 : BASE_ADDRESS_MODIFY;
> + intel_bb_out(ibb, 0 | tmp); //dw3
>
> /* surface */
> intel_bb_emit_reloc(ibb, ibb->handle, I915_GEM_DOMAIN_SAMPLER, //dw4-dw5
> @@ -1008,7 +1042,10 @@ xehp_emit_state_base_address(struct intel_bb *ibb)
> /* dynamic state buffer size */
> intel_bb_out(ibb, 1 << 12 | 1); //dw13
> /* indirect object buffer size */
> - intel_bb_out(ibb, 0xfffff000 | 1); //dw14
> + if (intel_graphics_ver(ibb->devid) == IP_VER(20, 0)) //dw14
> + intel_bb_out(ibb, 0);
> + else
> + intel_bb_out(ibb, 0xfffff000 | 1);
> /* intruction buffer size */
> intel_bb_out(ibb, 1 << 12 | 1); //dw15
>
> @@ -1030,7 +1067,7 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
> struct xehp_interface_descriptor_data *pidd,
> uint8_t color)
> {
> - uint32_t x_dim, y_dim, mask;
> + uint32_t x_dim, y_dim, mask, dword_length;
>
> /*
> * Simply do SIMD16 based dispatch, so every thread uses
> @@ -1052,7 +1089,8 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
> else
> mask = (1 << mask) - 1;
>
> - intel_bb_out(ibb, XEHP_COMPUTE_WALKER | 0x25);
> + dword_length = intel_graphics_ver(ibb->devid) >= IP_VER(20, 0) ? 0x26 : 0x25;
> + intel_bb_out(ibb, XEHP_COMPUTE_WALKER | dword_length);
>
> intel_bb_out(ibb, 0); /* debug object */ //dw1
> intel_bb_out(ibb, 0); /* indirect data length */ //dw2
> @@ -1090,9 +1128,12 @@ xehp_emit_compute_walk(struct intel_bb *ibb,
> intel_bb_out(ibb, 0); //dw15
> intel_bb_out(ibb, 0); //dw16
> intel_bb_out(ibb, 0); //dw17
> +
This line contains non printable characters other than newline.
> + if (intel_graphics_ver(ibb->devid) >= IP_VER(20, 0)) //Xe2:dw18
> + intel_bb_out(ibb, 0);
>
> /* Interface descriptor data */
> - for (int i = 0; i < 8; i++) { //dw18-25
> + for (int i = 0; i < 8; i++) { //dw18-25 (Xe2:dw19-26)
> intel_bb_out(ibb, ((uint32_t *) pidd)[i]);
> }
>
> diff --git a/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
> new file mode 100644
> index 000000000..e2ecc71f5
> --- /dev/null
> +++ b/lib/i915/shaders/gpgpu/xe2lpg_gpgpu_kernel.asm
> @@ -0,0 +1,13 @@
> +L0:
> + mov (4|M0) r1.0<1>:ub r1.0<0;1,0>:ub // Load r1.0-3 with color byte
> + shl (1|M0) r2.0<1>:ud r0.1<0;1,0>:ud 0x4:ud // Load r2.0-3 with tg id X << 4
> + mov (1|M0) r2.1<1>:ud r0.6<0;1,0>:ud // Load r2.4-7 with tg id Y
> +
> + // payload setup
> + mov (16|M0) r4.0<1>:ud 0x0:ud // Zero out register R4
> + mov (2|M0) r4.5<1>:ud r2.0<2;2,1>:ud // Store X and Y block start (160:191 and 192:223)
> + mov (1|M0) r4.14<1>:w 0xF:w // Store X and Y block size (224:231 and 232:239)
> + mov (16|M0) r5.0<1>:ud r1.0<0;1,0>:ud // Load r5-r6 with color byte
> +
> + send.tgm (16|M0) null r4 null:0 0x0 0x64000007 // Send TypedStore2DBlock to tgm port
> + send.gtwy (8|M0) null r80 null:0 0x0 0x02000000 {EOT}
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index df82ef5f5..d23c04073 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -755,7 +755,9 @@ igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
> {
> igt_fillfunc_t fill = NULL;
>
> - if (IS_METEORLAKE(devid))
> + if (intel_graphics_ver(devid) >= IP_VER(20, 0))
> + fill = xe2lpg_gpgpu_fillfunc;
This line uses spaces instead of tabs for indentation.
Here I pointed out some of the issues shown by checkpatcht that I asked
you to fix in my previous comment. Please revisit them and fix them.
Christoph
> + else if (IS_METEORLAKE(devid))
> fill = xehp_gpgpu_fillfunc;
> else if (intel_graphics_ver(devid) >= IP_VER(12, 60))
> fill = xehpc_gpgpu_fillfunc;
More information about the igt-dev
mailing list