[PATCH i-g-t 1/4] lib/gppgu_shader: Add write to ppgtt offset
Hajda, Andrzej
andrzej.hajda at intel.com
Mon Nov 18 13:00:00 UTC 2024
W dniu 15.11.2024 o 15:11, Gwan-gyeong Mun pisze:
> From: Jonathan Cavitt <jonathan.cavitt at intel.com>
>
> Create a function that adds the capacity to fill an oword at a given
> ppgtt offset with a dword value. Xe2 does this with an Untyped 2D Block
> Array Store operation, though older platforms used to do this with a
> Media Write Block, so both means are supported.
>
> Suggested-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> Co-developed-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
> Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
> Signed-off-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
> ---
> lib/gpgpu_shader.c | 109 ++++++++++++++++++++++++++++++++++++
> lib/gpgpu_shader.h | 2 +
> lib/iga64_generated_codes.c | 81 ++++++++++++++++++++++++++-
> 3 files changed, 191 insertions(+), 1 deletion(-)
>
> diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
> index 4e1b8d5e9..7a2f0d28d 100644
> --- a/lib/gpgpu_shader.c
> +++ b/lib/gpgpu_shader.c
> @@ -652,6 +652,115 @@ void gpgpu_shader__write_dword(struct gpgpu_shader *shdr, uint32_t value,
> ", 2, y_offset, 3, value, value, value, value);
> }
>
> +/**
> + * gpgpu_shader__write_offset:
> + * @shdr: shader to be modified
> + * @ppgtt_offset: write target virtual address
> + * @value: dword to be written
> + *
> + * Fill oword at @ppgtt with dword stored in @value.
> + *
> + * Note: for the write to succeed, the address specified by @ppgtt_offset has
> + * to be bound. Otherwise a page fault will be triggered.
> + */
> +void gpgpu_shader__write_offset(struct gpgpu_shader *shdr, uint64_t ppgtt_offset,
> + uint32_t value)
The name is somehow misleading, maybe gpgpu_shader__fill_a64_4dw?
Anything better?
> +{
> + uint64_t offset = CANONICAL(ppgtt_offset);
> + igt_assert_f((offset & 0xf) == 0, "Offset must be aligned to oword!\n");
> +
> + emit_iga64_code(shdr, write_offset, " \n\
> +#if GEN_VER < 2000 // Media Block Write \n\
> +(W) mov (8|M0) r30.0<1>:ud 0x0:ud \n\
> + // canonical address \n\
> +(W) mov (1|M0) r30.0<1>:ud ARG(0):ud \n\
> +(W) mov (1|M0) r30.1<1>:ud ARG(1):ud \n\
> + // written value \n\
> +(W) mov (1|M0) r31.0<1>:ud ARG(2):ud \n\
> +(W) mov (1|M0) r31.1<1>:ud ARG(3):ud \n\
> +(W) mov (1|M0) r31.2<1>:ud ARG(4):ud \n\
> +(W) mov (1|M0) r31.3<1>:ud ARG(5):ud \n\
It could be replaced by "mov (4) r31.0<1>:ud ARG(2):ud", and then
removed duplicated arguments ARGS(3-5).
> + // owblock write \n\
> +(W) send.dc1 (16|M0) null r30 r31 0x0 0x20d40ff \n\
> + // owblock read, to block the thread until the write is materialized \n\
> +(W) send.dc1 (16|M0) r32 r30 null 0x0 0x21500ff \n\
> +#else // Unyped 2D Block Store \n\
> +// Instruction_Store2DBlock \n\
> +// bspec: 63981 \n\
> +// src0 address payload (Untyped2DBLOCKAddressPayload) specifies both \n\
> +// the block parameters and the 2D Surface parameters. \n\
> +// src1 data payload format is selected by Data Size. \n\
> +// Untyped2DBLOCKAddressPayload \n\
> +// bspec: 63986 \n\
> +// [243:240] Array Length: 0 (length is 1) \n\
> +// [239:232] Block Height: 0 (height is 1) \n\
> +// [231:224] Block Width: 0xf (width is 16) \n\
> +// [223:192] Block Start Y: 0 \n\
> +// [191:160] Block Start X: 0 \n\
> +// [159:128] Untyped 2D Surface Pitch: 0x3f (pitch is 64 bytes) \n\
> +// [127:96] Untyped 2D Surface Height: 0 (height is 1) \n\
> +// [95:64] Untyped 2D Surface Width: 0x3f (width is 64 bytes) \n\
> +// [63:0] Untyped 2D Surface Base Address \n\
> +// initialize register \n\
> +(W) mov (8) r30.0<1>:uq 0x0:uq \n\
> +// [0:31] Untyped 2D Surface Base Address low \n\
> +(W) mov (1) r30.0<1>:ud ARG(0):ud \n\
> +// [32:63] Untyped 2D Surface Base Address high \n\
> +(W) mov (1) r30.1<1>:ud ARG(1):ud \n\
> +// [95:64] Untyped 2D Surface Width: 0x3f \n\
> +// (Width minus 1 (in bytes) of the 2D surface, it represents 64) \n\
> +(W) mov (1) r30.2<1>:ud 0x3f:ud \n\
> +// [127:96] Untyped 2D Surface Height: 0x0 \n\
> +// (Height minus 1 (in number of data elements) of \n\
> +// the Untyped 2D surface, it represents 1) \n\
> +(W) mov (1) r30.3<1>:ud 0x0:ud \n\
> +// [159:128] Untyped 2D Surface Pitch: 0x3f \n\
> +// (Pitch minus 1 (in bytes) of the 2D surface, it represents 64) \n\
> +(W) mov (1) r30.4<1>:ud 0x3f:ud \n\
> +// [231:224] Block Width: 0xf (15) \n\
> +// (Specifies the width minus 1 (in number of data elements) for this \n\
> +// rectangular region, it represents 16) \n\
> +// Block width (encoded_value + 1) must be a multiple of DW (4 bytes). \n\
> +// [239:232] Block Height: 0 \n\
> +// (Specifies the height minus 1 (in number of data elements) for \n\
> +// this rectangular region, it represents 1) \n\
> +// [243:240] Array Length: 0 \n\
> +// (Specifies Array Length minus 1 for Load2DBlockArray messages, \n\
> +// must be zero for 2D Block Store messages, it represents 1) \n\
> +(W) mov (1) r30.7<1>:ud 0xf:ud \n\
> +// src1 data payload size \n\
> +// Block Height x Block Width x Data size / GRF Register size \n\
> +// => 1 x 16 x 32bit / 512bit = 1 \n\
> +// data payload size is 1 \n\
> +(W) mov (8) r31.0<1>:uq 0x0:uq \n\
> +(W) mov (1|M0) r31.0<1>:ud ARG(2):ud \n\
> +(W) mov (1|M0) r31.1<1>:ud ARG(3):ud \n\
> +(W) mov (1|M0) r31.2<1>:ud ARG(4):ud \n\
> +(W) mov (1|M0) r31.3<1>:ud ARG(5):ud \n\
> +// send.ugm Untyped 2D Block Array Store \n\
> +// Format: send.ugm (1) dst src0 src1 ExtMsg MsgDesc \n\
> +// Execution Mask restriction: SIMT1 \n\
> +// \n\
> +// Extended Message Descriptor (Dataport Extended Descriptor Imm 2D Block) \n\
> +// bspec: 67780 \n\
> +// 0x0 => \n\
> +// [32:22] Global Y_offset: 0 \n\
> +// [21:12] Global X_offset: 0 \n\
> +// \n\
> +// Message Descriptor \n\
> +// bspec: 63981 \n\
> +// 0x2020407 => \n\
> +// [30:29] Address Type: 0 (FLAT) \n\
> +// [28:25] Src0 Length: 1 \n\
> +// [24:20] Dest Length: 0 \n\
> +// [19:16] Cache : 2 (L1UC_L3UC) \n\
> +// [11:9] Data Size: 2 (D32) \n\
> +// [5:0] Store Operation: 7 \n\
> +(W) send.ugm (1) null r30 r31:1 0x0 0x2020407 \n\
> +#endif \n\
> + ", offset & 0xffffffff, offset >> 32, value, value, value, value);
with above change, and proper macros line above becomes:
, lower_32_bits(offset), upper_32_bits(offset), value);
> +}
> +
> /**
> * gpgpu_shader__clear_exception:
> * @shdr: shader to be modified
> diff --git a/lib/gpgpu_shader.h b/lib/gpgpu_shader.h
> index c7c21c115..355b128b5 100644
> --- a/lib/gpgpu_shader.h
> +++ b/lib/gpgpu_shader.h
> @@ -83,6 +83,8 @@ void gpgpu_shader__write_aip(struct gpgpu_shader *shdr, uint32_t y_offset);
> void gpgpu_shader__increase_aip(struct gpgpu_shader *shdr, uint32_t value);
> void gpgpu_shader__write_dword(struct gpgpu_shader *shdr, uint32_t value,
> uint32_t y_offset);
> +void gpgpu_shader__write_offset(struct gpgpu_shader *shdr, uint64_t ppgtt_offset,
> + uint32_t value);
> void gpgpu_shader__write_on_exception(struct gpgpu_shader *shdr, uint32_t dw, uint32_t x_offset,
> uint32_t y_offset, uint32_t mask, uint32_t value);
> void gpgpu_shader__label(struct gpgpu_shader *shdr, int label_id);
> diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
> index 6638be07b..b23613ac4 100644
> --- a/lib/iga64_generated_codes.c
> +++ b/lib/iga64_generated_codes.c
> @@ -3,7 +3,7 @@
>
> #include "gpgpu_shader.h"
>
> -#define MD5_SUM_IGA64_ASMS ec9d477415eebb7d6983395f1bcde78f
> +#define MD5_SUM_IGA64_ASMS 4fcde43dedb9d3212f1d85b5b180b0c1
>
> struct iga64_template const iga64_code_gpgpu_fill[] = {
> { .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
> @@ -323,6 +323,85 @@ struct iga64_template const iga64_code_clear_exception[] = {
> }}
> };
>
> +struct iga64_template const iga64_code_write_offset[] = {
> + { .gen_ver = 2000, .size = 64, .code = (const uint32_t []) {
> + 0x800c0061, 0x1e054330, 0x00000000, 0x00000000,
> + 0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
> + 0x80000061, 0x1e154220, 0x00000000, 0xc0ded001,
> + 0x80000061, 0x1e254220, 0x00000000, 0x0000003f,
> + 0x80000061, 0x1e354220, 0x00000000, 0x00000000,
> + 0x80000061, 0x1e454220, 0x00000000, 0x0000003f,
> + 0x80000061, 0x1e754220, 0x00000000, 0x0000000f,
> + 0x800c0061, 0x1f054330, 0x00000000, 0x00000000,
> + 0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
> + 0x80000061, 0x1f154220, 0x00000000, 0xc0ded003,
> + 0x80000061, 0x1f254220, 0x00000000, 0xc0ded004,
> + 0x80000061, 0x1f354220, 0x00000000, 0xc0ded005,
> + 0x80032031, 0x00000000, 0xf80e1e0c, 0x00801f0c,
> + 0x80000001, 0x00010000, 0x20000000, 0x00000000,
> + 0x80000001, 0x00010000, 0x30000000, 0x00000000,
> + 0x80000901, 0x00010000, 0x00000000, 0x00000000,
> + }},
> + { .gen_ver = 1270, .size = 52, .code = (const uint32_t []) {
> + 0x80030061, 0x1e054220, 0x00000000, 0x00000000,
> + 0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
> + 0x80000061, 0x1e254220, 0x00000000, 0xc0ded001,
> + 0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
> + 0x80000061, 0x1f254220, 0x00000000, 0xc0ded003,
> + 0x80000061, 0x1f454220, 0x00000000, 0xc0ded004,
> + 0x80000061, 0x1f654220, 0x00000000, 0xc0ded005,
> + 0x80001d01, 0x00010000, 0x00000000, 0x00000000,
> + 0x80044031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
> + 0x80044131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
> + 0x80000001, 0x00010000, 0x20000000, 0x00000000,
> + 0x80000001, 0x00010000, 0x30000000, 0x00000000,
> + 0x80000901, 0x00010000, 0x00000000, 0x00000000,
> + }},
> + { .gen_ver = 1260, .size = 48, .code = (const uint32_t []) {
> + 0x800c0061, 0x1e054220, 0x00000000, 0x00000000,
> + 0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
> + 0x80000061, 0x1e154220, 0x00000000, 0xc0ded001,
> + 0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
> + 0x80000061, 0x1f154220, 0x00000000, 0xc0ded003,
> + 0x80000061, 0x1f254220, 0x00000000, 0xc0ded004,
> + 0x80000061, 0x1f354220, 0x00000000, 0xc0ded005,
> + 0x8013a031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
> + 0x8010c131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
> + 0x80000001, 0x00010000, 0x20000000, 0x00000000,
> + 0x80000001, 0x00010000, 0x30000000, 0x00000000,
> + 0x80000901, 0x00010000, 0x00000000, 0x00000000,
> + }},
> + { .gen_ver = 1250, .size = 52, .code = (const uint32_t []) {
> + 0x80030061, 0x1e054220, 0x00000000, 0x00000000,
> + 0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
> + 0x80000061, 0x1e254220, 0x00000000, 0xc0ded001,
> + 0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
> + 0x80000061, 0x1f254220, 0x00000000, 0xc0ded003,
> + 0x80000061, 0x1f454220, 0x00000000, 0xc0ded004,
> + 0x80000061, 0x1f654220, 0x00000000, 0xc0ded005,
> + 0x80001d01, 0x00010000, 0x00000000, 0x00000000,
> + 0x80044031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
> + 0x80044131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
> + 0x80000001, 0x00010000, 0x20000000, 0x00000000,
> + 0x80000001, 0x00010000, 0x30000000, 0x00000000,
> + 0x80000901, 0x00010000, 0x00000000, 0x00000000,
> + }},
> + { .gen_ver = 0, .size = 48, .code = (const uint32_t []) {
> + 0x80030061, 0x1e054220, 0x00000000, 0x00000000,
> + 0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
> + 0x80000061, 0x1e254220, 0x00000000, 0xc0ded001,
> + 0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
> + 0x80000061, 0x1f254220, 0x00000000, 0xc0ded003,
> + 0x80000061, 0x1f454220, 0x00000000, 0xc0ded004,
> + 0x80000061, 0x1f654220, 0x00000000, 0xc0ded005,
> + 0x8004d031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
> + 0x80044131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
> + 0x80000001, 0x00010000, 0x20000000, 0x00000000,
> + 0x80000001, 0x00010000, 0x30000000, 0x00000000,
> + 0x80000101, 0x00010000, 0x00000000, 0x00000000,
> + }}
> +};
> +
> struct iga64_template const iga64_code_media_block_write[] = {
> { .gen_ver = 2000, .size = 56, .code = (const uint32_t []) {
> 0x80100061, 0x04054220, 0x00000000, 0x00000000,
More information about the igt-dev
mailing list