[PATCH i-g-t 1/4] lib/gppgu_shader: Add write to ppgtt offset

Thu Nov 21 12:01:18 UTC 2024


On 11/18/24 3:00 PM, Hajda, Andrzej wrote:
> W dniu 15.11.2024 o 15:11, Gwan-gyeong Mun pisze:
>> From: Jonathan Cavitt <jonathan.cavitt at intel.com>
>>
>> Create a function that adds the capacity to fill an oword at a given
>> ppgtt offset with a dword value.  Xe2 does this with an Untyped 2D Block
>> Array Store operation, though older platforms used to do this with a
>> Media Write Block, so both means are supported.
>>
>> Suggested-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
>> Co-developed-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
>> Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
>> Signed-off-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
>> ---
>>   lib/gpgpu_shader.c          | 109 ++++++++++++++++++++++++++++++++++++
>>   lib/gpgpu_shader.h          |   2 +
>>   lib/iga64_generated_codes.c |  81 ++++++++++++++++++++++++++-
>>   3 files changed, 191 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
>> index 4e1b8d5e9..7a2f0d28d 100644
>> --- a/lib/gpgpu_shader.c
>> +++ b/lib/gpgpu_shader.c
>> @@ -652,6 +652,115 @@ void gpgpu_shader__write_dword(struct 
>> gpgpu_shader *shdr, uint32_t value,
>>       ", 2, y_offset, 3, value, value, value, value);
>>   }
>> +/**
>> + * gpgpu_shader__write_offset:
>> + * @shdr: shader to be modified
>> + * @ppgtt_offset: write target virtual address
>> + * @value: dword to be written
>> + *
>> + * Fill oword at @ppgtt with dword stored in @value.
>> + *
>> + * Note: for the write to succeed, the address specified by 
>> @ppgtt_offset has
>> + * to be bound. Otherwise a page fault will be triggered.
>> + */
>> +void gpgpu_shader__write_offset(struct gpgpu_shader *shdr, uint64_t 
>> ppgtt_offset,
>> +                uint32_t value)
> 
> The name is somehow misleading, maybe gpgpu_shader__fill_a64_4dw? 
> Anything better?
> 
I will update the function names more clearly and send the patch as 
version 2.
candidate name: gpgpu_shader__write_a64_dword() and 
gpgpu_shader__read_a64_dword
>> +{
>> +    uint64_t offset = CANONICAL(ppgtt_offset);
>> +    igt_assert_f((offset & 0xf) == 0, "Offset must be aligned to 
>> oword!\n");
>> +
>> +    emit_iga64_code(shdr, write_offset, "                    \n\
>> +#if GEN_VER < 2000 // Media Block Write                        \n\
>> +(W)    mov (8|M0)        r30.0<1>:ud    0x0:ud                \n\
>> +    // canonical address                            \n\
>> +(W)    mov (1|M0)        r30.0<1>:ud    ARG(0):ud            \n\
>> +(W)    mov (1|M0)        r30.1<1>:ud    ARG(1):ud            \n\
>> +    // written value                            \n\
>> +(W)    mov (1|M0)        r31.0<1>:ud    ARG(2):ud            \n\
>> +(W)    mov (1|M0)        r31.1<1>:ud    ARG(3):ud            \n\
>> +(W)    mov (1|M0)        r31.2<1>:ud    ARG(4):ud            \n\
>> +(W)    mov (1|M0)        r31.3<1>:ud    ARG(5):ud            \n\
> 
> It could be replaced by "mov (4) r31.0<1>:ud ARG(2):ud", and then 
> removed duplicated arguments ARGS(3-5).
> 
I will remove the shader code that is not currently in use and remove 
the duplicate code on next version
>> +    // owblock write                            \n\
>> +(W)    send.dc1 (16|M0)    null    r30    r31    0x0    0x20d40ff    \n\
>> +    // owblock read, to block the thread until the write is 
>> materialized    \n\
>> +(W)    send.dc1 (16|M0)    r32    r30    null    0x0    0x21500ff    \n\
>> +#else // Unyped 2D Block Store                            \n\
>> +// Instruction_Store2DBlock                            \n\
>> +// bspec: 63981                                    \n\
>> +// src0 address payload (Untyped2DBLOCKAddressPayload) specifies 
>> both        \n\
>> +//    the block parameters and the 2D Surface parameters.            \n\
>> +// src1 data payload format is selected by Data Size.                \n\
>> +// Untyped2DBLOCKAddressPayload                            \n\
>> +// bspec: 63986                                    \n\
>> +// [243:240] Array Length: 0 (length is 1)                    \n\
>> +// [239:232] Block Height: 0 (height is 1)                    \n\
>> +// [231:224] Block Width: 0xf (width is 16)                    \n\
>> +// [223:192] Block Start Y: 0                            \n\
>> +// [191:160] Block Start X: 0                            \n\
>> +// [159:128] Untyped 2D Surface Pitch: 0x3f (pitch is 64 
>> bytes)            \n\
>> +// [127:96] Untyped 2D Surface Height: 0 (height is 1)                
>> \n\
>> +// [95:64] Untyped 2D Surface Width: 0x3f (width is 64 
>> bytes)            \n\
>> +// [63:0] Untyped 2D Surface Base Address                    \n\
>> +// initialize register                                \n\
>> +(W)    mov (8)            r30.0<1>:uq    0x0:uq                \n\
>> +// [0:31] Untyped 2D Surface Base Address low                    \n\
>> +(W)    mov (1)            r30.0<1>:ud    ARG(0):ud            \n\
>> +// [32:63] Untyped 2D Surface Base Address high                    \n\
>> +(W)    mov (1)            r30.1<1>:ud ARG(1):ud                \n\
>> +// [95:64] Untyped 2D Surface Width: 0x3f                    \n\
>> +//       (Width minus 1 (in bytes) of the 2D surface, it represents 
>> 64)    \n\
>> +(W)    mov (1)         r30.2<1>:ud    0x3f:ud                \n\
>> +// [127:96] Untyped 2D Surface Height: 0x0                    \n\
>> +//        (Height minus 1 (in number of data elements) of            \n\
>> +//        the Untyped 2D surface, it represents 1)                \n\
>> +(W)    mov (1)         r30.3<1>:ud    0x0:ud                \n\
>> +// [159:128] Untyped 2D Surface Pitch: 0x3f                    \n\
>> +//         (Pitch minus 1 (in bytes) of the 2D surface, it represents 
>> 64)    \n\
>> +(W)    mov (1)            r30.4<1>:ud    0x3f:ud                \n\
>> +// [231:224] Block Width: 0xf (15)                        \n\
>> +//         (Specifies the width minus 1 (in number of data elements) 
>> for this    \n\
>> +//         rectangular region, it represents 16)                \n\
>> +// Block width (encoded_value + 1) must be a multiple of DW (4 
>> bytes).        \n\
>> +// [239:232] Block Height: 0                            \n\
>> +//         (Specifies the height minus 1 (in number of data elements) 
>> for    \n\
>> +//         this rectangular region, it represents 1)                \n\
>> +// [243:240] Array Length: 0                            \n\
>> +//         (Specifies Array Length minus 1 for Load2DBlockArray 
>> messages,    \n\
>> +//         must be zero for 2D Block Store messages, it represents 
>> 1)        \n\
>> +(W)    mov (1)            r30.7<1>:ud    0xf:ud                \n\
>> +// src1 data payload size                            \n\
>> +// Block Height x Block Width x Data size / GRF Register 
>> size            \n\
>> +//    => 1 x 16 x 32bit / 512bit = 1                        \n\
>> +// data payload size is 1                            \n\
>> +(W)    mov (8)            r31.0<1>:uq    0x0:uq                \n\
>> +(W)    mov (1|M0)        r31.0<1>:ud     ARG(2):ud            \n\
>> +(W)    mov (1|M0)        r31.1<1>:ud    ARG(3):ud            \n\
>> +(W)    mov (1|M0)        r31.2<1>:ud    ARG(4):ud            \n\
>> +(W)    mov (1|M0)        r31.3<1>:ud    ARG(5):ud            \n\
>> +// send.ugm Untyped 2D Block Array Store                    \n\
>> +// Format: send.ugm (1) dst src0 src1 ExtMsg MsgDesc                \n\
>> +// Execution Mask restriction: SIMT1                        \n\
>> +//                                        \n\
>> +// Extended Message Descriptor (Dataport Extended Descriptor Imm 2D 
>> Block)    \n\
>> +// bspec: 67780                                    \n\
>> +// 0x0 =>                                    \n\
>> +// [32:22] Global Y_offset: 0                            \n\
>> +// [21:12] Global X_offset: 0                            \n\
>> +//                                        \n\
>> +// Message Descriptor                                \n\
>> +// bspec: 63981                                    \n\
>> +// 0x2020407 =>                                    \n\
>> +// [30:29] Address Type: 0 (FLAT)                        \n\
>> +// [28:25] Src0 Length: 1                            \n\
>> +// [24:20] Dest Length: 0                            \n\
>> +// [19:16] Cache : 2 (L1UC_L3UC)                        \n\
>> +// [11:9] Data Size: 2 (D32)                            \n\
>> +// [5:0] Store Operation: 7                            \n\
>> +(W)    send.ugm (1)        null    r30    r31:1    0x0    
>> 0x2020407    \n\
>> +#endif                                        \n\
>> +    ", offset & 0xffffffff, offset >> 32, value, value, value, value);
> 
> with above change, and proper macros line above becomes:
> , lower_32_bits(offset), upper_32_bits(offset), value);
> 
>> +}
>> +
>>   /**
>>    * gpgpu_shader__clear_exception:
>>    * @shdr: shader to be modified
>> diff --git a/lib/gpgpu_shader.h b/lib/gpgpu_shader.h
>> index c7c21c115..355b128b5 100644
>> --- a/lib/gpgpu_shader.h
>> +++ b/lib/gpgpu_shader.h
>> @@ -83,6 +83,8 @@ void gpgpu_shader__write_aip(struct gpgpu_shader 
>> *shdr, uint32_t y_offset);
>>   void gpgpu_shader__increase_aip(struct gpgpu_shader *shdr, uint32_t 
>> value);
>>   void gpgpu_shader__write_dword(struct gpgpu_shader *shdr, uint32_t 
>> value,
>>                      uint32_t y_offset);
>> +void gpgpu_shader__write_offset(struct gpgpu_shader *shdr, uint64_t 
>> ppgtt_offset,
>> +                uint32_t value);
>>   void gpgpu_shader__write_on_exception(struct gpgpu_shader *shdr, 
>> uint32_t dw, uint32_t x_offset,
>>                         uint32_t y_offset, uint32_t mask, uint32_t 
>> value);
>>   void gpgpu_shader__label(struct gpgpu_shader *shdr, int label_id);
>> diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
>> index 6638be07b..b23613ac4 100644
>> --- a/lib/iga64_generated_codes.c
>> +++ b/lib/iga64_generated_codes.c
>> @@ -3,7 +3,7 @@
>>   #include "gpgpu_shader.h"
>> -#define MD5_SUM_IGA64_ASMS ec9d477415eebb7d6983395f1bcde78f
>> +#define MD5_SUM_IGA64_ASMS 4fcde43dedb9d3212f1d85b5b180b0c1
>>   struct iga64_template const iga64_code_gpgpu_fill[] = {
>>       { .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
>> @@ -323,6 +323,85 @@ struct iga64_template const 
>> iga64_code_clear_exception[] = {
>>       }}
>>   };
>> +struct iga64_template const iga64_code_write_offset[] = {
>> +    { .gen_ver = 2000, .size = 64, .code = (const uint32_t []) {
>> +        0x800c0061, 0x1e054330, 0x00000000, 0x00000000,
>> +        0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
>> +        0x80000061, 0x1e154220, 0x00000000, 0xc0ded001,
>> +        0x80000061, 0x1e254220, 0x00000000, 0x0000003f,
>> +        0x80000061, 0x1e354220, 0x00000000, 0x00000000,
>> +        0x80000061, 0x1e454220, 0x00000000, 0x0000003f,
>> +        0x80000061, 0x1e754220, 0x00000000, 0x0000000f,
>> +        0x800c0061, 0x1f054330, 0x00000000, 0x00000000,
>> +        0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
>> +        0x80000061, 0x1f154220, 0x00000000, 0xc0ded003,
>> +        0x80000061, 0x1f254220, 0x00000000, 0xc0ded004,
>> +        0x80000061, 0x1f354220, 0x00000000, 0xc0ded005,
>> +        0x80032031, 0x00000000, 0xf80e1e0c, 0x00801f0c,
>> +        0x80000001, 0x00010000, 0x20000000, 0x00000000,
>> +        0x80000001, 0x00010000, 0x30000000, 0x00000000,
>> +        0x80000901, 0x00010000, 0x00000000, 0x00000000,
>> +    }},
>> +    { .gen_ver = 1270, .size = 52, .code = (const uint32_t []) {
>> +        0x80030061, 0x1e054220, 0x00000000, 0x00000000,
>> +        0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
>> +        0x80000061, 0x1e254220, 0x00000000, 0xc0ded001,
>> +        0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
>> +        0x80000061, 0x1f254220, 0x00000000, 0xc0ded003,
>> +        0x80000061, 0x1f454220, 0x00000000, 0xc0ded004,
>> +        0x80000061, 0x1f654220, 0x00000000, 0xc0ded005,
>> +        0x80001d01, 0x00010000, 0x00000000, 0x00000000,
>> +        0x80044031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
>> +        0x80044131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
>> +        0x80000001, 0x00010000, 0x20000000, 0x00000000,
>> +        0x80000001, 0x00010000, 0x30000000, 0x00000000,
>> +        0x80000901, 0x00010000, 0x00000000, 0x00000000,
>> +    }},
>> +    { .gen_ver = 1260, .size = 48, .code = (const uint32_t []) {
>> +        0x800c0061, 0x1e054220, 0x00000000, 0x00000000,
>> +        0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
>> +        0x80000061, 0x1e154220, 0x00000000, 0xc0ded001,
>> +        0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
>> +        0x80000061, 0x1f154220, 0x00000000, 0xc0ded003,
>> +        0x80000061, 0x1f254220, 0x00000000, 0xc0ded004,
>> +        0x80000061, 0x1f354220, 0x00000000, 0xc0ded005,
>> +        0x8013a031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
>> +        0x8010c131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
>> +        0x80000001, 0x00010000, 0x20000000, 0x00000000,
>> +        0x80000001, 0x00010000, 0x30000000, 0x00000000,
>> +        0x80000901, 0x00010000, 0x00000000, 0x00000000,
>> +    }},
>> +    { .gen_ver = 1250, .size = 52, .code = (const uint32_t []) {
>> +        0x80030061, 0x1e054220, 0x00000000, 0x00000000,
>> +        0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
>> +        0x80000061, 0x1e254220, 0x00000000, 0xc0ded001,
>> +        0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
>> +        0x80000061, 0x1f254220, 0x00000000, 0xc0ded003,
>> +        0x80000061, 0x1f454220, 0x00000000, 0xc0ded004,
>> +        0x80000061, 0x1f654220, 0x00000000, 0xc0ded005,
>> +        0x80001d01, 0x00010000, 0x00000000, 0x00000000,
>> +        0x80044031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
>> +        0x80044131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
>> +        0x80000001, 0x00010000, 0x20000000, 0x00000000,
>> +        0x80000001, 0x00010000, 0x30000000, 0x00000000,
>> +        0x80000901, 0x00010000, 0x00000000, 0x00000000,
>> +    }},
>> +    { .gen_ver = 0, .size = 48, .code = (const uint32_t []) {
>> +        0x80030061, 0x1e054220, 0x00000000, 0x00000000,
>> +        0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
>> +        0x80000061, 0x1e254220, 0x00000000, 0xc0ded001,
>> +        0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
>> +        0x80000061, 0x1f254220, 0x00000000, 0xc0ded003,
>> +        0x80000061, 0x1f454220, 0x00000000, 0xc0ded004,
>> +        0x80000061, 0x1f654220, 0x00000000, 0xc0ded005,
>> +        0x8004d031, 0x00000000, 0xc1fe1e0c, 0x03501f04,
>> +        0x80044131, 0x200c0000, 0xc1fe1e0c, 0x01400000,
>> +        0x80000001, 0x00010000, 0x20000000, 0x00000000,
>> +        0x80000001, 0x00010000, 0x30000000, 0x00000000,
>> +        0x80000101, 0x00010000, 0x00000000, 0x00000000,
>> +    }}
>> +};
>> +
>>   struct iga64_template const iga64_code_media_block_write[] = {
>>       { .gen_ver = 2000, .size = 56, .code = (const uint32_t []) {
>>           0x80100061, 0x04054220, 0x00000000, 0x00000000,
>