[PATCH i-g-t v2 1/4] lib/gppgu_shader: Add write D32 to ppgtt virtual address

Fri Nov 22 10:47:55 UTC 2024

W dniu 22.11.2024 o 08:51, Gwan-gyeong Mun pisze:
>
>
> On 11/21/24 6:08 PM, Hajda, Andrzej wrote:
>>
>> W dniu 21.11.2024 o 13:22, Gwan-gyeong Mun pisze:
>>> From: Jonathan Cavitt <jonathan.cavitt at intel.com>
>>>
>>> Create a function that adds the capabilty to write an dword size at 
>>> a given
>>> ppgtt address with a dword value. Use an Untyped 2D Block Array Store
>>> DataPort functionality of XE2+ with A64 flat addressing to direct 
>>> accessing
>>> an entire ppgtt address space.
>>>
>>> For the write to succeed, the given ppgtt virtual address has to be 
>>> bound.
>>> Otherwise a store page fault will be triggered.
>>>
>>> v2: Fix the function name to be more clear. (Andrzej)
>>>      Use lower_32_bits() / upper_32_bits() macro (Andrzej)
>>>      Drop unused code
>>>
>>> Suggested-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
>>> Co-developed-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
>>> Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun at intel.com>
>>> Signed-off-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
>>> ---
>>>   lib/gpgpu_shader.c          | 94 
>>> +++++++++++++++++++++++++++++++++++++
>>>   lib/gpgpu_shader.h          |  2 +
>>>   lib/iga64_generated_codes.c | 23 ++++++++-
>>>   3 files changed, 118 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
>>> index 4e1b8d5e9..d9da35895 100644
>>> --- a/lib/gpgpu_shader.c
>>> +++ b/lib/gpgpu_shader.c
>>> @@ -803,3 +803,97 @@ void 
>>> gpgpu_shader__end_system_routine_step_if_eq(struct gpgpu_shader *shdr,
>>>       ", 0x807fffff, /* leave breakpoint exception */
>>>       y_offset, value, 0x7fffff /* clear all exceptions */ );
>>>   }
>>> +
>>> +/**
>>> + * gpgpu_shader__write_a64_dword:
>>> + * @shdr: shader to be modified
>>> + * @ppgtt_addr: write target ppgtt virtual address
>>> + * @value: dword to be written
>>> + *
>>> + * Write one D32 data (DW; DoubleWord) directly to the target ppgtt 
>>> virtual
>>> + * address (A64 Flat Address model).
>>> + *
>>> + * Note: for the write to succeed, the address specified by 
>>> @ppgtt_addr has
>>> + * to be bound. Otherwise a store page fault will be triggered.
>>> + */
>>> +void gpgpu_shader__write_a64_dword(struct gpgpu_shader *shdr, 
>>> uint64_t ppgtt_addr,
>>> +                   uint32_t value)
>>
>>
>> Nice name, could be even gpgpu_shader__write_a64_d32, to follow spec 
>> convention.
>>
> Ok, will update.
>>
>>> +{
>>> +    uint64_t addr = CANONICAL(ppgtt_addr);
>>> +    igt_assert_f((addr & 0x3) == 0, "address must be aligned to 
>>> DWord!\n");
>>> +
>>> +    emit_iga64_code(shdr, write_a64_dword, " \n\
>>> +#if GEN_VER >= 2000                                \n\
>>> +// Unyped 2D Block Store                            \n\
>>> +// Instruction_Store2DBlock                            \n\
>>> +// bspec: 63981                                    \n\
>>> +// src0 address payload (Untyped2DBLOCKAddressPayload) specifies 
>>> both        \n\
>>> +//    the block parameters and the 2D Surface 
>>> parameters.            \n\
>>> +// src1 data payload format is selected by Data 
>>> Size.                \n\
>>> +// Untyped2DBLOCKAddressPayload \n\
>>> +// bspec: 63986                                    \n\
>>> +// [243:240] Array Length: 0 (length is 1) \n\
>>> +// [239:232] Block Height: 0 (height is 1) \n\
>>> +// [231:224] Block Width: 0xf (width is 16)                    \n\
>>> +// [223:192] Block Start Y: 0                            \n\
>>> +// [191:160] Block Start X: 0                            \n\
>>> +// [159:128] Untyped 2D Surface Pitch: 0x3f (pitch is 64 
>>> bytes)            \n\
>>> +// [127:96] Untyped 2D Surface Height: 0 (height is 
>>> 1)                \n\
>>> +// [95:64] Untyped 2D Surface Width: 0x3f (width is 64 
>>> bytes)            \n\
>>> +// [63:0] Untyped 2D Surface Base Address \n\
>>> +// initialize register                                \n\
>>> +(W)    mov (8)            r30.0<1>:uq 0x0:uq                \n\
>>> +// [0:31] Untyped 2D Surface Base Address low                    \n\
>>> +(W)    mov (1)            r30.0<1>:ud ARG(0):ud            \n\
>>> +// [32:63] Untyped 2D Surface Base Address high                    \n\
>>> +(W)    mov (1)            r30.1<1>:ud ARG(1):ud                \n\
>>> +// [95:64] Untyped 2D Surface Width: 0x3f \n\
>>> +//       (Width minus 1 (in bytes) of the 2D surface, it represents 
>>> 64)    \n\
>>> +(W)    mov (1)         r30.2<1>:ud 0x3f:ud                \n\
>>> +// [127:96] Untyped 2D Surface Height: 0x0 \n\
>>> +//        (Height minus 1 (in number of data elements) 
>>> of            \n\
>>> +//        the Untyped 2D surface, it represents 1)                \n\
>>> +(W)    mov (1)         r30.3<1>:ud 0x0:ud                \n\
>>> +// [159:128] Untyped 2D Surface Pitch: 0x3f                    \n\
>>> +//         (Pitch minus 1 (in bytes) of the 2D surface, it 
>>> represents 64)    \n\
>>> +(W)    mov (1)            r30.4<1>:ud 0x3f:ud                \n\
>>> +// [231:224] Block Width: 0xf (15)                        \n\
>>> +//         (Specifies the width minus 1 (in number of data 
>>> elements) for this    \n\
>>> +//         rectangular region, it represents 16)                \n\
>>> +// Block width (encoded_value + 1) must be a multiple of DW (4 
>>> bytes).        \n\
>>> +// [239:232] Block Height: 0                            \n\
>>> +//         (Specifies the height minus 1 (in number of data 
>>> elements) for    \n\
>>> +//         this rectangular region, it represents 1)                
>>> \n\
>>> +// [243:240] Array Length: 0                            \n\
>>> +//         (Specifies Array Length minus 1 for Load2DBlockArray 
>>> messages,    \n\
>>> +//         must be zero for 2D Block Store messages, it represents 
>>> 1)        \n\
>>> +(W)    mov (1)            r30.7<1>:ud 0xf:ud                \n\
>>
>>
>> Shouldn't this be 0x3:ud now, for dword ?
>>
> No, r30.7 needs to set 0xf:ud.

0xf mean block width is 16 bytes, it means 4 dwords, in the description 
you says one d32,

to source register r31 you enter also d32, and zeroes the rest.

So what is going here? Writing one dword, or 4dwords (one provided, and 
the rest 0) ?

Regards

Andrzej

>
> please refer the Structure_Untyped2DBLOCKAddressPayload: bspec 63986
>
> [31:0]    => r30.0:ud  <= [0:31] Untyped 2D Surface Base Address low
> [63:32]   => r30.1:ud  <= [32:63] Untyped 2D Surface Base Address high
> [95:64]   => r30.2:ud  <= [95:64] Untyped 2D Surface Width: 0x3f
> [127:96]  => r30.3:ud  <= [127:96] Untyped 2D Surface Height: 0
> [159:128] => r30.4:ud  <= [159:128] Untyped 2D Surface Pitch: 0x3f
> [191:160] => r30.5:ud  <= [191:160] Block Start X: 0
> [223:192] => r30.6:ud  <= [223:192] Block Start Y: 0
> [251:224] => r30.7:ud  <= [231:224] Block Width: 0xf
>                           [239:232] Block Height: 0
>                           [243:240] Array Length: 0
>                           ; 0xf
>
> br,
>
> G.G.
>> Beside those details:
>>
>> Reviewed-by: Andrzej Hajda <andrzej.hajda at intel.com>
>>
>> Regards
>> Andrzej
>>
>>> +// src1 data payload size                            \n\
>>> +// Block Height x Block Width x Data size / GRF Register 
>>> size            \n\
>>> +//    => 1 x 16 x 32bit / 512bit = 1                        \n\
>>> +// data payload size is 1                            \n\
>>> +(W)    mov (8)            r31.0<1>:uq 0x0:uq                \n\
>>> +(W)    mov (1|M0)        r31.0<1>:ud ARG(2):ud            \n\
>>> +// send.ugm Untyped 2D Block Array Store \n\
>>> +// Format: send.ugm (1) dst src0 src1 ExtMsg MsgDesc                
>>> \n\
>>> +// Execution Mask restriction: SIMT1 \n\
>>> +//                                        \n\
>>> +// Extended Message Descriptor (Dataport Extended Descriptor Imm 2D 
>>> Block)    \n\
>>> +// bspec: 67780                                    \n\
>>> +// 0x0 =>                                    \n\
>>> +// [32:22] Global Y_offset: 0                            \n\
>>> +// [21:12] Global X_offset: 0                            \n\
>>> +//                                        \n\
>>> +// Message Descriptor                                \n\
>>> +// bspec: 63981                                    \n\
>>> +// 0x2020407 =>                                    \n\
>>> +// [30:29] Address Type: 0 (FLAT)                        \n\
>>> +// [28:25] Src0 Length: 1                            \n\
>>> +// [24:20] Dest Length: 0                            \n\
>>> +// [19:16] Cache : 2 (L1UC_L3UC)                        \n\
>>> +// [11:9] Data Size: 2 (D32)                            \n\
>>> +// [5:0] Store Operation: 7                            \n\
>>> +(W)    send.ugm (1)        null    r30    r31:1    0x0 0x2020407    
>>> \n\
>>> +#endif                                        \n\
>>> +    ", lower_32_bits(addr), upper_32_bits(addr), value);
>>> +}
>>> diff --git a/lib/gpgpu_shader.h b/lib/gpgpu_shader.h
>>> index c7c21c115..18a4c9725 100644
>>> --- a/lib/gpgpu_shader.h
>>> +++ b/lib/gpgpu_shader.h
>>> @@ -85,6 +85,8 @@ void gpgpu_shader__write_dword(struct gpgpu_shader 
>>> *shdr, uint32_t value,
>>>                      uint32_t y_offset);
>>>   void gpgpu_shader__write_on_exception(struct gpgpu_shader *shdr, 
>>> uint32_t dw, uint32_t x_offset,
>>>                         uint32_t y_offset, uint32_t mask, uint32_t 
>>> value);
>>> +void gpgpu_shader__write_a64_dword(struct gpgpu_shader *shdr, 
>>> uint64_t ppgtt_addr,
>>> +                   uint32_t value);
>>>   void gpgpu_shader__label(struct gpgpu_shader *shdr, int label_id);
>>>   void gpgpu_shader__jump(struct gpgpu_shader *shdr, int label_id);
>>>   void gpgpu_shader__jump_neq(struct gpgpu_shader *shdr, int label_id,
>>> diff --git a/lib/iga64_generated_codes.c b/lib/iga64_generated_codes.c
>>> index 6638be07b..e97bcf042 100644
>>> --- a/lib/iga64_generated_codes.c
>>> +++ b/lib/iga64_generated_codes.c
>>> @@ -3,7 +3,7 @@
>>>   #include "gpgpu_shader.h"
>>> -#define MD5_SUM_IGA64_ASMS ec9d477415eebb7d6983395f1bcde78f
>>> +#define MD5_SUM_IGA64_ASMS a1ee0173014ab4cda3090faeca1cbae1
>>>   struct iga64_template const iga64_code_gpgpu_fill[] = {
>>>       { .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
>>> @@ -79,6 +79,27 @@ struct iga64_template const 
>>> iga64_code_gpgpu_fill[] = {
>>>       }}
>>>   };
>>> +struct iga64_template const iga64_code_write_a64_dword[] = {
>>> +    { .gen_ver = 2000, .size = 52, .code = (const uint32_t []) {
>>> +        0x800c0061, 0x1e054330, 0x00000000, 0x00000000,
>>> +        0x80000061, 0x1e054220, 0x00000000, 0xc0ded000,
>>> +        0x80000061, 0x1e154220, 0x00000000, 0xc0ded001,
>>> +        0x80000061, 0x1e254220, 0x00000000, 0x0000003f,
>>> +        0x80000061, 0x1e354220, 0x00000000, 0x00000000,
>>> +        0x80000061, 0x1e454220, 0x00000000, 0x0000003f,
>>> +        0x80000061, 0x1e754220, 0x00000000, 0x0000000f,
>>> +        0x800c0061, 0x1f054330, 0x00000000, 0x00000000,
>>> +        0x80000061, 0x1f054220, 0x00000000, 0xc0ded002,
>>> +        0x80032031, 0x00000000, 0xf80e1e0c, 0x00801f0c,
>>> +        0x80000001, 0x00010000, 0x20000000, 0x00000000,
>>> +        0x80000001, 0x00010000, 0x30000000, 0x00000000,
>>> +        0x80000901, 0x00010000, 0x00000000, 0x00000000,
>>> +    }},
>>> +    { .gen_ver = 0, .size = 0, .code = (const uint32_t []) {
>>> +
>>> +    }}
>>> +};
>>> +
>>>   struct iga64_template const 
>>> iga64_code_end_system_routine_step_if_eq[] = {
>>>       { .gen_ver = 2000, .size = 44, .code = (const uint32_t []) {
>>>           0x80000966, 0x80018220, 0x02008000, 0x00008000,
>