[PATCH 1/8] drm/amdgpu: UAPI for user queue management

Tue Feb 7 07:38:05 UTC 2023

On 07/02/2023 08:03, Christian König wrote:
> Am 06.02.23 um 22:03 schrieb Alex Deucher:
>> On Mon, Feb 6, 2023 at 12:01 PM Christian König
>> <christian.koenig at amd.com> wrote:
>>> Am 06.02.23 um 17:56 schrieb Alex Deucher:
>>>> On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma 
>>>> <shashank.sharma at amd.com> wrote:
>>>>> Hey Alex,
>>>>>
>>>>> On 03/02/2023 23:07, Alex Deucher wrote:
>>>>>> On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma 
>>>>>> <shashank.sharma at amd.com> wrote:
>>>>>>> From: Alex Deucher <alexander.deucher at amd.com>
>>>>>>>
>>>>>>> This patch intorduces new UAPI/IOCTL for usermode graphics
>>>>>>> queue. The userspace app will fill this structure and request
>>>>>>> the graphics driver to add a graphics work queue for it. The
>>>>>>> output of this UAPI is a queue id.
>>>>>>>
>>>>>>> This UAPI maps the queue into GPU, so the graphics app can start
>>>>>>> submitting work to the queue as soon as the call returns.
>>>>>>>
>>>>>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>>>>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>>>>>> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
>>>>>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>>>>>> ---
>>>>>>>     include/uapi/drm/amdgpu_drm.h | 53 
>>>>>>> +++++++++++++++++++++++++++++++++++
>>>>>>>     1 file changed, 53 insertions(+)
>>>>>>>
>>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h 
>>>>>>> b/include/uapi/drm/amdgpu_drm.h
>>>>>>> index 4038abe8505a..6c5235d107b3 100644
>>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>>> @@ -54,6 +54,7 @@ extern "C" {
>>>>>>>     #define DRM_AMDGPU_VM                  0x13
>>>>>>>     #define DRM_AMDGPU_FENCE_TO_HANDLE     0x14
>>>>>>>     #define DRM_AMDGPU_SCHED               0x15
>>>>>>> +#define DRM_AMDGPU_USERQ               0x16
>>>>>>>
>>>>>>>     #define DRM_IOCTL_AMDGPU_GEM_CREATE 
>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union 
>>>>>>> drm_amdgpu_gem_create)
>>>>>>>     #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE 
>>>>>>> + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>>> @@ -71,6 +72,7 @@ extern "C" {
>>>>>>>     #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + 
>>>>>>> DRM_AMDGPU_VM, union drm_amdgpu_vm)
>>>>>>>     #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE 
>>>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union 
>>>>>>> drm_amdgpu_fence_to_handle)
>>>>>>>     #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + 
>>>>>>> DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
>>>>>>> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + 
>>>>>>> DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>>>>>>>
>>>>>>>     /**
>>>>>>>      * DOC: memory domains
>>>>>>> @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
>>>>>>>            union drm_amdgpu_ctx_out out;
>>>>>>>     };
>>>>>>>
>>>>>>> +/* user queue IOCTL */
>>>>>>> +#define AMDGPU_USERQ_OP_CREATE 1
>>>>>>> +#define AMDGPU_USERQ_OP_FREE   2
>>>>>>> +
>>>>>>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE  (1 << 0)
>>>>>>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL     (1 << 1)
>>>>>>> +
>>>>>>> +struct drm_amdgpu_userq_mqd {
>>>>>>> +       /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
>>>>>>> +       __u32   flags;
>>>>>>> +       /** IP type: AMDGPU_HW_IP_* */
>>>>>>> +       __u32   ip_type;
>>>>>>> +       /** GEM object handle */
>>>>>>> +       __u32   doorbell_handle;
>>>>>>> +       /** Doorbell offset in dwords */
>>>>>>> +       __u32   doorbell_offset;
>>>>>> Since doorbells are 64 bit, maybe this offset should be in qwords.
>>>>> Can you please help to cross check this information ? All the 
>>>>> existing
>>>>> kernel doorbell calculations are keeping doorbells size as 
>>>>> sizeof(u32)
>>>> Doorbells on pre-vega hardware are 32 bits so that is where that comes
>>>> from, but from vega onward most doorbells are 64 bit.  I think some
>>>> versions of VCN may still use 32 bit doorbells.  Internally in the
>>>> kernel driver we just use two slots for newer hardware, but for the
>>>> UAPI, I think we can just stick with 64 bit slots to avoid confusion.
>>>> Even if an engine only uses a 32 bit one, I don't know that there is
>>>> much value to trying to support variable doorbell sizes.
>>> I think we can stick with using __u32 because this is *not* the size of
>>> the doorbell entries.
>>>
>>> Instead this is the offset into the BO where to find the doorbell for
>>> this queue (which then in turn is 64bits wide).
>>>
>>> Since we will probably never have more than 4GiB doorbells we should be
>>> pretty save to use 32bits here.
>> Yes, the offset would still be 32 bits, but the units would be 
>> qwords.  E.g.,
>>
>> +       /** Doorbell offset in qwords */
>> +       __u32   doorbell_offset;
>>
>> That way you couldn't accidently specify an overlapping doorbell.
>
> Ah, so you only wanted to fix the comment. That was absolutely not 
> clear from the discussion.

If I understand this correctly, the offset of the doorbell in the BO is 
still is 32-bit, but its width (size in bytes) is 64 bits. Am I getting 
that right ?

- Shashank

>
> Christian.
>
>>
>> Alex
>>
>>> Christian.
>>>
>>>> Alex
>>>>
>>>>>>> +       /** GPU virtual address of the queue */
>>>>>>> +       __u64   queue_va;
>>>>>>> +       /** Size of the queue in bytes */
>>>>>>> +       __u64   queue_size;
>>>>>>> +       /** GPU virtual address of the rptr */
>>>>>>> +       __u64   rptr_va;
>>>>>>> +       /** GPU virtual address of the wptr */
>>>>>>> +       __u64   wptr_va;
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct drm_amdgpu_userq_in {
>>>>>>> +       /** AMDGPU_USERQ_OP_* */
>>>>>>> +       __u32   op;
>>>>>>> +       /** Flags */
>>>>>>> +       __u32   flags;
>>>>>>> +       /** Queue handle to associate the queue free call with,
>>>>>>> +        * unused for queue create calls */
>>>>>>> +       __u32   queue_id;
>>>>>>> +       __u32   pad;
>>>>>>> +       /** Queue descriptor */
>>>>>>> +       struct drm_amdgpu_userq_mqd mqd;
>>>>>>> +};
>>>>>>> +
>>>>>>> +struct drm_amdgpu_userq_out {
>>>>>>> +       /** Queue handle */
>>>>>>> +       __u32   q_id;
>>>>>> Maybe this should be queue_id to match the input.
>>>>> Agree.
>>>>>
>>>>> - Shashank
>>>>>
>>>>>> Alex
>>>>>>
>>>>>>> +       /** Flags */
>>>>>>> +       __u32   flags;
>>>>>>> +};
>>>>>>> +
>>>>>>> +union drm_amdgpu_userq {
>>>>>>> +       struct drm_amdgpu_userq_in in;
>>>>>>> +       struct drm_amdgpu_userq_out out;
>>>>>>> +};
>>>>>>> +
>>>>>>>     /* vm ioctl */
>>>>>>>     #define AMDGPU_VM_OP_RESERVE_VMID      1
>>>>>>>     #define AMDGPU_VM_OP_UNRESERVE_VMID    2
>>>>>>> -- 
>>>>>>> 2.34.1
>>>>>>>
>