[PATCH 1/8] drm/amdgpu: UAPI for user queue management
Christian König
ckoenig.leichtzumerken at gmail.com
Tue Feb 7 07:03:18 UTC 2023
Am 06.02.23 um 22:03 schrieb Alex Deucher:
> On Mon, Feb 6, 2023 at 12:01 PM Christian König
> <christian.koenig at amd.com> wrote:
>> Am 06.02.23 um 17:56 schrieb Alex Deucher:
>>> On Fri, Feb 3, 2023 at 5:26 PM Shashank Sharma <shashank.sharma at amd.com> wrote:
>>>> Hey Alex,
>>>>
>>>> On 03/02/2023 23:07, Alex Deucher wrote:
>>>>> On Fri, Feb 3, 2023 at 4:54 PM Shashank Sharma <shashank.sharma at amd.com> wrote:
>>>>>> From: Alex Deucher <alexander.deucher at amd.com>
>>>>>>
>>>>>> This patch intorduces new UAPI/IOCTL for usermode graphics
>>>>>> queue. The userspace app will fill this structure and request
>>>>>> the graphics driver to add a graphics work queue for it. The
>>>>>> output of this UAPI is a queue id.
>>>>>>
>>>>>> This UAPI maps the queue into GPU, so the graphics app can start
>>>>>> submitting work to the queue as soon as the call returns.
>>>>>>
>>>>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>>>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>>>>> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
>>>>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>>>>> ---
>>>>>> include/uapi/drm/amdgpu_drm.h | 53 +++++++++++++++++++++++++++++++++++
>>>>>> 1 file changed, 53 insertions(+)
>>>>>>
>>>>>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>>>>>> index 4038abe8505a..6c5235d107b3 100644
>>>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>>>> @@ -54,6 +54,7 @@ extern "C" {
>>>>>> #define DRM_AMDGPU_VM 0x13
>>>>>> #define DRM_AMDGPU_FENCE_TO_HANDLE 0x14
>>>>>> #define DRM_AMDGPU_SCHED 0x15
>>>>>> +#define DRM_AMDGPU_USERQ 0x16
>>>>>>
>>>>>> #define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>>>>> #define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>>>>>> @@ -71,6 +72,7 @@ extern "C" {
>>>>>> #define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>>>>>> #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>>>>>> #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
>>>>>> +#define DRM_IOCTL_AMDGPU_USERQ DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>>>>>>
>>>>>> /**
>>>>>> * DOC: memory domains
>>>>>> @@ -302,6 +304,57 @@ union drm_amdgpu_ctx {
>>>>>> union drm_amdgpu_ctx_out out;
>>>>>> };
>>>>>>
>>>>>> +/* user queue IOCTL */
>>>>>> +#define AMDGPU_USERQ_OP_CREATE 1
>>>>>> +#define AMDGPU_USERQ_OP_FREE 2
>>>>>> +
>>>>>> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE (1 << 0)
>>>>>> +#define AMDGPU_USERQ_MQD_FLAGS_AQL (1 << 1)
>>>>>> +
>>>>>> +struct drm_amdgpu_userq_mqd {
>>>>>> + /** Flags: AMDGPU_USERQ_MQD_FLAGS_* */
>>>>>> + __u32 flags;
>>>>>> + /** IP type: AMDGPU_HW_IP_* */
>>>>>> + __u32 ip_type;
>>>>>> + /** GEM object handle */
>>>>>> + __u32 doorbell_handle;
>>>>>> + /** Doorbell offset in dwords */
>>>>>> + __u32 doorbell_offset;
>>>>> Since doorbells are 64 bit, maybe this offset should be in qwords.
>>>> Can you please help to cross check this information ? All the existing
>>>> kernel doorbell calculations are keeping doorbells size as sizeof(u32)
>>> Doorbells on pre-vega hardware are 32 bits so that is where that comes
>>> from, but from vega onward most doorbells are 64 bit. I think some
>>> versions of VCN may still use 32 bit doorbells. Internally in the
>>> kernel driver we just use two slots for newer hardware, but for the
>>> UAPI, I think we can just stick with 64 bit slots to avoid confusion.
>>> Even if an engine only uses a 32 bit one, I don't know that there is
>>> much value to trying to support variable doorbell sizes.
>> I think we can stick with using __u32 because this is *not* the size of
>> the doorbell entries.
>>
>> Instead this is the offset into the BO where to find the doorbell for
>> this queue (which then in turn is 64bits wide).
>>
>> Since we will probably never have more than 4GiB doorbells we should be
>> pretty save to use 32bits here.
> Yes, the offset would still be 32 bits, but the units would be qwords. E.g.,
>
> + /** Doorbell offset in qwords */
> + __u32 doorbell_offset;
>
> That way you couldn't accidently specify an overlapping doorbell.
Ah, so you only wanted to fix the comment. That was absolutely not clear
from the discussion.
Christian.
>
> Alex
>
>> Christian.
>>
>>> Alex
>>>
>>>>>> + /** GPU virtual address of the queue */
>>>>>> + __u64 queue_va;
>>>>>> + /** Size of the queue in bytes */
>>>>>> + __u64 queue_size;
>>>>>> + /** GPU virtual address of the rptr */
>>>>>> + __u64 rptr_va;
>>>>>> + /** GPU virtual address of the wptr */
>>>>>> + __u64 wptr_va;
>>>>>> +};
>>>>>> +
>>>>>> +struct drm_amdgpu_userq_in {
>>>>>> + /** AMDGPU_USERQ_OP_* */
>>>>>> + __u32 op;
>>>>>> + /** Flags */
>>>>>> + __u32 flags;
>>>>>> + /** Queue handle to associate the queue free call with,
>>>>>> + * unused for queue create calls */
>>>>>> + __u32 queue_id;
>>>>>> + __u32 pad;
>>>>>> + /** Queue descriptor */
>>>>>> + struct drm_amdgpu_userq_mqd mqd;
>>>>>> +};
>>>>>> +
>>>>>> +struct drm_amdgpu_userq_out {
>>>>>> + /** Queue handle */
>>>>>> + __u32 q_id;
>>>>> Maybe this should be queue_id to match the input.
>>>> Agree.
>>>>
>>>> - Shashank
>>>>
>>>>> Alex
>>>>>
>>>>>> + /** Flags */
>>>>>> + __u32 flags;
>>>>>> +};
>>>>>> +
>>>>>> +union drm_amdgpu_userq {
>>>>>> + struct drm_amdgpu_userq_in in;
>>>>>> + struct drm_amdgpu_userq_out out;
>>>>>> +};
>>>>>> +
>>>>>> /* vm ioctl */
>>>>>> #define AMDGPU_VM_OP_RESERVE_VMID 1
>>>>>> #define AMDGPU_VM_OP_UNRESERVE_VMID 2
>>>>>> --
>>>>>> 2.34.1
>>>>>>
More information about the amd-gfx
mailing list