[PATCH v6 1/9] drm/amdgpu: UAPI for user queue management

Wed Oct 4 21:23:14 UTC 2023

On 2023-09-08 12:04, Shashank Sharma wrote:
> From: Alex Deucher <alexander.deucher at amd.com>
>
> This patch intorduces new UAPI/IOCTL for usermode graphics
> queue. The userspace app will fill this structure and request
> the graphics driver to add a graphics work queue for it. The
> output of this UAPI is a queue id.
>
> This UAPI maps the queue into GPU, so the graphics app can start
> submitting work to the queue as soon as the call returns.
>
> V2: Addressed review comments from Alex and Christian
>      - Make the doorbell offset's comment clearer
>      - Change the output parameter name to queue_id
>
> V3: Integration with doorbell manager
>
> V4:
>      - Updated the UAPI doc (Pierre-Eric)
>      - Created a Union for engine specific MQDs (Alex)
>      - Added Christian's R-B
> V5:
>      - Add variables for GDS and CSA in MQD structure (Alex)
>      - Make MQD data a ptr-size pair instead of union (Alex)
>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Reviewed-by: Christian König <christian.koenig at amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
>   include/uapi/drm/amdgpu_drm.h | 110 ++++++++++++++++++++++++++++++++++
>   1 file changed, 110 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 79b14828d542..627b4a38c855 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -54,6 +54,7 @@ extern "C" {
>   #define DRM_AMDGPU_VM			0x13
>   #define DRM_AMDGPU_FENCE_TO_HANDLE	0x14
>   #define DRM_AMDGPU_SCHED		0x15
> +#define DRM_AMDGPU_USERQ		0x16
>   
>   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -71,6 +72,7 @@ extern "C" {
>   #define DRM_IOCTL_AMDGPU_VM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm)
>   #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
>   #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
> +#define DRM_IOCTL_AMDGPU_USERQ		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>   
>   /**
>    * DOC: memory domains
> @@ -304,6 +306,114 @@ union drm_amdgpu_ctx {
>   	union drm_amdgpu_ctx_out out;
>   };
>   
> +/* user queue IOCTL */
> +#define AMDGPU_USERQ_OP_CREATE	1
> +#define AMDGPU_USERQ_OP_FREE	2
> +
> +/* Flag to indicate secure buffer related workload, unused for now */
> +#define AMDGPU_USERQ_MQD_FLAGS_SECURE	(1 << 0)
> +/* Flag to indicate AQL workload, unused for now */
> +#define AMDGPU_USERQ_MQD_FLAGS_AQL	(1 << 1)
> +
> +/*
> + * MQD (memory queue descriptor) is a set of parameters which allow

I find the term MQD misleading. For the firmware the MQD is a very 
different data structure from what you are defining here. It's a 
persistent data structure in kernel address space (VMID0) that is shared 
between the driver and the firmware that gets loaded or updated when 
queues are mapped or unmapped. I'd want to avoid confusing the firmware 
MQD with this structure.

Regards,
   Felix

> + * the GPU to uniquely define and identify a usermode queue. This
> + * structure defines the MQD for GFX-V11 IP ver 0.
> + */
> +struct drm_amdgpu_userq_mqd_gfx_v11_0 {
> +	/**
> +	 * @queue_va: Virtual address of the GPU memory which holds the queue
> +	 * object. The queue holds the workload packets.
> +	 */
> +	__u64   queue_va;
> +	/**
> +	 * @queue_size: Size of the queue in bytes, this needs to be 256-byte
> +	 * aligned.
> +	 */
> +	__u64   queue_size;
> +	/**
> +	 * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR.
> +	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
> +	 */
> +	__u64   rptr_va;
> +	/**
> +	 * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR.
> +	 * This object must be at least 8 byte in size and aligned to 8-byte offset.
> +	 *
> +	 * Queue, RPTR and WPTR can come from the same object, as long as the size
> +	 * and alignment related requirements are met.
> +	 */
> +	__u64   wptr_va;
> +	/**
> +	 * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer.
> +	 * This must be a from a separate GPU object, and must be at least 4-page
> +	 * sized.
> +	 */
> +	__u64   shadow_va;
> +	/**
> +	 * @gds_va: Virtual address of the GPU memory to hold the GDS buffer.
> +	 * This must be a from a separate GPU object, and must be at least 1-page
> +	 * sized.
> +	 */
> +	__u64   gds_va;
> +	/**
> +	 * @csa_va: Virtual address of the GPU memory to hold the CSA buffer.
> +	 * This must be a from a separate GPU object, and must be at least 1-page
> +	 * sized.
> +	 */
> +	__u64   csa_va;
> +};
> +
> +struct drm_amdgpu_userq_in {
> +	/** AMDGPU_USERQ_OP_* */
> +	__u32	op;
> +	/** Queue handle for USERQ_OP_FREE */
> +	__u32	queue_id;
> +	/** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */
> +	__u32   ip_type;
> +	/**
> +	 * @flags: flags to indicate special function for queue like secure
> +	 * buffer (TMZ). Unused for now.
> +	 */
> +	__u32   flags;
> +	/**
> +	 * @doorbell_handle: the handle of doorbell GEM object
> +	 * associated to this client.
> +	 */
> +	__u32   doorbell_handle;
> +	/**
> +	 * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo.
> +	 * Kernel will generate absolute doorbell offset using doorbell_handle
> +	 * and doorbell_offset in the doorbell bo.
> +	 */
> +	__u32   doorbell_offset;
> +	/**
> +	 * @mqd: Queue descriptor for USERQ_OP_CREATE
> +	 * MQD data can be of different size for different GPU IP/engine and
> +	 * their respective versions/revisions, so this points to a __u64 *
> +	 * which holds MQD of this usermode queue.
> +	 */
> +	__u64 mqd;
> +	/**
> +	 * @size: size of MQD data in bytes, it must match the MQD structure
> +	 * size of the respective engine/revision defined in UAPI for ex, for
> +	 * gfx_v11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx_v11).
> +	 */
> +	__u64 mqd_size;
> +};
> +
> +struct drm_amdgpu_userq_out {
> +	/** Queue handle */
> +	__u32	queue_id;
> +	/** Flags */
> +	__u32	flags;
> +};
> +
> +union drm_amdgpu_userq {
> +	struct drm_amdgpu_userq_in in;
> +	struct drm_amdgpu_userq_out out;
> +};
> +
>   /* vm ioctl */
>   #define AMDGPU_VM_OP_RESERVE_VMID	1
>   #define AMDGPU_VM_OP_UNRESERVE_VMID	2