[PATCH 4/6] drm/amdgpu: UAPI headers for userqueue Secure semaphore

Mon Feb 27 12:52:54 UTC 2023

Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:
>   - Add UAPI header support for userqueue Secure semaphore
>
>     v2: (Christian)
>       - Add bo handles,bo flags and padding fields.
>       - Include value/va in a combined array.
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
> ---
>   .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c |  1 +
>   include/uapi/drm/amdgpu_drm.h                 | 46 +++++++++++++++++++
>   2 files changed, 47 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
> index b8943e6aea22..5cb255a39732 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
> @@ -22,6 +22,7 @@
>    */
>   #include "amdgpu.h"
>   #include "amdgpu_userqueue.h"
> +#include "amdgpu_userq_fence.h"
>   #include "v11_structs.h"
>   #include "amdgpu_mes.h"
>   #include "mes_api_def.h"
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 2d94cca566e0..bd37c715f5a7 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -56,6 +56,8 @@ extern "C" {
>   #define DRM_AMDGPU_SCHED		0x15
>   #define DRM_AMDGPU_USERQ		0x16
>   #define DRM_AMDGPU_USERQ_DOORBELL_RING		0x17
> +#define DRM_AMDGPU_USERQ_SIGNAL		0x18
> +#define DRM_AMDGPU_USERQ_WAIT		0x19
>   
>   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -75,6 +77,8 @@ extern "C" {
>   #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
>   #define DRM_IOCTL_AMDGPU_USERQ		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq)
>   #define DRM_IOCTL_AMDGPU_USERQ_DOORBELL_RING		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_DOORBELL_RING, struct drm_amdgpu_db_ring)
> +#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
> +#define DRM_IOCTL_AMDGPU_USERQ_WAIT	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>   
>   /**
>    * DOC: memory domains
> @@ -361,6 +365,48 @@ union drm_amdgpu_userq {
>   	struct drm_amdgpu_userq_out out;
>   };
>   
> +/* userq signal/wait ioctl */
> +struct drm_amdgpu_userq_signal {
> +	/** Queue ID */
> +	__u32	queue_id;
> +	/** Flags */
> +	__u32   flags;
> +	/** Sync obj handle */
> +	__u32   handle;

Maybe rename to obj_handle or even syncobj_handle.

> +	__u32	pad;
> +	/* Sync obj timeline */
> +	__u64	point;

Same prefix here, either obj_point or even better syncobj_point.

> +	/** number of BO handles */
> +	__u64   num_bo_handles;

This can be 32bit I think. And when you move the bo_flags after this 
field we don't even need a padding.

> +	/** array of BO handles */
> +	__u64   bo_handles_array;
> +	/** bo flags */
> +	__u32 bo_flags;
> +};
> +
> +struct drm_amdgpu_userq_fence_info {
> +	__u64	va;
> +	__u64	value;
> +};
> +
> +struct drm_amdgpu_userq_wait {
> +	/** Flags */
> +	__u32   flags;
> +	/** array of Sync obj handles */
> +	__u64   handles;
> +	__u32   pad;

That padding is misplaced as far as I can see.

> +	/** number of Sync obj handles */
> +	__u64	count_handles;

Better let's us use num_syncobj_handles here as well. And u32 should be 
sufficient.

If you re-arrange the fields you could also drop the padding.

> +	/** number of BO handles */
> +	__u64	num_bo_handles;

Again u32 is sufficient for the number of handles.

> +	/** array of BO handles */
> +	__u64   bo_handles_array;
> +	/** bo flags */
> +	__u32 	bo_wait_flags;
> +	/** array of addr/values */
> +	__u64	userq_fence_info;

We need a number for that as well. It can be that a BO is idle and has 
no fences and it can be that we have tons of fences on a single BO.

The usual semantics is that you call the kernel driver with the 
userq_fence_info==0 first to get how much space you need to reserve and 
then once more after you allocated enough.

See function sync_file_ioctl_fence_info() for a good example how to do this.

Regards,
Christian.

> +};
> +
>   /* vm ioctl */
>   #define AMDGPU_VM_OP_RESERVE_VMID	1
>   #define AMDGPU_VM_OP_UNRESERVE_VMID	2