[PATCH v3 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

Tue Sep 27 14:28:53 UTC 2022

Am 2022-09-26 um 17:40 schrieb Shashank Sharma:
> Allow the user to specify a workload hint to the kernel.
> We can use these to tweak the dpm heuristics to better match
> the workload for improved performance.
>
> V3: Create only set() workload UAPI (Christian)
>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
>   include/uapi/drm/amdgpu_drm.h | 17 +++++++++++++++++
>   1 file changed, 17 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index c2c9c674a223..23d354242699 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -212,6 +212,7 @@ union drm_amdgpu_bo_list {
>   #define AMDGPU_CTX_OP_QUERY_STATE2	4
>   #define AMDGPU_CTX_OP_GET_STABLE_PSTATE	5
>   #define AMDGPU_CTX_OP_SET_STABLE_PSTATE	6
> +#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE	7
>   
>   /* GPU reset status */
>   #define AMDGPU_CTX_NO_RESET		0
> @@ -252,6 +253,17 @@ union drm_amdgpu_bo_list {
>   #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
>   #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
>   
> +/* GPU workload hints, flag bits 8-15 */
> +#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT     8
> +#define AMDGPU_CTX_WORKLOAD_HINT_MASK      (0xff << AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

8 bits seems overkill for this. Are we ever going to have 256 different 
workload types? Maybe 4 bits would be enough. That would allow up to 16 
types.

> +#define AMDGPU_CTX_WORKLOAD_HINT_NONE      (0 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_3D        (1 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO     (2 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_VR        (3 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_MAX	   AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
> +#define AMDGPU_CTX_WORKLOAD_INDEX(n)	   (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

The macro argument (n) should be wrapped in parentheses. Also, it may be 
a good idea to apply the AMDGPU_CTX_WORKLOAD_HINT_MASK when extracting 
the index, in case more flags are added at higher bits in the future:

     (((n) & AMDGPU_CTX_WORKLOAD_HINT_MASK) >> AMDGPU_WORKLOAD_HINT_SHIFT)

Regards,
   Felix

> +
>   struct drm_amdgpu_ctx_in {
>   	/** AMDGPU_CTX_OP_* */
>   	__u32	op;
> @@ -281,6 +293,11 @@ union drm_amdgpu_ctx_out {
>   			__u32	flags;
>   			__u32	_pad;
>   		} pstate;
> +
> +		struct {
> +			__u32	flags;
> +			__u32	_pad;
> +		} workload;
>   };
>   
>   union drm_amdgpu_ctx {