[PATCH] drm/amdkfd: Remove GPU ID in GWS queue creation

Wed Jul 31 21:36:03 UTC 2019

On 2019-07-26 9:27 p.m., Greathouse, Joseph wrote:
> The gpu_id argument is not needed when enabling GWS on a queue.
> The queue can only be associated with one device, so the only
> possible situations for the call as previously defined were:
> 1) the gpu_id was for the device associated with the target queue
> and things worked as expected, or 2) the gpu_id was for a device
> not associated with the target queue and the request was undefined.
>
> In particular, the previous result of the undefined operation is
> that you would allocate the number of GWS entries available on the
> gpu_id device, even if the queue was on a device with a different
> number available. For example: a queue on a device without GWS
> capability, but the user passes in a gpu_id for a device with GWS.
> We would end up trying to allocate GWS on the device that does not
> support it.
>
> Rather than leaving this footgun around and making life more
> difficult for user space, we can instead grab the gpu_id from the
> target queue. The gpu_id argument being passed in is thus not
> needed. We thus change the field in the ioctl struct to be reserved
> so that nobody expects it to do anything. However, we do not remove
> because that would break user-land API compatibility.
>
> Change-Id: I861cebc8a0a7eab5360da10971a73d5a4700c6d8
> Signed-off-by: Joseph Greathouse <Joseph.Greathouse at amd.com>

Cosmetic comments inline. Otherwise this looks good to me.

> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 19 +++++++++++++------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  2 ++
>   .../amd/amdkfd/kfd_process_queue_manager.c    | 12 ++++++++++++
>   include/uapi/linux/kfd_ioctl.h                |  4 ++--
>   4 files changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f91126f5f1be..46005b1dcf79 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1572,20 +1572,27 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
>   {
>   	int retval;
>   	struct kfd_ioctl_alloc_queue_gws_args *args = data;
> +	struct queue *q;
>   	struct kfd_dev *dev;
>   
>   	if (!hws_gws_support)
>   		return -ENODEV;
>   
> -	dev = kfd_device_by_id(args->gpu_id);
> -	if (!dev) {
> -		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
> -		return -ENODEV;
> +	mutex_lock(&p->mutex);
> +	q = pqm_get_user_queue(&p->pqm, args->queue_id);
> +
> +	if (q)
> +		dev = q->device;

Please use {} around the if-block for consistency with the else-block.

> +	else {
> +		mutex_unlock(&p->mutex);

I'd prefer the error handling with a goto out_unlock. That's a common 
convention in kernel code to minimize error prone duplication of 
unwinding code for error handling.

> +		return -EINVAL;
>   	}
> -	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
> +
> +	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
> +		mutex_unlock(&p->mutex);
>   		return -ENODEV;
> +	}
>   
> -	mutex_lock(&p->mutex);
>   	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
>   	mutex_unlock(&p->mutex);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index aa7bf20d20f8..9b9a8da187c8 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -915,6 +915,8 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
>   			void *gws);
>   struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
>   						unsigned int qid);
> +struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
> +						unsigned int qid);
>   int pqm_get_wave_state(struct process_queue_manager *pqm,
>   		       unsigned int qid,
>   		       void __user *ctl_stack,
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index 7e6c3ee82f5b..20dae1fdb16a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -473,6 +473,18 @@ struct kernel_queue *pqm_get_kernel_queue(
>   	return NULL;
>   }
>   
> +struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
> +					unsigned int qid)
> +{
> +	struct process_queue_node *pqn;
> +
> +	pqn = get_queue_by_qid(pqm, qid);
> +	if (pqn && pqn->q)

It would be sufficient to check if (pqn) here. If pqn->q is NULL, you'll 
return NULL either way. You could even condense it into a single return 
statement:

     return pqn ? pqn->q : NULL;

Regards,
   Felix

> +		return pqn->q;
> +
> +	return NULL;
> +}
> +
>   int pqm_get_wave_state(struct process_queue_manager *pqm,
>   		       unsigned int qid,
>   		       void __user *ctl_stack,
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 070d1bc7e725..0cbd25d2bb38 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -412,14 +412,14 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
>   
>   /* Allocate GWS for specific queue
>    *
> - * @gpu_id:      device identifier
> + * @reserved:    reserved for ABI compatibility. value is ignored.
>    * @queue_id:    queue's id that GWS is allocated for
>    * @num_gws:     how many GWS to allocate
>    * @first_gws:   index of the first GWS allocated.
>    *               only support contiguous GWS allocation
>    */
>   struct kfd_ioctl_alloc_queue_gws_args {
> -	__u32 gpu_id;		/* to KFD */
> +	__u32 reserved;		/* to KFD */
>   	__u32 queue_id;		/* to KFD */
>   	__u32 num_gws;		/* to KFD */
>   	__u32 first_gws;	/* from KFD */