[PATCH] drm/amdkfd: fix a potential cu_mask memory leak

Felix Kuehling felix.kuehling at amd.com
Wed Sep 29 15:24:43 UTC 2021


Am 2021-09-29 um 4:22 a.m. schrieb Lang Yu:
> If user doesn't explicitly call kfd_ioctl_destroy_queue
> to destroy all created queues, when the kfd process is
> destroyed, some queues' cu_mask memory are not freed.
>
> To avoid forgetting to free them in some places,
> free them immediately after use.
>
> Signed-off-by: Lang Yu <lang.yu at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c               |  8 ++++----
>  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 10 ++++------
>  2 files changed, 8 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 4de907f3e66a..5c0e6dcf692a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -451,8 +451,8 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
>  	retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
>  	if (retval) {
>  		pr_debug("Could not copy CU mask from userspace");
> -		kfree(properties.cu_mask);
> -		return -EFAULT;
> +		retval = -EFAULT;
> +		goto out;
>  	}
>  
>  	mutex_lock(&p->mutex);
> @@ -461,8 +461,8 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
>  
>  	mutex_unlock(&p->mutex);
>  
> -	if (retval)
> -		kfree(properties.cu_mask);
> +out:
> +	kfree(properties.cu_mask);
>  
>  	return retval;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index 243dd1efcdbf..4c81d690f31a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -394,8 +394,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
>  			pdd->qpd.num_gws = 0;
>  		}
>  
> -		kfree(pqn->q->properties.cu_mask);
> -		pqn->q->properties.cu_mask = NULL;
>  		uninit_queue(pqn->q);
>  	}
>  
> @@ -448,16 +446,16 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
>  		return -EFAULT;
>  	}
>  
> -	/* Free the old CU mask memory if it is already allocated, then
> -	 * allocate memory for the new CU mask.
> -	 */
> -	kfree(pqn->q->properties.cu_mask);
> +	WARN_ON_ONCE(pqn->q->properties.cu_mask);
>  
>  	pqn->q->properties.cu_mask_count = p->cu_mask_count;
>  	pqn->q->properties.cu_mask = p->cu_mask;
>  
>  	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
>  							pqn->q);
> +
> +	pqn->q->properties.cu_mask = NULL;
> +

This won't work correctly. We need to save the cu_mask for later.
Otherwise the next time dqm->ops.update_queue is called, for example in
pqm_update_queue or pqm_set_gws, it will wipe out the CU mask in the MQD.

Regards,
  Felix


>  	if (retval != 0)
>  		return retval;
>  


More information about the amd-gfx mailing list