[PATCH v3] drm/amdkfd: Free gang_ctx_bo and wptr_bo in pqm_uninit

Felix Kuehling felix.kuehling at amd.com
Thu Nov 23 22:34:03 UTC 2023


On 2023-11-23 01:20, ZhenGuo Yin wrote:
> [Why]
> Memory leaks of gang_ctx_bo and wptr_bo.
>
> [How]
> Free gang_ctx_bo and wptr_bo in pqm_uninit.
>
> v2: add a common function pqm_clean_queue_resource to
> free queue's resources.
> v3: reset pdd->pqd.num_gws when destorying GWS queue.
>
> Signed-off-by: ZhenGuo Yin <zhenguo.yin at amd.com>
> ---
>   .../amd/amdkfd/kfd_process_queue_manager.c    | 54 +++++++++++--------
>   1 file changed, 33 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index ebaec476f49a..fb5840a5df06 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -169,16 +169,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
>   	return 0;
>   }
>   
> +static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
> +				     struct process_queue_node *pqn)
> +{
> +	struct kfd_node *dev;
> +	struct kfd_process_device *pdd;
> +
> +	dev = pqn->q->device;
> +
> +	pdd = kfd_get_process_device_data(dev, pqm->process);
> +	if (!pdd) {
> +		pr_err("Process device data doesn't exist\n");
> +		return;
> +	}
> +
> +	if (pqn->q->gws) {
> +		if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
> +		    !dev->kfd->shared_resources.enable_mes)
> +			amdgpu_amdkfd_remove_gws_from_process(
> +				pqm->process->kgd_process_info, pqn->q->gws);
> +				pdd->qpd.num_gws = 0;

Wrong indentation. I almost didn't see this at all. It should be 
indented at the same level as the if-statement.

+		if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+		    !dev->kfd->shared_resources.enable_mes)
+			amdgpu_amdkfd_remove_gws_from_process(
+				pqm->process->kgd_process_info, pqn->q->gws);
+		pdd->qpd.num_gws = 0;

With that fixed, the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> +	}
> +
> +	if (dev->kfd->shared_resources.enable_mes) {
> +		amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
> +		if (pqn->q->wptr_bo)
> +			amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
> +	}
> +}
> +
>   void pqm_uninit(struct process_queue_manager *pqm)
>   {
>   	struct process_queue_node *pqn, *next;
>   
>   	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
> -		if (pqn->q && pqn->q->gws &&
> -		    KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
> -		    !pqn->q->device->kfd->shared_resources.enable_mes)
> -			amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
> -				pqn->q->gws);
> +		if (pqn->q)
> +			pqm_clean_queue_resource(pqm, pqn);
> +
>   		kfd_procfs_del_queue(pqn->q);
>   		uninit_queue(pqn->q);
>   		list_del(&pqn->process_queue_list);
> @@ -465,22 +492,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
>   				goto err_destroy_queue;
>   		}
>   
> -		if (pqn->q->gws) {
> -			if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
> -			    !dev->kfd->shared_resources.enable_mes)
> -				amdgpu_amdkfd_remove_gws_from_process(
> -						pqm->process->kgd_process_info,
> -						pqn->q->gws);
> -			pdd->qpd.num_gws = 0;
> -		}
> -
> -		if (dev->kfd->shared_resources.enable_mes) {
> -			amdgpu_amdkfd_free_gtt_mem(dev->adev,
> -						   pqn->q->gang_ctx_bo);
> -			if (pqn->q->wptr_bo)
> -				amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
> -
> -		}
> +		pqm_clean_queue_resource(pqm, pqn);
>   		uninit_queue(pqn->q);
>   	}
>   


More information about the amd-gfx mailing list