[PATCH v3] drm/amdkfd: Free gang_ctx_bo and wptr_bo in pqm_uninit
Felix Kuehling
felix.kuehling at amd.com
Thu Nov 23 22:34:03 UTC 2023
On 2023-11-23 01:20, ZhenGuo Yin wrote:
> [Why]
> Memory leaks of gang_ctx_bo and wptr_bo.
>
> [How]
> Free gang_ctx_bo and wptr_bo in pqm_uninit.
>
> v2: add a common function pqm_clean_queue_resource to
> free queue's resources.
> v3: reset pdd->pqd.num_gws when destorying GWS queue.
>
> Signed-off-by: ZhenGuo Yin <zhenguo.yin at amd.com>
> ---
> .../amd/amdkfd/kfd_process_queue_manager.c | 54 +++++++++++--------
> 1 file changed, 33 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index ebaec476f49a..fb5840a5df06 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -169,16 +169,43 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
> return 0;
> }
>
> +static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
> + struct process_queue_node *pqn)
> +{
> + struct kfd_node *dev;
> + struct kfd_process_device *pdd;
> +
> + dev = pqn->q->device;
> +
> + pdd = kfd_get_process_device_data(dev, pqm->process);
> + if (!pdd) {
> + pr_err("Process device data doesn't exist\n");
> + return;
> + }
> +
> + if (pqn->q->gws) {
> + if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
> + !dev->kfd->shared_resources.enable_mes)
> + amdgpu_amdkfd_remove_gws_from_process(
> + pqm->process->kgd_process_info, pqn->q->gws);
> + pdd->qpd.num_gws = 0;
Wrong indentation. I almost didn't see this at all. It should be
indented at the same level as the if-statement.
+ if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
+ !dev->kfd->shared_resources.enable_mes)
+ amdgpu_amdkfd_remove_gws_from_process(
+ pqm->process->kgd_process_info, pqn->q->gws);
+ pdd->qpd.num_gws = 0;
With that fixed, the patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> + }
> +
> + if (dev->kfd->shared_resources.enable_mes) {
> + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->gang_ctx_bo);
> + if (pqn->q->wptr_bo)
> + amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
> + }
> +}
> +
> void pqm_uninit(struct process_queue_manager *pqm)
> {
> struct process_queue_node *pqn, *next;
>
> list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
> - if (pqn->q && pqn->q->gws &&
> - KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
> - !pqn->q->device->kfd->shared_resources.enable_mes)
> - amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
> - pqn->q->gws);
> + if (pqn->q)
> + pqm_clean_queue_resource(pqm, pqn);
> +
> kfd_procfs_del_queue(pqn->q);
> uninit_queue(pqn->q);
> list_del(&pqn->process_queue_list);
> @@ -465,22 +492,7 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
> goto err_destroy_queue;
> }
>
> - if (pqn->q->gws) {
> - if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) &&
> - !dev->kfd->shared_resources.enable_mes)
> - amdgpu_amdkfd_remove_gws_from_process(
> - pqm->process->kgd_process_info,
> - pqn->q->gws);
> - pdd->qpd.num_gws = 0;
> - }
> -
> - if (dev->kfd->shared_resources.enable_mes) {
> - amdgpu_amdkfd_free_gtt_mem(dev->adev,
> - pqn->q->gang_ctx_bo);
> - if (pqn->q->wptr_bo)
> - amdgpu_amdkfd_free_gtt_mem(dev->adev, pqn->q->wptr_bo);
> -
> - }
> + pqm_clean_queue_resource(pqm, pqn);
> uninit_queue(pqn->q);
> }
>
More information about the amd-gfx
mailing list