[PATCH] drm/amdkfd: Correct the postion of reserve and unreserve memory

Philip Yang yangp at amd.com
Mon Feb 24 15:21:16 UTC 2025


On 2025-02-20 06:59, Emily Deng wrote:
> Call amdgpu_amdkfd_reserve_mem_limit in svm_range_vram_node_new when
> creating a new SVM BO. Call amdgpu_amdkfd_unreserve_mem_limit
> in svm_range_bo_release when the SVM BO is deleted.
>
> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 13 -------------
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 20 ++++++++++++++++++++
>   2 files changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 79251f22b702..3bbc69751f7e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -514,15 +514,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
>   	start = start_mgr << PAGE_SHIFT;
>   	end = (last_mgr + 1) << PAGE_SHIFT;
>   
> -	r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
> -					prange->npages * PAGE_SIZE,
> -					KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
> -					node->xcp ? node->xcp->id : 0);
> -	if (r) {
> -		dev_dbg(node->adev->dev, "failed to reserve VRAM, r: %ld\n", r);
> -		return -ENOSPC;
> -	}
> -
 From git history, this is to "make sure there is enough available VRAM 
and migrating to VRAM doesn't evict

     other possible non-unified memory BOs.", without this, it will 
trigger unexpected OOM killer.

We should keep this for xnack on.

>   	r = svm_range_vram_node_new(node, prange, true);
>   	if (r) {
>   		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
> @@ -560,10 +551,6 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
>   	}
>   
>   out:
> -	amdgpu_amdkfd_unreserve_mem_limit(node->adev,
> -					prange->npages * PAGE_SIZE,
> -					KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
> -					node->xcp ? node->xcp->id : 0);
Keep this for xnack on.
>   	return r < 0 ? r : 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index db3034b00dac..c861d8c90419 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -430,6 +430,10 @@ static void svm_range_bo_release(struct kref *kref)
>   		/* We're not in the eviction worker. Signal the fence. */
>   		dma_fence_signal(&svm_bo->eviction_fence->base);
>   	dma_fence_put(&svm_bo->eviction_fence->base);
> +	amdgpu_amdkfd_unreserve_mem_limit(svm_bo->node->adev,
> +		svm_bo->bo->tbo.base.size,
> +		KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
> +		svm_bo->node->xcp ? svm_bo->node->xcp->id : 0);
>   	amdgpu_bo_unref(&svm_bo->bo);
>   	kfree(svm_bo);
>   }
> @@ -581,6 +585,18 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
>   		kfree(svm_bo);
>   		return -ESRCH;
>   	}
> +
> +	r = amdgpu_amdkfd_reserve_mem_limit(node->adev,
> +			prange->npages * PAGE_SIZE,
> +			KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
> +			node->xcp ? node->xcp->id : 0);
> +	if (r) {
> +		pr_debug("failed to reserve VRAM, r: %ld\n", r);
> +		mmput(mm);
> +		kfree(svm_bo);
> +		return -ENOSPC;
> +	}
> +

The reserve VRAM limit is only for xnack off, as xnack on VRAM over 
commit should work.

Regards,

Philip

>   	svm_bo->node = node;
>   	svm_bo->eviction_fence =
>   		amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
> @@ -655,6 +671,10 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange,
>   reserve_bo_failed:
>   	amdgpu_bo_unref(&bo);
>   create_bo_failed:
> +	amdgpu_amdkfd_unreserve_mem_limit(svm_bo->node->adev,
> +		prange->npages * PAGE_SIZE,
> +		KFD_IOC_ALLOC_MEM_FLAGS_VRAM,
> +		node->xcp ? node->xcp->id : 0);
>   	dma_fence_put(&svm_bo->eviction_fence->base);
>   	kfree(svm_bo);
>   	prange->ttm_res = NULL;


More information about the amd-gfx mailing list