[PATCH v4] drm/amdkfd: Page aligned memory reserve size

Felix Kuehling felix.kuehling at amd.com
Tue Jan 10 23:07:26 UTC 2023


Am 2023-01-10 um 17:41 schrieb Philip Yang:
> Use page aligned size to reserve memory usage because page aligned TTM
> BO size is used to unreserve memory usage, otherwise no page aligned
> size causes memory usage accounting unbalanced.
>
> Change vram_used definition type to int64_t to be able to trigger
> WARN_ONCE(adev && adev->kfd.vram_used < 0, "..."), to help debug the
> accounting issue with warning and backtrace.
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 +++++++-----
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c         |  9 +++++++--
>   3 files changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index fb41869e357a..333780491867 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {
>   
>   struct amdgpu_kfd_dev {
>   	struct kfd_dev *dev;
> -	uint64_t vram_used;
> +	int64_t vram_used;
>   	uint64_t vram_used_aligned;
>   	bool init_complete;
>   	struct work_struct reset_work;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index cd5de5e08d2f..6f236ded5f12 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1598,6 +1598,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   	struct amdgpu_bo *bo;
>   	struct drm_gem_object *gobj = NULL;
>   	u32 domain, alloc_domain;
> +	uint64_t aligned_size;
>   	u64 alloc_flags;
>   	int ret;
>   
> @@ -1653,22 +1654,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   	 * the memory.
>   	 */
>   	if ((*mem)->aql_queue)
> -		size = size >> 1;
> +		size >>= 1;
> +	aligned_size = PAGE_ALIGN(size);
>   
>   	(*mem)->alloc_flags = flags;
>   
>   	amdgpu_sync_create(&(*mem)->sync);
>   
> -	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
> +	ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
>   	if (ret) {
>   		pr_debug("Insufficient memory\n");
>   		goto err_reserve_limit;
>   	}
>   
>   	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
> -			va, size, domain_string(alloc_domain));
> +			va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));
>   
> -	ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
> +	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
>   				       bo_type, NULL, &gobj);
>   	if (ret) {
>   		pr_debug("Failed to create BO on domain %s. ret %d\n",
> @@ -1725,7 +1727,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   	/* Don't unreserve system mem limit twice */
>   	goto err_reserve_limit;
>   err_bo_create:
> -	amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
> +	amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
>   err_reserve_limit:
>   	mutex_destroy(&(*mem)->lock);
>   	if (gobj)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 6d291aa6386b..f79b8e964140 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1127,8 +1127,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>   	}
>   
>   	/* Update the VRAM usage count */
> -	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
> -		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
> +	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> +		uint64_t size = args->size;
> +
> +		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
> +			size >>= 1;
> +		WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
> +	}
>   
>   	mutex_unlock(&p->mutex);
>   


More information about the amd-gfx mailing list