[PATCH 1/2] drm/amdkfd: Let VRAM allocations go to GTT domain on small APUs
Felix Kuehling
felix.kuehling at amd.com
Fri Apr 26 22:51:49 UTC 2024
On 2024-04-26 04:37, Lang Yu wrote:
> Small APUs(i.e., consumer, embedded products) usually have a small
> carveout device memory which can't satisfy most compute workloads
> memory allocation requirements.
>
> We can't even run a Basic MNIST Example with a default 512MB carveout.
> https://github.com/pytorch/examples/tree/main/mnist.
>
> Though we can change BIOS settings to enlarge carveout size,
> which is inflexible and may bring complaint. On the other hand,
> the memory resource can't be effectively used between host and device.
>
> The solution is MI300A approach, i.e., let VRAM allocations go to GTT.
>
> Signed-off-by: Lang Yu <Lang.Yu at amd.com>
Two nit-picks inline. Other than that, this patch looks reasonable to me.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 +++++-
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 21 +++++++++++--------
> drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 2 +-
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 ++++--
> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 3 ++-
> 5 files changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 7ba05f030dd1..3295838e9a1d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -456,7 +456,9 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
> mem_info->local_mem_size_private =
> KFD_XCP_MEMORY_SIZE(adev, xcp->id);
> } else {
> - mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
> + mem_info->local_mem_size_public = adev->flags & AMD_IS_APU ?
> + (ttm_tt_pages_limit() << PAGE_SHIFT) :
> + adev->gmc.visible_vram_size;
> mem_info->local_mem_size_private = adev->gmc.real_vram_size -
> adev->gmc.visible_vram_size;
On an APU the private size should be reported as 0.
> }
> @@ -824,6 +826,8 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id)
> }
> do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition);
> return ALIGN_DOWN(tmp, PAGE_SIZE);
> + } else if (adev->flags & AMD_IS_APU) {
> + return (ttm_tt_pages_limit() << PAGE_SHIFT);
> } else {
> return adev->gmc.real_vram_size;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index c4f9960dafbb..7eb5afcc4895 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -196,7 +196,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> return -EINVAL;
>
> vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
> - if (adev->gmc.is_app_apu) {
> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
> system_mem_needed = size;
> ttm_mem_needed = size;
> }
> @@ -232,7 +232,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> "adev reference can't be null when vram is used");
> if (adev && xcp_id >= 0) {
> adev->kfd.vram_used[xcp_id] += vram_needed;
> - adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ?
> + adev->kfd.vram_used_aligned[xcp_id] +=
> + (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ?
> vram_needed :
> ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
> }
> @@ -260,7 +261,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
>
> if (adev) {
> adev->kfd.vram_used[xcp_id] -= size;
> - if (adev->gmc.is_app_apu) {
> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
> adev->kfd.vram_used_aligned[xcp_id] -= size;
> kfd_mem_limit.system_mem_used -= size;
> kfd_mem_limit.ttm_mem_used -= size;
> @@ -889,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
> * if peer device has large BAR. In contrast, access over xGMI is
> * allowed for both small and large BAR configurations of peer device
> */
> - if ((adev != bo_adev && !adev->gmc.is_app_apu) &&
> + if ((adev != bo_adev && !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU)) &&
> ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
> (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
> (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
> @@ -1657,7 +1658,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
> - atomic64_read(&adev->vram_pin_size)
> - reserved_for_pt;
>
> - if (adev->gmc.is_app_apu) {
> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
> system_mem_available = no_system_mem_limit ?
> kfd_mem_limit.max_system_mem_limit :
> kfd_mem_limit.max_system_mem_limit -
> @@ -1669,6 +1670,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
> available = min3(system_mem_available, ttm_mem_available,
> vram_available);
> available = ALIGN_DOWN(available, PAGE_SIZE);
> +
Unnecessary whitespace change.
Regards,
Felix
> } else {
> available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN);
> }
> @@ -1705,7 +1707,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
>
> - if (adev->gmc.is_app_apu) {
> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
> domain = AMDGPU_GEM_DOMAIN_GTT;
> alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
> alloc_flags = 0;
> @@ -1952,7 +1954,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> if (size) {
> if (!is_imported &&
> (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
> - (adev->gmc.is_app_apu &&
> + ((adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) &&
> mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
> *size = bo_size;
> else
> @@ -2374,8 +2376,9 @@ static int import_obj_create(struct amdgpu_device *adev,
> (*mem)->dmabuf = dma_buf;
> (*mem)->bo = bo;
> (*mem)->va = va;
> - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
> - AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
> + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
> + !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ?
> + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
>
> (*mem)->mapped_to_gpu_memory = 0;
> (*mem)->process_info = avm->process_info;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> index 4bcfbeac48fb..4816fcb9803a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
> @@ -1023,7 +1023,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
> if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
> return -EINVAL;
>
> - if (adev->gmc.is_app_apu)
> + if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU)
> return 0;
>
> pgmap = &kfddev->pgmap;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index bfab16b43fec..238ac11bb97d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2619,7 +2619,8 @@ svm_range_best_restore_location(struct svm_range *prange,
> return -1;
> }
>
> - if (node->adev->gmc.is_app_apu)
> + if (node->adev->gmc.is_app_apu ||
> + node->adev->flags & AMD_IS_APU)
> return 0;
>
> if (prange->preferred_loc == gpuid ||
> @@ -3337,7 +3338,8 @@ svm_range_best_prefetch_location(struct svm_range *prange)
> goto out;
> }
>
> - if (bo_node->adev->gmc.is_app_apu) {
> + if (bo_node->adev->gmc.is_app_apu ||
> + bo_node->adev->flags & AMD_IS_APU) {
> best_loc = 0;
> goto out;
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 026863a0abcd..9c37bd0567ef 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -201,7 +201,8 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
> * is initialized to not 0 when page migration register device memory.
> */
> #define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
> - (adev)->gmc.is_app_apu)
> + (adev)->gmc.is_app_apu ||\
> + ((adev)->flags & AMD_IS_APU))
>
> void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
>
More information about the amd-gfx
mailing list