[PATCH] drm/amdkfd: track unified memory reservation with xnack off
Felix Kuehling
felix.kuehling at amd.com
Mon Jul 18 18:31:28 UTC 2022
On 2022-07-15 19:54, Alex Sierra wrote:
> [WHY]
> Unified memory with xnack off should be tracked, as userptr mappings
> and legacy allocations do. To avoid oversuscribe system memory when
> xnack off.
> [How]
> Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM
> API and call them on every prange creation and free.
>
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 23 ++++---
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 60 +++++++++++++------
> 3 files changed, 60 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 73bf8b5f2aa9..83d955f0c52f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -305,6 +305,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *
> void amdgpu_amdkfd_block_mmu_notifications(void *p);
> int amdgpu_amdkfd_criu_resume(void *p);
> bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
> +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> + uint64_t size, u32 alloc_flag);
> +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
> + uint64_t size, u32 alloc_flag);
>
> #if IS_ENABLED(CONFIG_HSA_AMD)
> void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 2bc36ff0aa0f..39d589394160 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -129,7 +129,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
> *
> * Return: returns -ENOMEM in case of error, ZERO otherwise
> */
> -static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> uint64_t size, u32 alloc_flag)
> {
> uint64_t reserved_for_pt =
> @@ -169,7 +169,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
> (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
> kfd_mem_limit.max_ttm_mem_limit) ||
> - (adev->kfd.vram_used + vram_needed >
> + (adev && adev->kfd.vram_used + vram_needed >
> adev->gmc.real_vram_size -
> atomic64_read(&adev->vram_pin_size) -
> reserved_for_pt)) {
> @@ -180,7 +180,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> /* Update memory accounting by decreasing available system
> * memory, TTM memory and GPU memory as computed above
> */
> - adev->kfd.vram_used += vram_needed;
> + WARN_ONCE(vram_needed && !adev,
> + "adev reference can't be null when vram is used");
> + if (adev)
> + adev->kfd.vram_used += vram_needed;
> kfd_mem_limit.system_mem_used += system_mem_needed;
> kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
>
> @@ -189,7 +192,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
> return ret;
> }
>
> -static void unreserve_mem_limit(struct amdgpu_device *adev,
> +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
> uint64_t size, u32 alloc_flag)
> {
> spin_lock(&kfd_mem_limit.mem_limit_lock);
> @@ -198,7 +201,10 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
> kfd_mem_limit.system_mem_used -= size;
> kfd_mem_limit.ttm_mem_used -= size;
> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> - adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
> + WARN_ONCE(!adev,
> + "adev reference can't be null when alloc mem flags vram is set");
> + if (adev)
> + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
> kfd_mem_limit.system_mem_used -= size;
> } else if (!(alloc_flag &
> @@ -207,8 +213,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
> pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
> goto release;
> }
> -
> - WARN_ONCE(adev->kfd.vram_used < 0,
> + WARN_ONCE(adev && adev->kfd.vram_used < 0,
> "KFD VRAM memory accounting unbalanced");
> WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
> "KFD TTM memory accounting unbalanced");
> @@ -225,7 +230,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
> u32 alloc_flags = bo->kfd_bo->alloc_flags;
> u64 size = amdgpu_bo_size(bo);
>
> - unreserve_mem_limit(adev, size, alloc_flags);
> + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
>
> kfree(bo->kfd_bo);
> }
> @@ -1788,7 +1793,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
> /* Don't unreserve system mem limit twice */
> goto err_reserve_limit;
> err_bo_create:
> - unreserve_mem_limit(adev, size, flags);
> + amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
> err_reserve_limit:
> mutex_destroy(&(*mem)->lock);
> if (gobj)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index b592aee6d9d6..af5b0629b84a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -260,13 +260,22 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
> }
> }
>
> -static void svm_range_free(struct svm_range *prange)
> +static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
> {
> + uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
> + struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
> +
> pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
> prange->start, prange->last);
>
> svm_range_vram_node_free(prange);
> svm_range_free_dma_mappings(prange);
> +
> + if (update_mem_usage && !p->xnack_enabled) {
> + pr_debug("unreserve mem limit: %lld\n", size);
> + amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
> + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
> + }
> mutex_destroy(&prange->lock);
> mutex_destroy(&prange->migrate_mutex);
> kfree(prange);
> @@ -285,7 +294,7 @@ svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
>
> static struct
> svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
> - uint64_t last)
> + uint64_t last, bool update_mem_usage)
> {
> uint64_t size = last - start + 1;
> struct svm_range *prange;
> @@ -294,6 +303,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
> prange = kzalloc(sizeof(*prange), GFP_KERNEL);
> if (!prange)
> return NULL;
> +
> + p = container_of(svms, struct kfd_process, svms);
> + if (!p->xnack_enabled && update_mem_usage &&
> + amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
> + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
> + pr_info("SVM mapping failed, exceeds resident system memory limit\n");
> + kfree(prange);
> + return NULL;
> + }
> prange->npages = size;
> prange->svms = svms;
> prange->start = start;
> @@ -308,7 +326,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
> mutex_init(&prange->migrate_mutex);
> mutex_init(&prange->lock);
>
> - p = container_of(svms, struct kfd_process, svms);
> if (p->xnack_enabled)
> bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
> MAX_GPU_INSTANCE);
> @@ -1001,9 +1018,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
>
> svms = prange->svms;
> if (old_start == start)
> - *new = svm_range_new(svms, last + 1, old_last);
> + *new = svm_range_new(svms, last + 1, old_last, false);
> else
> - *new = svm_range_new(svms, old_start, start - 1);
> + *new = svm_range_new(svms, old_start, start - 1, false);
> if (!*new)
> return -ENOMEM;
>
> @@ -1011,7 +1028,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
> if (r) {
> pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
> r, old_start, old_last, start, last);
> - svm_range_free(*new);
> + svm_range_free(*new, false);
> *new = NULL;
> }
>
> @@ -1846,7 +1863,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
> {
> struct svm_range *new;
>
> - new = svm_range_new(old->svms, old->start, old->last);
> + new = svm_range_new(old->svms, old->start, old->last, false);
> if (!new)
> return NULL;
>
> @@ -1910,6 +1927,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
> struct interval_tree_node *node;
> struct svm_range *prange;
> struct svm_range *tmp;
> + struct list_head new_list;
> int r = 0;
>
> pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
> @@ -1917,6 +1935,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
> INIT_LIST_HEAD(update_list);
> INIT_LIST_HEAD(insert_list);
> INIT_LIST_HEAD(remove_list);
> + INIT_LIST_HEAD(&new_list);
>
> node = interval_tree_iter_first(&svms->objects, start, last);
> while (node) {
> @@ -1972,13 +1991,13 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>
> /* insert a new node if needed */
> if (node->start > start) {
> - prange = svm_range_new(svms, start, node->start - 1);
> + prange = svm_range_new(svms, start, node->start - 1, true);
> if (!prange) {
> r = -ENOMEM;
> goto out;
> }
>
> - list_add(&prange->list, insert_list);
> + list_add(&prange->list, &new_list);
> list_add(&prange->update_list, update_list);
> }
>
> @@ -1988,19 +2007,24 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
>
> /* add a final range at the end if needed */
> if (start <= last) {
> - prange = svm_range_new(svms, start, last);
> + prange = svm_range_new(svms, start, last, true);
> if (!prange) {
> r = -ENOMEM;
> goto out;
> }
> - list_add(&prange->list, insert_list);
> + list_add(&prange->list, &new_list);
> list_add(&prange->update_list, update_list);
> }
>
> out:
> - if (r)
> + if (r) {
> list_for_each_entry_safe(prange, tmp, insert_list, list)
> - svm_range_free(prange);
> + svm_range_free(prange, false);
> + list_for_each_entry_safe(prange, tmp, &new_list, list)
> + svm_range_free(prange, true);
> + } else {
> + list_splice(&new_list, insert_list);
> + }
>
> return r;
> }
> @@ -2047,7 +2071,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
> svms, prange, prange->start, prange->last);
> svm_range_unlink(prange);
> svm_range_remove_notifier(prange);
> - svm_range_free(prange);
> + svm_range_free(prange, true);
> break;
> case SVM_OP_UPDATE_RANGE_NOTIFIER:
> pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
> @@ -2610,14 +2634,14 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
> last = addr;
> }
>
> - prange = svm_range_new(&p->svms, start, last);
> + prange = svm_range_new(&p->svms, start, last, true);
> if (!prange) {
> pr_debug("Failed to create prange in address [0x%llx]\n", addr);
> return NULL;
> }
> if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {
> pr_debug("failed to get gpuid from kgd\n");
> - svm_range_free(prange);
> + svm_range_free(prange, true);
> return NULL;
> }
>
> @@ -2917,7 +2941,7 @@ void svm_range_list_fini(struct kfd_process *p)
> list_for_each_entry_safe(prange, next, &p->svms.list, list) {
> svm_range_unlink(prange);
> svm_range_remove_notifier(prange);
> - svm_range_free(prange);
> + svm_range_free(prange, true);
> }
>
> mutex_destroy(&p->svms.lock);
> @@ -3333,7 +3357,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
> prange->last);
> svm_range_unlink(prange);
> svm_range_remove_notifier(prange);
> - svm_range_free(prange);
> + svm_range_free(prange, false);
> }
>
> mmap_write_downgrade(mm);
More information about the amd-gfx
mailing list