<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2022-06-27 20:23, Alex Sierra wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:20220628002347.3982-2-alex.sierra@amd.com">
      <pre class="moz-quote-pre" wrap="">[WHY]
Unified memory with xnack off should be tracked, as userptr mappings
and legacy allocations do. To avoid oversuscribe system memory when
xnack off.</pre>
    </blockquote>
    I think this also apply to XNACK ON (remove p->xnack_enabled
    check), to avoid oversubscribe system memory OOM killer, if we don't
    account swap space as it will degrade performance.  <br>
    <blockquote type="cite" cite="mid:20220628002347.3982-2-alex.sierra@amd.com">
      <pre class="moz-quote-pre" wrap="">
[How]
Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM
API and call them on every prange creation and free.

Signed-off-by: Alex Sierra <a class="moz-txt-link-rfc2396E" href="mailto:alex.sierra@amd.com"><alex.sierra@amd.com></a>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  4 ++
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 25 ++++----
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c          | 58 +++++++++++++------
 3 files changed, 58 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b25b41f50213..e6244182a3a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -305,6 +305,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *
 void amdgpu_amdkfd_block_mmu_notifications(void *p);
 int amdgpu_amdkfd_criu_resume(void *p);
 bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+               uint64_t size, u32 alloc_flag);
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
+               uint64_t size, u32 alloc_flag);
 
 #if IS_ENABLED(CONFIG_HSA_AMD)
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9142f6cc3f4d..9719577ecc6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -128,7 +128,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
  *
  * Return: returns -ENOMEM in case of error, ZERO otherwise
  */
-static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
+int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
                uint64_t size, u32 alloc_flag)
 {
        uint64_t reserved_for_pt =
@@ -168,7 +168,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
             kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
            (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
             kfd_mem_limit.max_ttm_mem_limit) ||
-           (adev->kfd.vram_used + vram_needed >
+           (adev && adev->kfd.vram_used + vram_needed >
             adev->gmc.real_vram_size -
             atomic64_read(&adev->vram_pin_size) -
             reserved_for_pt)) {
@@ -179,7 +179,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
        /* Update memory accounting by decreasing available system
         * memory, TTM memory and GPU memory as computed above
         */
-       adev->kfd.vram_used += vram_needed;
+       WARN_ONCE(vram_needed && !adev,
+                 "adev reference can't be null when vram is used");
+       if (adev)
+               adev->kfd.vram_used += vram_needed;
        kfd_mem_limit.system_mem_used += system_mem_needed;
        kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
 
@@ -188,7 +191,7 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
        return ret;
 }
 
-static void unreserve_mem_limit(struct amdgpu_device *adev,
+void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
                uint64_t size, u32 alloc_flag)
 {
        spin_lock(&kfd_mem_limit.mem_limit_lock);
@@ -197,7 +200,10 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
                kfd_mem_limit.system_mem_used -= size;
                kfd_mem_limit.ttm_mem_used -= size;
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-               adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
+               WARN_ONCE(!adev,
+                         "adev reference can't be null when alloc mem flags vram is set");
+               if (adev)
+                       adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
        } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                kfd_mem_limit.system_mem_used -= size;
        } else if (!(alloc_flag &
@@ -206,11 +212,8 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
                pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
                goto release;
        }
-
-       WARN_ONCE(adev->kfd.vram_used < 0,
+       WARN_ONCE(adev && adev->kfd.vram_used < 0,
                  "KFD VRAM memory accounting unbalanced");
-       WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
-                 "KFD TTM memory accounting unbalanced");
        WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
                  "KFD system memory accounting unbalanced");
 
@@ -224,7 +227,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
        u32 alloc_flags = bo->kfd_bo->alloc_flags;
        u64 size = amdgpu_bo_size(bo);
 
-       unreserve_mem_limit(adev, size, alloc_flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
 
        kfree(bo->kfd_bo);
 }
@@ -1806,7 +1809,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
 err_bo_create:
-       unreserve_mem_limit(adev, size, flags);
+       amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
 err_reserve_limit:
        mutex_destroy(&(*mem)->lock);
        if (gobj)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d6fc00d51c8c..e706cbfa924f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -259,13 +259,22 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
        }
 }
 
-static void svm_range_free(struct svm_range *prange)
+static void svm_range_free(struct svm_range *prange, bool skip_unreserve)
 {
+       uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
+       struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+
        pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
                 prange->start, prange->last);
 
        svm_range_vram_node_free(prange);
        svm_range_free_dma_mappings(prange);
+
+       if (!skip_unreserve && !p->xnack_enabled) {
+               pr_debug("unreserve mem limit: %lld\n", size);
+               amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
+                                       KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
+       }
        mutex_destroy(&prange->lock);
        mutex_destroy(&prange->migrate_mutex);
        kfree(prange);
@@ -284,7 +293,7 @@ svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
 
 static struct
 svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
-                        uint64_t last)
+                        uint64_t last, bool is_new_alloc)
 {
        uint64_t size = last - start + 1;
        struct svm_range *prange;
@@ -293,6 +302,15 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
        prange = kzalloc(sizeof(*prange), GFP_KERNEL);
        if (!prange)
                return NULL;
+
+       p = container_of(svms, struct kfd_process, svms);
+       if (!p->xnack_enabled && is_new_alloc &&
+           amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
+                                           KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
+               pr_info("SVM mapping failed, exceeds resident system memory limit\n");
+               kfree(prange);
+               return NULL;
+       }
        prange->npages = size;
        prange->svms = svms;
        prange->start = start;
@@ -307,7 +325,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
        mutex_init(&prange->migrate_mutex);
        mutex_init(&prange->lock);
 
-       p = container_of(svms, struct kfd_process, svms);
        if (p->xnack_enabled)
                bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
                            MAX_GPU_INSTANCE);
@@ -1000,9 +1017,9 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
 
        svms = prange->svms;
        if (old_start == start)
-               *new = svm_range_new(svms, last + 1, old_last);
+               *new = svm_range_new(svms, last + 1, old_last, false);
        else
-               *new = svm_range_new(svms, old_start, start - 1);
+               *new = svm_range_new(svms, old_start, start - 1, false);
        if (!*new)
                return -ENOMEM;
 
@@ -1010,7 +1027,7 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
        if (r) {
                pr_debug("failed %d split [0x%llx 0x%llx] to [0x%llx 0x%llx]\n",
                         r, old_start, old_last, start, last);
-               svm_range_free(*new);
+               svm_range_free(*new, true);
                *new = NULL;
        }
 
@@ -1825,7 +1842,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
 {
        struct svm_range *new;
 
-       new = svm_range_new(old->svms, old->start, old->last);
+       new = svm_range_new(old->svms, old->start, old->last, false);
        if (!new)
                return NULL;
 
@@ -1889,6 +1906,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
        struct interval_tree_node *node;
        struct svm_range *prange;
        struct svm_range *tmp;
+       struct list_head new_list;
        int r = 0;
 
        pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last);
@@ -1896,6 +1914,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
        INIT_LIST_HEAD(update_list);
        INIT_LIST_HEAD(insert_list);
        INIT_LIST_HEAD(remove_list);
+       INIT_LIST_HEAD(&new_list);
 
        node = interval_tree_iter_first(&svms->objects, start, last);
        while (node) {
@@ -1951,13 +1970,13 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
 
                /* insert a new node if needed */
                if (node->start > start) {
-                       prange = svm_range_new(svms, start, node->start - 1);
+                       prange = svm_range_new(svms, start, node->start - 1, true);
                        if (!prange) {
                                r = -ENOMEM;
                                goto out;
                        }
 
-                       list_add(&prange->list, insert_list);
+                       list_add(&prange->list, &new_list);
                        list_add(&prange->update_list, update_list);
                }
 
@@ -1967,19 +1986,22 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
 
        /* add a final range at the end if needed */
        if (start <= last) {
-               prange = svm_range_new(svms, start, last);
+               prange = svm_range_new(svms, start, last, true);
                if (!prange) {
                        r = -ENOMEM;
                        goto out;
                }
-               list_add(&prange->list, insert_list);
+               list_add(&prange->list, &new_list);
                list_add(&prange->update_list, update_list);
        }
 
 out:
        if (r)
-               list_for_each_entry_safe(prange, tmp, insert_list, list)
-                       svm_range_free(prange);
+               list_for_each_entry_safe(prange, tmp, &new_list, list)
+                       svm_range_free(prange, false);
+       else
+               list_for_each_entry_safe(prange, tmp, &new_list, list)
+                       list_add(&prange->list, insert_list);</pre>
    </blockquote>
    <p>We should remove range from both new_list and insert_list to
      rollback pranges.</p>
    <pre class="moz-quote-pre" wrap="">       if (r) {
                list_for_each_entry_safe(prange, tmp, insert_list, list)
                        svm_range_free(prange, true);
                list_for_each_entry_safe(prange, tmp, &new_list, list)
                        svm_range_free(prange, false);
        } else if (!list_empty(&new_list) {
                list_splice(&new_list, insert_list);
        }
Regards,
Philip
</pre>
    <blockquote type="cite" cite="mid:20220628002347.3982-2-alex.sierra@amd.com">
      <pre class="moz-quote-pre" wrap="">
 
        return r;
 }
@@ -2026,7 +2048,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange,
                         svms, prange, prange->start, prange->last);
                svm_range_unlink(prange);
                svm_range_remove_notifier(prange);
-               svm_range_free(prange);
+               svm_range_free(prange, false);
                break;
        case SVM_OP_UPDATE_RANGE_NOTIFIER:
                pr_debug("update notifier 0x%p prange 0x%p [0x%lx 0x%lx]\n",
@@ -2588,14 +2610,14 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
                last = addr;
        }
 
-       prange = svm_range_new(&p->svms, start, last);
+       prange = svm_range_new(&p->svms, start, last, true);
        if (!prange) {
                pr_debug("Failed to create prange in address [0x%llx]\n", addr);
                return NULL;
        }
        if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {
                pr_debug("failed to get gpuid from kgd\n");
-               svm_range_free(prange);
+               svm_range_free(prange, false);
                return NULL;
        }
 
@@ -2884,7 +2906,7 @@ void svm_range_list_fini(struct kfd_process *p)
        list_for_each_entry_safe(prange, next, &p->svms.list, list) {
                svm_range_unlink(prange);
                svm_range_remove_notifier(prange);
-               svm_range_free(prange);
+               svm_range_free(prange, false);
        }
 
        mutex_destroy(&p->svms.lock);
@@ -3299,7 +3321,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
                         prange->last);
                svm_range_unlink(prange);
                svm_range_remove_notifier(prange);
-               svm_range_free(prange);
+               svm_range_free(prange, true);
        }
 
        mmap_write_downgrade(mm);
</pre>
    </blockquote>
  </body>
</html>