[PATCH 2/2] drm/amdkfd: track unified memory reservation with xnack off
Felix Kuehling
felix.kuehling at amd.com
Fri May 20 15:12:58 UTC 2022
Am 2022-05-20 um 08:45 schrieb philip yang:
>
>
> On 2022-05-19 19:08, Felix Kuehling wrote:
>> Am 2022-05-19 um 12:21 schrieb Alex Sierra:
>>> [WHY]
>>> Unified memory with xnack off should be tracked, as userptr mappings
>>> and legacy allocations do. To avoid oversuscribe system memory when
>>> xnack off.
>>> [How]
>>> Exposing functions reserve_mem_limit and unreserve_mem_limit to SVM
>>> API and call them on every prange creation and free.
>>>
>>> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +++
>>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 24 +++++++------
>>> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 34
>>> ++++++++++++++-----
>>> 3 files changed, 43 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index f8b9f27adcf5..f55f34af6480 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -301,6 +301,10 @@ bool amdgpu_amdkfd_bo_mapped_to_dev(struct
>>> amdgpu_device *adev, struct kgd_mem *
>>> void amdgpu_amdkfd_block_mmu_notifications(void *p);
>>> int amdgpu_amdkfd_criu_resume(void *p);
>>> bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct
>>> amdgpu_device *adev);
>>> +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>> + uint64_t size, u32 alloc_flag);
>>> +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
>>> + uint64_t size, u32 alloc_flag);
>>> #if IS_ENABLED(CONFIG_HSA_AMD)
>>> void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 966714dd764b..615e2b34fe12 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -122,7 +122,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t
>>> size)
>>> *
>>> * Return: returns -ENOMEM in case of error, ZERO otherwise
>>> */
>>> -static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>> +int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>> uint64_t size, u32 alloc_flag)
>>> {
>>> uint64_t reserved_for_pt =
>>> @@ -157,8 +157,8 @@ static int
>>> amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>> kfd_mem_limit.max_system_mem_limit &&
>>> !no_system_mem_limit) ||
>>> (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
>>> kfd_mem_limit.max_ttm_mem_limit) ||
>>> - (adev->kfd.vram_used + vram_needed >
>>> - adev->gmc.real_vram_size - reserved_for_pt)) {
>>> + (adev && (adev->kfd.vram_used + vram_needed >
>>> + adev->gmc.real_vram_size - reserved_for_pt))) {
>>> ret = -ENOMEM;
>>> goto release;
>>> }
>>> @@ -166,7 +166,8 @@ static int
>>> amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>> /* Update memory accounting by decreasing available system
>>> * memory, TTM memory and GPU memory as computed above
>>> */
>>> - adev->kfd.vram_used += vram_needed;
>>> + if (adev)
>>> + adev->kfd.vram_used += vram_needed;
>>
>> Add a WARN_ONCE(vram_needed && !adev).
>>
>>
>>> kfd_mem_limit.system_mem_used += system_mem_needed;
>>> kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
>>> @@ -175,7 +176,7 @@ static int
>>> amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
>>> return ret;
>>> }
>>> -static void unreserve_mem_limit(struct amdgpu_device *adev,
>>> +void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
>>> uint64_t size, u32 alloc_flag)
>>> {
>>> spin_lock(&kfd_mem_limit.mem_limit_lock);
>>> @@ -184,7 +185,8 @@ static void unreserve_mem_limit(struct
>>> amdgpu_device *adev,
>>> kfd_mem_limit.system_mem_used -= size;
>>> kfd_mem_limit.ttm_mem_used -= size;
>>> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>>> - adev->kfd.vram_used -= size;
>>> + if (adev)
>>
>> Add a WARN_ONCE(!adev) here.
>>
>>
>>> + adev->kfd.vram_used -= size;
>>> } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>>> kfd_mem_limit.system_mem_used -= size;
>>> } else if (!(alloc_flag &
>>> @@ -193,9 +195,9 @@ static void unreserve_mem_limit(struct
>>> amdgpu_device *adev,
>>> pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
>>> goto release;
>>> }
>>> -
>>> - WARN_ONCE(adev->kfd.vram_used < 0,
>>> - "KFD VRAM memory accounting unbalanced");
>>> + if (adev)
>>> + WARN_ONCE(adev->kfd.vram_used < 0,
>>> + "KFD VRAM memory accounting unbalanced");
>>
>> This could be simplified to WARN_ONCE(adev && adev->kfd.vram_used <
>> 0, ...).
>>
>>
>>> WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
>>> "KFD TTM memory accounting unbalanced");
>>> WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
>>> @@ -211,7 +213,7 @@ void amdgpu_amdkfd_release_notify(struct
>>> amdgpu_bo *bo)
>>> u32 alloc_flags = bo->kfd_bo->alloc_flags;
>>> u64 size = amdgpu_bo_size(bo);
>>> - unreserve_mem_limit(adev, size, alloc_flags);
>>> + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags);
>>> kfree(bo->kfd_bo);
>>> }
>>> @@ -1601,7 +1603,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>>> /* Don't unreserve system mem limit twice */
>>> goto err_reserve_limit;
>>> err_bo_create:
>>> - unreserve_mem_limit(adev, size, flags);
>>> + amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
>>> err_reserve_limit:
>>> mutex_destroy(&(*mem)->lock);
>>> if (gobj)
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 835b5187f0b8..1380c2fee0dc 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -261,11 +261,21 @@ void svm_range_free_dma_mappings(struct
>>> svm_range *prange)
>>> static void svm_range_free(struct svm_range *prange)
>>> {
>>> + uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
>>> + struct kfd_process *p;
>>> +
>>> pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
>>> prange->svms, prange,
>>> prange->start, prange->last);
>>> svm_range_vram_node_free(prange);
>>> svm_range_free_dma_mappings(prange);
>>> +
>>> + p = container_of(prange->svms, struct kfd_process, svms);
>>
>> You could initialize p in the variable declaration above.
>>
>>
>>> + if (!p->xnack_enabled) {
>>> + pr_debug("unreserve mem limit: %lld\n", size);
>>> + amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
>>> + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
>>> + }
>>> mutex_destroy(&prange->lock);
>>> mutex_destroy(&prange->migrate_mutex);
>>> kfree(prange);
>>> @@ -284,7 +294,7 @@ svm_range_set_default_attributes(int32_t
>>> *location, int32_t *prefetch_loc,
>>> static struct
>>> svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
>>> - uint64_t last)
>>> + uint64_t last, bool is_new_alloc)
>>> {
>>> uint64_t size = last - start + 1;
>>> struct svm_range *prange;
>>> @@ -293,6 +303,15 @@ svm_range *svm_range_new(struct svm_range_list
>>> *svms, uint64_t start,
>>> prange = kzalloc(sizeof(*prange), GFP_KERNEL);
>>> if (!prange)
>>> return NULL;
>>> +
>>> + p = container_of(svms, struct kfd_process, svms);
>>> + if (!p->xnack_enabled && is_new_alloc &&
>>> + amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
>>> + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) {
>>> + pr_info("SVM mapping failed, exceeds resident system memory
>>> limit\n");
>>> + kfree(prange);
>>> + return NULL;
>>> + }
>>> prange->npages = size;
>>> prange->svms = svms;
>>> prange->start = start;
>>> @@ -307,7 +326,6 @@ svm_range *svm_range_new(struct svm_range_list
>>> *svms, uint64_t start,
>>> mutex_init(&prange->migrate_mutex);
>>> mutex_init(&prange->lock);
>>> - p = container_of(svms, struct kfd_process, svms);
>>> if (p->xnack_enabled)
>>> bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
>>> MAX_GPU_INSTANCE);
>>> @@ -1000,9 +1018,9 @@ svm_range_split(struct svm_range *prange,
>>> uint64_t start, uint64_t last,
>>> svms = prange->svms;
>>> if (old_start == start)
>>> - *new = svm_range_new(svms, last + 1, old_last);
>>> + *new = svm_range_new(svms, last + 1, old_last, false);
>>> else
>>> - *new = svm_range_new(svms, old_start, start - 1);
>>> + *new = svm_range_new(svms, old_start, start - 1, false);
>>> if (!*new)
>>> return -ENOMEM;
>>> @@ -1825,7 +1843,7 @@ static struct svm_range
>>> *svm_range_clone(struct svm_range *old)
>>> {
>>> struct svm_range *new;
>>> - new = svm_range_new(old->svms, old->start, old->last);
>>> + new = svm_range_new(old->svms, old->start, old->last, false);
>>
>> This won't work as intended. When a range gets cloned, one of the
>> clones will be freed later. So when freeing that clone, you also need
>> to skip unreserving its space, because the other clone is still
>> holding it.
>
> Thanks Felix for catching this, svm_range_free should skip unreserving
> the cloned ranges from remove_list, otherwise we don't account
> overlapped head or tail range size now.
>
Also, in the error handling case of svm_range_add, we have to skip
unreserving the cloned ranges being freed from the insert_list.
Regards,
Felix
> Regards,
>
> Philip
>
>>
>> Regards,
>> Felix
>>
>>
>>> if (!new)
>>> return NULL;
>>> @@ -1951,7 +1969,7 @@ svm_range_add(struct kfd_process *p,
>>> uint64_t start, uint64_t size,
>>> /* insert a new node if needed */
>>> if (node->start > start) {
>>> - prange = svm_range_new(svms, start, node->start - 1);
>>> + prange = svm_range_new(svms, start, node->start - 1,
>>> true);
>>> if (!prange) {
>>> r = -ENOMEM;
>>> goto out;
>>> @@ -1967,7 +1985,7 @@ svm_range_add(struct kfd_process *p, uint64_t
>>> start, uint64_t size,
>>> /* add a final range at the end if needed */
>>> if (start <= last) {
>>> - prange = svm_range_new(svms, start, last);
>>> + prange = svm_range_new(svms, start, last, true);
>>> if (!prange) {
>>> r = -ENOMEM;
>>> goto out;
>>> @@ -2585,7 +2603,7 @@ svm_range
>>> *svm_range_create_unregistered_range(struct amdgpu_device *adev,
>>> last = addr;
>>> }
>>> - prange = svm_range_new(&p->svms, start, last);
>>> + prange = svm_range_new(&p->svms, start, last, true);
>>> if (!prange) {
>>> pr_debug("Failed to create prange in address [0x%llx]\n",
>>> addr);
>>> return NULL;
More information about the amd-gfx
mailing list