[PATCH 1/1] drm/amdkfd: Track unified memory when changing xnack mode

Philip Yang yangp at amd.com
Tue Sep 13 15:05:24 UTC 2022


On 2022-09-12 14:53, Felix Kuehling wrote:
>
> Am 2022-09-12 um 14:04 schrieb Philip Yang:
>> Unified memory usage with xnack off is tracked to avoid oversubscribe
>> system memory. When changing xnack mode from off to on, subsequent
>> free ranges allocated with xnack off will not unreserve memory because
>> xnack is changed to on, cause memory accounting unbalanced.
> To you need something equivalent to reserve already allocated memory 
> when switching XNACK off?
>
> One more comment inline.

Yes, I get error message "KFD system memory accounting unbalanced" when 
switching XNACK off too, I will send v2 patch to handle this case.

Thanks,

Philip

>
>
>>
>> Unreserve memory of the ranges allocated with xnack off when switching
>> to xnack on.
>>
>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++++++--
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 14 ++++++++++++++
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |  3 +++
>>   3 files changed, 31 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 56f7307c21d2..1855efeeaaa0 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1594,16 +1594,28 @@ static int kfd_ioctl_set_xnack_mode(struct 
>> file *filep,
>>       if (args->xnack_enabled >= 0) {
>>           if (!list_empty(&p->pqm.queues)) {
>>               pr_debug("Process has user queues running\n");
>> -            mutex_unlock(&p->mutex);
>> -            return -EBUSY;
>> +            r = -EBUSY;
>> +            goto out_unlock;
>>           }
>> +
>> +        if (p->xnack_enabled == args->xnack_enabled)
>> +            goto out_unlock;
>> +
>>           if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
>>               r = -EPERM;
>
> You should goto out_unlock here. It may not be strictly necessary, but 
> it's confusing to think of the cases where you may fall through here 
> unexpectedly.
>
> Regards,
>   Felix
>
>
>>           else
>>               p->xnack_enabled = args->xnack_enabled;
>> +
>> +        /* Switching to XNACK on, unreserve memory of all ranges
>> +         * allocated with XNACK off.
>> +         */
>> +        if (p->xnack_enabled)
>> +            svm_range_list_unreserve_mem(p);
>>       } else {
>>           args->xnack_enabled = p->xnack_enabled;
>>       }
>> +
>> +out_unlock:
>>       mutex_unlock(&p->mutex);
>>         return r;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index cf5b4005534c..5c333bacf546 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -2956,6 +2956,20 @@ svm_range_restore_pages(struct amdgpu_device 
>> *adev, unsigned int pasid,
>>       return r;
>>   }
>>   +void svm_range_list_unreserve_mem(struct kfd_process *p)
>> +{
>> +    struct svm_range *prange;
>> +    uint64_t size;
>> +
>> +    mutex_lock(&p->svms.lock);
>> +    list_for_each_entry(prange, &p->svms.list, list) {
>> +        size = (prange->last - prange->start + 1) << PAGE_SHIFT;
>> +        pr_debug("svms 0x%p size 0x%llx\n", &p->svms, size);
>> +        amdgpu_amdkfd_unreserve_mem_limit(NULL, size, 
>> KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
>> +    }
>> +    mutex_unlock(&p->svms.lock);
>> +}
>> +
>>   void svm_range_list_fini(struct kfd_process *p)
>>   {
>>       struct svm_range *prange;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> index 012c53729516..339f706673c8 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
>> @@ -203,11 +203,14 @@ void svm_range_list_lock_and_flush_work(struct 
>> svm_range_list *svms, struct mm_s
>>   void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
>>     void svm_range_set_max_pages(struct amdgpu_device *adev);
>> +void svm_range_list_unreserve_mem(struct kfd_process *p);
>>     #else
>>     struct kfd_process;
>>   +void svm_range_list_unreserve_mem(struct kfd_process *p) { }
>> +
>>   static inline int svm_range_list_init(struct kfd_process *p)
>>   {
>>       return 0;


More information about the amd-gfx mailing list