[PATCH v5 1/1] drm/amdkfd: Track unified memory when switching xnack mode

Felix Kuehling felix.kuehling at amd.com
Wed Sep 28 19:02:47 UTC 2022


Am 2022-09-28 um 12:11 schrieb Philip Yang:
> Unified memory usage with xnack off is tracked to avoid oversubscribe
> system memory, with xnack on, we don't track unified memory usage to
> allow memory oversubscribe. When switching xnack mode from off to on,
> subsequent free ranges allocated with xnack off will not unreserve
> memory. When switching xnack mode from on to off, subsequent free ranges
> allocated with xnack on will unreserve memory. Both cases cause memory
> accounting unbalanced.
>
> When switching xnack mode from on to off, need reserve already allocated
> svm range memory. When switching xnack mode from off to on, need
> unreserve already allocated svm range memory.
>
> v5: Handle prange child ranges
> v4: Handle reservation memory failure
> v3: Handle switching xnack mode race with svm_range_deferred_list_work
> v2: Handle both switching xnack from on to off and from off to on cases
>
> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 26 ++++++++---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 56 +++++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h     |  1 +
>   3 files changed, 76 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 56f7307c21d2..5feaba6a77de 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1584,6 +1584,8 @@ static int kfd_ioctl_smi_events(struct file *filep,
>   	return kfd_smi_event_open(pdd->dev, &args->anon_fd);
>   }
>   
> +#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
> +
>   static int kfd_ioctl_set_xnack_mode(struct file *filep,
>   				    struct kfd_process *p, void *data)
>   {
> @@ -1594,22 +1596,29 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep,
>   	if (args->xnack_enabled >= 0) {
>   		if (!list_empty(&p->pqm.queues)) {
>   			pr_debug("Process has user queues running\n");
> -			mutex_unlock(&p->mutex);
> -			return -EBUSY;
> +			r = -EBUSY;
> +			goto out_unlock;
>   		}
> -		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
> +
> +		if (p->xnack_enabled == args->xnack_enabled)
> +			goto out_unlock;
> +
> +		if (args->xnack_enabled && !kfd_process_xnack_mode(p, true)) {
>   			r = -EPERM;
> -		else
> -			p->xnack_enabled = args->xnack_enabled;
> +			goto out_unlock;
> +		}
> +
> +		r = svm_range_switch_xnack_reserve_mem(p, args->xnack_enabled);
>   	} else {
>   		args->xnack_enabled = p->xnack_enabled;
>   	}
> +
> +out_unlock:
>   	mutex_unlock(&p->mutex);
>   
>   	return r;
>   }
>   
> -#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
>   static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
>   {
>   	struct kfd_ioctl_svm_args *args = data;
> @@ -1629,6 +1638,11 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
>   	return r;
>   }
>   #else
> +static int kfd_ioctl_set_xnack_mode(struct file *filep,
> +				    struct kfd_process *p, void *data)
> +{
> +	return -EPERM;
> +}
>   static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
>   {
>   	return -EPERM;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index cf5b4005534c..ff47ac836bd4 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -278,7 +278,7 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
>   	svm_range_free_dma_mappings(prange);
>   
>   	if (update_mem_usage && !p->xnack_enabled) {
> -		pr_debug("unreserve mem limit: %lld\n", size);
> +		pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
>   		amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
>   					KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
>   	}
> @@ -2956,6 +2956,60 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   	return r;
>   }
>   
> +int
> +svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled)
> +{
> +	struct svm_range *prange, *pchild;
> +	uint64_t reserved_size = 0;
> +	uint64_t size;
> +	int r = 0;
> +
> +	pr_debug("switching xnack from %d to %d\n", p->xnack_enabled, xnack_enabled);
> +
> +	mutex_lock(&p->svms.lock);
> +
> +	list_for_each_entry(prange, &p->svms.list, list) {
> +		list_for_each_entry(pchild, &prange->child_list, child_list) {

I believe the child_list is not protected by the svms.lock because we 
update it in MMU notifiers. It is protected by the prange->lock of the 
parent range.

Regards,
   Felix


> +			size = (pchild->last - pchild->start + 1) << PAGE_SHIFT;
> +			if (xnack_enabled) {
> +				amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
> +						KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
> +			} else {
> +				r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
> +						KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
> +				if (r)
> +					goto out;
> +				reserved_size += size;
> +			}
> +		}
> +
> +		size = (prange->last - prange->start + 1) << PAGE_SHIFT;
> +		if (xnack_enabled) {
> +			amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
> +						KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
> +		} else {
> +			r = amdgpu_amdkfd_reserve_mem_limit(NULL, size,
> +						KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
> +			if (r)
> +				goto out;
> +			reserved_size += size;
> +		}
> +	}
> +
> +out:
> +	if (r)
> +		amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size,
> +						KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
> +	else
> +		/* Change xnack mode must be inside svms lock, to avoid race with
> +		 * svm_range_deferred_list_work unreserve memory in parallel.
> +		 */
> +		p->xnack_enabled = xnack_enabled;
> +
> +	mutex_unlock(&p->svms.lock);
> +	return r;
> +}
> +
>   void svm_range_list_fini(struct kfd_process *p)
>   {
>   	struct svm_range *prange;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 012c53729516..7a33b93f9df6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -203,6 +203,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
>   void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
>   
>   void svm_range_set_max_pages(struct amdgpu_device *adev);
> +int svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled);
>   
>   #else
>   


More information about the amd-gfx mailing list