[PATCH 3/4] drm/amdkfd: avoid svm conflicting with userptr address

Wed Oct 6 18:23:29 UTC 2021

Am 2021-10-06 um 2:09 p.m. schrieb philip yang:
>
>
> On 2021-10-06 1:22 p.m., Felix Kuehling wrote:
>> Am 2021-10-06 um 10:32 a.m. schrieb Philip Yang:
>>> For ioctl_alloc_memory_of_gpu to alloc userptr bo, check userptr address
>>> does not exist in svm->objects.
>>>
>>> For svm range allocation, look for address in the userptr range of
>>> interval tree VA nodes.
>> Why? The purpose of the check is to prevent the same GPU virtual address
>> being managed by the two different memory managers. So checking
>> args->va_addr should be correct even for userptr BOs. The CPU virtual
>> address should be OK to be mapped with userptr and SVM at the same time
>> as long as the userptr uses a distinct GPU virtual address.
>
> userptr cpu virtual address is registered to MMU notifier, if svm
> range overlap with userptr, then migration svm range triggers mmu
> notifier to evict userptr and evict user queues, for xnack on, this is
> not correct. And restore userptr will fail if svm range is migrated to
> vram because hmm_range_fault failed to get system pages, app will soft
> hang as queues are not restored.
>
OK. Then we need to check both the CPU and GPU virtual address ranges.
Having userptr or SVM registrations fail like that is not ideal. It only
changes the failure mode, but doesn't really fix applications affected
by this.

A real solution probably requires, that we reimplement userptrs using
the SVM API in the Thunk, when SVM is available. That way you avoid this
conflict between the two memory managers.

Regards,
  Felix

> Regards,
>
> Philip
>
>> Regards,
>>   Felix
>>
>>
>>> Change helper svm_range_check_vm to return amdgpu_bo, which will be used
>>> to avoid creating new svm range overlap with bo later.
>>>
>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>>> ---
>>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 +++++---
>>>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c     | 55 +++++++++++++++++++-----
>>>  2 files changed, 57 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index f1e7edeb4e6b..34dfa6a938bf 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -1255,6 +1255,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>>>  	long err;
>>>  	uint64_t offset = args->mmap_offset;
>>>  	uint32_t flags = args->flags;
>>> +	unsigned long start, last;
>>>  
>>>  	if (args->size == 0)
>>>  		return -EINVAL;
>>> @@ -1266,11 +1267,17 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
>>>  	svm_range_list_lock_and_flush_work(&p->svms, current->mm);
>>>  	mutex_lock(&p->svms.lock);
>>>  	mmap_write_unlock(current->mm);
>>> -	if (interval_tree_iter_first(&p->svms.objects,
>>> -				     args->va_addr >> PAGE_SHIFT,
>>> -				     (args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
>>> -		pr_err("Address: 0x%llx already allocated by SVM\n",
>>> -			args->va_addr);
>>> +
>>> +	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
>>> +		start = args->mmap_offset >> PAGE_SHIFT;
>>> +		last = (args->mmap_offset + args->size - 1) >> PAGE_SHIFT;
>>> +	} else {
>>> +		start = args->va_addr >> PAGE_SHIFT;
>>> +		last = (args->va_addr + args->size - 1) >> PAGE_SHIFT;
>>> +	}
>>> +
>>> +	if (interval_tree_iter_first(&p->svms.objects, start, last)) {
>>> +		pr_err("[0x%lx 0x%lx] already allocated by SVM\n", start, last);
>>>  		mutex_unlock(&p->svms.lock);
>>>  		return -EADDRINUSE;
>>>  	}
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index 7f0743fcfcc3..d49c08618714 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -2679,15 +2679,18 @@ int svm_range_list_init(struct kfd_process *p)
>>>   *
>>>   * Context: Process context
>>>   *
>>> - * Return 0 - OK, if the range is not mapped.
>>> + * Return NULL - if the range is not mapped.
>>> + * amdgpu_bo - if the range is mapped by old API
>>>   * Otherwise error code:
>>> - * -EADDRINUSE - if address is mapped already by kfd_ioctl_alloc_memory_of_gpu
>>>   * -ERESTARTSYS - A wait for the buffer to become unreserved was interrupted by
>>>   * a signal. Release all buffer reservations and return to user-space.
>>>   */
>>> -static int
>>> +static struct amdgpu_bo *
>>>  svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last)
>>>  {
>>> +	struct amdgpu_bo_va_mapping *mapping;
>>> +	struct interval_tree_node *node;
>>> +	struct amdgpu_bo *bo = NULL;
>>>  	uint32_t i;
>>>  	int r;
>>>  
>>> @@ -2700,16 +2703,42 @@ svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last)
>>>  		vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
>>>  		r = amdgpu_bo_reserve(vm->root.bo, false);
>>>  		if (r)
>>> -			return r;
>>> -		if (interval_tree_iter_first(&vm->va, start, last)) {
>>> -			pr_debug("Range [0x%llx 0x%llx] already mapped\n", start, last);
>>> -			amdgpu_bo_unreserve(vm->root.bo);
>>> -			return -EADDRINUSE;
>>> +			return ERR_PTR(r);
>>> +		node = interval_tree_iter_first(&vm->va, start, last);
>>> +		if (node) {
>>> +			pr_debug("range [0x%llx 0x%llx] already TTM mapped\n",
>>> +				 start, last);
>>> +			mapping = container_of((struct rb_node *)node,
>>> +					       struct amdgpu_bo_va_mapping, rb);
>>> +			bo = mapping->bo_va->base.bo;
>>> +			goto out_unreserve;
>>> +		}
>>> +
>>> +		/* Check userptr by searching entire vm->va interval tree */
>>> +		node = interval_tree_iter_first(&vm->va, 0, ~0ULL);
>>> +		while (node) {
>>> +			mapping = container_of((struct rb_node *)node,
>>> +					       struct amdgpu_bo_va_mapping, rb);
>>> +			bo = mapping->bo_va->base.bo;
>>> +
>>> +			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>> +							 start << PAGE_SHIFT,
>>> +							 last << PAGE_SHIFT)) {
>>> +				pr_debug("[0x%llx 0x%llx] userptr mapped\n",
>>> +					 start, last);
>>> +				goto out_unreserve;
>>> +			}
>>> +			bo = NULL;
>>> +			node = interval_tree_iter_next(node, 0, ~0ULL);
>>>  		}
>>> +
>>> +out_unreserve:
>>>  		amdgpu_bo_unreserve(vm->root.bo);
>>> +		if (bo)
>>> +			break;
>>>  	}
>>>  
>>> -	return 0;
>>> +	return bo;
>>>  }
>>>  
>>>  /**
>>> @@ -2732,6 +2761,7 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
>>>  	struct vm_area_struct *vma;
>>>  	unsigned long end;
>>>  	unsigned long start_unchg = start;
>>> +	struct amdgpu_bo *bo;
>>>  
>>>  	start <<= PAGE_SHIFT;
>>>  	end = start + (size << PAGE_SHIFT);
>>> @@ -2743,7 +2773,12 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
>>>  		start = min(end, vma->vm_end);
>>>  	} while (start < end);
>>>  
>>> -	return svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT);
>>> +	bo = svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT);
>>> +	if (IS_ERR(bo))
>>> +		return PTR_ERR(bo);
>>> +	if (bo)
>>> +		return -EADDRINUSE;
>>> +	return 0;
>>>  }
>>>  
>>>  /**