[PATCH] drm/amdkfd: avoid conflicting address mappings
Felix Kuehling
felix.kuehling at amd.com
Thu Jul 15 23:09:15 UTC 2021
On 2021-07-15 3:59 p.m., Alex Sierra wrote:
> [Why]
> Avoid conflict with address ranges mapped by SVM
> mechanism that try to be allocated again through
> ioctl_alloc in the same process. And viceversa.
>
> [How]
> For ioctl_alloc_memory_of_gpu allocations
> Check if the address range passed into ioctl memory
> alloc does not exist already in the kfd_process
> svms->objects interval tree.
>
> For SVM allocations
> Look for the address range into the interval tree VA from
> the VM inside of each pdds used in a kfd_process.
>
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 +++++
> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 68 ++++++++++++++++++------
> 2 files changed, 66 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 67541c30327a..f39baaa22a62 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1251,6 +1251,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
> struct kfd_process_device *pdd;
> void *mem;
> struct kfd_dev *dev;
> + struct svm_range_list *svms = &p->svms;
> int idr_handle;
> long err;
> uint64_t offset = args->mmap_offset;
> @@ -1259,6 +1260,18 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
> if (args->size == 0)
> return -EINVAL;
>
> +#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
> + mutex_lock(&svms->lock);
> + if (interval_tree_iter_first(&svms->objects,
> + args->va_addr >> PAGE_SHIFT,
> + (args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
> + pr_err("Address: 0x%llx already allocated by SVM\n",
> + args->va_addr);
> + mutex_unlock(&svms->lock);
> + return -EADDRINUSE;
> + }
> + mutex_unlock(&svms->lock);
> +#endif
> dev = kfd_device_by_id(args->gpu_id);
> if (!dev)
> return -EINVAL;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 31f3f24cef6a..43b31f00951a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2581,9 +2581,46 @@ int svm_range_list_init(struct kfd_process *p)
> return 0;
> }
>
> +/**
> + * svm_range_is_vm_bo_mapped - check if virtual address range mapped already
> + * @p: current kfd_process
> + * @start: range start address, in pages
> + * @last: range last address, in pages
> + *
> + * The purpose is to avoid virtual address ranges already allocated by
> + * kfd_ioctl_alloc_memory_of_gpu ioctl.
> + * It looks for each pdd in the kfd_process.
> + *
> + * Context: Process context
> + *
> + * Return 0 - OK, if the range is not mapped.
> + * Otherwise error code:
> + * -EADDRINUSE - if address is mapped already by kfd_ioctl_alloc_memory_of_gpu
> + * -EFAULT - if VM does not exist in specific pdd
> + */
> +static int
> +svm_range_is_vm_bo_mapped(struct kfd_process *p, uint64_t start, uint64_t last)
> +{
> + uint32_t i;
> +
> + for (i = 0; i < p->n_pdds; i++) {
> + struct amdgpu_vm *vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
> +
> + if (!vm)
> + return -EFAULT;
This is not necessarily an error. Not every process uses every GPU. I
think you can just skip PDDs that don't have a VM. But you also need a
NULL-check for p->pdds[i]->drm_priv. I believe it will be NULL on an
unused GPU (where user mode never called kfd_ioctl_acquire_vm).
> +
> + if (interval_tree_iter_first(&vm->va, start, last)) {
I believe access to this interval tree needs to happen under the page
table reservation lock. That means you need to
amdgpu_bo_reserve(vm->root.bo, false) (and handle the special case that
it returns -ERESTARTSYS).
> + pr_debug("Range [0x%llx 0x%llx] already mapped\n", start, last);
> + return -EADDRINUSE;
> + }
> + }
> +
> + return 0;
> +}
> +
> /**
> * svm_range_is_valid - check if virtual address range is valid
> - * @mm: current process mm_struct
> + * @mm: current kfd_process
> * @start: range start address, in pages
> * @size: range size, in pages
> *
> @@ -2592,28 +2629,27 @@ int svm_range_list_init(struct kfd_process *p)
> * Context: Process context
> *
> * Return:
> - * true - valid svm range
> - * false - invalid svm range
> + * 0 - OK, otherwise error code
> */
> -static bool
> -svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size)
> +static int
> +svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
> {
> const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
> struct vm_area_struct *vma;
> unsigned long end;
> + unsigned long start_unchg = start;
>
> start <<= PAGE_SHIFT;
> end = start + (size << PAGE_SHIFT);
> -
> do {
> - vma = find_vma(mm, start);
> + vma = find_vma(p->mm, start);
> if (!vma || start < vma->vm_start ||
> (vma->vm_flags & device_vma))
> - return false;
> + return -EFAULT;
> start = min(end, vma->vm_end);
> } while (start < end);
>
> - return true;
> + return svm_range_is_vm_bo_mapped(p, start_unchg, (end - 1) >> PAGE_SHIFT);
> }
>
> /**
> @@ -2913,9 +2949,9 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
>
> svm_range_list_lock_and_flush_work(svms, mm);
>
> - if (!svm_range_is_valid(mm, start, size)) {
> - pr_debug("invalid range\n");
> - r = -EFAULT;
> + r = svm_range_is_valid(p, start, size);
> + if (r) {
> + pr_debug("invalid range r=%d\n", r);
> mmap_write_unlock(mm);
> goto out;
> }
> @@ -3016,15 +3052,17 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
> uint32_t flags = 0xffffffff;
> int gpuidx;
> uint32_t i;
> + int r = 0;
>
> pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
> start + size - 1, nattr);
>
> mmap_read_lock(mm);
> - if (!svm_range_is_valid(mm, start, size)) {
> - pr_debug("invalid range\n");
> + r = svm_range_is_valid(p, start, size);
> + if (r) {
> + pr_debug("invalid range r=%d\n", r);
> mmap_read_unlock(mm);
If you move the mmap_read_unlock before the if (r), you don't need to
duplicate it inside and outside the "if".
Regards,
Felix
> - return -EINVAL;
> + return r;
> }
> mmap_read_unlock(mm);
>
More information about the amd-gfx
mailing list