[PATCH v2] drm/amdkfd: Separate pinned BOs destruction from general routine
Felix Kuehling
felix.kuehling at amd.com
Thu Oct 21 15:58:52 UTC 2021
Am 2021-10-15 um 2:54 a.m. schrieb Lang Yu:
> Currently, all kfd BOs use same destruction routine. But pinned
> BOs are not unpinned properly. Separate them from general routine.
>
> v2 (Felix):
> Add safeguard to prevent user space from freeing signal BO.
> Kunmap signal BO in the event of setting event page error.
> Just kunmap signal BO to avoid duplicating the code.
>
> Signed-off-by: Lang Yu <lang.yu at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 +
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 10 ++
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 31 +++--
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 110 +++++++++++++-----
> 5 files changed, 119 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 69de31754907..751557af09bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -279,6 +279,8 @@ int amdgpu_amdkfd_gpuvm_sync_memory(
> struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
> int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
> struct kgd_mem *mem, void **kptr, uint64_t *size);
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem);
> +
> int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
> struct dma_fence **ef);
> int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index cdf46bd0d8d5..4969763c2e47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1871,6 +1871,16 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
> return ret;
> }
>
> +void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_dev *kgd, struct kgd_mem *mem)
> +{
> + struct amdgpu_bo *bo = mem->bo;
> +
> + amdgpu_bo_reserve(bo, true);
> + amdgpu_bo_kunmap(bo);
> + amdgpu_bo_unpin(bo);
> + amdgpu_bo_unreserve(bo);
> +}
> +
> int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
> struct kfd_vm_fault_info *mem)
> {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f1e7edeb4e6b..9317a2e238d0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1011,11 +1011,6 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
> void *mem, *kern_addr;
> uint64_t size;
>
> - if (p->signal_page) {
> - pr_err("Event page is already set\n");
> - return -EINVAL;
> - }
> -
> kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
> if (!kfd) {
> pr_err("Getting device by id failed in %s\n", __func__);
> @@ -1023,6 +1018,13 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
> }
>
> mutex_lock(&p->mutex);
> +
> + if (p->signal_page) {
> + pr_err("Event page is already set\n");
> + err = -EINVAL;
> + goto out_unlock;
> + }
> +
> pdd = kfd_bind_process_to_device(kfd, p);
> if (IS_ERR(pdd)) {
> err = PTR_ERR(pdd);
> @@ -1037,20 +1039,24 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
> err = -EINVAL;
> goto out_unlock;
> }
> - mutex_unlock(&p->mutex);
>
> err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
> mem, &kern_addr, &size);
> if (err) {
> pr_err("Failed to map event page to kernel\n");
> - return err;
> + goto out_unlock;
> }
>
> err = kfd_event_page_set(p, kern_addr, size);
> if (err) {
> pr_err("Failed to set event page\n");
> - return err;
> + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem);
> + goto out_unlock;
> }
> +
> + p->signal_handle = args->event_page_offset;
> +
> + mutex_unlock(&p->mutex);
> }
>
> err = kfd_event_create(filp, p, args->event_type,
> @@ -1368,6 +1374,15 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
> return -EINVAL;
>
> mutex_lock(&p->mutex);
> + /*
> + * Safeguard to prevent user space from freeing signal BO.
> + * It will be freed at process termination.
> + */
> + if (p->signal_handle && (p->signal_handle == args->handle)) {
> + pr_err("Free signal BO is not allowed\n");
> + ret = -EPERM;
> + goto err_unlock;
> + }
>
> pdd = kfd_get_process_device_data(dev, p);
> if (!pdd) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 6d8f9bb2d905..30f08f1606bb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -608,12 +608,14 @@ struct qcm_process_device {
> uint32_t sh_hidden_private_base;
>
> /* CWSR memory */
> + struct kgd_mem *cwsr_mem;
> void *cwsr_kaddr;
> uint64_t cwsr_base;
> uint64_t tba_addr;
> uint64_t tma_addr;
>
> /* IB memory */
> + struct kgd_mem *ib_mem;
> uint64_t ib_base;
> void *ib_kaddr;
>
> @@ -808,6 +810,7 @@ struct kfd_process {
> /* Event ID allocator and lookup */
> struct idr event_idr;
> /* Event page */
> + u64 signal_handle;
> struct kfd_signal_page *signal_page;
> size_t signal_mapped_size;
> size_t signal_event_count;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 21ec8a18cad2..26fc716a92c2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
> static void evict_process_worker(struct work_struct *work);
> static void restore_process_worker(struct work_struct *work);
>
> +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd);
> +
> struct kfd_procfs_tree {
> struct kobject *kobj;
> };
> @@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void)
> }
>
> static void kfd_process_free_gpuvm(struct kgd_mem *mem,
> - struct kfd_process_device *pdd)
> + struct kfd_process_device *pdd, void *kptr)
> {
> struct kfd_dev *dev = pdd->dev;
>
> + if (kptr) {
> + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
> + kptr = NULL;
> + }
> +
> amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
> amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
> NULL);
> @@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
> */
> static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
> uint64_t gpu_va, uint32_t size,
> - uint32_t flags, void **kptr)
> + uint32_t flags, struct kgd_mem **mem, void **kptr)
> {
> struct kfd_dev *kdev = pdd->dev;
> - struct kgd_mem *mem = NULL;
> - int handle;
> int err;
>
> err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
> - pdd->drm_priv, &mem, NULL, flags);
> + pdd->drm_priv, mem, NULL, flags);
> if (err)
> goto err_alloc_mem;
>
> - err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
> + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
> pdd->drm_priv, NULL);
> if (err)
> goto err_map_mem;
>
> - err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
> + err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
> if (err) {
> pr_debug("Sync memory failed, wait interrupted by user signal\n");
> goto sync_memory_failed;
> }
>
> - /* Create an obj handle so kfd_process_device_remove_obj_handle
> - * will take care of the bo removal when the process finishes.
> - * We do not need to take p->mutex, because the process is just
> - * created and the ioctls have not had the chance to run.
> - */
> - handle = kfd_process_device_create_obj_handle(pdd, mem);
> -
> - if (handle < 0) {
> - err = handle;
> - goto free_gpuvm;
> - }
> -
> if (kptr) {
> err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
> - (struct kgd_mem *)mem, kptr, NULL);
> + (struct kgd_mem *)*mem, kptr, NULL);
> if (err) {
> pr_debug("Map GTT BO to kernel failed\n");
> - goto free_obj_handle;
> + goto sync_memory_failed;
> }
> }
>
> return err;
>
> -free_obj_handle:
> - kfd_process_device_remove_obj_handle(pdd, handle);
> -free_gpuvm:
> sync_memory_failed:
> - kfd_process_free_gpuvm(mem, pdd);
> - return err;
> + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv);
>
> err_map_mem:
> - amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
> + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
> NULL);
> err_alloc_mem:
> + *mem = NULL;
> *kptr = NULL;
> return err;
> }
> @@ -776,6 +766,7 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
> KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
> KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
> KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> + struct kgd_mem *mem;
> void *kaddr;
> int ret;
>
> @@ -784,15 +775,26 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
>
> /* ib_base is only set for dGPU */
> ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
> - &kaddr);
> + &mem, &kaddr);
> if (ret)
> return ret;
>
> + qpd->ib_mem = mem;
> qpd->ib_kaddr = kaddr;
>
> return 0;
> }
>
> +static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
> +{
> + struct qcm_process_device *qpd = &pdd->qpd;
> +
> + if (!qpd->ib_kaddr || !qpd->ib_base)
> + return;
> +
> + kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
> +}
> +
> struct kfd_process *kfd_create_process(struct file *filep)
> {
> struct kfd_process *process;
> @@ -947,6 +949,38 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
> }
> }
>
> +/*
> + * Just kunmap and unpin signal BO here. It will be freed in
> + * kfd_process_free_outstanding_kfd_bos()
> + */
> +static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
> +{
> + struct kfd_process_device *pdd;
> + struct kfd_dev *kdev;
> + void *mem;
> + int i;
> +
> + kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
> + if (!kdev)
> + return;
> +
> + mutex_lock(&p->mutex);
> +
> + pdd = kfd_get_process_device_data(kdev, p);
> + if (!pdd)
> + goto out;
> +
> + mem = kfd_process_device_translate_handle(
> + pdd, GET_IDR_HANDLE(p->signal_handle));
> + if (!mem)
> + goto out;
> +
> + amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);
> +
> +out:
> + mutex_unlock(&p->mutex);
> +}
> +
> static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
> {
> int i;
> @@ -965,6 +999,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
> pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
> pdd->dev->id, p->pasid);
>
> + kfd_process_device_destroy_cwsr_dgpu(pdd);
> + kfd_process_device_destroy_ib_mem(pdd);
> +
> if (pdd->drm_file) {
> amdgpu_amdkfd_gpuvm_release_process_vm(
> pdd->dev->kgd, pdd->drm_priv);
> @@ -1049,9 +1086,11 @@ static void kfd_process_wq_release(struct work_struct *work)
> {
> struct kfd_process *p = container_of(work, struct kfd_process,
> release_work);
> +
> kfd_process_remove_sysfs(p);
> kfd_iommu_unbind_process(p);
>
> + kfd_process_kunmap_signal_bo(p);
> kfd_process_free_outstanding_kfd_bos(p);
> svm_range_list_fini(p);
>
> @@ -1198,6 +1237,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
> uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
> | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
> | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
> + struct kgd_mem *mem;
> void *kaddr;
> int ret;
>
> @@ -1206,10 +1246,11 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
>
> /* cwsr_base is only set for dGPU */
> ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
> - KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
> + KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr);
> if (ret)
> return ret;
>
> + qpd->cwsr_mem = mem;
> qpd->cwsr_kaddr = kaddr;
> qpd->tba_addr = qpd->cwsr_base;
>
> @@ -1222,6 +1263,17 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
> return 0;
> }
>
> +static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
> +{
> + struct kfd_dev *dev = pdd->dev;
> + struct qcm_process_device *qpd = &pdd->qpd;
> +
> + if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
> + return;
> +
> + kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
> +}
> +
> void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
> uint64_t tba_addr,
> uint64_t tma_addr)
More information about the amd-gfx
mailing list