[PATCH v2 04/10] drm/amdgpu: Simplify AQL queue mapping

Fri Apr 23 07:23:20 UTC 2021

Am 2021-04-22 um 9:33 p.m. schrieb Zeng, Oak:
> Regards,
> Oak 
>
>  
>
> On 2021-04-21, 9:31 PM, "amd-gfx on behalf of Felix Kuehling" <amd-gfx-bounces at lists.freedesktop.org on behalf of Felix.Kuehling at amd.com> wrote:
>
>     Do AQL queue double-mapping with a single attach call. That will make it
>     easier to create per-GPU BOs later, to be shared between the two BO VA
>     mappings on the same GPU.
>
>     Freeing the attachments is not necessary if map_to_gpu fails. These will be
>     cleaned up when the kdg_mem object is destroyed in
>     amdgpu_amdkfd_gpuvm_free_memory_of_gpu.
>
>     Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>     ---
>      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 103 ++++++++----------
>      1 file changed, 48 insertions(+), 55 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     index 34c9a2d0028e..fbd7e786b54e 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     @@ -486,70 +486,76 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>       * 4a.  Validate new page tables and directories
>       */
>      static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>     -		struct amdgpu_vm *vm, bool is_aql,
>     -		struct kfd_mem_attachment **p_attachment)
>     +		struct amdgpu_vm *vm, bool is_aql)
>      {
>      	unsigned long bo_size = mem->bo->tbo.base.size;
>      	uint64_t va = mem->va;
>     -	struct kfd_mem_attachment *attachment;
>     -	struct amdgpu_bo *bo;
>     -	int ret;
>     +	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
>     +	struct amdgpu_bo *bo[2] = {NULL, NULL};
>     +	int i, ret;
>
>      	if (!va) {
>      		pr_err("Invalid VA when adding BO to VM\n");
>      		return -EINVAL;
>      	}
>
>     -	if (is_aql)
>     -		va += bo_size;
>     -
>     -	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
>     -	if (!attachment)
>     -		return -ENOMEM;
>     +	for (i = 0; i <= is_aql; i++) {
>     +		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
>     +		if (unlikely(!attachment[i])) {
>     +			ret = -ENOMEM;
>     +			goto unwind;
>     +		}
>
>     -	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
>     -			va + bo_size, vm);
>     +		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
>     +			 va + bo_size, vm);
>
>     -	/* FIXME: For now all attachments use the same BO. This is incorrect
>     -	 * because one BO can only have one DMA mapping for one GPU. We need
>     -	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
>     -	 * will be addressed one BO-type at a time in subsequent patches.
>     -	 */
>     -	bo = mem->bo;
>     -	drm_gem_object_get(&bo->tbo.base);
>     +		/* FIXME: For now all attachments use the same BO. This is
>     +		 * incorrect because one BO can only have one DMA mapping
>     +		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
>     +		 * import with dynamic attachment. This will be addressed
>     +		 * one BO-type at a time in subsequent patches.
>     +		 */
>     +		bo[i] = mem->bo;
>     +		drm_gem_object_get(&bo[i]->tbo.base);
>
>     -	/* Add BO to VM internal data structures*/
>     -	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
>     -	if (!attachment->bo_va) {
>     -		ret = -EINVAL;
>     -		pr_err("Failed to add BO object to VM. ret == %d\n",
>     -				ret);
>     -		goto err_vmadd;
>     -	}
>     +		/* Add BO to VM internal data structures */
>     +		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> Just for discussion. Are we allowed to add one bo twice to a vm? When I looked at amdgpu_vm_bo_base_init (called by amdgpu_vm_bo_add), line:
> bo->vm_bo = base;
> when you add the same bo to vm the second time, bo->vm_bo will be overwritten. I am not sure whether this will cause an issue later.
> This is not introduced by your code. The original code (calling kfd_mem_attach twice for aql) has the same problem.

If you just add one more line of context, you'll see that bo->vm_bo is
the start of a single linked list of struct amdgpu_vm_bo_base. So adding
a BO to a VM multiple times just extends that single-linked list:

        base->next = bo->vm_bo;
        bo->vm_bo = base;

Regards,
  Felix

>     +		if (unlikely(!attachment[i]->bo_va)) {
>     +			ret = -ENOMEM;
>     +			pr_err("Failed to add BO object to VM. ret == %d\n",
>     +			       ret);
>     +			goto unwind;
>     +		}
>
>     -	attachment->va = va;
>     -	attachment->pte_flags = get_pte_flags(adev, mem);
>     -	attachment->adev = adev;
>     -	list_add(&attachment->list, &mem->attachments);
>     +		attachment[i]->va = va;
>     +		attachment[i]->pte_flags = get_pte_flags(adev, mem);
>     +		attachment[i]->adev = adev;
>     +		list_add(&attachment[i]->list, &mem->attachments);
>
>     -	if (p_attachment)
>     -		*p_attachment = attachment;
>     +		va += bo_size;
>     +	}
>
>      	/* Allocate validate page tables if needed */
>      	ret = vm_validate_pt_pd_bos(vm);
>      	if (unlikely(ret)) {
>      		pr_err("validate_pt_pd_bos() failed\n");
>     -		goto err_alloc_pts;
>     +		goto unwind;
>      	}
>
>      	return 0;
>
>     -err_alloc_pts:
>     -	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
>     -	list_del(&attachment->list);
>     -err_vmadd:
>     -	drm_gem_object_put(&bo->tbo.base);
>     -	kfree(attachment);
>     +unwind:
>     +	for (; i >= 0; i--) {
>     +		if (!attachment[i])
>     +			continue;
>     +		if (attachment[i]->bo_va) {
>     +			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
>     +			list_del(&attachment[i]->list);
>     +		}
>     +		if (bo[i])
>     +			drm_gem_object_put(&bo[i]->tbo.base);
>     +		kfree(attachment[i]);
>     +	}
>      	return ret;
>      }
>
>     @@ -1382,8 +1388,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      	uint32_t domain;
>      	struct kfd_mem_attachment *entry;
>      	struct bo_vm_reservation_context ctx;
>     -	struct kfd_mem_attachment *attachment = NULL;
>     -	struct kfd_mem_attachment *attachment_aql = NULL;
>      	unsigned long bo_size;
>      	bool is_invalid_userptr = false;
>
>     @@ -1433,15 +1437,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      		is_invalid_userptr = true;
>
>      	if (!kfd_mem_is_attached(avm, mem)) {
>     -		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
>     +		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
>      		if (ret)
>      			goto attach_failed;
>     -		if (mem->aql_queue) {
>     -			ret = kfd_mem_attach(adev, mem, avm, true,
>     -					     &attachment_aql);
>     -			if (ret)
>     -				goto attach_failed_aql;
>     -		}
>      	} else {
>      		ret = vm_validate_pt_pd_bos(avm);
>      		if (unlikely(ret))
>     @@ -1496,11 +1494,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      	goto out;
>
>      map_bo_to_gpuvm_failed:
>     -	if (attachment_aql)
>     -		kfd_mem_detach(attachment_aql);
>     -attach_failed_aql:
>     -	if (attachment)
>     -		kfd_mem_detach(attachment);
>      attach_failed:
>      	unreserve_bo_and_vms(&ctx, false, false);
>      out:
>     -- 
>     2.31.1
>
>     _______________________________________________
>     amd-gfx mailing list
>     amd-gfx at lists.freedesktop.org
>     https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>