[PATCH 5/9] drm/amdgpu: delay VM root PD allocation to first use

Mon Jun 10 09:26:07 UTC 2024

Try to not allocate the backing store for the root PD before it is used for
the first time. This avoids talking to the GPU while initializing the VM.

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 29 +++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 37 ++++++++++++++---------
 2 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b497b345e44a..65d42a405476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -344,7 +344,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 	else
 		amdgpu_vm_bo_idle(base);
 
-	if (bo->preferred_domains &
+	if (bo->tbo.resource && bo->preferred_domains &
 	    amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))
 		return;
 
@@ -478,19 +478,32 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->evicted)) {
+		bool clear;
+
 		bo_base = list_first_entry(&vm->evicted,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
 		spin_unlock(&vm->status_lock);
 
 		bo = bo_base->bo;
-
+		clear = !bo->tbo.resource;
 		r = validate(param, bo);
 		if (r)
 			return r;
 
 		if (bo->tbo.type != ttm_bo_type_kernel) {
 			amdgpu_vm_bo_moved(bo_base);
+
+		} else if (clear) {
+			/* Delayed clear for the root PD */
+			struct amdgpu_bo_vm *bo_vm;
+
+			bo_vm = container_of(bo, struct amdgpu_bo_vm, bo);
+			r = amdgpu_vm_pt_clear(adev, vm, bo_vm, false);
+			if (r)
+				return r;
+			amdgpu_vm_bo_relocated(bo_base);
+
 		} else {
 			vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
 			amdgpu_vm_bo_relocated(bo_base);
@@ -2447,13 +2460,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 
 	amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
-	r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
-	if (r)
-		goto error_free_root;
-
-	r = amdgpu_vm_pt_clear(adev, vm, root, false);
-	if (r)
-		goto error_free_root;
 
 	r = amdgpu_vm_create_task_info(vm);
 	if (r)
@@ -2464,11 +2470,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	return 0;
 
-error_free_root:
-	amdgpu_vm_pt_free_root(adev, vm);
-	amdgpu_bo_unreserve(vm->root.bo);
-	amdgpu_bo_unref(&root_bo);
-
 error_free_delayed:
 	dma_fence_put(vm->last_tlb_flush);
 	dma_fence_put(vm->last_unlocked);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index c8e0b8cfd336..984be7080b52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -360,7 +360,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		       struct amdgpu_bo_vm *vmbo, bool immediate)
 {
 	unsigned int level = adev->vm_manager.root_level;
-	struct ttm_operation_ctx ctx = { true, false };
 	struct amdgpu_vm_update_params params;
 	struct amdgpu_bo *ancestor = &vmbo->bo;
 	unsigned int entries;
@@ -379,10 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	entries = amdgpu_bo_size(bo) / 8;
 
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-	if (r)
-		return r;
-
 	if (!drm_dev_enter(adev_to_drm(adev), &idx))
 		return -ENODEV;
 
@@ -441,6 +436,8 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 {
 	struct amdgpu_bo_param bp;
 	unsigned int num_entries;
+	unsigned int domains;
+	int r;
 
 	memset(&bp, 0, sizeof(bp));
 
@@ -448,32 +445,42 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
 
 	if (!adev->gmc.is_app_apu)
-		bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+		domains = AMDGPU_GEM_DOMAIN_VRAM;
 	else
-		bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+		domains = AMDGPU_GEM_DOMAIN_GTT;
+
+	domains = amdgpu_bo_get_preferred_domain(adev, domains);
+
+	if (vm->root.bo) {
+		bp.resv = vm->root.bo->tbo.base.resv;
+		bp.domain = domains;
+	} else {
+		bp.domain = 0;
+	}
 
-	bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
 	bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 		AMDGPU_GEM_CREATE_CPU_GTT_USWC;
 
+	if (vm->use_cpu_for_update)
+		bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
 	if (level < AMDGPU_VM_PTB)
 		num_entries = amdgpu_vm_pt_num_entries(adev, level);
 	else
 		num_entries = 0;
 
 	bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
-
-	if (vm->use_cpu_for_update)
-		bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-
 	bp.type = ttm_bo_type_kernel;
 	bp.no_wait_gpu = immediate;
 	bp.xcp_id_plus1 = xcp_id + 1;
 
-	if (vm->root.bo)
-		bp.resv = vm->root.bo->tbo.base.resv;
+	r = amdgpu_bo_create_vm(adev, &bp, vmbo);
+	if (r)
+		return r;
 
-	return amdgpu_bo_create_vm(adev, &bp, vmbo);
+	(*vmbo)->bo.allowed_domains = domains;
+	(*vmbo)->bo.preferred_domains = domains;
+	return 0;
 }
 
 /**
-- 
2.34.1