[PATCH] drm/amdgpu: revert "use the new cursor in the VM code"

Thu Mar 18 13:17:27 UTC 2021

[AMD Public Use]

Reviewed-by: Guchun Chen <guchun.chen at amd.com>

Regards,
Guchun

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken at gmail.com> 
Sent: Thursday, March 18, 2021 7:53 PM
To: amd-gfx at lists.freedesktop.org
Cc: Chen, Guchun <Guchun.Chen at amd.com>
Subject: [PATCH] drm/amdgpu: revert "use the new cursor in the VM code"

We are seeing VM page faults with this. Revert the change until the bugs are fixed.

This reverts commit e71af7b9807cc9ab2b40a7c02ff93c622786bd2a.

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +++++++++++++++++---------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bc3951b71079..9268db1172bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,7 +37,6 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
-#include "amdgpu_res_cursor.h"
 
 /**
  * DOC: GPUVM
@@ -1584,7 +1583,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
  * @last: last mapped entry
  * @flags: flags for the entries
  * @offset: offset into nodes and pages_addr
- * @res: ttm_resource to map
+ * @nodes: array of drm_mm_nodes with the MC addresses
  * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
  *
@@ -1599,13 +1598,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       bool unlocked, struct dma_resv *resv,
 				       uint64_t start, uint64_t last,
 				       uint64_t flags, uint64_t offset,
-				       struct ttm_resource *res,
+				       struct drm_mm_node *nodes,
 				       dma_addr_t *pages_addr,
 				       struct dma_fence **fence)
 {
 	struct amdgpu_vm_update_params params;
-	struct amdgpu_res_cursor cursor;
 	enum amdgpu_sync_mode sync_mode;
+	uint64_t pfn;
 	int r;
 
 	memset(&params, 0, sizeof(params));
@@ -1623,6 +1622,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	else
 		sync_mode = AMDGPU_SYNC_EXPLICIT;
 
+	pfn = offset >> PAGE_SHIFT;
+	if (nodes) {
+		while (pfn >= nodes->size) {
+			pfn -= nodes->size;
+			++nodes;
+		}
+	}
+
 	amdgpu_vm_eviction_lock(vm);
 	if (vm->evicting) {
 		r = -EBUSY;
@@ -1641,17 +1648,23 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_unlock;
 
-	amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
-			 &cursor);
-	while (cursor.remaining) {
+	do {
 		uint64_t tmp, num_entries, addr;
 
-		num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
+
+		num_entries = last - start + 1;
+		if (nodes) {
+			addr = nodes->start << PAGE_SHIFT;
+			num_entries = min((nodes->size - pfn) *
+				AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
+		} else {
+			addr = 0;
+		}
+
 		if (pages_addr) {
 			bool contiguous = true;
 
 			if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
-				uint64_t pfn = cursor.start >> PAGE_SHIFT;
 				uint64_t count;
 
 				contiguous = pages_addr[pfn + 1] == @@ -1671,18 +1684,16 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 			}
 
 			if (!contiguous) {
-				addr = cursor.start;
+				addr = pfn << PAGE_SHIFT;
 				params.pages_addr = pages_addr;
 			} else {
-				addr = pages_addr[cursor.start >> PAGE_SHIFT];
+				addr = pages_addr[pfn];
 				params.pages_addr = NULL;
 			}
 
 		} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-			addr = bo_adev->vm_manager.vram_base_offset +
-				cursor.start;
-		} else {
-			addr = 0;
+			addr += bo_adev->vm_manager.vram_base_offset;
+			addr += pfn << PAGE_SHIFT;
 		}
 
 		tmp = start + num_entries;
@@ -1690,9 +1701,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		if (r)
 			goto error_unlock;
 
-		amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
+		pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
+		if (nodes && nodes->size == pfn) {
+			pfn = 0;
+			++nodes;
+		}
 		start = tmp;
-	};
+
+	} while (unlikely(start != last + 1));
 
 	r = vm->update_funcs->commit(&params, fence);
 
@@ -1721,6 +1737,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	struct ttm_resource *mem;
+	struct drm_mm_node *nodes;
 	struct dma_fence **last_update;
 	struct dma_resv *resv;
 	uint64_t flags;
@@ -1729,6 +1746,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 
 	if (clear || !bo) {
 		mem = NULL;
+		nodes = NULL;
 		resv = vm->root.base.bo->tbo.base.resv;
 	} else {
 		struct drm_gem_object *obj = &bo->tbo.base; @@ -1743,6 +1761,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 				bo = gem_to_amdgpu_bo(gobj);
 		}
 		mem = &bo->tbo.mem;
+		nodes = mem->mm_node;
 		if (mem->mem_type == TTM_PL_TT)
 			pages_addr = bo->tbo.ttm->dma_address;
 	}
@@ -1791,7 +1810,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
 		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
 						resv, mapping->start,
 						mapping->last, update_flags,
-						mapping->offset, mem,
+						mapping->offset, nodes,
 						pages_addr, last_update);
 		if (r)
 			return r;
--
2.25.1