[PATCH 7/7] drm/amdgpu: enable huge page handling in the VM v2

Tue May 23 16:52:54 UTC 2017

From: Christian König <christian.koenig at amd.com>

The hardware can use huge pages to map 2MB of address space with only one PDE.

v2: few cleanups and rebased

Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 95 +++++++++++++++++++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  4 ++
 2 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f7afdfa..f07c9b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -325,6 +325,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 			entry->bo = pt;
 			entry->addr = 0;
+			entry->huge_page = false;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -1013,7 +1014,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 		pt = amdgpu_bo_gpu_offset(bo);
 		pt = amdgpu_gart_get_vm_pde(adev, pt);
-		if (parent->entries[pt_idx].addr == pt)
+		if (parent->entries[pt_idx].addr == pt ||
+		    parent->entries[pt_idx].huge_page)
 			continue;
 
 		parent->entries[pt_idx].addr = pt;
@@ -1145,29 +1147,69 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_find_pt - find the page table for an address
+ * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
  *
  * @p: see amdgpu_pte_update_params definition
  * @addr: virtual address in question
+ * @nptes: number of PTEs updated with this operation
+ * @dst: destination address where the PTEs should point to
+ * @flags: access flags fro the PTEs
+ * @bo: resulting page tables BO
  *
- * Find the page table BO for a virtual address, return NULL when none found.
+ * Check if we can update the PD with a huge page. Also finds the page table
+ * BO for a virtual address, returns -ENOENT when nothing found.
  */
-static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
-					  uint64_t addr)
+static int amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
+				       uint64_t addr, unsigned nptes,
+				       uint64_t dst, uint64_t flags,
+				       struct amdgpu_bo **bo)
 {
-	struct amdgpu_vm_pt *entry = &p->vm->root;
-	unsigned idx, level = p->adev->vm_manager.num_level;
+	unsigned pt_idx, level = p->adev->vm_manager.num_level;
+	struct amdgpu_vm_pt *entry = &p->vm->root, *parent;
+	uint64_t pd_addr, pde;
 
-	while (entry->entries) {
-		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size(entry->bo) / 8;
-		entry = &entry->entries[idx];
-	}
+	do {
+		pt_idx = addr >> (p->adev->vm_manager.block_size * level--);
+		pt_idx %= amdgpu_bo_size(entry->bo) / 8;
+		parent = entry;
+		entry = &entry->entries[pt_idx];
+	} while (entry->entries);
 
 	if (level)
-		return NULL;
+		return -ENOENT;
+
+	*bo = entry->bo;
+
+	/* In the case of a mixed PT the PDE must point to it*/
+	if (p->adev->asic_type < CHIP_VEGA10 ||
+	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
+	    p->func != amdgpu_vm_do_set_ptes ||
+	    !(flags & AMDGPU_PTE_VALID)) {
+
+		dst = amdgpu_bo_gpu_offset(*bo);
+		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
+		flags = AMDGPU_PTE_VALID;
+	} else {
+		flags |= AMDGPU_PDE_PTE;
+	}
 
-	return entry->bo;
+	if (entry->addr == dst &&
+	    entry->huge_page == !!(flags & AMDGPU_PDE_PTE))
+		return 0;
+
+	entry->addr = dst;
+	entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
+
+	if (parent->bo->shadow) {
+		pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+		pde = pd_addr + pt_idx * 8;
+		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+	}
+
+	pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+	pde = pd_addr + pt_idx * 8;
+	amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
+	return 0;
 }
 
 /**
@@ -1193,14 +1235,20 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	uint64_t addr, pe_start;
 	struct amdgpu_bo *pt;
 	unsigned nptes;
+	int r;
 
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; addr += nptes) {
-		pt = amdgpu_vm_get_pt(params, addr);
-		if (!pt) {
-			pr_err("PT not found, aborting update_ptes\n");
-			return -EINVAL;
-		}
+
+		if ((addr & ~mask) == (end & ~mask))
+			nptes = end - addr;
+		else
+			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
+
+		r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
+						dst, flags, &pt);
+		if (r)
+			return r;
 
 		if (params->shadow) {
 			if (!pt->shadow)
@@ -1208,11 +1256,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			pt = pt->shadow;
 		}
 
-		if ((addr & ~mask) == (end & ~mask))
-			nptes = end - addr;
-		else
-			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
-
 		pe_start = amdgpu_bo_gpu_offset(pt);
 		pe_start += (addr & mask) * 8;
 
@@ -1353,6 +1396,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
+	/* one PDE write for each huge page */
+	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 7;
+
 	if (src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
@@ -1437,6 +1483,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
+	amdgpu_vm_invalidate_level(&vm->root);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 0f83fe3..2940d66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -70,6 +70,9 @@ struct amdgpu_bo_list_entry;
 /* TILED for VEGA10, reserved for older ASICs  */
 #define AMDGPU_PTE_PRT		(1ULL << 51)
 
+/* PDE is handled as PTE for VEGA10 */
+#define AMDGPU_PDE_PTE		(1ULL << 54)
+
 /* VEGA10 only */
 #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK	AMDGPU_PTE_MTYPE(3ULL)
@@ -92,6 +95,7 @@ struct amdgpu_bo_list_entry;
 struct amdgpu_vm_pt {
 	struct amdgpu_bo	*bo;
 	uint64_t		addr;
+	bool			huge_page;
 
 	/* array of page tables, one for each directory entry */
 	struct amdgpu_vm_pt	*entries;
-- 
2.7.4