[PATCH 096/131] micro-gen8-ppgtt-insert

Chris Wilson chris at chris-wilson.co.uk
Sat Aug 6 07:37:03 UTC 2016


---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 75 ++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index dc5295497f9d..892442cebc0d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -781,42 +781,46 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 static void
 gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 			      struct i915_page_directory_pointer *pdp,
-			      struct sg_page_iter *sg_iter,
+			      struct sgt_iter *sgt_iter,
 			      uint64_t start,
 			      enum i915_cache_level cache_level)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+	unsigned int pdpe = gen8_pdpe_index(start);
+	unsigned int pde = gen8_pde_index(start);
+	unsigned int pte = gen8_pte_index(start);
+	struct i915_page_directory *pd;
+	gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, true);
 	gen8_pte_t *pt_vaddr;
-	unsigned pdpe = gen8_pdpe_index(start);
-	unsigned pde = gen8_pde_index(start);
-	unsigned pte = gen8_pte_index(start);
 
-	pt_vaddr = NULL;
+	pd = pdp->page_directory[pdpe];
+	pt_vaddr = kmap_px(pd->page_table[pde]);
 
-	while (__sg_page_iter_next(sg_iter)) {
-		if (pt_vaddr == NULL) {
-			struct i915_page_directory *pd = pdp->page_directory[pdpe];
-			struct i915_page_table *pt = pd->page_table[pde];
-			pt_vaddr = kmap_px(pt);
+	do {
+		pt_vaddr[pte] = pte_encode | (sgt_iter->dma + sgt_iter->curr);
+		sgt_iter->curr += PAGE_SIZE;
+		if (sgt_iter->curr >= sgt_iter->max) {
+			*sgt_iter = __sgt_iter(__sg_next(sgt_iter->sgp), true);
+			if (sgt_iter->dma == 0)
+				break;
 		}
 
-		pt_vaddr[pte] =
-			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
-					cache_level, true);
 		if (++pte == GEN8_PTES) {
-			kunmap_px(ppgtt, pt_vaddr);
-			pt_vaddr = NULL;
 			if (++pde == I915_PDES) {
 				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
 					break;
+
+				pd = pdp->page_directory[pdpe];
 				pde = 0;
 			}
+
+			kunmap_px(ppgtt, pt_vaddr);
+			pt_vaddr = kmap_px(pd->page_table[pde]);
 			pte = 0;
 		}
-	}
+	} while (1);
 
-	if (pt_vaddr)
-		kunmap_px(ppgtt, pt_vaddr);
+	kunmap_px(ppgtt, pt_vaddr);
 }
 
 static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
@@ -826,22 +830,19 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      u32 unused)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	struct sg_page_iter sg_iter;
-
-	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
+	struct sgt_iter sgt_iter = __sgt_iter(pages->sgl, true);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
-		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
+		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sgt_iter, start,
 					      cache_level);
 	} else {
 		struct i915_page_directory_pointer *pdp;
-		uint64_t pml4e;
-		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+		u64 length = (u64)pages->orig_nents << PAGE_SHIFT;
+		u64 pml4e;
 
-		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
-			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
+		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e)
+			gen8_ppgtt_insert_pte_entries(vm, pdp, &sgt_iter,
 						      start, cache_level);
-		}
 	}
 }
 
@@ -2353,29 +2354,17 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
 	struct sgt_iter sgt_iter;
 	gen8_pte_t __iomem *gtt_entries;
-	gen8_pte_t gtt_entry;
+	gen8_pte_t pte_encode = gen8_pte_encode(0, level, true);
 	dma_addr_t addr;
 	int rpm_atomic_seq;
-	int i = 0;
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
 	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT);
+	for_each_sgt_dma(addr, sgt_iter, st)
+		gen8_set_pte(gtt_entries++, pte_encode | addr);
 
-	for_each_sgt_dma(addr, sgt_iter, st) {
-		gtt_entry = gen8_pte_encode(addr, level, true);
-		gen8_set_pte(&gtt_entries[i++], gtt_entry);
-	}
-
-	/*
-	 * XXX: This serves as a posting read to make sure that the PTE has
-	 * actually been updated. There is some concern that even though
-	 * registers and PTEs are within the same BAR that they are potentially
-	 * of NUMA access patterns. Therefore, even with the way we assume
-	 * hardware should work, we must keep this posting read for paranoia.
-	 */
-	if (i != 0)
-		WARN_ON(readq(&gtt_entries[i-1]) != gtt_entry);
+	wmb();
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
-- 
2.8.1



More information about the Intel-gfx-trybot mailing list