[Intel-gfx] [PATCH 10/15] drm/i915/bdw: Map unused PDPs to a scratch page

Rodrigo Vivi rodrigo.vivi at intel.com
Tue Aug 5 16:51:21 CEST 2014


From: Bob Beckett <robert.beckett at intel.com>

Create a scratch page for the two unused PDPs and set all the PTEs
for them to point to it.

This patch addresses a page fault, and subsequent hang in pipe
control flush. In these cases, the Main Graphic Arbiter Error
register [0x40A0] showed a TLB Page Fault error, and a high memory
address (higher than the size of our PPGTT) was reported in the
Fault TLB RD Data0 register (0x4B10).

PDP2 & PDP3 were not set because, in theory, they aren't required
for our PPGTT size, but they should be mapped to a scratch page
anyway.

v2: Rebase on latest nightly.

Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v1)
Signed-off-by: Dave Gordon <david.s.gordon at intel.com> (v2)
Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 79 +++++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  2 +
 2 files changed, 65 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 264f2a6..a88f879 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -365,6 +365,11 @@ static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt)
 		kfree(ppgtt->gen8_pt_dma_addr[i]);
 	}
 
+	/* Unused PDPs are always assigned to scratch page */
+	for (i = ppgtt->num_pd_pages; i < GEN8_LEGACY_PDPS; i++)
+		kfree(ppgtt->gen8_pt_dma_addr[i]);
+	__free_page(ppgtt->scratch_page);
+
 	__free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
 }
 
@@ -389,6 +394,13 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 					       PCI_DMA_BIDIRECTIONAL);
 		}
 	}
+
+	/* Unused PDPs are always assigned to scratch page */
+	for (i = ppgtt->num_pd_pages; i < GEN8_LEGACY_PDPS; i++) {
+		if (ppgtt->pd_dma_addr[i])
+			pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i],
+				PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	}
 }
 
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -475,10 +487,21 @@ static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
 static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
 						const int max_pdp)
 {
-	ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
-	if (!ppgtt->pd_pages)
+	/* Scratch page for unmapped PDP's */
+	ppgtt->scratch_page = alloc_page(GFP_KERNEL);
+	if (!ppgtt->scratch_page)
 		return -ENOMEM;
 
+	/* Must allocate space for all 4 PDPs. HW has implemented cache which
+	 * pre-fetches entries; that pre-fetch can attempt access for entries
+	 * even if no resources are located in that range.
+	 */
+	ppgtt->pd_pages = alloc_pages(GFP_KERNEL, GEN8_LEGACY_PDPS);
+	if (!ppgtt->pd_pages) {
+		__free_page(ppgtt->scratch_page);
+		return -ENOMEM;
+	}
+
 	ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
 	BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
 
@@ -496,6 +519,7 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
 
 	ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp);
 	if (ret) {
+		__free_page(ppgtt->scratch_page);
 		__free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
 		return ret;
 	}
@@ -530,18 +554,25 @@ static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
 
 static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
 					const int pd,
-					const int pt)
+					const int pt,
+					const int max_pdp)
 {
 	dma_addr_t pt_addr;
 	struct page *p;
 	int ret;
 
-	p = ppgtt->gen8_pt_pages[pd][pt];
-	pt_addr = pci_map_page(ppgtt->base.dev->pdev,
-			       p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
-	if (ret)
-		return ret;
+	/* Unused PDPs need to have their ptes pointing to the
+	 * existing scratch page.
+	 */
+	if (pd < max_pdp) {
+		p = ppgtt->gen8_pt_pages[pd][pt];
+		pt_addr = pci_map_page(ppgtt->base.dev->pdev,
+					p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
+		if (ret)
+			return ret;
+	} else
+		pt_addr = ppgtt->scratch_dma_addr;
 
 	ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr;
 
@@ -563,6 +594,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
 	const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
 	int i, j, ret;
+	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
 
 	if (size % (1<<30))
 		DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
@@ -572,30 +604,38 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	if (ret)
 		return ret;
 
-	/*
-	 * 2. Create DMA mappings for the page directories and page tables.
-	 */
-	for (i = 0; i < max_pdp; i++) {
+	/* 2. Map the scratch page */
+	ppgtt->scratch_dma_addr =
+		pci_map_page(ppgtt->base.dev->pdev,
+			     ppgtt->scratch_page, 0, PAGE_SIZE,
+			     PCI_DMA_BIDIRECTIONAL);
+
+	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, ppgtt->scratch_dma_addr);
+	if (ret)
+		goto bail;
+
+	/* 3. Create DMA mappings for the page directories and page tables. */
+	for (i = 0; i < GEN8_LEGACY_PDPS; i++) {
 		ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
 		if (ret)
 			goto bail;
 
 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
-			ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
+			ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j, max_pdp);
 			if (ret)
 				goto bail;
 		}
 	}
 
 	/*
-	 * 3. Map all the page directory entires to point to the page tables
+	 * 4. Map all the page directory entries to point to the page tables
 	 * we've allocated.
 	 *
 	 * For now, the PPGTT helper functions all require that the PDEs are
 	 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
 	 * will never need to touch the PDEs again.
 	 */
-	for (i = 0; i < max_pdp; i++) {
+	for (i = 0; i < GEN8_LEGACY_PDPS; i++) {
 		gen8_ppgtt_pde_t *pd_vaddr;
 		pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
 		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
@@ -618,6 +658,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 
 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
 
+	scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
+					I915_CACHE_LLC, true);
+	pt_vaddr = kmap_atomic(ppgtt->scratch_page);
+	for (i = 0; i < GEN8_PTES_PER_PAGE; i++)
+		pt_vaddr[i] = scratch_pte;
+	kunmap_atomic(pt_vaddr);
+
 	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
 			 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
 	DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 666c938..02032b3 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -249,6 +249,7 @@ struct i915_hw_ppgtt {
 		struct page **gen8_pt_pages[GEN8_LEGACY_PDPS];
 	};
 	struct page *pd_pages;
+	struct page *scratch_page;
 	union {
 		uint32_t pd_offset;
 		dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS];
@@ -258,6 +259,7 @@ struct i915_hw_ppgtt {
 		dma_addr_t *gen8_pt_dma_addr[4];
 	};
 
+	dma_addr_t scratch_dma_addr;
 	struct intel_context *ctx;
 
 	int (*enable)(struct i915_hw_ppgtt *ppgtt);
-- 
1.9.3




More information about the Intel-gfx mailing list