[Intel-gfx] [RFC 18/38] drm/i915: Create page table allocators

Michel Thierry michel.thierry at intel.com
Tue Oct 7 19:11:14 CEST 2014


From: Ben Widawsky <benjamin.widawsky at intel.com>

As we move toward dynamic page table allocation, it becomes much easier
to manage our data structures if break do things less coarsely by
breaking up all of our actions into individual tasks.  This makes the
code easier to write, read, and verify.

Aside from the dissection of the allocation functions, the patch
statically allocates the page table structures without a page directory.
This remains the same for all platforms,

The patch itself should not have much functional difference. The primary
noticeable difference is the fact that page tables are no longer
allocated, but rather statically declared as part of the page directory.
This has non-zero overhead, but things gain non-trivial complexity as a
result.

This patch exists for a few reasons:
1. Splitting out the functions allows easily combining GEN6 and GEN8
code. Page tables have no difference based on GEN8. As we'll see in a
future patch when we add the DMA mappings to the allocations, it
requires only one small change to make work, and error handling should
just fall into place.

2. Unless we always want to allocate all page tables under a given PDE,
we'll have to eventually break this up into an array of pointers (or
pointer to pointer).

3. Having the discrete functions is easier to review, and understand.
All allocations and frees now take place in just a couple of locations.
Reviewing, and catching leaks should be easy.

4. Less important: the GFP flags are confined to one location, which
makes playing around with such things trivial.

v2: Updated commit message to explain why this patch exists

v3: For lrc, s/pdp.pagedir[i].daddr/pdp.pagedir[i]->daddr/

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v3)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 226 +++++++++++++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   4 +-
 drivers/gpu/drm/i915/intel_lrc.c    |  16 +--
 3 files changed, 155 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2d4c2a5..8a79142 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -210,6 +210,102 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
 	return pte;
 }
 
+static void free_pt_single(struct i915_pagetab *pt)
+{
+	if (WARN_ON(!pt->page))
+		return;
+	__free_page(pt->page);
+	kfree(pt);
+}
+
+static struct i915_pagetab *alloc_pt_single(void)
+{
+	struct i915_pagetab *pt;
+
+	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
+	if (!pt)
+		return ERR_PTR(-ENOMEM);
+
+	pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!pt->page) {
+		kfree(pt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return pt;
+}
+
+/**
+ * alloc_pt_range() - Allocate a multiple page tables
+ * @pd:		The page directory which will have at least @count entries
+ *		available to point to the allocated page tables.
+ * @pde:	First page directory entry for which we are allocating.
+ * @count:	Number of pages to allocate.
+ *
+ * Allocates multiple page table pages and sets the appropriate entries in the
+ * page table structure within the page directory. Function cleans up after
+ * itself on any failures.
+ *
+ * Return: 0 if allocation succeeded.
+ */
+static int alloc_pt_range(struct i915_pagedir *pd, uint16_t pde, size_t count)
+{
+	int i, ret;
+
+	/* 512 is the max page tables per pagedir on any platform.
+	 * TODO: make WARN after patch series is done
+	 */
+	BUG_ON(pde + count > I915_PDES_PER_PD);
+
+	for (i = pde; i < pde + count; i++) {
+		struct i915_pagetab *pt = alloc_pt_single();
+		if (IS_ERR(pt)) {
+			ret = PTR_ERR(pt);
+			goto err_out;
+		}
+		WARN(pd->page_tables[i],
+		     "Leaking page directory entry %d (%pa)\n",
+		     i, pd->page_tables[i]);
+		pd->page_tables[i] = pt;
+	}
+
+	return 0;
+
+err_out:
+	while (i--)
+		free_pt_single(pd->page_tables[i]);
+	return ret;
+}
+
+static void __free_pd_single(struct i915_pagedir *pd)
+{
+	__free_page(pd->page);
+	kfree(pd);
+}
+
+#define free_pd_single(pd) do { \
+	if ((pd)->page) { \
+		__free_pd_single(pd); \
+	} \
+} while (0)
+
+static struct i915_pagedir *alloc_pd_single(void)
+{
+	struct i915_pagedir *pd;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!pd->page) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return pd;
+}
+
 /* Broadwell Page Directory Pointer Descriptors */
 static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
 			   uint64_t val)
@@ -242,7 +338,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD;
 
 	for (i = used_pd - 1; i >= 0; i--) {
-		dma_addr_t addr = ppgtt->pdp.pagedir[i].daddr;
+		dma_addr_t addr = ppgtt->pdp.pagedir[i]->daddr;
 		ret = gen8_write_pdp(ring, i, addr);
 		if (ret)
 			return ret;
@@ -269,8 +365,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 				      I915_CACHE_LLC, use_scratch);
 
 	while (num_entries) {
-		struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe];
-		struct page *page_table = pd->page_tables[pde].page;
+		struct i915_pagedir *pd = ppgtt->pdp.pagedir[pdpe];
+		struct i915_pagetab *pt = pd->page_tables[pde];
+		struct page *page_table = pt->page;
 
 		last_pte = pte + num_entries;
 		if (last_pte > GEN8_PTES_PER_PT)
@@ -315,8 +412,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 			break;
 
 		if (pt_vaddr == NULL) {
-			struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe];
-			struct page *page_table = pd->page_tables[pde].page;
+			struct i915_pagedir *pd = ppgtt->pdp.pagedir[pdpe];
+			struct i915_pagetab *pt = pd->page_tables[pde];
+			struct page *page_table = pt->page;
 			pt_vaddr = kmap_atomic(page_table);
 		}
 
@@ -346,18 +444,13 @@ static void gen8_free_page_tables(struct i915_pagedir *pd)
 {
 	int i;
 
-	if (pd->page_tables == NULL)
+	if (!pd->page)
 		return;
 
-	for (i = 0; i < I915_PDES_PER_PD; i++)
-		if (pd->page_tables[i].page)
-			__free_page(pd->page_tables[i].page);
-}
-
-static void gen8_free_page_directories(struct i915_pagedir *pd)
-{
-	kfree(pd->page_tables);
-	__free_page(pd->page);
+	for (i = 0; i < I915_PDES_PER_PD; i++) {
+		free_pt_single(pd->page_tables[i]);
+		pd->page_tables[i] = NULL;
+	}
 }
 
 static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
@@ -365,8 +458,8 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
 	int i;
 
 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		gen8_free_page_tables(&ppgtt->pdp.pagedir[i]);
-		gen8_free_page_directories(&ppgtt->pdp.pagedir[i]);
+		gen8_free_page_tables(ppgtt->pdp.pagedir[i]);
+		free_pd_single(ppgtt->pdp.pagedir[i]);
 	}
 }
 
@@ -378,14 +471,16 @@ static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
 		/* TODO: In the future we'll support sparse mappings, so this
 		 * will have to change. */
-		if (!ppgtt->pdp.pagedir[i].daddr)
+		if (!ppgtt->pdp.pagedir[i]->daddr)
 			continue;
 
-		pci_unmap_page(hwdev, ppgtt->pdp.pagedir[i].daddr, PAGE_SIZE,
+		pci_unmap_page(hwdev, ppgtt->pdp.pagedir[i]->daddr, PAGE_SIZE,
 			       PCI_DMA_BIDIRECTIONAL);
 
 		for (j = 0; j < I915_PDES_PER_PD; j++) {
-			dma_addr_t addr = ppgtt->pdp.pagedir[i].page_tables[j].daddr;
+			struct i915_pagedir *pd = ppgtt->pdp.pagedir[i];
+			struct i915_pagetab *pt =  pd->page_tables[j];
+			dma_addr_t addr = pt->daddr;
 			if (addr)
 				pci_unmap_page(hwdev, addr, PAGE_SIZE,
 					       PCI_DMA_BIDIRECTIONAL);
@@ -404,24 +499,20 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 
 static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
 {
-	int i, j;
+	int i, ret;
 
 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		struct i915_pagedir *pd = &ppgtt->pdp.pagedir[i];
-		for (j = 0; j < I915_PDES_PER_PD; j++) {
-			struct i915_pagetab *pt = &pd->page_tables[j];
-			pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-			if (!pt->page)
-				goto unwind_out;
-
-		}
+		ret = alloc_pt_range(ppgtt->pdp.pagedir[i],
+				     0, I915_PDES_PER_PD);
+		if (ret)
+			goto unwind_out;
 	}
 
 	return 0;
 
 unwind_out:
 	while (i--)
-		gen8_free_page_tables(&ppgtt->pdp.pagedir[i]);
+		gen8_free_page_tables(ppgtt->pdp.pagedir[i]);
 
 	return -ENOMEM;
 }
@@ -432,16 +523,9 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
 	int i;
 
 	for (i = 0; i < max_pdp; i++) {
-		struct i915_pagetab *pt;
-		pt = kcalloc(I915_PDES_PER_PD, sizeof(*pt), GFP_KERNEL);
-		if (!pt)
-			goto unwind_out;
-
-		ppgtt->pdp.pagedir[i].page = alloc_page(GFP_KERNEL);
-		if (!ppgtt->pdp.pagedir[i].page)
+		ppgtt->pdp.pagedir[i] = alloc_pd_single();
+		if (IS_ERR(ppgtt->pdp.pagedir[i]))
 			goto unwind_out;
-
-		ppgtt->pdp.pagedir[i].page_tables = pt;
 	}
 
 	ppgtt->num_pd_pages = max_pdp;
@@ -450,10 +534,8 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 
 unwind_out:
-	while (i--) {
-		kfree(ppgtt->pdp.pagedir[i].page_tables);
-		__free_page(ppgtt->pdp.pagedir[i].page);
-	}
+	while (i--)
+		free_pd_single(ppgtt->pdp.pagedir[i]);
 
 	return -ENOMEM;
 }
@@ -488,14 +570,14 @@ static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
 	int ret;
 
 	pd_addr = pci_map_page(ppgtt->base.dev->pdev,
-			       ppgtt->pdp.pagedir[pdpe].page, 0,
+			       ppgtt->pdp.pagedir[pdpe]->page, 0,
 			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 
 	ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
 	if (ret)
 		return ret;
 
-	ppgtt->pdp.pagedir[pdpe].daddr = pd_addr;
+	ppgtt->pdp.pagedir[pdpe]->daddr = pd_addr;
 
 	return 0;
 }
@@ -505,8 +587,8 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
 					const int pde)
 {
 	dma_addr_t pt_addr;
-	struct i915_pagedir *pd = &ppgtt->pdp.pagedir[pdpe];
-	struct i915_pagetab *pt = &pd->page_tables[pde];
+	struct i915_pagedir *pd = ppgtt->pdp.pagedir[pdpe];
+	struct i915_pagetab *pt = pd->page_tables[pde];
 	struct page *p = pt->page;
 	int ret;
 
@@ -569,10 +651,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	 * will never need to touch the PDEs again.
 	 */
 	for (i = 0; i < max_pdp; i++) {
+		struct i915_pagedir *pd = ppgtt->pdp.pagedir[i];
 		gen8_ppgtt_pde_t *pd_vaddr;
-		pd_vaddr = kmap_atomic(ppgtt->pdp.pagedir[i].page);
+		pd_vaddr = kmap_atomic(ppgtt->pdp.pagedir[i]->page);
 		for (j = 0; j < I915_PDES_PER_PD; j++) {
-			dma_addr_t addr = ppgtt->pdp.pagedir[i].page_tables[j].daddr;
+			struct i915_pagetab *pt = pd->page_tables[j];
+			dma_addr_t addr = pt->daddr;
 			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
 						      I915_CACHE_LLC);
 		}
@@ -621,7 +705,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
 		u32 expected;
 		gen6_gtt_pte_t *pt_vaddr;
-		dma_addr_t pt_addr = ppgtt->pd.page_tables[pde].daddr;
+		dma_addr_t pt_addr = ppgtt->pd.page_tables[pde]->daddr;
 		pd_entry = readl(pd_addr + pde);
 		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
 
@@ -632,7 +716,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 				   expected);
 		seq_printf(m, "\tPDE: %x\n", pd_entry);
 
-		pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde].page);
+		pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde]->page);
 		for (pte = 0; pte < GEN6_PTES_PER_PT; pte+=4) {
 			unsigned long va =
 				(pde * PAGE_SIZE * GEN6_PTES_PER_PT) +
@@ -682,7 +766,7 @@ static void gen6_map_page_tables(struct i915_hw_ppgtt *ppgtt)
 
 	WARN_ON(ppgtt->pd.pd_offset & 0x3f);
 	for (i = 0; i < ppgtt->num_pd_entries; i++)
-		gen6_map_single(ppgtt, i, ppgtt->pd.page_tables[i].daddr);
+		gen6_map_single(ppgtt, i, ppgtt->pd.page_tables[i]->daddr);
 
 	readl(dev_priv->gtt.gsm);
 }
@@ -843,7 +927,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 		if (last_pte > GEN6_PTES_PER_PT)
 			last_pte = GEN6_PTES_PER_PT;
 
-		pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde].page);
+		pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde]->page);
 
 		for (i = pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
@@ -871,7 +955,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	pt_vaddr = NULL;
 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
 		if (pt_vaddr == NULL)
-			pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde].page);
+			pt_vaddr = kmap_atomic(ppgtt->pd.page_tables[pde]->page);
 
 		pt_vaddr[pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
@@ -893,7 +977,7 @@ static void gen6_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 
 	for (i = 0; i < ppgtt->num_pd_entries; i++)
 		pci_unmap_page(ppgtt->base.dev->pdev,
-			       ppgtt->pd.page_tables[i].daddr,
+			       ppgtt->pd.page_tables[i]->daddr,
 			       4096, PCI_DMA_BIDIRECTIONAL);
 }
 
@@ -902,8 +986,9 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
 	int i;
 
 	for (i = 0; i < ppgtt->num_pd_entries; i++)
-		__free_page(ppgtt->pd.page_tables[i].page);
-	kfree(ppgtt->pd.page_tables);
+		free_pt_single(ppgtt->pd.page_tables[i]);
+
+	free_pd_single(&ppgtt->pd);
 }
 
 static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -958,27 +1043,6 @@ alloc:
 	return 0;
 }
 
-static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
-{
-	struct i915_pagetab *pt;
-	int i;
-
-	pt = kcalloc(ppgtt->num_pd_entries, sizeof(*pt), GFP_KERNEL);
-	if (!pt)
-		return -ENOMEM;
-
-	for (i = 0; i < ppgtt->num_pd_entries; i++) {
-		pt[i].page = alloc_page(GFP_KERNEL);
-		if (!pt->page) {
-			gen6_ppgtt_free(ppgtt);
-			return -ENOMEM;
-		}
-	}
-
-	ppgtt->pd.page_tables = pt;
-	return 0;
-}
-
 static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
 {
 	int ret;
@@ -987,7 +1051,7 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
 	if (ret)
 		return ret;
 
-	ret = gen6_ppgtt_allocate_page_tables(ppgtt);
+	ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries);
 	if (ret) {
 		drm_mm_remove_node(&ppgtt->node);
 		return ret;
@@ -1005,7 +1069,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
 		struct page *page;
 		dma_addr_t pt_addr;
 
-		page = ppgtt->pd.page_tables[i].page;
+		page = ppgtt->pd.page_tables[i]->page;
 		pt_addr = pci_map_page(dev->pdev, page, 0, 4096,
 				       PCI_DMA_BIDIRECTIONAL);
 
@@ -1014,7 +1078,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
 			return -EIO;
 		}
 
-		ppgtt->pd.page_tables[i].daddr = pt_addr;
+		ppgtt->pd.page_tables[i]->daddr = pt_addr;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index e59f203..329f75f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -267,12 +267,12 @@ struct i915_pagedir {
 		dma_addr_t daddr;
 	};
 
-	struct i915_pagetab *page_tables;
+	struct i915_pagetab *page_tables[I915_PDES_PER_PD]; /* PDEs */
 };
 
 struct i915_pagedirpo {
 	/* struct page *page; */
-	struct i915_pagedir pagedir[GEN8_LEGACY_PDPES];
+	struct i915_pagedir *pagedir[GEN8_LEGACY_PDPES];
 };
 
 struct i915_hw_ppgtt {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5a623b5..6607f56 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1719,14 +1719,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
 	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
 	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
-	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3].daddr);
-	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3].daddr);
-	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2].daddr);
-	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2].daddr);
-	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1].daddr);
-	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1].daddr);
-	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0].daddr);
-	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0].daddr);
+	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3]->daddr);
+	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3]->daddr);
+	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2]->daddr);
+	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2]->daddr);
+	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1]->daddr);
+	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1]->daddr);
+	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0]->daddr);
+	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0]->daddr);
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
-- 
2.0.3




More information about the Intel-gfx mailing list