[Intel-gfx] [PATCH 7/8] drm/i915/gtt: Purge page tracking bitmaps

Michał Winiarski michal.winiarski at intel.com
Mon Dec 12 11:48:44 UTC 2016


All of the page tracking structures contain a bitmap holding currently
used entries.
It's redundant, since we can just inspect the pointer.
The only actual place where we're taking advantage of bitmaps is during
ppgtt cleanup - since we're able to reduce the number of iterations.
Still, we can improve that in the following commits, and iterate over
the full range here.

Let's replace the bitmaps with a simple counter.

Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Michel Thierry <michel.thierry at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 138 +++++++++++++-----------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  10 +--
 2 files changed, 54 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a7d6f78..50d4861 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -426,38 +426,26 @@ static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
 static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
 {
 	struct i915_page_table *pt;
-	const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
 	int ret = -ENOMEM;
 
 	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
 	if (!pt)
 		return ERR_PTR(-ENOMEM);
 
-	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
-				GFP_KERNEL);
-
-	if (!pt->used_ptes)
-		goto fail_bitmap;
-
 	ret = setup_px(dev_priv, pt);
-	if (ret)
-		goto fail_page_m;
+	if (ret) {
+		kfree(pt);
+		return ERR_PTR(ret);
+	}
 
 	return pt;
 
-fail_page_m:
-	kfree(pt->used_ptes);
-fail_bitmap:
-	kfree(pt);
-
-	return ERR_PTR(ret);
 }
 
 static void free_pt(struct drm_i915_private *dev_priv,
 		    struct i915_page_table *pt)
 {
 	cleanup_px(dev_priv, pt);
-	kfree(pt->used_ptes);
 	kfree(pt);
 }
 
@@ -494,23 +482,13 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
 	if (!pd)
 		return ERR_PTR(-ENOMEM);
 
-	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
-				sizeof(*pd->used_pdes), GFP_KERNEL);
-	if (!pd->used_pdes)
-		goto fail_bitmap;
-
 	ret = setup_px(dev_priv, pd);
-	if (ret)
-		goto fail_page_m;
+	if (ret) {
+		kfree(pd);
+		return ERR_PTR(ret);
+	}
 
 	return pd;
-
-fail_page_m:
-	kfree(pd->used_pdes);
-fail_bitmap:
-	kfree(pd);
-
-	return ERR_PTR(ret);
 }
 
 static void free_pd(struct drm_i915_private *dev_priv,
@@ -518,7 +496,6 @@ static void free_pd(struct drm_i915_private *dev_priv,
 {
 	if (px_page(pd)) {
 		cleanup_px(dev_priv, pd);
-		kfree(pd->used_pdes);
 		kfree(pd);
 	}
 }
@@ -538,28 +515,16 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
 {
 	size_t pdpes = I915_PDPES_PER_PDP(dev_priv);
 
-	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
-				  sizeof(unsigned long),
-				  GFP_KERNEL);
-	if (!pdp->used_pdpes)
-		return -ENOMEM;
-
 	pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
 				      GFP_KERNEL);
-	if (!pdp->page_directory) {
-		kfree(pdp->used_pdpes);
-		/* the PDP might be the statically allocated top level. Keep it
-		 * as clean as possible */
-		pdp->used_pdpes = NULL;
+	if (!pdp->page_directory)
 		return -ENOMEM;
-	}
 
 	return 0;
 }
 
 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 {
-	kfree(pdp->used_pdpes);
 	kfree(pdp->page_directory);
 	pdp->page_directory = NULL;
 }
@@ -733,9 +698,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 
 	GEM_BUG_ON(pte_end > GEN8_PTES);
 
-	bitmap_clear(pt->used_ptes, pte, num_entries);
+	pt->num_ptes -= num_entries;
 
-	if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+	if (pt->num_ptes == 0)
 		return true;
 
 	pt_vaddr = kmap_px(pt);
@@ -768,15 +733,16 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 			break;
 
 		if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
-			__clear_bit(pde, pd->used_pdes);
+			pd->num_pdes--;
 			pde_vaddr = kmap_px(pd);
 			pde_vaddr[pde] = scratch_pde;
 			kunmap_px(ppgtt, pde_vaddr);
 			free_pt(vm->i915, pt);
+			pd->page_table[pde] = NULL;
 		}
 	}
 
-	if (bitmap_empty(pd->used_pdes, I915_PDES))
+	if (pd->num_pdes == 0)
 		return true;
 
 	return false;
@@ -802,19 +768,20 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
 			break;
 
 		if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
-			__clear_bit(pdpe, pdp->used_pdpes);
+			pdp->num_pdpes--;
 			if (USES_FULL_48BIT_PPGTT(dev_priv)) {
 				pdpe_vaddr = kmap_px(pdp);
 				pdpe_vaddr[pdpe] = scratch_pdpe;
 				kunmap_px(ppgtt, pdpe_vaddr);
 			}
 			free_pd(vm->i915, pd);
+			pdp->page_directory[pdpe] = NULL;
 		}
 	}
 
 	mark_tlbs_dirty(ppgtt);
 
-	if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
+	if (pdp->num_pdpes == 0)
 		return true;
 
 	return false;
@@ -843,11 +810,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
 			break;
 
 		if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
-			__clear_bit(pml4e, pml4->used_pml4es);
+			pml4->num_pml4es--;
 			pml4e_vaddr = kmap_px(pml4);
 			pml4e_vaddr[pml4e] = scratch_pml4e;
 			kunmap_px(ppgtt, pml4e_vaddr);
 			free_pdp(vm->i915, pdp);
+			pml4->pdps[pml4e] = NULL;
 		}
 	}
 }
@@ -938,12 +906,11 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
 	if (!px_page(pd))
 		return;
 
-	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
-		if (WARN_ON(!pd->page_table[i]))
-			continue;
-
-		free_pt(dev_priv, pd->page_table[i]);
-		pd->page_table[i] = NULL;
+	for (i = 0; i < I915_PDES; i++) {
+		if (pd->page_table[i]) {
+			free_pt(dev_priv, pd->page_table[i]);
+			pd->page_table[i] = NULL;
+		}
 	}
 }
 
@@ -1040,12 +1007,11 @@ static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
 {
 	int i;
 
-	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
-		if (WARN_ON(!pdp->page_directory[i]))
-			continue;
-
-		gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
-		free_pd(dev_priv, pdp->page_directory[i]);
+	for (i = 0; i < I915_PDPES_PER_PDP(dev_priv); i++) {
+		if (pdp->page_directory[i]) {
+			gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
+			pdp->page_directory[i] = NULL;
+		}
 	}
 
 	free_pdp(dev_priv, pdp);
@@ -1056,11 +1022,11 @@ static void gen8_ppgtt_cleanup_4lvl(struct drm_i915_private *dev_priv,
 {
 	int i;
 
-	for_each_set_bit(i, pml4->used_pml4es, GEN8_PML4ES_PER_PML4) {
-		if (WARN_ON(!pml4->pdps[i]))
-			continue;
-
-		gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]);
+	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
+		if (pml4->pdps[i]) {
+			gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]);
+			pml4->pdps[i] = NULL;
+		}
 	}
 
 	cleanup_px(dev_priv, pml4);
@@ -1109,7 +1075,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 	const uint64_t start_save = start;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		if (test_bit(pde, pd->used_pdes))
+		if (pd->page_table[pde])
 			continue;
 
 		pt = alloc_pt(dev_priv);
@@ -1121,7 +1087,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 
 		gen8_initialize_pt(vm, pt);
 		pd->page_table[pde] = pt;
-		__set_bit(pde, pd->used_pdes);
+		pd->num_pdes++;
 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
 	}
 
@@ -1160,7 +1126,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 	const uint64_t start_save = start;
 
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		if (test_bit(pdpe, pdp->used_pdpes))
+		if (pdp->page_directory[pdpe])
 			continue;
 
 		pd = alloc_pd(dev_priv);
@@ -1172,7 +1138,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
 
 		gen8_initialize_pd(vm, pd);
 		pdp->page_directory[pdpe] = pd;
-		__set_bit(pdpe, pdp->used_pdpes);
+		pdp->num_pdpes++;
 		trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
 	}
 
@@ -1205,7 +1171,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 	const uint64_t start_save = start;
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-		if (test_bit(pml4e, pml4->used_pml4es))
+		if (pml4->pdps[pml4e])
 			continue;
 
 		pdp = alloc_pdp(dev_priv);
@@ -1217,7 +1183,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 
 		gen8_initialize_pdp(vm, pdp);
 		pml4->pdps[pml4e] = pdp;
-		__set_bit(pml4e, pml4->used_pml4es);
+		pml4->num_pml4es++;
 		trace_i915_page_directory_pointer_entry_alloc(vm,
 							      pml4e,
 							      start,
@@ -1286,9 +1252,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			WARN_ON(!gen8_pte_count(pd_start, pd_len));
 
 			/* Set our used ptes within the page table */
-			bitmap_set(pt->used_ptes,
-				   gen8_pte_index(pd_start),
-				   gen8_pte_count(pd_start, pd_len));
+			pt->num_ptes += gen8_pte_count(pd_start, pd_len);
 
 			/* Map the PDE to the page table */
 			page_directory[pde] = gen8_pde_encode(px_dma(pt),
@@ -1296,7 +1260,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
 							gen8_pte_index(start),
 							gen8_pte_count(start, length),
-							bitmap_weight(pt->used_ptes, GEN8_PTES));
+							pt->num_ptes);
 
 			/* NB: We haven't yet mapped ptes to pages. At this
 			 * point we're still relying on insert_entries() */
@@ -1373,7 +1337,7 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
 		uint64_t pd_start = start;
 		uint32_t pde;
 
-		if (!test_bit(pdpe, pdp->used_pdpes))
+		if (!pdp->page_directory[pdpe])
 			continue;
 
 		seq_printf(m, "\tPDPE #%d\n", pdpe);
@@ -1381,7 +1345,7 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
 			uint32_t  pte;
 			gen8_pte_t *pt_vaddr;
 
-			if (!test_bit(pde, pd->used_pdes))
+			if (!pd->page_table[pde])
 				continue;
 
 			pt_vaddr = kmap_px(pt);
@@ -1432,7 +1396,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 		struct i915_page_directory_pointer *pdp;
 
 		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-			if (!test_bit(pml4e, pml4->used_pml4es))
+			if (!pml4->pdps[pml4e])
 				continue;
 
 			seq_printf(m, "    PML4E #%llu\n", pml4e);
@@ -1452,9 +1416,6 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 	 */
 	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
 						start, length);
-	if (!ret)
-		bitmap_set(ppgtt->pdp.used_pdpes, gen8_pdpe_index(start),
-			   i915_pte_count(start, length, GEN8_PDPE_SHIFT));
 
 	return ret;
 }
@@ -1848,12 +1809,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	 */
 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
 		if (pt != vm->scratch_pt) {
-			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
+			WARN_ON(pt->num_ptes == 0);
 			continue;
 		}
 
 		/* We've already allocated a page table */
-		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
+		WARN_ON(pt->num_ptes != 0);
 
 		pt = alloc_pt(dev_priv);
 		if (IS_ERR(pt)) {
@@ -1866,8 +1827,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
 
 		gen6_initialize_pt(vm, pt);
-		bitmap_set(pt->used_ptes, gen6_pte_index(start),
-			   gen6_pte_count(start, length));
+		pt->num_ptes += gen6_pte_count(start, length);
 
 		ppgtt->pd.page_table[pde] = pt;
 		gen6_write_pde(&ppgtt->pd, pde, pt);
@@ -1875,7 +1835,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 		trace_i915_page_table_entry_map(vm, pde, pt,
 					 gen6_pte_index(start),
 					 gen6_pte_count(start, length),
-					 bitmap_weight(pt->used_ptes, GEN6_PTES));
+					 pt->num_ptes);
 	}
 
 	/* Make sure write is complete before other code can use this page
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8965bbb..1fc65ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -193,27 +193,27 @@ struct i915_page_dma {
 struct i915_page_table {
 	struct i915_page_dma base;
 
-	unsigned long *used_ptes;
+	unsigned int num_ptes;
 };
 
 struct i915_page_directory {
 	struct i915_page_dma base;
 
-	unsigned long *used_pdes;
+	unsigned int num_pdes;
 	struct i915_page_table *page_table[I915_PDES]; /* PDEs */
 };
 
 struct i915_page_directory_pointer {
 	struct i915_page_dma base;
 
-	unsigned long *used_pdpes;
+	unsigned int num_pdpes;
 	struct i915_page_directory **page_directory;
 };
 
 struct i915_pml4 {
 	struct i915_page_dma base;
 
-	DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4);
+	unsigned int num_pml4es;
 	struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
 };
 
@@ -477,7 +477,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
 {
-	return test_bit(n, ppgtt->pdp.used_pdpes) ?
+	return ppgtt->pdp.page_directory[n] ?
 		px_dma(ppgtt->pdp.page_directory[n]) :
 		px_dma(ppgtt->base.scratch_pd);
 }
-- 
2.7.4



More information about the Intel-gfx mailing list