[Intel-gfx] [PATCH 10/10] drm/i915/gtt: Setup phys pages for 3lvl pdps

Mika Kuoppala mika.kuoppala at linux.intel.com
Fri Jun 14 16:43:50 UTC 2019


If we setup backing phys page for 3lvl pdps, even they
are not used, we lose 5 pages per ppgtt.

Trading this memory on bsw, we gain more common code paths for all
gen8+ directory manipulation. And those paths are now void of checks
for page directory type, making the hot paths faster.

Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 112 +++++++++++++++++-----------
 1 file changed, 68 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ba2802c25d13..c76c92072d54 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -714,22 +714,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
 	return pd;
 }
 
-static inline bool pd_has_phys_page(const struct i915_page_directory * const pd)
-{
-	return pd->base.page;
-}
-
 static void free_pd(struct i915_address_space *vm,
 		    struct i915_page_directory *pd)
 {
-	if (likely(pd_has_phys_page(pd)))
-		cleanup_page_dma(vm, &pd->base);
-
+	cleanup_page_dma(vm, &pd->base);
 	kfree(pd);
 }
 
 #define init_pd(vm, pd, to) {					\
-	GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd));		\
 	fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \
 	memset_p((pd)->entry, (to), 512);				\
 }
@@ -747,8 +739,7 @@ static void __set_pd_entry(struct i915_page_directory * const pd,
 
 #define set_pd_entry(pd, pde, to) ({					\
 	(pd)->entry[(pde)] = (to);					\
-	if (likely(pd_has_phys_page(pd)))				\
-		__set_pd_entry((pd), (pde),				\
+	__set_pd_entry((pd), (pde),					\
 		       gen8_pde_encode(px_dma(to), I915_CACHE_LLC));	\
 })
 
@@ -764,8 +755,7 @@ __swap_pd_entry(struct i915_page_directory * const pd,
 
 	if (likely(old == old_val)) {
 		atomic_inc(&pd->used);
-		if (likely(pd_has_phys_page(pd)))
-			__set_pd_entry(pd, pde, encode(daddr, I915_CACHE_LLC));
+		__set_pd_entry(pd, pde, encode(daddr, I915_CACHE_LLC));
 	}
 
 	return old;
@@ -1539,6 +1529,50 @@ static void ppgtt_init(struct drm_i915_private *i915,
 	ppgtt->vm.vma_ops.clear_pages = clear_pages;
 }
 
+static void init_pd_n(struct i915_address_space *vm,
+		      struct i915_page_directory *pd,
+		      struct i915_page_directory *to,
+		      const unsigned int entries)
+{
+	const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC);
+	u64 * const vaddr = kmap_atomic(pd->base.page);
+
+	memset64(vaddr, daddr, entries);
+	kunmap_atomic(vaddr);
+
+	memset_p(pd->entry, to, entries);
+}
+
+static struct i915_page_directory *
+gen8_alloc_top_pd(struct i915_address_space *vm)
+{
+	struct i915_page_directory *pd;
+
+	if (i915_vm_is_4lvl(vm)) {
+		pd = alloc_pd(vm);
+		if (!IS_ERR(pd))
+			init_pd(vm, pd, vm->scratch_pdp);
+
+		return pd;
+	}
+
+	/* 3lvl */
+	pd = __alloc_pd();
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	pd->entry[GEN8_3LVL_PDPES] = NULL;
+
+	if (unlikely(setup_page_dma(vm, &pd->base))) {
+		kfree(pd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES);
+
+	return pd;
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1548,6 +1582,7 @@ static void ppgtt_init(struct drm_i915_private *i915,
  */
 static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 {
+	struct i915_address_space *vm;
 	struct i915_ppgtt *ppgtt;
 	int err;
 
@@ -1557,70 +1592,59 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
 
 	ppgtt_init(i915, ppgtt);
 
+	vm = &ppgtt->vm;
+
 	/*
 	 * From bdw, there is hw support for read-only pages in the PPGTT.
 	 *
 	 * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
 	 * for now.
 	 */
-	ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11;
+	vm->has_read_only = INTEL_GEN(i915) != 11;
 
 	/* There are only few exceptions for gen >=6. chv and bxt.
 	 * And we are not sure about the latter so play safe for now.
 	 */
 	if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915))
-		ppgtt->vm.pt_kmap_wc = true;
+		vm->pt_kmap_wc = true;
 
-	err = gen8_init_scratch(&ppgtt->vm);
+	err = gen8_init_scratch(vm);
 	if (err)
 		goto err_free;
 
-	ppgtt->pd = __alloc_pd();
-	if (!ppgtt->pd) {
-		err = -ENOMEM;
+	ppgtt->pd = gen8_alloc_top_pd(vm);
+	if (IS_ERR(ppgtt->pd)) {
+		err = PTR_ERR(ppgtt->pd);
 		goto err_free_scratch;
 	}
 
-	if (i915_vm_is_4lvl(&ppgtt->vm)) {
-		err = setup_page_dma(&ppgtt->vm, &ppgtt->pd->base);
-		if (err)
-			goto err_free_pdp;
-
-		init_pd(&ppgtt->vm, ppgtt->pd, ppgtt->vm.scratch_pdp);
-
-		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
-		ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
-		ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
+	if (i915_vm_is_4lvl(vm)) {
+		vm->allocate_va_range = gen8_ppgtt_alloc_4lvl;
+		vm->insert_entries = gen8_ppgtt_insert_4lvl;
+		vm->clear_range = gen8_ppgtt_clear_4lvl;
 	} else {
-		/*
-		 * We don't need to setup dma for top level pdp, only
-		 * for entries. So point entries to scratch.
-		 */
-		memset_p(ppgtt->pd->entry, ppgtt->vm.scratch_pd,
-			 GEN8_3LVL_PDPES);
-
 		if (intel_vgpu_active(i915)) {
 			err = gen8_preallocate_top_level_pdp(ppgtt);
 			if (err)
-				goto err_free_pdp;
+				goto err_free_pd;
 		}
 
-		ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
-		ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
-		ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
+		vm->allocate_va_range = gen8_ppgtt_alloc_3lvl;
+		vm->insert_entries = gen8_ppgtt_insert_3lvl;
+		vm->clear_range = gen8_ppgtt_clear_3lvl;
 	}
 
 	if (intel_vgpu_active(i915))
 		gen8_ppgtt_notify_vgt(ppgtt, true);
 
-	ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
+	vm->cleanup = gen8_ppgtt_cleanup;
 
 	return ppgtt;
 
-err_free_pdp:
-	free_pd(&ppgtt->vm, ppgtt->pd);
+err_free_pd:
+	free_pd(vm, ppgtt->pd);
 err_free_scratch:
-	gen8_free_scratch(&ppgtt->vm);
+	gen8_free_scratch(vm);
 err_free:
 	kfree(ppgtt);
 	return ERR_PTR(err);
-- 
2.17.1



More information about the Intel-gfx mailing list