[Intel-gfx] [PATCH 64/68] drm/i915: Depend exclusively on map and unmap_vma

Ben Widawsky benjamin.widawsky at intel.com
Fri Aug 22 05:12:27 CEST 2014


Drop both the insert and clear entries, as well as the allocate/teardown
of the va range. The former was short sighted, and the latter was never
meant to be permanent.

XXX: Like the previous few patches, this was never tested pre GEN8, and
not tested individually on GEN8 post rebase.

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 398 +++++++++++++-----------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  23 +--
 2 files changed, 147 insertions(+), 274 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 3b3f844..5c23f5b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -593,19 +593,6 @@ static void gen8_ppgtt_clear_pte_range(struct i915_pagedirpo *pdp,
 	}
 }
 
-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-				   uint64_t start,
-				   uint64_t length,
-				   bool use_scratch)
-{
-	struct i915_hw_ppgtt *ppgtt =
-		container_of(vm, struct i915_hw_ppgtt, base);
-	struct i915_pagedirpo *pdp = &ppgtt->pdp; /* FIXME: 48b */
-	gen8_gtt_pte_t scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
-						     I915_CACHE_LLC, use_scratch);
-	gen8_ppgtt_clear_pte_range(pdp, start, length, scratch_pte, !HAS_LLC(vm->dev));
-}
-
 static void gen8_ppgtt_insert_pte_entries(struct i915_pagedirpo *pdp,
 					  struct sg_page_iter *sg_iter,
 					  uint64_t start,
@@ -650,22 +637,6 @@ static void gen8_ppgtt_insert_pte_entries(struct i915_pagedirpo *pdp,
 	}
 }
 
-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct sg_table *pages,
-				      uint64_t start,
-				      enum i915_cache_level cache_level,
-				      u32 unused)
-{
-	struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base);
-	struct i915_pagedirpo *pdp = &ppgtt->pdp; /* FIXME: 48b */
-	struct sg_page_iter sg_iter;
-
-	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
-	gen8_ppgtt_insert_pte_entries(pdp, &sg_iter, start,
-				      sg_nents(pages->sgl),
-				      cache_level, !HAS_LLC(vm->dev));
-}
-
 static void __gen8_do_map_pt(gen8_ppgtt_pde_t * const pde,
 			     struct i915_pagetab *pt,
 			     struct drm_device *dev)
@@ -732,6 +703,8 @@ static void gen8_map_page_directory_pointer(struct i915_pml4 *pml4,
 	kunmap_atomic(pagemap);
 }
 
+/* Returns true if the PDP(s) has been freed and the caller could potentially
+ * cleanup. */
 static bool gen8_teardown_va_range_3lvl(struct i915_address_space *vm,
 					struct i915_pagedirpo *pdp,
 					uint64_t start, uint64_t length,
@@ -742,6 +715,8 @@ static bool gen8_teardown_va_range_3lvl(struct i915_address_space *vm,
 	struct i915_pagetab *pt;
 	uint64_t temp;
 	uint32_t pdpe, pde, orig_start = start;
+	gen8_gtt_pte_t scratch = gen8_pte_encode(vm->scratch.addr,
+						 I915_CACHE_LLC, true);
 
 	BUG_ON(!pdp);
 
@@ -826,9 +801,10 @@ static bool gen8_teardown_va_range_3lvl(struct i915_address_space *vm,
 							     GEN8_PDE_SHIFT);
 				pd->page_tables[pde] = NULL;
 			}
+
+			gen8_ppgtt_clear_pte_range(pdp, pd_start, pd_len, scratch, !HAS_LLC(vm->dev));
 		}
 
-		gen8_ppgtt_clear_range(vm, pd_start, pd_len, true);
 
 		if (bitmap_empty(pd->used_pdes, I915_PDES_PER_PD)) {
 			WARN_ON(!test_and_clear_bit(pdpe, pdp->used_pdpes));
@@ -868,6 +844,7 @@ static void gen8_teardown_va_range_4lvl(struct i915_address_space *vm,
 	struct i915_pagedirpo *pdp;
 	uint64_t temp, pml4e;
 
+	BUG_ON(I915_PDPES_PER_PDP(vm->dev) != 512);
 	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
 		if (!pdp)
 			continue;
@@ -1110,14 +1087,15 @@ err_out:
 	return -ENOMEM;
 }
 
-static int __gen8_alloc_vma_range_3lvl(struct i915_address_space *vm,
-				       struct i915_pagedirpo *pdp,
+static int __gen8_alloc_vma_range_3lvl(struct i915_pagedirpo *pdp,
+				       struct i915_vma *vma,
 				       struct sg_page_iter *sg_iter,
 				       uint64_t start,
 				       uint64_t length,
 				       u32 flags)
 {
 	unsigned long *new_page_dirs, **new_page_tables;
+	struct i915_address_space *vm = vma->vm;
 	struct drm_device *dev = vm->dev;
 	struct i915_pagedir *pd;
 	const uint64_t orig_start = start;
@@ -1127,6 +1105,8 @@ static int __gen8_alloc_vma_range_3lvl(struct i915_address_space *vm,
 	size_t pdpes = I915_PDPES_PER_PDP(dev);
 	int ret;
 
+	BUG_ON(!sg_iter->sg);
+
 #ifndef CONFIG_64BIT
 	/* Disallow 64b address on 32b platforms. Nothing is wrong with doing
 	 * this in hardware, but a lot of the drm code is not prepared to handle
@@ -1176,18 +1156,16 @@ static int __gen8_alloc_vma_range_3lvl(struct i915_address_space *vm,
 			BUG_ON(!pt);
 			BUG_ON(!pd_len);
 			BUG_ON(!gen8_pte_count(pd_start, pd_len));
+			BUG_ON(!sg_iter->__nents);
 
 			/* Set our used ptes within the page table */
 			bitmap_set(pt->used_ptes,
 				   gen8_pte_index(pd_start),
 				   gen8_pte_count(pd_start, pd_len));
 
-			if (sg_iter) {
-				BUG_ON(!sg_iter->__nents);
-				gen8_ppgtt_insert_pte_entries(pdp, sg_iter, pd_start,
-							      gen8_pte_count(pd_start, pd_len),
-							      flags, !HAS_LLC(vm->dev));
-			}
+			gen8_ppgtt_insert_pte_entries(pdp, sg_iter, pd_start,
+						      gen8_pte_count(pd_start, pd_len),
+						      flags, !HAS_LLC(vm->dev));
 			set_bit(pde, pd->used_pdes);
 			pt->zombie = 0;
 		}
@@ -1214,20 +1192,21 @@ err_out:
 	return ret;
 }
 
-static int __gen8_alloc_vma_range_4lvl(struct i915_address_space *vm,
-				       struct i915_pml4 *pml4,
+static int __gen8_alloc_vma_range_4lvl(struct i915_pml4 *pml4,
+				       struct i915_vma *vma,
 				       struct sg_page_iter *sg_iter,
-				       uint64_t start,
-				       uint64_t length,
 				       u32 flags)
 {
 	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
+	struct i915_address_space *vm = vma->vm;
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	struct i915_pagedirpo *pdp;
+	uint64_t start = vma->node.start, length = vma->node.size;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
 	uint64_t temp, pml4e;
+	int ret;
 
 	/* Do the pml4 allocations first, so we don't need to track the newly
 	 * allocated tables below the pdp */
@@ -1263,11 +1242,9 @@ static int __gen8_alloc_vma_range_4lvl(struct i915_address_space *vm,
 	length = orig_length;
 
 	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
-		int ret;
-
 		BUG_ON(!pdp);
 
-		ret = __gen8_alloc_vma_range_3lvl(vm, pdp, sg_iter,
+		ret = __gen8_alloc_vma_range_3lvl(pdp, vma, sg_iter,
 						  start, length, flags);
 		if (ret)
 			goto err_out;
@@ -1294,146 +1271,80 @@ err_out:
 err_alloc:
 	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
 		free_pdp_single(pdp, vm->dev);
-}
-
-static int gen8_alloc_va_range(struct i915_address_space *vm,
-			       uint64_t start, uint64_t length)
-{
-	struct i915_hw_ppgtt *ppgtt =
-		container_of(vm, struct i915_hw_ppgtt, base);
 
-	if (HAS_48B_PPGTT(vm->dev))
-		return __gen8_alloc_vma_range_4lvl(vm, &ppgtt->pml4, NULL,
-						   start, length, 0);
-	else
-		return __gen8_alloc_vma_range_3lvl(vm, &ppgtt->pdp, NULL,
-						   start, length, 0);
+	return ret;
 }
 
 static int gen8_map_vma(struct i915_vma *vma,
 		       enum i915_cache_level cache_level,
-		       u32 unused)
-
+		       u32 flags)
 {
 	struct i915_address_space *vm = vma->vm;
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	struct sg_page_iter sg_iter;
 
-	__sg_page_iter_start(&sg_iter, vma->obj->pages->sgl,
-			     sg_nents(vma->obj->pages->sgl), 0);
-	if (HAS_48B_PPGTT(vm->dev))
-		return __gen8_alloc_vma_range_4lvl(vm, &ppgtt->pml4, &sg_iter,
-						   vma->node.start,
-						   vma->node.size, 0);
+	__sg_page_iter_start(&sg_iter, vma->obj->pages->sgl, sg_nents(vma->obj->pages->sgl), 0);
+	if (HAS_48B_PPGTT(vma->vm->dev))
+		return __gen8_alloc_vma_range_4lvl(&ppgtt->pml4, vma, &sg_iter, flags);
 	else
-		return __gen8_alloc_vma_range_3lvl(vm, &ppgtt->pdp, &sg_iter,
+		return __gen8_alloc_vma_range_3lvl(&ppgtt->pdp, vma, &sg_iter,
 						   vma->node.start,
-						   vma->node.size, 0);
+						   vma->node.size,
+						   flags);
 }
 
-static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt)
-{
-	free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev);
-	if (HAS_48B_PPGTT(ppgtt->base.dev))
-		pml4_fini(&ppgtt->pml4);
-	else
-		free_pdp_single(&ppgtt->pdp, ppgtt->base.dev);
-}
-
-/**
- * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
- * with a net effect resembling a 2-level page table in normal x86 terms. Each
- * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
- * space.
- *
- */
-static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
+static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
 {
 	ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev);
 	if (IS_ERR(ppgtt->scratch_pd))
 		return PTR_ERR(ppgtt->scratch_pd);
 
 	ppgtt->base.start = 0;
-	ppgtt->base.total = size;
-	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
-	ppgtt->enable = gen8_ppgtt_enable;
 
+	/* In the case of 3 levels we need a page directory scratch page (each
+	 * PDP entry can point to that. 4 Levels requires a single PML4 page. In
+	 * the struct definition, they are all the same anyway...
+	 */
 	if (HAS_48B_PPGTT(ppgtt->base.dev)) {
 		int ret = pml4_init(ppgtt);
 		if (ret) {
-			free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev);
+			free_pt_scratch(ppgtt->scratch_pml4, ppgtt->base.dev);
 			return ret;
 		}
-
+		ppgtt->base.total = (1ULL<<48);
 		ppgtt->switch_mm = gen8_48b_mm_switch;
+		/* NB: Aliasing PPGTT always aliases the GGTT which has a max of
+		 * 4GB. However, if we used 3 level page tables instead, it
+		 * would require changing the GFX_MODE register on a switch
+		 * which adds complexity. Instead we can use a 4 level table,
+		 * and only populate the low 4GB. */
+		if (aliasing) {
+			/* Make it 32b even if GGTT is less to get all 4 PDPs */
+			ppgtt->base.total = (1ULL<<32);
+		}
 	} else {
+		/* PDP doesn't need an actually page */
 		int ret = __pdp_init(&ppgtt->pdp, false);
 		if (ret) {
 			free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev);
 			return ret;
 		}
-
 		ppgtt->switch_mm = gen8_legacy_mm_switch;
-		trace_i915_pagedirpo_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT);
+		ppgtt->base.total = (1ULL<<32);
 	}
 
-	return 0;
-}
-
-static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
-{
-	struct drm_device *dev = ppgtt->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_pagedirpo *pdp = &ppgtt->pdp; /* FIXME: 48b */
-	struct i915_pagedir *pd;
-	uint64_t temp, start = 0, size = dev_priv->gtt.base.total;
-	uint32_t pdpe;
-	int ret;
-
-	ret = gen8_ppgtt_init_common(ppgtt, size);
-	if (ret)
-		return ret;
-
-	/* Aliasing PPGTT has to always work and be mapped because of the way we
-	 * use RESTORE_INHIBIT in the context switch. This will be fixed
-	 * eventually. */
-	ret = gen8_alloc_va_range(&ppgtt->base, start, size);
-	if (ret) {
-		gen8_ppgtt_fini_common(ppgtt);
-		return ret;
+	if (aliasing) {
+		struct i915_address_space *vm = &ppgtt->base;
+		struct drm_i915_private *dev_priv = to_i915(vm->dev);
+		ppgtt->base.total = dev_priv->gtt.base.total;
+		WARN_ON(dev_priv->gtt.base.start != 0);
 	}
 
-	/* FIXME: PML4 */
-	gen8_for_each_pdpe(pd, pdp, start, size, temp, pdpe)
-		gen8_map_pagetable_range(&ppgtt->base, pd,start, size);
-
-	ppgtt->base.allocate_va_range = NULL;
-	ppgtt->base.teardown_va_range = NULL;
-	ppgtt->base.map_vma = NULL;
-	ppgtt->base.unmap_vma = NULL;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
-	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
-
-	return 0;
-}
-
-static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
-{
-	struct drm_device *dev = ppgtt->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total);
-	if (ret)
-		return ret;
-
-	ppgtt->base.allocate_va_range = NULL;
-	ppgtt->base.teardown_va_range = NULL;
+	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.map_vma = gen8_map_vma;
 	ppgtt->base.unmap_vma = gen8_unmap_vma;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
-	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
+	ppgtt->enable = gen8_ppgtt_enable;
 
 	return 0;
 }
@@ -1804,8 +1715,10 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		kunmap_atomic(pt_vaddr);
 }
 
-static int gen6_alloc_va_range(struct i915_address_space *vm,
-			       uint64_t start, uint64_t length)
+static int _gen6_map_vma(struct i915_address_space *vm,
+			 struct i915_vma *vma,
+			 enum i915_cache_level cache_level,
+			 u32 flags)
 {
 	DECLARE_BITMAP(new_page_tables, I915_PDES_PER_PD);
 	struct drm_device *dev = vm->dev;
@@ -1813,6 +1726,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		        container_of(vm, struct i915_hw_ppgtt, base);
 	struct i915_pagetab *pt;
+	uint32_t start = vma->node.start, length = vma->node.size;
 	const uint32_t start_save = start, length_save = length;
 	uint32_t pde, temp;
 	int ret;
@@ -1882,6 +1796,9 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	 * table. Also require for WC mapped PTEs */
 	readl(dev_priv->gtt.gsm);
 
+	gen6_ppgtt_insert_entries(vm, vma->obj->pages, vma->node.start,
+				  cache_level, flags);
+
 	return 0;
 
 unwind_out:
@@ -1897,14 +1814,16 @@ static int gen6_map_vma(struct i915_vma *vma,
 			enum i915_cache_level cache_level,
 			u32 flags)
 {
-	return gen6_alloc_va_range(vma->vm, vma->node.start, vma->node.size);
+	return _gen6_map_vma(vma->vm, vma, cache_level, flags);
 }
 
 static void gen6_teardown_va_range(struct i915_address_space *vm,
 				   uint64_t start, uint64_t length)
+
 {
 	struct i915_hw_ppgtt *ppgtt =
 		        container_of(vm, struct i915_hw_ppgtt, base);
+	const uint32_t orig_start = start, orig_length = length;
 	struct i915_pagetab *pt;
 	uint32_t pde, temp;
 
@@ -1931,6 +1850,8 @@ static void gen6_teardown_va_range(struct i915_address_space *vm,
 			ppgtt->pd.page_tables[pde] = ppgtt->scratch_pt;
 		}
 	}
+
+	gen6_ppgtt_clear_range(vm, orig_start, orig_length, true);
 }
 
 static void gen6_unmap_vma(struct i915_vma *vma)
@@ -2067,15 +1988,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
 	if (ret)
 		return ret;
 
-	if (aliasing) {
-		ppgtt->base.allocate_va_range = gen6_alloc_va_range;
-		ppgtt->base.teardown_va_range = gen6_teardown_va_range;
-	} else {
-		ppgtt->base.map_vma = gen6_map_vma;
-		ppgtt->base.unmap_vma = gen6_unmap_vma;
-	}
-	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
-	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.map_vma = gen6_map_vma;
+	ppgtt->base.unmap_vma = gen6_unmap_vma;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.start = 0;
 	ppgtt->base.total = I915_PDES_PER_PD * GEN6_PTES_PER_PT * PAGE_SIZE;
@@ -2087,8 +2001,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
 	ppgtt->pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
 		ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
 
-	if (!aliasing)
-		gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
+	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
 
 	gen6_map_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
 
@@ -2109,20 +2022,20 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt, boo
 
 	if (INTEL_INFO(dev)->gen < 8)
 		ret = gen6_ppgtt_init(ppgtt, aliasing);
-	else if (IS_GEN8(dev) && aliasing)
-		ret = gen8_aliasing_ppgtt_init(ppgtt);
 	else if (IS_GEN8(dev))
-		ret = gen8_ppgtt_init(ppgtt);
+		ret = gen8_ppgtt_init(ppgtt, aliasing);
 	else
 		BUG();
 
 	if (ret)
 		return ret;
 
+	BUG_ON(ppgtt->base.total < dev_priv->gtt.base.total && aliasing);
+	if (aliasing)
+		ppgtt->base.total = dev_priv->gtt.base.total;
+
 	kref_init(&ppgtt->ref);
 	drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total);
-	if (ppgtt->base.clear_range)
-		ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
 	i915_init_vm(dev_priv, &ppgtt->base);
 
 	return 0;
@@ -2144,56 +2057,26 @@ ppgtt_bind_vma(struct i915_vma *vma,
 {
 	int ret;
 
+	BUG_ON(!vma->vm->map_vma);
+
 	/* Currently applicable only to VLV */
 	if (vma->obj->gt_ro)
 		flags |= PTE_READ_ONLY;
 
-	if (vma->vm->map_vma) {
-		trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size,
-				    VM_TO_TRACE_NAME(vma->vm));
-		ret = vma->vm->map_vma(vma, cache_level, flags);
-		if (!ret)
-			ppgtt_invalidate_tlbs(vma->vm);
-		return ret;
-	}
-
-	if (vma->vm->allocate_va_range) {
-		trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size,
-				    VM_TO_TRACE_NAME(vma->vm));
-		ret = vma->vm->allocate_va_range(vma->vm,
-						 vma->node.start,
-						 vma->node.size);
-		if (ret)
-			return ret;
-
+	trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size,
+			    VM_TO_TRACE_NAME(vma->vm));
+	ret = vma->vm->map_vma(vma, cache_level, flags);
+	if (!ret)
 		ppgtt_invalidate_tlbs(vma->vm);
-	}
-
-	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
-				cache_level, flags);
-
-	return 0;
+	return ret;
 }
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
-	WARN_ON(vma->vm->teardown_va_range && vma->vm->clear_range);
-	if (vma->vm->teardown_va_range) {
-		trace_i915_va_teardown(vma->vm,
-				       vma->node.start, vma->node.size,
-				       VM_TO_TRACE_NAME(vma->vm));
-
-		vma->vm->teardown_va_range(vma->vm,
-					   vma->node.start, vma->node.size);
-		ppgtt_invalidate_tlbs(vma->vm);
-	} else if (vma->vm->clear_range) {
-		vma->vm->clear_range(vma->vm,
-				     vma->node.start,
-				     vma->obj->base.size,
-				     true);
-	} else
-		BUG();
-
+	trace_i915_va_teardown(vma->vm, vma->node.start, vma->node.size,
+			       VM_TO_TRACE_NAME(vma->vm));
+	vma->vm->unmap_vma(vma);
+	ppgtt_invalidate_tlbs(vma->vm);
 }
 
 extern int intel_iommu_gfx_mapped;
@@ -2275,10 +2158,10 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
 
 	i915_check_and_clear_faults(dev);
 
-	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
-				       dev_priv->gtt.base.start,
-				       dev_priv->gtt.base.total,
-				       true);
+	dev_priv->gtt.clear_range(&dev_priv->gtt,
+				  dev_priv->gtt.base.start,
+				  dev_priv->gtt.base.total,
+				  true);
 }
 
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
@@ -2290,10 +2173,10 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	i915_check_and_clear_faults(dev);
 
 	/* First fill our portion of the GTT with scratch pages */
-	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
-				       dev_priv->gtt.base.start,
-				       dev_priv->gtt.base.total,
-				       true);
+	dev_priv->gtt.clear_range(&dev_priv->gtt,
+				  dev_priv->gtt.base.start,
+				  dev_priv->gtt.base.total,
+				  true);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
@@ -2366,15 +2249,16 @@ static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
 #endif
 }
 
-static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+static void gen8_ggtt_insert_entries(struct i915_gtt *gtt,
 				     struct sg_table *st,
 				     uint64_t start,
 				     enum i915_cache_level level, u32 unused)
 {
-	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	struct drm_i915_private *dev_priv =
+		container_of(gtt, struct drm_i915_private, gtt);
 	unsigned first_entry = start >> PAGE_SHIFT;
 	gen8_gtt_pte_t __iomem *gtt_entries =
-		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+		(gen8_gtt_pte_t __iomem *)gtt->gsm + first_entry;
 	int i = 0;
 	struct sg_page_iter sg_iter;
 	dma_addr_t addr = 0; /* shut up gcc */
@@ -2412,22 +2296,23 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
  * within the global GTT as well as accessible by the GPU through the GMADR
  * mapped BAR (dev_priv->mm.gtt->gtt).
  */
-static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+static void gen6_ggtt_insert_entries(struct i915_gtt *gtt,
 				     struct sg_table *st,
 				     uint64_t start,
 				     enum i915_cache_level level, u32 flags)
 {
-	struct drm_i915_private *dev_priv = vm->dev->dev_private;
+	struct drm_i915_private *dev_priv =
+		container_of(gtt, struct drm_i915_private, gtt);
 	unsigned first_entry = start >> PAGE_SHIFT;
 	gen6_gtt_pte_t __iomem *gtt_entries =
-		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+		(gen6_gtt_pte_t __iomem *)gtt->gsm + first_entry;
 	int i = 0;
 	struct sg_page_iter sg_iter;
 	dma_addr_t addr = 0;
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
+		iowrite32(gtt->base.pte_encode(addr, level, true, flags), &gtt_entries[i]);
 		i++;
 	}
 
@@ -2438,8 +2323,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0) {
-		unsigned long gtt = readl(&gtt_entries[i-1]);
-		WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
+		unsigned long pte = readl(&gtt_entries[i-1]);
+		WARN_ON(pte != gtt->base.pte_encode(addr, level, true, flags));
 	}
 
 	/* This next bit makes the above posting read even more important. We
@@ -2450,17 +2335,16 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
-static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+static void gen8_ggtt_clear_range(struct i915_gtt *gtt,
 				  uint64_t start,
 				  uint64_t length,
 				  bool use_scratch)
 {
-	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
 	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
-		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-	const int max_entries = gtt_total_entries(&dev_priv->gtt) - first_entry;
+		(gen8_gtt_pte_t __iomem *) gtt->gsm + first_entry;
+	const int max_entries = gtt_total_entries(gtt) - first_entry;
 	int i;
 
 	if (WARN(num_entries > max_entries,
@@ -2468,7 +2352,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = gen8_pte_encode(vm->scratch.addr,
+	scratch_pte = gen8_pte_encode(gtt->base.scratch.addr,
 				      I915_CACHE_LLC,
 				      use_scratch);
 	for (i = 0; i < num_entries; i++)
@@ -2509,17 +2393,16 @@ void gen8_for_every_pdpe_pde(struct i915_hw_ppgtt *ppgtt,
 	}
 }
 
-static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+static void gen6_ggtt_clear_range(struct i915_gtt *gtt,
 				  uint64_t start,
 				  uint64_t length,
 				  bool use_scratch)
 {
-	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	unsigned first_entry = start >> PAGE_SHIFT;
 	unsigned num_entries = length >> PAGE_SHIFT;
 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
-		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-	const int max_entries = gtt_total_entries(&dev_priv->gtt) - first_entry;
+		(gen6_gtt_pte_t __iomem *) gtt->gsm + first_entry;
+	const int max_entries = gtt_total_entries(gtt) - first_entry;
 	int i;
 
 	if (WARN(num_entries > max_entries,
@@ -2527,7 +2410,8 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
+	scratch_pte = gtt->base.pte_encode(gtt->base.scratch.addr,
+					   I915_CACHE_LLC, use_scratch, 0);
 
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
@@ -2550,7 +2434,7 @@ static int i915_ggtt_bind_vma(struct i915_vma *vma,
 	return 0;
 }
 
-static void i915_ggtt_clear_range(struct i915_address_space *vm,
+static void i915_ggtt_clear_range(struct i915_gtt *gunused,
 				  uint64_t start,
 				  uint64_t length,
 				  bool unused)
@@ -2596,9 +2480,10 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
 		if (!obj->has_global_gtt_mapping ||
 		    (cache_level != obj->cache_level)) {
-			vma->vm->insert_entries(vma->vm, obj->pages,
-						vma->node.start,
-						cache_level, flags);
+			struct i915_gtt *gtt = &dev_priv->gtt;
+			gtt->insert_entries(gtt, obj->pages,
+					    vma->node.start,
+					    cache_level, flags);
 			obj->has_global_gtt_mapping = 1;
 		}
 	}
@@ -2609,11 +2494,7 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (dev_priv->mm.aliasing_ppgtt &&
 	    (!obj->has_aliasing_ppgtt_mapping ||
 	     (cache_level != obj->cache_level))) {
-		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
-		appgtt->base.insert_entries(&appgtt->base,
-					    vma->obj->pages,
-					    vma->node.start,
-					    cache_level, flags);
+		ppgtt_bind_vma(vma, cache_level, flags);
 		vma->obj->has_aliasing_ppgtt_mapping = 1;
 	}
 
@@ -2622,24 +2503,20 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 
 static void ggtt_unbind_vma(struct i915_vma *vma)
 {
-	struct drm_device *dev = vma->vm->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj = vma->obj;
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_gtt *gtt = &dev_priv->gtt;
+
+	BUG_ON(vma->vm != &gtt->base);
 
 	if (obj->has_global_gtt_mapping) {
-		vma->vm->clear_range(vma->vm,
-				     vma->node.start,
-				     obj->base.size,
-				     true);
+		gtt->clear_range(gtt, vma->node.start, obj->base.size, true);
 		obj->has_global_gtt_mapping = 0;
 	}
 
 	if (obj->has_aliasing_ppgtt_mapping) {
-		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
-		appgtt->base.clear_range(&appgtt->base,
-					 vma->node.start,
-					 obj->base.size,
-					 true);
+		ppgtt_unbind_vma(vma);
 		obj->has_aliasing_ppgtt_mapping = 0;
 	}
 }
@@ -2692,7 +2569,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 	 * of the aperture.
 	 */
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
+	struct i915_gtt *gtt = &dev_priv->gtt;
+	struct i915_address_space *ggtt_vm = &gtt->base;
 	struct drm_mm_node *entry;
 	struct drm_i915_gem_object *obj;
 	unsigned long hole_start, hole_end;
@@ -2725,12 +2603,12 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
-		ggtt_vm->clear_range(ggtt_vm, hole_start,
-				     hole_end - hole_start, true);
+		gtt->clear_range(gtt, hole_start,
+				 hole_end - hole_start, true);
 	}
 
 	/* And finally clear the reserved guard page */
-	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
+	gtt->clear_range(gtt, end - PAGE_SIZE, PAGE_SIZE, true);
 }
 
 void i915_gem_init_global_gtt(struct drm_device *dev)
@@ -2961,8 +2839,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
 
 	ret = ggtt_probe_common(dev, gtt_size);
 
-	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
-	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
+	dev_priv->gtt.clear_range = gen8_ggtt_clear_range;
+	dev_priv->gtt.insert_entries = gen8_ggtt_insert_entries;
 
 	return ret;
 }
@@ -3001,8 +2879,8 @@ static int gen6_gmch_probe(struct drm_device *dev,
 
 	ret = ggtt_probe_common(dev, gtt_size);
 
-	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
-	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
+	dev_priv->gtt.clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.insert_entries = gen6_ggtt_insert_entries;
 
 	return ret;
 }
@@ -3038,7 +2916,7 @@ static int i915_gmch_probe(struct drm_device *dev,
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
-	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.clear_range = i915_ggtt_clear_range;
 
 	if (unlikely(dev_priv->gtt.do_idle_maps))
 		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index d2cd9cc..92acd95 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -281,20 +281,6 @@ struct i915_address_space {
 	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
 				     enum i915_cache_level level,
 				     bool valid, u32 flags); /* Create a valid PTE */
-	int (*allocate_va_range)(struct i915_address_space *vm,
-				 uint64_t start,
-				 uint64_t length);
-	void (*teardown_va_range)(struct i915_address_space *vm,
-				  uint64_t start,
-				  uint64_t length);
-	void (*clear_range)(struct i915_address_space *vm,
-			    uint64_t start,
-			    uint64_t length,
-			    bool use_scratch);
-	void (*insert_entries)(struct i915_address_space *vm,
-			       struct sg_table *st,
-			       uint64_t start,
-			       enum i915_cache_level cache_level, u32 flags);
 	int (*map_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags);
 	void (*unmap_vma)(struct i915_vma *vma);
 	void (*cleanup)(struct i915_address_space *vm);
@@ -313,6 +299,7 @@ struct i915_hw_ppgtt {
 	union {
 		struct i915_pagetab *scratch_pt;
 		struct i915_pagetab *scratch_pd; /* Just need the daddr */
+		struct i915_pagetab *scratch_pml4;
 	};
 
 	struct intel_context *ctx;
@@ -352,6 +339,14 @@ struct i915_gtt {
 	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
 			  size_t *stolen, phys_addr_t *mappable_base,
 			  unsigned long *mappable_end);
+	void (*insert_entries)(struct i915_gtt *gtt,
+			       struct sg_table *st,
+			       uint64_t start,
+			       enum i915_cache_level cache_level, u32 flags);
+	void (*clear_range)(struct i915_gtt *gtt,
+			    uint64_t start,
+			    uint64_t length,
+			    bool use_scratch);
 };
 
 /* For each pde iterates over every pde between from start until start + length.
-- 
2.0.4




More information about the Intel-gfx mailing list