[Intel-gfx] [PATCH 09/11] drm/i915/gtt: Make gen6 page directories evictable

Tue Jun 5 07:19:47 UTC 2018

Currently all page directories are bound at creation using an
unevictable node in the GGTT. This severely limits us as we cannot
remove any inactive ppgtt for new contexts, or under aperture pressure.
To fix this we need to make the page directory into a first class and
unbindable vma. Hence, the creation of a custom vma to wrap the page
directory as opposed to a GEM object.

In this patch, we leave the page directories pinned upon creation.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Matthew Auld <matthew.william.auld at gmail.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 261 ++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   2 +-
 drivers/gpu/drm/i915/i915_vma.h     |   1 +
 3 files changed, 148 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c756c5110d2e..fa09413c9adf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1640,79 +1640,55 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
 {
 	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
 	struct i915_address_space *vm = &base->vm;
-	struct i915_page_table *unused;
-	gen6_pte_t scratch_pte;
-	u32 pd_entry, pte, pde;
-
-	scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
-
-	gen6_for_all_pdes(unused, &base->pd, pde) {
-		u32 expected;
-		gen6_pte_t *pt_vaddr;
-		const dma_addr_t pt_addr = px_dma(base->pd.page_table[pde]);
-		pd_entry = readl(ppgtt->pd_addr + pde);
-		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
-
-		if (pd_entry != expected)
-			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
-				   pde,
-				   pd_entry,
-				   expected);
-		seq_printf(m, "\tPDE: %x\n", pd_entry);
-
-		pt_vaddr = kmap_atomic_px(base->pd.page_table[pde]);
-
-		for (pte = 0; pte < GEN6_PTES; pte+=4) {
-			unsigned long va =
-				(pde * PAGE_SIZE * GEN6_PTES) +
-				(pte * PAGE_SIZE);
+	const gen6_pte_t scratch_pte =
+		vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
+	struct i915_page_table *pt;
+	u32 pte, pde;
+
+	gen6_for_all_pdes(pt, &base->pd, pde) {
+		gen6_pte_t *vaddr;
+
+		if (i915_vma_is_pinned(ppgtt->vma)) {
+			u32 expected =
+				GEN6_PDE_ADDR_ENCODE(px_dma(pt)) |
+				GEN6_PDE_VALID;
+			u32 pd_entry = readl(ppgtt->pd_addr + pde);
+
+			if (pd_entry != expected)
+				seq_printf(m,
+					   "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
+					   pde,
+					   pd_entry,
+					   expected);
+
+			seq_printf(m, "\tPDE: %x\n", pd_entry);
+		}
+
+		vaddr = kmap_atomic_px(base->pd.page_table[pde]);
+		for (pte = 0; pte < GEN6_PTES; pte += 4) {
 			int i;
-			bool found = false;
+
 			for (i = 0; i < 4; i++)
-				if (pt_vaddr[pte + i] != scratch_pte)
-					found = true;
-			if (!found)
+				if (vaddr[pte + i] != scratch_pte)
+					break;
+			if (i == 4)
 				continue;
 
-			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
+			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =",
+				   (pde * GEN6_PTES + pte) * PAGE_SIZE,
+				   pde, pte);
 			for (i = 0; i < 4; i++) {
-				if (pt_vaddr[pte + i] != scratch_pte)
-					seq_printf(m, " %08x", pt_vaddr[pte + i]);
+				if (vaddr[pte + i] != scratch_pte)
+					seq_printf(m, " %08x", vaddr[pte + i]);
 				else
 					seq_puts(m, "  SCRATCH ");
 			}
 			seq_puts(m, "\n");
 		}
-		kunmap_atomic(pt_vaddr);
+		kunmap_atomic(vaddr);
 	}
 }
 
-/* Write pde (index) from the page directory @pd to the page table @pt */
-static inline void gen6_write_pde(const struct gen6_hw_ppgtt *ppgtt,
-				  const unsigned int pde,
-				  const struct i915_page_table *pt)
-{
-	/* Caller needs to make sure the write completes if necessary */
-	writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
-		       ppgtt->pd_addr + pde);
-}
-
-/* Write all the page tables found in the ppgtt structure to incrementing page
- * directories. */
-static void gen6_write_page_range(struct i915_hw_ppgtt *base,
-				  u32 start, u32 length)
-{
-	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
-	struct i915_page_table *pt;
-	unsigned int pde;
-
-	gen6_for_each_pde(pt, &base->pd, start, length, pde)
-		gen6_write_pde(ppgtt, pde, pt);
-
-	mark_tlbs_dirty(base);
-	wmb();
-}
-
 static inline u32 get_pd_offset(struct gen6_hw_ppgtt *ppgtt)
 {
 	GEM_BUG_ON(ppgtt->base.pd.base.ggtt_offset & 0x3f);
@@ -1909,7 +1885,6 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	struct i915_page_table *pt;
 	u64 from = start;
 	unsigned int pde;
-	bool flush = false;
 
 	gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
 		if (pt == vm->scratch_pt) {
@@ -1919,14 +1894,13 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 
 			gen6_initialize_pt(vm, pt);
 			ppgtt->base.pd.page_table[pde] = pt;
-			gen6_write_pde(ppgtt, pde, pt);
-			flush = true;
-		}
-	}
+			if (i915_vma_is_pinned(ppgtt->vma)) {
+				u32 addr = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
 
-	if (flush) {
-		mark_tlbs_dirty(&ppgtt->base);
-		wmb();
+				writel_relaxed(addr | GEN6_PDE_VALID,
+					       ppgtt->pd_addr + pde);
+			}
+		}
 	}
 
 	return 0;
@@ -1975,44 +1949,114 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
 
-	drm_mm_remove_node(&ppgtt->node);
+	i915_vma_unpin(ppgtt->vma);
+	i915_vma_destroy(ppgtt->vma);
 
 	gen6_ppgtt_free_pd(ppgtt);
 	gen6_ppgtt_free_scratch(vm);
 }
 
-static int gen6_ppgtt_allocate_page_directories(struct gen6_hw_ppgtt *ppgtt)
+static int pd_vma_set_pages(struct i915_vma *vma)
 {
-	struct drm_i915_private *dev_priv = ppgtt->base.vm.i915;
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	int err;
+	vma->pages = ERR_PTR(-ENODEV);
+	return 0;
+}
 
-	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
-	 * allocator works in address space sizes, so it's multiplied by page
-	 * size. We allocate at the top of the GTT to avoid fragmentation.
-	 */
-	BUG_ON(!drm_mm_initialized(&ggtt->vm.mm));
+static void pd_vma_clear_pages(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!vma->pages);
 
-	err = i915_gem_gtt_insert(&ggtt->vm, &ppgtt->node,
-				  GEN6_PD_SIZE, GEN6_PD_ALIGN,
-				  I915_COLOR_UNEVICTABLE,
-				  0, ggtt->vm.total,
-				  PIN_HIGH);
-	if (err)
-		return err;
+	vma->pages = NULL;
+}
 
-	if (ppgtt->node.start < ggtt->mappable_end)
-		DRM_DEBUG("Forced to use aperture for PDEs\n");
+static int pd_vma_bind(struct i915_vma *vma,
+		       enum i915_cache_level cache_level,
+		       u32 unused)
+{
+	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+	struct gen6_hw_ppgtt *ppgtt = vma->private;
+	u32 ggtt_offset = i915_ggtt_offset(vma) / PAGE_SIZE;
+	struct i915_page_table *pt;
+	unsigned int pde;
 
-	ppgtt->base.pd.base.ggtt_offset =
-		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
+	ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
 
-	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
-		ppgtt->base.pd.base.ggtt_offset / sizeof(gen6_pte_t);
+	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
+
+	gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
+		u32 val = GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID;
+
+		writel_relaxed(val, ppgtt->pd_addr + pde);
+	}
+	wmb();
 
 	return 0;
 }
 
+static void pd_vma_unbind(struct i915_vma *vma)
+{
+}
+
+static const struct i915_vma_ops pd_vma_ops = {
+	.set_pages = pd_vma_set_pages,
+	.clear_pages = pd_vma_clear_pages,
+	.bind_vma = pd_vma_bind,
+	.unbind_vma = pd_vma_unbind,
+};
+
+static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
+{
+	struct drm_i915_private *i915 = ppgtt->base.vm.i915;
+	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_vma *vma;
+	int i;
+
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+	GEM_BUG_ON(size > ggtt->vm.total);
+
+	vma = kmem_cache_zalloc(i915->vmas, GFP_KERNEL);
+	if (vma == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
+		init_request_active(&vma->last_read[i], NULL);
+	init_request_active(&vma->last_fence, NULL);
+
+	vma->vm = &ggtt->vm;
+	vma->ops = &pd_vma_ops;
+	vma->private = ppgtt;
+
+	vma->obj = NULL;
+	vma->resv = NULL;
+	vma->size = size;
+	vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
+
+	vma->fence_size = size;
+	vma->fence_alignment = I915_GTT_MIN_ALIGNMENT;
+
+	vma->flags |= I915_VMA_GGTT;
+	vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
+
+	INIT_LIST_HEAD(&vma->obj_link);
+	list_add(&vma->vm_link, &vma->vm->unbound_list);
+
+	return vma;
+}
+
+static int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
+{
+	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+	/*
+	 * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
+	 * allocator works in address space sizes, so it's multiplied by page
+	 * size. We allocate at the top of the GTT to avoid fragmentation.
+	 */
+	return i915_vma_pin(ppgtt->vma,
+			    0, GEN6_PD_ALIGN,
+			    PIN_GLOBAL | PIN_HIGH);
+}
+
 static void gen6_scratch_va_range(struct gen6_hw_ppgtt *ppgtt,
 				  u64 start, u64 length)
 {
@@ -2062,20 +2106,23 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 	if (err)
 		goto err_free;
 
-	err = gen6_ppgtt_allocate_page_directories(ppgtt);
-	if (err)
+	ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
+	if (IS_ERR(ppgtt->vma))
 		goto err_scratch;
 
 	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.vm.total);
-	gen6_write_page_range(&ppgtt->base, 0, ppgtt->base.vm.total);
 
 	err = gen6_alloc_va_range(&ppgtt->base.vm, 0, ppgtt->base.vm.total);
+	if (err)
+		goto err_vma;
+
+	err = gen6_ppgtt_pin(&ppgtt->base);
 	if (err)
 		goto err_pd;
 
 	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
-			 ppgtt->node.size >> 20,
-			 ppgtt->node.start / PAGE_SIZE);
+			 ppgtt->vma->node.size >> 20,
+			 ppgtt->vma->node.start / PAGE_SIZE);
 
 	DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
 			 ppgtt->base.pd.base.ggtt_offset << 10);
@@ -2084,6 +2131,8 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 
 err_pd:
 	gen6_ppgtt_free_pd(ppgtt);
+err_vma:
+	i915_vma_destroy(ppgtt->vma);
 err_scratch:
 	gen6_ppgtt_free_scratch(&ppgtt->base.vm);
 err_free:
@@ -3599,6 +3648,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	}
 
 	ggtt->vm.closed = false;
+	i915_ggtt_invalidate(dev_priv);
 
 	if (INTEL_GEN(dev_priv) >= 8) {
 		struct intel_ppat *ppat = &dev_priv->ppat;
@@ -3607,25 +3657,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		dev_priv->ppat.update_hw(dev_priv);
 		return;
 	}
-
-	if (USES_PPGTT(dev_priv)) {
-		struct i915_address_space *vm;
-
-		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
-			struct i915_hw_ppgtt *ppgtt;
-
-			if (i915_is_ggtt(vm))
-				ppgtt = dev_priv->mm.aliasing_ppgtt;
-			else
-				ppgtt = i915_vm_to_ppgtt(vm);
-			if (!ppgtt)
-				continue;
-
-			gen6_write_page_range(ppgtt, 0, ppgtt->vm.total);
-		}
-	}
-
-	i915_ggtt_invalidate(dev_priv);
 }
 
 static struct scatterlist *
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c0eb60484eb4..f4cfe0ed8fac 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -409,7 +409,7 @@ struct i915_hw_ppgtt {
 struct gen6_hw_ppgtt {
 	struct i915_hw_ppgtt base;
 
-	struct drm_mm_node node;
+	struct i915_vma *vma;
 	gen6_pte_t __iomem *pd_addr;
 
 	int (*switch_mm)(struct gen6_hw_ppgtt *ppgtt, struct i915_request *rq);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4321476a6a32..de02482c0659 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -54,6 +54,7 @@ struct i915_vma {
 	struct reservation_object *resv; /** Alias of obj->resv */
 	struct sg_table *pages;
 	void __iomem *iomap;
+	void *private; /* owned by creator */
 	u64 size;
 	u64 display_alignment;
 	struct i915_page_sizes page_sizes;
-- 
2.17.1