[Intel-gfx] [PATCH v4 23/24] drm/i915/bdw: Dynamic page table allocations

Michel Thierry michel.thierry at intel.com
Thu Jan 22 09:01:45 PST 2015


From: Ben Widawsky <benjamin.widawsky at intel.com>

This finishes off the dynamic page tables allocations, in the legacy 3
level style that already exists. Most everything has already been setup
to this point, the patch finishes off the enabling by setting the
appropriate function pointers.

v2: Update aliasing/true ppgtt allocate/teardown/clear functions for
gen 6 & 7.

v3: Rebase.

v4: Remove BUG() from ppgtt_unbind_vma, but keep checking that either
teardown_va_range or clear_range functions exist (Daniel).

v5: Similar to gen6, in init, gen8_ppgtt_clear_range call is only needed
for aliasing ppgtt. Zombie tracking was originally added for teardown
function and is no longer required.

Cc: Daniel Vetter <daniel at ffwll.ch>
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v2+)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 295 +++++++++++++++++++++++++++++-------
 1 file changed, 242 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 662b9d8..f3d7e26 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -609,7 +609,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 	}
 }
 
-static void __gen8_do_map_pt(gen8_ppgtt_pde_t *pde,
+static void __gen8_do_map_pt(gen8_ppgtt_pde_t * const pde,
 			     struct i915_page_table_entry *pt,
 			     struct drm_device *dev)
 {
@@ -626,7 +626,7 @@ static void gen8_map_pagetable_range(struct i915_page_directory_entry *pd,
 				     uint64_t length,
 				     struct drm_device *dev)
 {
-	gen8_ppgtt_pde_t *page_directory = kmap_atomic(pd->page);
+	gen8_ppgtt_pde_t * const page_directory = kmap_atomic(pd->page);
 	struct i915_page_table_entry *pt;
 	uint64_t temp, pde;
 
@@ -710,67 +710,173 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	gen8_ppgtt_free(ppgtt);
 }
 
-static int gen8_ppgtt_alloc_pagetabs(struct i915_page_directory_entry *pd,
+/**
+ * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
+ * @ppgtt:	Master ppgtt structure.
+ * @pd:		Page directory for this address range.
+ * @start:	Starting virtual address to begin allocations.
+ * @length	Size of the allocations.
+ * @new_pts:	Bitmap set by function with new allocations. Likely used by the
+ *		caller to free on error.
+ *
+ * Allocate the required number of page tables. Extremely similar to
+ * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
+ * the page directory boundary (instead of the page directory pointer). That
+ * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
+ * possible, and likely that the caller will need to use multiple calls of this
+ * function to achieve the appropriate allocation.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
+				     struct i915_page_directory_entry *pd,
 				     uint64_t start,
 				     uint64_t length,
-				     struct drm_device *dev)
+				     unsigned long *new_pts)
 {
-	struct i915_page_table_entry *unused;
+	struct i915_page_table_entry *pt;
 	uint64_t temp;
 	uint32_t pde;
 
-	gen8_for_each_pde(unused, pd, start, length, temp, pde) {
-		BUG_ON(unused);
-		pd->page_tables[pde] = alloc_pt_single(dev);
-		if (IS_ERR(pd->page_tables[pde]))
+	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
+		/* Don't reallocate page tables */
+		if (pt) {
+			/* Scratch is never allocated this way */
+			WARN_ON(pt->scratch);
+			continue;
+		}
+
+		pt = alloc_pt_single(ppgtt->base.dev);
+		if (IS_ERR(pt))
 			goto unwind_out;
+
+		pd->page_tables[pde] = pt;
+		set_bit(pde, new_pts);
 	}
 
 	return 0;
 
 unwind_out:
-	while (pde--)
-		unmap_and_free_pt(pd->page_tables[pde], dev);
+	for_each_set_bit(pde, new_pts, GEN8_PDES_PER_PAGE)
+		unmap_and_free_pt(pd->page_tables[pde], ppgtt->base.dev);
 
 	return -ENOMEM;
 }
 
-/* bitmap of new page_directories */
-static int gen8_ppgtt_alloc_page_directories(struct i915_page_directory_pointer_entry *pdp,
+/**
+ * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
+ * @ppgtt:	Master ppgtt structure.
+ * @pdp:	Page directory pointer for this address range.
+ * @start:	Starting virtual address to begin allocations.
+ * @length	Size of the allocations.
+ * @new_pds	Bitmap set by function with new allocations. Likely used by the
+ *		caller to free on error.
+ *
+ * Allocate the required number of page directories starting at the pde index of
+ * @start, and ending at the pde index @start + @length. This function will skip
+ * over already allocated page directories within the range, and only allocate
+ * new ones, setting the appropriate pointer within the pdp as well as the
+ * correct position in the bitmap @new_pds.
+ *
+ * The function will only allocate the pages within the range for a give page
+ * directory pointer. In other words, if @start + @length straddles a virtually
+ * addressed PDP boundary (512GB for 4k pages), there will be more allocations
+ * required by the caller, This is not currently possible, and the BUG in the
+ * code will prevent it.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
+				     struct i915_page_directory_pointer_entry *pdp,
 				     uint64_t start,
 				     uint64_t length,
-				     struct drm_device *dev)
+				     unsigned long *new_pds)
 {
-	struct i915_page_directory_entry *unused;
+	struct i915_page_directory_entry *pd;
 	uint64_t temp;
 	uint32_t pdpe;
 
+	BUG_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
+
 	/* FIXME: PPGTT container_of won't work for 64b */
 	BUG_ON((start + length) > 0x800000000ULL);
 
-	gen8_for_each_pdpe(unused, pdp, start, length, temp, pdpe) {
-		BUG_ON(unused);
-		pdp->page_directory[pdpe] = alloc_pd_single(dev);
+	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+		if (pd)
+			continue;
 
-		if (IS_ERR(pdp->page_directory[pdpe]))
+		pd = alloc_pd_single(ppgtt->base.dev);
+		if (IS_ERR(pd))
 			goto unwind_out;
+
+		pdp->page_directory[pdpe] = pd;
+		set_bit(pdpe, new_pds);
 	}
 
 	return 0;
 
 unwind_out:
-	while (pdpe--)
-		unmap_and_free_pd(pdp->page_directory[pdpe], dev);
+	for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
+		unmap_and_free_pd(pdp->page_directory[pdpe], ppgtt->base.dev);
 
 	return -ENOMEM;
 }
 
+static inline void
+free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
+{
+	int i;
+
+	for (i = 0; i < GEN8_LEGACY_PDPES; i++)
+		kfree(new_pts[i]);
+	kfree(new_pts);
+	kfree(new_pds);
+}
+
+/* Fills in the page directory bitmap, ant the array of page tables bitmap. Both
+ * of these are based on the number of PDPEs in the system.
+ */
+int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
+					 unsigned long ***new_pts)
+{
+	int i;
+	unsigned long *pds;
+	unsigned long **pts;
+
+	pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
+	if (!pds)
+		return -ENOMEM;
+
+	pts = kcalloc(GEN8_PDES_PER_PAGE, sizeof(unsigned long *), GFP_KERNEL);
+	if (!pts) {
+		kfree(pds);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
+		pts[i] = kcalloc(BITS_TO_LONGS(GEN8_PDES_PER_PAGE),
+				 sizeof(unsigned long), GFP_KERNEL);
+		if (!pts[i])
+			goto err_out;
+	}
+
+	*new_pds = pds;
+	*new_pts = (unsigned long **)pts;
+
+	return 0;
+
+err_out:
+	free_gen8_temp_bitmaps(pds, pts);
+	return -ENOMEM;
+}
+
 static int gen8_alloc_va_range(struct i915_address_space *vm,
 			       uint64_t start,
 			       uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
+	unsigned long *new_page_dirs, **new_page_tables;
 	struct i915_page_directory_entry *pd;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
@@ -778,44 +884,96 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
 	uint32_t pdpe;
 	int ret;
 
-	/* Do the allocations first so we can easily bail out */
-	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->pdp, start, length,
-					ppgtt->base.dev);
+#ifndef CONFIG_64BIT
+	/* Disallow 64b address on 32b platforms. Nothing is wrong with doing
+	 * this in hardware, but a lot of the drm code is not prepared to handle
+	 * 64b offset on 32b platforms.
+	 * This will be addressed when 48b PPGTT is added */
+	if (start + length > 0x100000000ULL)
+		return -E2BIG;
+#endif
+
+	/* Wrap is never okay since we can only represent 48b, and we don't
+	 * actually use the other side of the canonical address space.
+	 */
+	if (WARN_ON(start + length < start))
+		return -ERANGE;
+
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
 	if (ret)
 		return ret;
 
+	/* Do the allocations first so we can easily bail out */
+	ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
+					new_page_dirs);
+	if (ret) {
+		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+		return ret;
+	}
+
+	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
-		ret = gen8_ppgtt_alloc_pagetabs(pd, start, length,
-						ppgtt->base.dev);
+		bitmap_zero(new_page_tables[pdpe], GEN8_PDES_PER_PAGE);
+		ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
+						new_page_tables[pdpe]);
 		if (ret)
 			goto err_out;
 	}
 
-	/* Now mark everything we've touched as used. This doesn't allow for
-	 * robust error checking, but it makes the code a hell of a lot simpler.
-	 */
 	start = orig_start;
 	length = orig_length;
 
+	/* Allocations have completed successfully, so set the bitmaps, and do
+	 * the mappings. */
 	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
+		gen8_ppgtt_pde_t *const page_directory = kmap_atomic(pd->page);
 		struct i915_page_table_entry *pt;
 		uint64_t pd_len = gen8_clamp_pd(start, length);
 		uint64_t pd_start = start;
 		uint32_t pde;
 
-		gen8_for_each_pde(pt, &ppgtt->pd, pd_start, pd_len, temp, pde) {
-			bitmap_set(pd->page_tables[pde]->used_ptes,
-				   gen8_pte_index(start),
-				   gen8_pte_count(start, length));
+		/* Every pd should be allocated, we just did that above. */
+		BUG_ON(!pd);
+
+		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+			/* Same reasoning as pd */
+			BUG_ON(!pt);
+			BUG_ON(!pd_len);
+			BUG_ON(!gen8_pte_count(pd_start, pd_len));
+
+			/* Set our used ptes within the page table */
+			bitmap_set(pt->used_ptes,
+				   gen8_pte_index(pd_start),
+				   gen8_pte_count(pd_start, pd_len));
+
+			/* Our pde is now pointing to the pagetable, pt */
 			set_bit(pde, pd->used_pdes);
+
+			/* Map the PDE to the page table */
+			__gen8_do_map_pt(page_directory + pde, pt, vm->dev);
+
+			/* NB: We haven't yet mapped ptes to pages. At this
+			 * point we're still relying on insert_entries() */
 		}
+
+		if (!HAS_LLC(vm->dev))
+			drm_clflush_virt_range(page_directory, PAGE_SIZE);
+
+		kunmap_atomic(page_directory);
+
 		set_bit(pdpe, ppgtt->pdp.used_pdpes);
 	}
 
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
 	return 0;
 
 err_out:
 	gen8_ppgtt_free(ppgtt);
+
+	for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
+		unmap_and_free_pd(ppgtt->pdp.page_directory[pdpe], vm->dev);
+
+	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
 	return ret;
 }
 
@@ -826,38 +984,67 @@ err_out:
  * space.
  *
  */
-static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
+static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 {
-	struct i915_page_directory_entry *pd;
-	uint64_t temp, start = 0;
-	const uint64_t orig_length = size;
-	uint32_t pdpe;
-	int ret;
+	ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev);
+	if (IS_ERR(ppgtt->scratch_pd))
+		return PTR_ERR(ppgtt->scratch_pd);
 
 	ppgtt->base.start = 0;
 	ppgtt->base.total = size;
-	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
-	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
+	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
+
 	ppgtt->switch_mm = gen8_mm_switch;
 
-	ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev);
-	if (IS_ERR(ppgtt->scratch_pd))
-		return PTR_ERR(ppgtt->scratch_pd);
+	return 0;
+}
 
+static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_page_directory_entry *pd;
+	uint64_t temp, start = 0, size = dev_priv->gtt.base.total;
+	uint32_t pdpe;
+	int ret;
+
+	ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total);
+	if (ret)
+		return ret;
+
+	/* Aliasing PPGTT has to always work and be mapped because of the way we
+	 * use RESTORE_INHIBIT in the context switch. This will be fixed
+	 * eventually. */
 	ret = gen8_alloc_va_range(&ppgtt->base, start, size);
 	if (ret) {
 		unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
 		return ret;
 	}
 
-	start = 0;
-	size = orig_length;
-
 	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, size, temp, pdpe)
 		gen8_map_pagetable_range(pd, start, size, ppgtt->base.dev);
 
+	ppgtt->base.allocate_va_range = NULL;
+	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
+
+	return 0;
+}
+
+static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total);
+	if (ret)
+		return ret;
+
+	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
+	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
+
 	return 0;
 }
 
@@ -1380,7 +1567,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
 		}
 	}
 
-	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
+	ppgtt->base.allocate_va_range = aliasing ? NULL : gen6_alloc_va_range;
 	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
@@ -1421,8 +1608,10 @@ static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt,
 
 	if (INTEL_INFO(dev)->gen < 8)
 		return gen6_ppgtt_init(ppgtt, aliasing);
+	else if (aliasing)
+		return gen8_aliasing_ppgtt_init(ppgtt);
 	else
-		return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
+		return gen8_ppgtt_init(ppgtt);
 }
 int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
@@ -1531,10 +1720,10 @@ ppgtt_bind_vma(struct i915_vma *vma,
 
 static void ppgtt_unbind_vma(struct i915_vma *vma)
 {
-	vma->vm->clear_range(vma->vm,
-			     vma->node.start,
-			     vma->obj->base.size,
-			     true);
+		vma->vm->clear_range(vma->vm,
+				     vma->node.start,
+				     vma->obj->base.size,
+				     true);
 }
 
 extern int intel_iommu_gfx_mapped;
-- 
2.1.1



More information about the Intel-gfx mailing list