[PATCH 73/79] no-used-ptes

Chris Wilson chris at chris-wilson.co.uk
Sun Jan 1 19:37:23 UTC 2017


---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 183 +++++++++++++-----------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |   3 +-
 drivers/gpu/drm/i915/i915_trace.h   |  19 ++--
 3 files changed, 70 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 43fa1a105ba8..c81ab02e8fb6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -429,37 +429,23 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
 {
 	struct i915_page_table *pt;
-	const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
-	int ret = -ENOMEM;
 
-	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
-	if (!pt)
+	pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
+	if (unlikely(!pt))
 		return ERR_PTR(-ENOMEM);
 
-	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
-				GFP_KERNEL);
-
-	if (!pt->used_ptes)
-		goto fail_bitmap;
-
-	ret = setup_px(vm, pt);
-	if (ret)
-		goto fail_page_m;
+	if (unlikely(setup_px(vm, pt))) {
+		kfree(pt);
+		return ERR_PTR(-ENOMEM);
+	}
 
+	pt->used_ptes = 0;
 	return pt;
-
-fail_page_m:
-	kfree(pt->used_ptes);
-fail_bitmap:
-	kfree(pt);
-
-	return ERR_PTR(ret);
 }
 
 static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
 {
 	cleanup_px(vm, pt);
-	kfree(pt->used_ptes);
 	kfree(pt);
 }
 
@@ -528,11 +514,12 @@ static void free_pd(struct i915_address_space *vm,
 static void gen8_initialize_pd(struct i915_address_space *vm,
 			       struct i915_page_directory *pd)
 {
-	gen8_pde_t scratch_pde;
-
-	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
+	unsigned int i;
 
-	fill_px(vm, pd, scratch_pde);
+	fill_px(vm, pd,
+		gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
+	for (i = 0; i < I915_PDES; i++)
+		pd->page_table[i] = vm->scratch_pt;
 }
 
 static int __pdp_init(struct drm_i915_private *dev_priv,
@@ -719,8 +706,7 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
  */
 static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 				struct i915_page_table *pt,
-				uint64_t start,
-				uint64_t length)
+				u64 start, u64 length)
 {
 	unsigned int num_entries = gen8_pte_count(start, length);
 	unsigned int pte = gen8_pte_index(start);
@@ -729,14 +715,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
 	gen8_pte_t *vaddr;
 
-	if (WARN_ON(!px_page(pt)))
-		return false;
-
-	GEM_BUG_ON(pte_end > GEN8_PTES);
-
-	bitmap_clear(pt->used_ptes, pte, num_entries);
+	GEM_BUG_ON(num_entries > pt->used_ptes);
 
-	if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+	pt->used_ptes -= num_entries;
+	if (!pt->used_ptes)
 		return true;
 
 	vaddr = kmap_atomic_px(pt);
@@ -747,31 +729,38 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	return false;
 }
 
+static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
+			       struct i915_page_directory *pd,
+			       struct i915_page_table *pt,
+			       unsigned int pde)
+{
+	gen8_pde_t *vaddr;
+
+	pd->page_table[pde] = pt;
+
+	vaddr = kmap_atomic_px(pd);
+	vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
+	kunmap_atomic(vaddr);
+}
+
 /* Removes entries from a single page dir, releasing it if it's empty.
  * Caller can use the return value to update higher-level entries
  */
 static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
 				struct i915_page_directory *pd,
-				uint64_t start,
-				uint64_t length)
+				u64 start, u64 length)
 {
 	struct i915_page_table *pt;
-	uint64_t pde;
-	gen8_pde_t *pde_vaddr;
-	gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
-						 I915_CACHE_LLC);
+	u32 pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
-		if (WARN_ON(!pd->page_table[pde]))
-			break;
+		if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
+			continue;
 
-		if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
-			__clear_bit(pde, pd->used_pdes);
-			pde_vaddr = kmap_atomic_px(pd);
-			pde_vaddr[pde] = scratch_pde;
-			kunmap_atomic(pde_vaddr);
-			free_pt(vm, pt);
-		}
+		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
+		__clear_bit(pde, pd->used_pdes);
+
+		free_pt(vm, pt);
 	}
 
 	if (bitmap_empty(pd->used_pdes, I915_PDES))
@@ -1103,8 +1092,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  * @pd:	Page directory for this address range.
  * @start:	Starting virtual address to begin allocations.
  * @length:	Size of the allocations.
- * @new_pts:	Bitmap set by function with new allocations. Likely used by the
- *		caller to free on error.
  *
  * Allocate the required number of page tables. Extremely similar to
  * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
@@ -1117,37 +1104,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  */
 static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
 				     struct i915_page_directory *pd,
-				     uint64_t start,
-				     uint64_t length,
-				     unsigned long *new_pts)
+				     u64 start, u64 length)
 {
 	struct i915_page_table *pt;
+	u64 from = start;
 	uint32_t pde;
 
 	gen8_for_each_pde(pt, pd, start, length, pde) {
 		/* Don't reallocate page tables */
-		if (test_bit(pde, pd->used_pdes)) {
-			/* Scratch is never allocated this way */
-			WARN_ON(pt == vm->scratch_pt);
-			continue;
-		}
-
-		pt = alloc_pt(vm);
-		if (IS_ERR(pt))
-			goto unwind_out;
+		if (!test_bit(pde, pd->used_pdes)) {
+			pt = alloc_pt(vm);
+			if (IS_ERR(pt))
+				goto unwind;
 
-		gen8_initialize_pt(vm, pt);
-		pd->page_table[pde] = pt;
-		__set_bit(pde, new_pts);
+			gen8_initialize_pt(vm, pt);
+			pd->page_table[pde] = pt;
+		}
+		pt->used_ptes += gen8_pte_count(start, length);
 		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
 	}
 
 	return 0;
 
-unwind_out:
-	for_each_set_bit(pde, new_pts, I915_PDES)
-		free_pt(vm, pd->page_table[pde]);
-
+unwind:
+	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
 	return -ENOMEM;
 }
 
@@ -1264,9 +1244,8 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
 }
 
 static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
+free_gen8_temp_bitmaps(unsigned long *new_pds)
 {
-	kfree(new_pts);
 	kfree(new_pds);
 }
 
@@ -1275,29 +1254,16 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
  */
 static
 int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
-					 unsigned long **new_pts,
 					 uint32_t pdpes)
 {
 	unsigned long *pds;
-	unsigned long *pts;
 
 	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
 	if (!pds)
 		return -ENOMEM;
 
-	pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
-		      GFP_TEMPORARY);
-	if (!pts)
-		goto err_out;
-
 	*new_pds = pds;
-	*new_pts = pts;
-
 	return 0;
-
-err_out:
-	free_gen8_temp_bitmaps(pds, pts);
-	return -ENOMEM;
 }
 
 static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
@@ -1306,7 +1272,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 				    uint64_t length)
 {
 	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-	unsigned long *new_page_dirs, *new_page_tables;
+	unsigned long *new_page_dirs;
 	struct i915_page_directory *pd;
 	const uint64_t orig_start = start;
 	const uint64_t orig_length = length;
@@ -1314,7 +1280,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
 	int ret;
 
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
 	if (ret)
 		return ret;
 
@@ -1322,14 +1288,13 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
 						new_page_dirs);
 	if (ret) {
-		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+		free_gen8_temp_bitmaps(new_page_dirs);
 		return ret;
 	}
 
 	/* For every page directory referenced, allocate page tables */
 	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
-						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
+		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length);
 		if (ret)
 			goto err_out;
 	}
@@ -1355,11 +1320,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 			WARN_ON(!pd_len);
 			WARN_ON(!gen8_pte_count(pd_start, pd_len));
 
-			/* Set our used ptes within the page table */
-			bitmap_set(pt->used_ptes,
-				   gen8_pte_index(pd_start),
-				   gen8_pte_count(pd_start, pd_len));
-
 			/* Our pde is now pointing to the pagetable, pt */
 			__set_bit(pde, pd->used_pdes);
 
@@ -1368,8 +1328,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 							      I915_CACHE_LLC);
 			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
 							gen8_pte_index(start),
-							gen8_pte_count(start, length),
-							GEN8_PTES);
+							gen8_pte_count(start, length));
 
 			/* NB: We haven't yet mapped ptes to pages. At this
 			 * point we're still relying on insert_entries() */
@@ -1380,23 +1339,15 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
 		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
 	}
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return 0;
 
 err_out:
-	while (pdpe--) {
-		unsigned long temp;
-
-		for_each_set_bit(temp, new_page_tables + pdpe *
-				BITS_TO_LONGS(I915_PDES), I915_PDES)
-			free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
-	}
-
 	for_each_set_bit(pdpe, new_page_dirs, pdpes)
 		free_pd(vm, pdp->page_directory[pdpe]);
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 	mark_tlbs_dirty(ppgtt);
 	return ret;
 }
@@ -1539,14 +1490,14 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 
 static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 {
-	unsigned long *new_page_dirs, *new_page_tables;
+	unsigned long *new_page_dirs;
 	uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev));
 	int ret;
 
 	/* We allocate temp bitmap for page tables for no gain
 	 * but as this is for init only, lets keep the things simple
 	 */
-	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
 	if (ret)
 		return ret;
 
@@ -1559,7 +1510,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
 	if (!ret)
 		*ppgtt->pdp.used_pdpes = *new_page_dirs;
 
-	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+	free_gen8_temp_bitmaps(new_page_dirs);
 
 	return ret;
 }
@@ -1951,13 +1902,8 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 	 * tables.
 	 */
 	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
-		if (pt != vm->scratch_pt) {
-			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
+		if (pt != vm->scratch_pt)
 			continue;
-		}
-
-		/* We've already allocated a page table */
-		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
 
 		pt = alloc_pt(vm);
 		if (IS_ERR(pt)) {
@@ -1987,10 +1933,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
 
 		trace_i915_page_table_entry_map(vm, pde, pt,
 					 gen6_pte_index(start),
-					 gen6_pte_count(start, length),
-					 GEN6_PTES);
-		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
-				GEN6_PTES);
+					 gen6_pte_count(start, length));
 	}
 
 	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 98268a59bb08..9cb1c0e6f194 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -216,8 +216,7 @@ struct i915_page_dma {
 
 struct i915_page_table {
 	struct i915_page_dma base;
-
-	unsigned long *used_ptes;
+	unsigned int used_ptes;
 };
 
 struct i915_page_directory {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 18ae37c411fd..ef2216242092 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -245,15 +245,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc,
 
 DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
-	TP_ARGS(vm, pde, pt, first, count, bits),
+		 struct i915_page_table *pt, u32 first, u32 count),
+	TP_ARGS(vm, pde, pt, first, count),
 
 	TP_STRUCT__entry(
 		__field(struct i915_address_space *, vm)
 		__field(u32, pde)
 		__field(u32, first)
 		__field(u32, last)
-		__dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits))
 	),
 
 	TP_fast_assign(
@@ -261,22 +260,16 @@ DECLARE_EVENT_CLASS(i915_page_table_entry_update,
 		__entry->pde = pde;
 		__entry->first = first;
 		__entry->last = first + count - 1;
-		scnprintf(__get_str(cur_ptes),
-			  TRACE_PT_SIZE(bits),
-			  "%*pb",
-			  bits,
-			  pt->used_ptes);
 	),
 
-	TP_printk("vm=%p, pde=%d, updating %u:%u\t%s",
-		  __entry->vm, __entry->pde, __entry->last, __entry->first,
-		  __get_str(cur_ptes))
+	TP_printk("vm=%p, pde=%d, updating %u:%u",
+		  __entry->vm, __entry->pde, __entry->last, __entry->first)
 );
 
 DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
 	TP_PROTO(struct i915_address_space *vm, u32 pde,
-		 struct i915_page_table *pt, u32 first, u32 count, u32 bits),
-	TP_ARGS(vm, pde, pt, first, count, bits)
+		 struct i915_page_table *pt, u32 first, u32 count),
+	TP_ARGS(vm, pde, pt, first, count)
 );
 
 TRACE_EVENT(i915_gem_object_change_domain,
-- 
2.11.0



More information about the Intel-gfx-trybot mailing list