[Intel-gfx] [PATCH v2 14/24] drm/i915: Finish gen6/7 dynamic page table allocation

Daniel Vetter daniel at ffwll.ch
Mon Jan 5 06:45:10 PST 2015


On Tue, Dec 23, 2014 at 05:16:17PM +0000, Michel Thierry wrote:
> From: Ben Widawsky <benjamin.widawsky at intel.com>
> 
> This patch continues on the idea from the previous patch. From here on,
> in the steady state, PDEs are all pointing to the scratch page table (as
> recommended in the spec). When an object is allocated in the VA range,
> the code will determine if we need to allocate a page for the page
> table. Similarly when the object is destroyed, we will remove, and free
> the page table pointing the PDE back to the scratch page.
> 
> Following patches will work to unify the code a bit as we bring in GEN8
> support. GEN6 and GEN8 are different enough that I had a hard time to
> get to this point with as much common code as I do.
> 
> The aliasing PPGTT must pre-allocate all of the page tables. There are a
> few reasons for this. Two trivial ones: aliasing ppgtt goes through the
> ggtt paths, so it's hard to maintain, we currently do not restore the
> default context (assuming the previous force reload is indeed
> necessary). Most importantly though, the only way (it seems from
> empirical evidence) to invalidate the CS TLBs on non-render ring is to
> either use ring sync (which requires actually stopping the rings in
> order to synchronize when the sync completes vs. where you are in
> execution), or to reload DCLV.  Since without full PPGTT we do not ever
> reload the DCLV register, there is no good way to achieve this. The
> simplest solution is just to not support dynamic page table
> creation/destruction in the aliasing PPGTT.
> 
> We could always reload DCLV, but this seems like quite a bit of excess
> overhead only to save at most 2MB-4k of memory for the aliasing PPGTT
> page tables.
> 
> v2: Make the page table bitmap declared inside the function (Chris)
> Simplify the way scratching address space works.
> Move the alloc/teardown tracepoints up a level in the call stack so that
> both all implementations get the trace.
> 
> v3: Updated trace event to spit out a name
> 
> v4: Aliasing ppgtt is now initialized differently (in setup global gtt)
> 
> v5: Rebase to latest code. Also removed unnecessary aliasing ppgtt check for
> trace, as it is no longer possible after the PPGTT cleanup patch series
> of a couple of months ago (Daniel).
> 
> Cc: Daniel Vetter <daniel at ffwll.ch>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v4+)

The tracepoints should be split into a separate patch. Although the
teardown stuff will likely disappear I guess ...

Two more comments below.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |   3 +-
>  drivers/gpu/drm/i915/i915_gem.c     |   2 +
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 128 ++++++++++++++++++++++++++++++++----
>  drivers/gpu/drm/i915/i915_trace.h   | 115 ++++++++++++++++++++++++++++++++
>  4 files changed, 236 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 60f91bc..0f63076 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2149,6 +2149,8 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
>  		seq_printf(m, "PP_DIR_BASE_READ: 0x%08x\n", I915_READ(RING_PP_DIR_BASE_READ(ring)));
>  		seq_printf(m, "PP_DIR_DCLV: 0x%08x\n", I915_READ(RING_PP_DIR_DCLV(ring)));
>  	}
> +	seq_printf(m, "ECOCHK: 0x%08x\n\n", I915_READ(GAM_ECOCHK));
> +
>  	if (dev_priv->mm.aliasing_ppgtt) {
>  		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
>  
> @@ -2165,7 +2167,6 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
>  			   get_pid_task(file->pid, PIDTYPE_PID)->comm);
>  		idr_for_each(&file_priv->context_idr, per_file_ctx, m);
>  	}
> -	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
>  }
>  
>  static int i915_ppgtt_info(struct seq_file *m, void *data)
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 5d52990..1649fb2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3599,6 +3599,8 @@ search_free:
>  
>  	/*  allocate before insert / bind */
>  	if (vma->vm->allocate_va_range) {
> +		trace_i915_va_alloc(vma->vm, vma->node.start, vma->node.size,
> +				VM_TO_TRACE_NAME(vma->vm));
>  		ret = vma->vm->allocate_va_range(vma->vm,
>  						vma->node.start,
>  						vma->node.size);
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 54c7ca7..32a355a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1138,10 +1138,47 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
>  static int gen6_alloc_va_range(struct i915_address_space *vm,
>  			       uint64_t start, uint64_t length)
>  {
> +	DECLARE_BITMAP(new_page_tables, GEN6_PPGTT_PD_ENTRIES);
> +	struct drm_device *dev = vm->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct i915_hw_ppgtt *ppgtt =
>  				container_of(vm, struct i915_hw_ppgtt, base);
>  	struct i915_pagetab *pt;
> +	const uint32_t start_save = start, length_save = length;
>  	uint32_t pde, temp;
> +	int ret;
> +
> +	BUG_ON(upper_32_bits(start));
> +
> +	bitmap_zero(new_page_tables, GEN6_PPGTT_PD_ENTRIES);
> +
> +	/* The allocation is done in two stages so that we can bail out with
> +	 * minimal amount of pain. The first stage finds new page tables that
> +	 * need allocation. The second stage marks use ptes within the page
> +	 * tables.
> +	 */

If we drop the bitmask tracking we could massively simplify this -
checking just the various pt pointers should be enough?

> +	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
> +		if (pt != ppgtt->scratch_pt) {
> +			WARN_ON(bitmap_empty(pt->used_ptes, I915_PPGTT_PT_ENTRIES));
> +			continue;
> +		}
> +
> +		/* We've already allocated a page table */
> +		WARN_ON(!bitmap_empty(pt->used_ptes, I915_PPGTT_PT_ENTRIES));
> +
> +		pt = alloc_pt_single(dev);
> +		if (IS_ERR(pt)) {
> +			ret = PTR_ERR(pt);
> +			goto unwind_out;
> +		}
> +
> +		ppgtt->pd.page_tables[pde] = pt;
> +		set_bit(pde, new_page_tables);
> +		trace_i915_pagetable_alloc(vm, pde, start, GEN6_PDE_SHIFT);
> +	}
> +
> +	start = start_save;
> +	length = length_save;
>  
>  	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
>  		int j;
> @@ -1159,12 +1196,35 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
>  			}
>  		}
>  
> -		bitmap_or(pt->used_ptes, pt->used_ptes, tmp_bitmap,
> +		if (test_and_clear_bit(pde, new_page_tables))
> +			gen6_write_pdes(&ppgtt->pd, pde, pt);
> +
> +		trace_i915_pagetable_map(vm, pde, pt,
> +					 gen6_pte_index(start),
> +					 gen6_pte_count(start, length),
> +					 I915_PPGTT_PT_ENTRIES);
> +		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
>  				I915_PPGTT_PT_ENTRIES);
>  	}
>  
> +	WARN_ON(!bitmap_empty(new_page_tables, GEN6_PPGTT_PD_ENTRIES));
> +
> +	/* Make sure write is complete before other code can use this page
> +	 * table. Also require for WC mapped PTEs */
> +	readl(dev_priv->gtt.gsm);
> +
>  	ppgtt_invalidate_tlbs(vm);
>  	return 0;
> +
> +unwind_out:
> +	for_each_set_bit(pde, new_page_tables, GEN6_PPGTT_PD_ENTRIES) {
> +		struct i915_pagetab *pt = ppgtt->pd.page_tables[pde];
> +		ppgtt->pd.page_tables[pde] = NULL;
> +		free_pt_single(pt, vm->dev);
> +	}
> +
> +	ppgtt_invalidate_tlbs(vm);
> +	return ret;
>  }
>  
>  static void gen6_teardown_va_range(struct i915_address_space *vm,
> @@ -1176,8 +1236,27 @@ static void gen6_teardown_va_range(struct i915_address_space *vm,
>  	uint32_t pde, temp;
>  
>  	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
> +
> +		if (WARN(pt == ppgtt->scratch_pt,
> +		    "Tried to teardown scratch page vm %p. pde %u: %llx-%llx\n",
> +		    vm, pde, start, start + length))
> +			continue;
> +
> +		trace_i915_pagetable_unmap(vm, pde, pt,
> +					   gen6_pte_index(start),
> +					   gen6_pte_count(start, length),
> +					   I915_PPGTT_PT_ENTRIES);
> +
>  		bitmap_clear(pt->used_ptes, gen6_pte_index(start),
>  			     gen6_pte_count(start, length));
> +
> +		if (bitmap_empty(pt->used_ptes, I915_PPGTT_PT_ENTRIES)) {
> +			trace_i915_pagetable_destroy(vm, pde,
> +						     start & GENMASK_ULL(63, GEN6_PDE_SHIFT),
> +						     GEN6_PDE_SHIFT);
> +			gen6_write_pdes(&ppgtt->pd, pde, ppgtt->scratch_pt);
> +			ppgtt->pd.page_tables[pde] = ppgtt->scratch_pt;
> +		}
>  	}
>  
>  	ppgtt_invalidate_tlbs(vm);
> @@ -1187,9 +1266,13 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
>  {
>  	int i;
>  
> -	for (i = 0; i < ppgtt->num_pd_entries; i++)
> -		free_pt_single(ppgtt->pd.page_tables[i], ppgtt->base.dev);
> +	for (i = 0; i < ppgtt->num_pd_entries; i++) {
> +		struct i915_pagetab *pt = ppgtt->pd.page_tables[i];
> +		if (pt != ppgtt->scratch_pt)
> +			free_pt_single(ppgtt->pd.page_tables[i], ppgtt->base.dev);
> +	}
>  
> +	/* Consider putting this as part of pd free. */
>  	free_pt_scratch(ppgtt->scratch_pt, ppgtt->base.dev);
>  	free_pd_single(&ppgtt->pd);
>  }
> @@ -1254,7 +1337,7 @@ err_out:
>  	return ret;
>  }
>  
> -static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
> +static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, bool preallocate_pt)

Imo it would be clearer to move the pt preallocation for alising ppgtt
into the ppgtt_init function. Makes for a bit a bigger diff, but will
result in less convoluted control flow since we should end up in a nice

if (alising)
	/* create all pts */
else
	/* allocate&use scratch_pt */

Aside: Should we only allocate the scratch_pt for !aliasing?

>  {
>  	int ret;
>  
> @@ -1262,10 +1345,14 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
>  	if (ret)
>  		return ret;
>  
> +	if (!preallocate_pt)
> +		return 0;
> +
>  	ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries,
>  			ppgtt->base.dev);
>  
>  	if (ret) {
> +		free_pt_scratch(ppgtt->scratch_pt, ppgtt->base.dev);
>  		drm_mm_remove_node(&ppgtt->node);
>  		return ret;
>  	}
> @@ -1273,7 +1360,17 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
>  	return 0;
>  }
>  
> -static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> +static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
> +				  uint64_t start, uint64_t length)
> +{
> +	struct i915_pagetab *unused;
> +	uint32_t pde, temp;
> +
> +	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
> +		ppgtt->pd.page_tables[pde] = ppgtt->scratch_pt;
> +}
> +
> +static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
>  {
>  	struct drm_device *dev = ppgtt->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -1289,7 +1386,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	} else
>  		BUG();
>  
> -	ret = gen6_ppgtt_alloc(ppgtt);
> +	ret = gen6_ppgtt_alloc(ppgtt, aliasing);
>  	if (ret)
>  		return ret;
>  
> @@ -1308,6 +1405,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
>  		ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
>  
> +	if (!aliasing)
> +		gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
> +
>  	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
>  
>  	DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
> @@ -1320,7 +1420,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	return 0;
>  }
>  
> -static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
> +static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt,
> +		bool aliasing)
>  {
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> @@ -1328,7 +1429,7 @@ static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
>  
>  	if (INTEL_INFO(dev)->gen < 8)
> -		return gen6_ppgtt_init(ppgtt);
> +		return gen6_ppgtt_init(ppgtt, aliasing);
>  	else
>  		return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
>  }
> @@ -1337,7 +1438,7 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int ret = 0;
>  
> -	ret = __hw_ppgtt_init(dev, ppgtt);
> +	ret = __hw_ppgtt_init(dev, ppgtt, false);
>  	if (ret == 0) {
>  		kref_init(&ppgtt->ref);
>  		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
> @@ -1445,9 +1546,14 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
>  			     vma->node.start,
>  			     vma->obj->base.size,
>  			     true);
> -	if (vma->vm->teardown_va_range)
> +	if (vma->vm->teardown_va_range) {
> +		trace_i915_va_teardown(vma->vm,
> +				       vma->node.start, vma->node.size,
> +				       VM_TO_TRACE_NAME(vma->vm));
> +
>  		vma->vm->teardown_va_range(vma->vm,
>  					   vma->node.start, vma->node.size);
> +	}
>  }
>  
>  extern int intel_iommu_gfx_mapped;
> @@ -1963,7 +2069,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
>  		if (!ppgtt)
>  			return -ENOMEM;
>  
> -		ret = __hw_ppgtt_init(dev, ppgtt);
> +		ret = __hw_ppgtt_init(dev, ppgtt, true);
>  		if (ret != 0)
>  			return ret;
>  
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index f004d3d..0b617c9 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -156,6 +156,121 @@ TRACE_EVENT(i915_vma_unbind,
>  		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
>  );
>  
> +#define VM_TO_TRACE_NAME(vm) \
> +	(i915_is_ggtt(vm) ? "GGTT" : \
> +				      "Private VM")
> +
> +DECLARE_EVENT_CLASS(i915_va,
> +	TP_PROTO(struct i915_address_space *vm, u64 start, u64 length, const char *name),
> +	TP_ARGS(vm, start, length, name),
> +
> +	TP_STRUCT__entry(
> +		__field(struct i915_address_space *, vm)
> +		__field(u64, start)
> +		__field(u64, end)
> +		__string(name, name)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->vm = vm;
> +		__entry->start = start;
> +		__entry->end = start + length;
> +		__assign_str(name, name);
> +	),
> +
> +	TP_printk("vm=%p (%s), 0x%llx-0x%llx",
> +		  __entry->vm, __get_str(name),  __entry->start, __entry->end)
> +);
> +
> +DEFINE_EVENT(i915_va, i915_va_alloc,
> +	     TP_PROTO(struct i915_address_space *vm, u64 start, u64 length, const char *name),
> +	     TP_ARGS(vm, start, length, name)
> +);
> +
> +DEFINE_EVENT(i915_va, i915_va_teardown,
> +	     TP_PROTO(struct i915_address_space *vm, u64 start, u64 length, const char *name),
> +	     TP_ARGS(vm, start, length, name)
> +);
> +
> +DECLARE_EVENT_CLASS(i915_pagetable,
> +	TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift),
> +	TP_ARGS(vm, pde, start, pde_shift),
> +
> +	TP_STRUCT__entry(
> +		__field(struct i915_address_space *, vm)
> +		__field(u32, pde)
> +		__field(u64, start)
> +		__field(u64, end)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->vm = vm;
> +		__entry->pde = pde;
> +		__entry->start = start;
> +		__entry->end = (start + (1ULL << pde_shift)) & ~((1ULL << pde_shift)-1);
> +	),
> +
> +	TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)",
> +		  __entry->vm, __entry->pde, __entry->start, __entry->end)
> +);
> +
> +DEFINE_EVENT(i915_pagetable, i915_pagetable_alloc,
> +	     TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift),
> +	     TP_ARGS(vm, pde, start, pde_shift)
> +);
> +
> +DEFINE_EVENT(i915_pagetable, i915_pagetable_destroy,
> +	     TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift),
> +	     TP_ARGS(vm, pde, start, pde_shift)
> +);
> +
> +/* Avoid extra math because we only support two sizes. The format is defined by
> + * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */
> +#define TRACE_PT_SIZE(bits) \
> +	((((bits) == 1024) ? 288 : 144) + 1)
> +
> +DECLARE_EVENT_CLASS(i915_pagetable_update,
> +	TP_PROTO(struct i915_address_space *vm, u32 pde,
> +		 struct i915_pagetab *pt, u32 first, u32 len, size_t bits),
> +	TP_ARGS(vm, pde, pt, first, len, bits),
> +
> +	TP_STRUCT__entry(
> +		__field(struct i915_address_space *, vm)
> +		__field(u32, pde)
> +		__field(u32, first)
> +		__field(u32, last)
> +		__dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits))
> +	),
> +
> +	TP_fast_assign(
> +		__entry->vm = vm;
> +		__entry->pde = pde;
> +		__entry->first = first;
> +		__entry->last = first + len;
> +
> +		bitmap_scnprintf(__get_str(cur_ptes),
> +				 TRACE_PT_SIZE(bits),
> +				 pt->used_ptes,
> +				 bits);
> +	),
> +
> +	TP_printk("vm=%p, pde=%d, updating %u:%u\t%s",
> +		  __entry->vm, __entry->pde, __entry->last, __entry->first,
> +		  __get_str(cur_ptes))
> +);
> +
> +DEFINE_EVENT(i915_pagetable_update, i915_pagetable_map,
> +	TP_PROTO(struct i915_address_space *vm, u32 pde,
> +		 struct i915_pagetab *pt, u32 first, u32 len, size_t bits),
> +	TP_ARGS(vm, pde, pt, first, len, bits)
> +);
> +
> +DEFINE_EVENT(i915_pagetable_update, i915_pagetable_unmap,
> +	TP_PROTO(struct i915_address_space *vm, u32 pde,
> +		 struct i915_pagetab *pt, u32 first, u32 len, size_t bits),
> +	TP_ARGS(vm, pde, pt, first, len, bits)
> +);
> +
>  TRACE_EVENT(i915_gem_object_change_domain,
>  	    TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write),
>  	    TP_ARGS(obj, old_read, old_write),
> -- 
> 2.1.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch


More information about the Intel-gfx mailing list