[Intel-gfx] [PATCH] drm/i915: Apply VT-d scanout adjustment to the VMA

Wed Feb 3 09:00:54 UTC 2021

On Wed, Feb 03, 2021 at 08:38:41AM +0000, Chris Wilson wrote:
> Currently, we allocate exactly the VMA requested for the framebuffer and
> rely on filling the whole of the GGTT with scratch pages to catch when
> VT-d prefetches beyond the bounds of the surface. However, this means
> that we have to scrub the GGTT on startup and resume, and on recent HW
> this is made even slower as the only access to GSM is uncached.
> 
> Since we do fill the pad-to-size PTE with scratch pages, and this is
> also reapplied on resume, we can forgo the overzealous clearing of the
> entire GGTT and instead pad the VMA to avoid the VT-d prefetches going
> outside of the VMA.

We have a lot of these "allocate a huge pile of extra PTEs
after (or before for rotation) the scanout surface" workarounds
we're not handling at all. Not sure ths is sufficient to cover
those.

> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c | 18 ++++++++++++-----
>  drivers/gpu/drm/i915/gt/intel_ggtt.c       | 23 ----------------------
>  2 files changed, 13 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 36f54cedaaeb..2668a79383c8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -359,19 +359,26 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
>   */
>  struct i915_vma *
>  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
> -				     u32 alignment,
> +				     u32 align,
>  				     const struct i915_ggtt_view *view,
>  				     unsigned int flags)
>  {
>  	struct drm_i915_private *i915 = to_i915(obj->base.dev);
>  	struct i915_gem_ww_ctx ww;
>  	struct i915_vma *vma;
> +	u64 size;
>  	int ret;
>  
>  	/* Frame buffer must be in LMEM (no migration yet) */
>  	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
>  		return ERR_PTR(-EINVAL);
>  
> +	size = obj->base.size;
> +	if (intel_scanout_needs_vtd_wa(i915)) {
> +		size = ALIGN(size, SZ_256K);
> +		align = ALIGN(align, SZ_256K);
> +	}
> +
>  	i915_gem_ww_ctx_init(&ww, true);
>  retry:
>  	ret = i915_gem_object_lock(obj, &ww);
> @@ -404,18 +411,19 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	vma = ERR_PTR(-ENOSPC);
>  	if ((flags & PIN_MAPPABLE) == 0 &&
>  	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
> -		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
> +		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view,
> +						  size, align,
>  						  flags | PIN_MAPPABLE |
>  						  PIN_NONBLOCK);
>  	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
> -		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
> -						  alignment, flags);
> +		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view,
> +						  size, align, flags);
>  	if (IS_ERR(vma)) {
>  		ret = PTR_ERR(vma);
>  		goto err;
>  	}
>  
> -	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
> +	vma->display_alignment = max_t(u64, vma->display_alignment, align);
>  	i915_vma_mark_scanout(vma);
>  
>  	i915_gem_object_flush_if_display_locked(obj);
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index fc399ac16eda..126b061862ad 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -304,27 +304,6 @@ static void nop_clear_range(struct i915_address_space *vm,
>  {
>  }
>  
> -static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> -				  u64 start, u64 length)
> -{
> -	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
> -	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
> -	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
> -	const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
> -	gen8_pte_t __iomem *gtt_base =
> -		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
> -	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
> -	int i;
> -
> -	if (WARN(num_entries > max_entries,
> -		 "First entry = %d; Num entries = %d (max=%d)\n",
> -		 first_entry, num_entries, max_entries))
> -		num_entries = max_entries;
> -
> -	for (i = 0; i < num_entries; i++)
> -		gen8_set_pte(&gtt_base[i], scratch_pte);
> -}
> -
>  static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
>  {
>  	/*
> @@ -884,8 +863,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>  	ggtt->vm.cleanup = gen6_gmch_remove;
>  	ggtt->vm.insert_page = gen8_ggtt_insert_page;
>  	ggtt->vm.clear_range = nop_clear_range;
> -	if (intel_scanout_needs_vtd_wa(i915))
> -		ggtt->vm.clear_range = gen8_ggtt_clear_range;
>  
>  	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
>  
> -- 
> 2.20.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel