[Intel-gfx] [PATCH] drm/i915: Do not access stolen memory directly by the CPU, even for error capture

Ben Widawsky ben at bwidawsk.net
Thu Apr 24 23:47:48 CEST 2014


On Wed, Feb 12, 2014 at 07:18:40PM +0000, Chris Wilson wrote:
> For stolen pages, since it is verboten to access them directly on many
> architectures, we have to read them through the GTT aperture. If they
> are not accessible through the aperture, then we have to abort.
> 
> This was complicated by
> 
> commit 8b6124a633d8095b0c8364f585edff9c59568a96
> Author: Chris Wilson <chris at chris-wilson.co.uk>
> Date:   Thu Jan 30 14:38:16 2014 +0000
> 
>     drm/i915: Don't access snooped pages through the GTT (even for error capture)
> 
> and the desire to use stolen memory for ringbuffers, contexts and
> batches in the future.

I am somewhat unclear as to whether we want to prefer the aperture for
reading back objects which may be mapped in multiple address spaces.

> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c | 50 ++++++++++++++++++++++-------------
>  1 file changed, 31 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 0e1f7b691082..a2c3a639c3cc 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -542,10 +542,11 @@ static struct drm_i915_error_object *
>  i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  			       struct drm_i915_gem_object *src,
>  			       struct i915_address_space *vm,
> -			       const int num_pages)
> +			       int num_pages)
>  {
>  	struct drm_i915_error_object *dst;
> -	int i;
> +	bool use_ggtt;

	bool use_aperture;

> +	int i = 0;
>  	u32 reloc_offset;
>  
>  	if (src == NULL || src->pages == NULL)
> @@ -555,8 +556,32 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  	if (dst == NULL)
>  		return NULL;
>  
> -	reloc_offset = dst->gtt_offset = i915_gem_obj_offset(src, vm);
> -	for (i = 0; i < num_pages; i++) {
> +	dst->gtt_offset = i915_gem_obj_offset(src, vm);
> +
> +	reloc_offset = dst->gtt_offset;
> +	use_ggtt = (src->cache_level == I915_CACHE_NONE &&
> +		    i915_is_ggtt(vm) &&
> +		    src->has_global_gtt_mapping &&
> +		    reloc_offset + num_pages * PAGE_SIZE <= dev_priv->gtt.mappable_end);
> +

You should probably do this as two patches. Pull out the use_aperture
variable first, followed by the stolen check.
"drm/i915: Don't check each page on error object capture"

> +	/* Cannot access stolen address directly, try to use the aperture */
> +	if (src->stolen) {
> +		use_ggtt = true;
> +
> +		if (!src->has_global_gtt_mapping)

This is BUG() or at least WARN() isn't it? Hmm, is there some
possibility we get here after an object is unmapped but before it's
taken away?

> +			goto unwind;
> +
> +		reloc_offset = i915_gem_obj_ggtt_offset(src);
> +		if (reloc_offset + num_pages * PAGE_SIZE > dev_priv->gtt.mappable_end)

This could probably be a separate patch since it changes the old behavior
that may get some pages from CPU, and others from aperture for a given
object.

> +			goto unwind;
> +	}
> +
> +	/* Cannot access snooped pages through the aperture */
> +	if (use_ggtt && src->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv->dev))
> +		goto unwind;
> +

Some nitpicks from the previous code maybe worth fixing in the future:
/* Prefer the aperture? */
use_aperture = src->has_global_gtt_mapping &&
		i915_gem_obj_ggtt_offset(src) + num_pages * PAGE_SIZE <= dev_priv->gtt.mappable_end)

if (src->stolen && !use_aperture)
	goto unwind;

if (use_aperture)
	reloc_offset = i915_gem_obj_ggtt_offset(src)
else
	reloc_offset = dst->gtt_offset;

I don't really understand why the current code has
use_ggtt = (src->cache_level == I915_CACHE_NONE...

That seems irrelevant to me for this decision.

> +	dst->page_count = num_pages;
> +	while (num_pages--) {
>  		unsigned long flags;
>  		void *d;
>  
> @@ -565,10 +590,7 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  			goto unwind;
>  
>  		local_irq_save(flags);
> -		if (src->cache_level == I915_CACHE_NONE &&
> -		    reloc_offset < dev_priv->gtt.mappable_end &&
> -		    src->has_global_gtt_mapping &&
> -		    i915_is_ggtt(vm)) {
> +		if (use_ggtt) {
>  			void __iomem *s;
>  
>  			/* Simply ignore tiling or any overlapping fence.
> @@ -580,14 +602,6 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  						     reloc_offset);
>  			memcpy_fromio(d, s, PAGE_SIZE);
>  			io_mapping_unmap_atomic(s);
> -		} else if (src->stolen) {
> -			unsigned long offset;
> -
> -			offset = dev_priv->mm.stolen_base;
> -			offset += src->stolen->start;
> -			offset += i << PAGE_SHIFT;
> -
> -			memcpy_fromio(d, (void __iomem *) offset, PAGE_SIZE);
>  		} else {
>  			struct page *page;
>  			void *s;
> @@ -604,11 +618,9 @@ i915_error_object_create_sized(struct drm_i915_private *dev_priv,
>  		}
>  		local_irq_restore(flags);
>  
> -		dst->pages[i] = d;
> -
> +		dst->pages[i++] = d;

You could capture the object backwards and get rid of i if you felt like
a good time.

>  		reloc_offset += PAGE_SIZE;
>  	}
> -	dst->page_count = num_pages;
>  
>  	return dst;
>  

I couldn't spot anything actually wrong, but I really do want use_ggtt
changed to something better.

With that
Reviewed-by: Ben Widawsky <ben at bwidawsk.net>

If you fix any of my other suggestions, I will re-review (please CC me).

-- 
Ben Widawsky, Intel Open Source Technology Center



More information about the Intel-gfx mailing list