[Intel-gfx] [PATCH] drm/i915: Force CPU relocations if not GTT mapped

Daniel Vetter daniel at ffwll.ch
Mon Aug 11 12:02:36 CEST 2014


On Sun, Aug 10, 2014 at 08:06:57AM +0100, Chris Wilson wrote:
> Move the decision on whether we need to have a mappable object during
> execbuffer to the fore and then reuse that decision by propagating the
> flag through to reservation. As a corollary, before doing the actual
> relocation through the GTT, we can make sure that we do have a GTT
> mapping through which to operate.
> 
> v2: Revamp and resend to ease future patches.
> v3: Refresh patch rationale
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>

Ok, the secure batch hunk in here looks rather unrelated and imo also a
bit incomplete. I've dropped it. And I've added a bit of text to the
commit message to explain why this patch touches map_and_fenceable logic.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c            |  8 +--
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
>  2 files changed, 42 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99250d27668d..6366230c4e32 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	vma->unbind_vma(vma);
>  
>  	list_del_init(&vma->mm_list);
> -	/* Avoid an unnecessary call to unbind on rebind. */
>  	if (i915_is_ggtt(vma->vm))
> -		obj->map_and_fenceable = true;
> +		obj->map_and_fenceable = false;
>  
>  	drm_mm_remove_node(&vma->node);
>  	i915_gem_vma_destroy(vma);
> @@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
>  			return 0;
>  		}
>  	} else if (enable) {
> +		if (WARN_ON(!obj->map_and_fenceable))
> +			return -EINVAL;
> +
>  		reg = i915_find_fence_reg(dev);
>  		if (IS_ERR(reg))
>  			return PTR_ERR(reg);
> @@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  
>  	obj->fence_reg = I915_FENCE_REG_NONE;
>  	obj->madv = I915_MADV_WILLNEED;
> -	/* Avoid an unnecessary call to unbind on the first bind. */
> -	obj->map_and_fenceable = true;
>  
>  	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc4e5b2..8b734d5d16b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -35,6 +35,7 @@
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> +#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
>  #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
>  
>  #define BATCH_OFFSET_BIAS (256*1024)
> @@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
>  }
>  
>  static int
> -need_reloc_mappable(struct i915_vma *vma)
> -{
> -	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
> -		i915_is_ggtt(vma->vm);
> -}
> -
> -static int
>  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  				struct intel_engine_cs *ring,
>  				bool *need_reloc)
> @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -	bool need_fence;
>  	uint64_t flags;
>  	int ret;
>  
>  	flags = 0;
> -
> -	need_fence =
> -		has_fenced_gpu_access &&
> -		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -		obj->tiling_mode != I915_TILING_NONE;
> -	if (need_fence || need_reloc_mappable(vma))
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
>  		flags |= PIN_MAPPABLE;
> -
>  	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
>  		flags |= PIN_GLOBAL;
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  }
>  
>  static bool
> -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
> +need_reloc_mappable(struct i915_vma *vma)
>  {
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	struct drm_i915_gem_object *obj = vma->obj;
> -	bool need_fence, need_mappable;
>  
> -	need_fence =
> -		has_fenced_gpu_access &&
> -		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -		obj->tiling_mode != I915_TILING_NONE;
> -	need_mappable = need_fence || need_reloc_mappable(vma);
> +	if (entry->relocation_count == 0)
> +		return false;
>  
> -	WARN_ON((need_mappable || need_fence) &&
> +	if (!i915_is_ggtt(vma->vm))
> +		return false;
> +
> +	/* See also use_cpu_reloc() */
> +	if (HAS_LLC(vma->obj->base.dev))
> +		return false;
> +
> +	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> +		return false;
> +
> +	return true;
> +}
> +
> +static bool
> +eb_vma_misplaced(struct i915_vma *vma)
> +{
> +	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +	struct drm_i915_gem_object *obj = vma->obj;
> +
> +	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
>  	       !i915_is_ggtt(vma->vm));
>  
>  	if (entry->alignment &&
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> -	if (need_mappable && !obj->map_and_fenceable)
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>  		return true;
>  
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  			obj->tiling_mode != I915_TILING_NONE;
>  		need_mappable = need_fence || need_reloc_mappable(vma);
>  
> -		if (need_mappable)
> +		if (need_mappable) {
> +			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>  			list_move(&vma->exec_list, &ordered_vmas);
> -		else
> +		} else
>  			list_move_tail(&vma->exec_list, &ordered_vmas);
>  
>  		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
> @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>  			if (!drm_mm_node_allocated(&vma->node))
>  				continue;
>  
> -			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
> +			if (eb_vma_misplaced(vma))
>  				ret = i915_vma_unbind(vma);
>  			else
>  				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
> @@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>  	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>  	 * hsw should have this fixed, but bdw mucks it up again. */
> -	if (flags & I915_DISPATCH_SECURE &&
> -	    !batch_obj->has_global_gtt_mapping) {
> -		/* When we have multiple VMs, we'll need to make sure that we
> -		 * allocate space first */
> -		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
> -		BUG_ON(!vma);
> -		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
> -	}
> +	if (flags & I915_DISPATCH_SECURE) {
> +		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
> +		if (ret)
> +			goto err;
>  
> -	if (flags & I915_DISPATCH_SECURE)
>  		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> -	else
> +	} else
>  		exec_start += i915_gem_obj_offset(batch_obj, vm);
>  
>  	ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
> -			args, &eb->vmas, batch_obj, exec_start, flags);
> -	if (ret)
> -		goto err;
> +					   args, &eb->vmas, batch_obj, exec_start, flags);
>  
> +	if (flags & I915_DISPATCH_SECURE)
> +		i915_gem_object_ggtt_unpin(batch_obj);
>  err:
>  	/* the request owns the ref now */
>  	i915_gem_context_unreference(ctx);
> -- 
> 2.1.0.rc1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list