[Intel-gfx] [PATCH 1/3] drm/i915: Fix obj->map_and_fenceable for ppgtt

Daniel Vetter daniel at ffwll.ch
Thu May 15 23:12:39 CEST 2014


On Thu, May 15, 2014 at 04:55:25PM +0100, Chris Wilson wrote:
> With ppgtt, it is no longer correct to mark an object as
> map_and_fenceable if we simply unbind it from the global gtt. This has
> consequences during execbuffer where we simply use
> obj->map_and_fenceable in use_cpu_reloc() to decide which access method
> to use for writing into the object. Now for a ppgtt object,
> map_and_fenceable will be true when it is not bound into the ggtt but
> only in a ppgtt, leading to an invalid access on a non-llc architecture.
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>

I wonder what we could do with all the per-object stuff that's only really
valid for the global gtt binding. Moving it to the vma is wasteful, but
keeping it here in the object leads to subtile bugs. See the pin leak as
another example ...

Good ideas anyone?
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gem.c            |   8 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 146 +++++++++++++++--------------
>  2 files changed, 79 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index f8b2c16745f8..844ea6048321 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2807,9 +2807,8 @@ int i915_vma_unbind(struct i915_vma *vma)
>  	i915_gem_gtt_finish_object(obj);
>  
>  	list_del_init(&vma->mm_list);
> -	/* Avoid an unnecessary call to unbind on rebind. */
>  	if (i915_is_ggtt(vma->vm))
> -		obj->map_and_fenceable = true;
> +		obj->map_and_fenceable = false;
>  
>  	drm_mm_remove_node(&vma->node);
>  	i915_gem_vma_destroy(vma);
> @@ -3159,6 +3158,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
>  			return 0;
>  		}
>  	} else if (enable) {
> +		if (WARN_ON(!obj->map_and_fenceable))
> +			return -EINVAL;
> +
>  		reg = i915_find_fence_reg(dev);
>  		if (IS_ERR(reg))
>  			return PTR_ERR(reg);
> @@ -4170,8 +4172,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>  
>  	obj->fence_reg = I915_FENCE_REG_NONE;
>  	obj->madv = I915_MADV_WILLNEED;
> -	/* Avoid an unnecessary call to unbind on the first bind. */
> -	obj->map_and_fenceable = true;
>  
>  	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index c7ee1e3013ac..b4d8c31c0919 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -35,6 +35,7 @@
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> +#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
>  
>  struct eb_vmas {
>  	struct list_head vmas;
> @@ -532,14 +533,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
>  }
>  
>  static int
> -need_reloc_mappable(struct i915_vma *vma)
> -{
> -	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
> -		i915_is_ggtt(vma->vm);
> -}
> -
> -static int
>  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  				struct intel_ring_buffer *ring,
>  				bool *need_reloc)
> @@ -547,19 +540,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	struct drm_i915_gem_object *obj = vma->obj;
>  	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
>  	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> -	bool need_fence;
>  	unsigned flags;
>  	int ret;
>  
>  	flags = 0;
> -
> -	need_fence =
> -		has_fenced_gpu_access &&
> -		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -		obj->tiling_mode != I915_TILING_NONE;
> -	if (need_fence || need_reloc_mappable(vma))
> +	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
>  		flags |= PIN_MAPPABLE;
> -
>  	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
>  		flags |= PIN_GLOBAL;
>  
> @@ -595,6 +581,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  	return 0;
>  }
>  
> +static bool
> +need_reloc_mappable(struct i915_vma *vma)
> +{
> +	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> +
> +	if (entry->relocation_count == 0)
> +		return false;
> +
> +	if (!i915_is_ggtt(vma->vm))
> +		return false;
> +
> +	/* See also use_cpu_reloc() */
> +	if (HAS_LLC(vma->obj->base.dev))
> +		return false;
> +
> +	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> +		return false;
> +
> +	return true;
> +}
> +
>  static int
>  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  			    struct list_head *vmas,
> @@ -627,9 +634,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  			obj->tiling_mode != I915_TILING_NONE;
>  		need_mappable = need_fence || need_reloc_mappable(vma);
>  
> -		if (need_mappable)
> +		if (need_mappable) {
> +			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>  			list_move(&vma->exec_list, &ordered_vmas);
> -		else
> +		} else
>  			list_move_tail(&vma->exec_list, &ordered_vmas);
>  
>  		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
> @@ -657,25 +665,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
>  		/* Unbind any ill-fitting objects or pin. */
>  		list_for_each_entry(vma, vmas, exec_list) {
>  			struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> -			bool need_fence, need_mappable;
>  
>  			obj = vma->obj;
>  
>  			if (!drm_mm_node_allocated(&vma->node))
>  				continue;
>  
> -			need_fence =
> -				has_fenced_gpu_access &&
> -				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> -				obj->tiling_mode != I915_TILING_NONE;
> -			need_mappable = need_fence || need_reloc_mappable(vma);
> -
> -			WARN_ON((need_mappable || need_fence) &&
> -			       !i915_is_ggtt(vma->vm));
> -
> -			if ((entry->alignment &&
> -			     vma->node.start & (entry->alignment - 1)) ||
> -			    (need_mappable && !obj->map_and_fenceable))
> +			if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
> +			    (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
>  				ret = i915_vma_unbind(vma);
>  			else
>  				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
> @@ -776,9 +773,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
>  		 * relocations were valid.
>  		 */
>  		for (j = 0; j < exec[i].relocation_count; j++) {
> -			if (copy_to_user(&user_relocs[j].presumed_offset,
> -					 &invalid_offset,
> -					 sizeof(invalid_offset))) {
> +			if (__copy_to_user(&user_relocs[j].presumed_offset,
> +					   &invalid_offset,
> +					   sizeof(invalid_offset))) {
>  				ret = -EFAULT;
>  				mutex_lock(&dev->struct_mutex);
>  				goto err;
> @@ -1268,33 +1265,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
>  	 * batch" bit. Hence we need to pin secure batches into the global gtt.
>  	 * hsw should have this fixed, but bdw mucks it up again. */
> -	if (flags & I915_DISPATCH_SECURE &&
> -	    !batch_obj->has_global_gtt_mapping) {
> -		/* When we have multiple VMs, we'll need to make sure that we
> -		 * allocate space first */
> -		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
> -		BUG_ON(!vma);
> -		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
> -	}
> +	if (flags & I915_DISPATCH_SECURE) {
> +		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
> +		if (ret)
> +			goto err;
>  
> -	if (flags & I915_DISPATCH_SECURE)
>  		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> -	else
> +	} else
>  		exec_start += i915_gem_obj_offset(batch_obj, vm);
>  
>  	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
>  	if (ret)
> -		goto err;
> +		goto err_batch;
>  
>  	ret = i915_switch_context(ring, ctx);
>  	if (ret)
> -		goto err;
> +		goto err_batch;
>  
>  	if (ring == &dev_priv->ring[RCS] &&
>  	    mode != dev_priv->relative_constants_mode) {
>  		ret = intel_ring_begin(ring, 4);
>  		if (ret)
> -				goto err;
> +			goto err_batch;
>  
>  		intel_ring_emit(ring, MI_NOOP);
>  		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> @@ -1308,30 +1300,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
>  		ret = i915_reset_gen7_sol_offsets(dev, ring);
>  		if (ret)
> -			goto err;
> +			goto err_batch;
>  	}
>  
> -
>  	exec_len = args->batch_len;
>  	if (cliprects) {
>  		for (i = 0; i < args->num_cliprects; i++) {
>  			ret = i915_emit_box(dev, &cliprects[i],
>  					    args->DR1, args->DR4);
>  			if (ret)
> -				goto err;
> +				goto err_batch;
>  
>  			ret = ring->dispatch_execbuffer(ring,
>  							exec_start, exec_len,
>  							flags);
>  			if (ret)
> -				goto err;
> +				goto err_batch;
>  		}
>  	} else {
>  		ret = ring->dispatch_execbuffer(ring,
>  						exec_start, exec_len,
>  						flags);
>  		if (ret)
> -			goto err;
> +			goto err_batch;
>  	}
>  
>  	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
> @@ -1339,6 +1330,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
>  	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
>  
> +err_batch:
> +	if (flags & I915_DISPATCH_SECURE)
> +		i915_gem_object_ggtt_unpin(batch_obj);
>  err:
>  	/* the request owns the ref now */
>  	i915_gem_context_unreference(ctx);
> @@ -1420,18 +1414,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
>  
>  	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
>  	if (!ret) {
> +		struct drm_i915_gem_exec_object __user *user_exec_list =
> +			to_user_ptr(args->buffers_ptr);
> +
>  		/* Copy the new buffer offsets back to the user's exec list. */
> -		for (i = 0; i < args->buffer_count; i++)
> -			exec_list[i].offset = exec2_list[i].offset;
> -		/* ... and back out to userspace */
> -		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
> -				   exec_list,
> -				   sizeof(*exec_list) * args->buffer_count);
> -		if (ret) {
> -			ret = -EFAULT;
> -			DRM_DEBUG("failed to copy %d exec entries "
> -				  "back to user (%d)\n",
> -				  args->buffer_count, ret);
> +		for (i = 0; i < args->buffer_count; i++) {
> +			ret = __copy_to_user(&user_exec_list[i].offset,
> +					     &exec2_list[i].offset,
> +					     sizeof(user_exec_list[i].offset));
> +			if (ret) {
> +				ret = -EFAULT;
> +				DRM_DEBUG("failed to copy %d exec entries "
> +					  "back to user (%d)\n",
> +					  args->buffer_count, ret);
> +				break;
> +			}
>  		}
>  	}
>  
> @@ -1482,14 +1479,21 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
>  	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
>  	if (!ret) {
>  		/* Copy the new buffer offsets back to the user's exec list. */
> -		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
> -				   exec2_list,
> -				   sizeof(*exec2_list) * args->buffer_count);
> -		if (ret) {
> -			ret = -EFAULT;
> -			DRM_DEBUG("failed to copy %d exec entries "
> -				  "back to user (%d)\n",
> -				  args->buffer_count, ret);
> +		struct drm_i915_gem_exec_object2 *user_exec_list =
> +				   to_user_ptr(args->buffers_ptr);
> +		int i;
> +
> +		for (i = 0; i < args->buffer_count; i++) {
> +			ret = __copy_to_user(&user_exec_list[i].offset,
> +					     &exec2_list[i].offset,
> +					     sizeof(user_exec_list[i].offset));
> +			if (ret) {
> +				ret = -EFAULT;
> +				DRM_DEBUG("failed to copy %d exec entries "
> +					  "back to user\n",
> +					  args->buffer_count);
> +				break;
> +			}
>  		}
>  	}
>  
> -- 
> 2.0.0.rc2
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list