[Intel-gfx] [PATCH v6] drm/i915: Add soft-pinning API for execbuffer

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Dec 2 03:28:52 PST 2015


On 16/10/15 11:59, Thomas Daniel wrote:
> From: Chris Wilson <chris at chris-wilson.co.uk>
>
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>
> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
> (Not published externally)
>
> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
> to allow eviction of soft-pinned objects when another soft-pinned object used
> by a subsequent execbuffer overlaps reported by Michal Winiarski.
> (Not published externally)
>
> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
> pinned first after an address conflict happens to avoid repeated conflicts in
> rare cases (Suggested by Chris Wilson).  Expanded comment on
> drm_i915_gem_exec_object2.offset to cover this new API.
>
> v5: Added I915_PARAM_HAS_EXEC_SOFTPIN parameter for detecting this capability
> (Kristian). Added check for multiple pinnings on eviction (Akash). Made sure
> buffers are not considered misplaced without the user specifying
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS.  User must assume responsibility for any
> addressing workarounds.  Updated object2.offset field comment again to clarify
> NO_RELOC case (Chris).  checkpatch cleanup.
>
> v6: Trivial rebase on latest drm-intel-nightly
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Akash Goel <akash.goel at intel.com>
> Cc: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
> Cc: Michal Winiarski <michal.winiarski at intel.com>
> Cc: Zou Nanhai <nanhai.zou at intel.com>
> Cc: Kristian Høgsberg <hoegsberg at gmail.com>
> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_dma.c            |  3 ++
>   drivers/gpu/drm/i915/i915_drv.h            |  2 +
>   drivers/gpu/drm/i915/i915_gem.c            | 64 ++++++++++++++++++++----------
>   drivers/gpu/drm/i915/i915_gem_evict.c      | 39 ++++++++++++++++++
>   drivers/gpu/drm/i915/i915_gem_execbuffer.c | 19 +++++++--
>   include/uapi/drm/i915_drm.h                | 12 ++++--
>   6 files changed, 113 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 2336af9..824c6c3 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_RESOURCE_STREAMER:
>   		value = HAS_RESOURCE_STREAMER(dev);
>   		break;
> +	case I915_PARAM_HAS_EXEC_SOFTPIN:
> +		value = 1;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1396af9..73c3acf 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2816,6 +2816,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>   #define PIN_UPDATE	(1<<5)
>   #define PIN_ZONE_4G	(1<<6)
>   #define PIN_HIGH	(1<<7)
> +#define PIN_OFFSET_FIXED	(1<<8)
>   #define PIN_OFFSET_MASK (~4095)
>   int __must_check
>   i915_gem_object_pin(struct drm_i915_gem_object *obj,
> @@ -3163,6 +3164,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
>   					  unsigned long start,
>   					  unsigned long end,
>   					  unsigned flags);
> +int __must_check i915_gem_evict_for_vma(struct i915_vma *target);
>   int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>
>   /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 1e67484..c3453bd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3450,30 +3450,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>   	if (IS_ERR(vma))
>   		goto err_unpin;
>
> -	if (flags & PIN_HIGH) {
> -		search_flag = DRM_MM_SEARCH_BELOW;
> -		alloc_flag = DRM_MM_CREATE_TOP;
> +	if (flags & PIN_OFFSET_FIXED) {
> +		uint64_t offset = flags & PIN_OFFSET_MASK;
> +
> +		if (offset & (alignment - 1)) {
> +			ret = -EINVAL;
> +			goto err_free_vma;
> +		}
> +		vma->node.start = offset;
> +		vma->node.size = size;
> +		vma->node.color = obj->cache_level;
> +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);

This will return ENOSPC to userspace when they have passed in an address 
outside the (PP)GTT range which is misleading.

I would suggest explicit checking against the 'end' (available a bit 
further up in the same function) and returning something more 
appropriate. At least EINVAL, although execbuf overloads that one so 
much I would even rather steal some other semi-appropriate one like 
ENXIO (No such device or _address_) or something.

Regards,

Tvrtko

> +		if (ret) {
> +			ret = i915_gem_evict_for_vma(vma);
> +			if (ret == 0)
> +				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> +		}
> +		if (ret)
> +			goto err_free_vma;
>   	} else {
> -		search_flag = DRM_MM_SEARCH_DEFAULT;
> -		alloc_flag = DRM_MM_CREATE_DEFAULT;
> -	}
> +		if (flags & PIN_HIGH) {
> +			search_flag = DRM_MM_SEARCH_BELOW;
> +			alloc_flag = DRM_MM_CREATE_TOP;
> +		} else {
> +			search_flag = DRM_MM_SEARCH_DEFAULT;
> +			alloc_flag = DRM_MM_CREATE_DEFAULT;
> +		}
>
>   search_free:
> -	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> -						  size, alignment,
> -						  obj->cache_level,
> -						  start, end,
> -						  search_flag,
> -						  alloc_flag);
> -	if (ret) {
> -		ret = i915_gem_evict_something(dev, vm, size, alignment,
> -					       obj->cache_level,
> -					       start, end,
> -					       flags);
> -		if (ret == 0)
> -			goto search_free;
> +		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> +							  size, alignment,
> +							  obj->cache_level,
> +							  start, end,
> +							  search_flag,
> +							  alloc_flag);
> +		if (ret) {
> +			ret = i915_gem_evict_something(dev, vm, size, alignment,
> +						       obj->cache_level,
> +						       start, end,
> +						       flags);
> +			if (ret == 0)
> +				goto search_free;
>
> -		goto err_free_vma;
> +			goto err_free_vma;
> +		}
>   	}
>   	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>   		ret = -EINVAL;
> @@ -4007,6 +4027,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
>   	    vma->node.start < (flags & PIN_OFFSET_MASK))
>   		return true;
>
> +	if (flags & PIN_OFFSET_FIXED &&
> +	    vma->node.start != (flags & PIN_OFFSET_MASK))
> +		return true;
> +
>   	return false;
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d71a133..07c6e4d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,45 @@ found:
>   	return ret;
>   }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target)
> +{
> +	struct drm_mm_node *node, *next;
> +
> +	list_for_each_entry_safe(node, next,
> +			&target->vm->mm.head_node.node_list,
> +			node_list) {
> +		struct i915_vma *vma;
> +		int ret;
> +
> +		if (node->start + node->size <= target->node.start)
> +			continue;
> +		if (node->start >= target->node.start + target->node.size)
> +			break;
> +
> +		vma = container_of(node, typeof(*vma), node);
> +
> +		if (vma->pin_count) {
> +			if (!vma->exec_entry || (vma->pin_count > 1))
> +				/* Object is pinned for some other use */
> +				return -EBUSY;
> +
> +			/* We need to evict a buffer in the same batch */
> +			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
> +				/* Overlapping fixed objects in the same batch */
> +				return -EINVAL;
> +
> +			return -ENOSPC;
> +		}
> +
> +		ret = i915_vma_unbind(vma);
> +		if (ret)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
>   /**
>    * i915_gem_evict_vm - Evict all idle vmas from a vm
>    * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 6ed7d63a..b72ffe6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -599,6 +599,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>   			flags |= PIN_GLOBAL | PIN_MAPPABLE;
>   		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>   			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			flags |= entry->offset | PIN_OFFSET_FIXED;
>   		if ((flags & PIN_MAPPABLE) == 0)
>   			flags |= PIN_HIGH;
>   	}
> @@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>   	    vma->node.start & (entry->alignment - 1))
>   		return true;
>
> +	if (entry->flags & EXEC_OBJECT_PINNED &&
> +	    vma->node.start != entry->offset)
> +		return true;
> +
>   	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>   	    vma->node.start < BATCH_OFFSET_BIAS)
>   		return true;
> @@ -678,7 +684,8 @@ eb_vma_misplaced(struct i915_vma *vma)
>   	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>   		return !only_mappable_for_reloc(entry->flags);
>
> -	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> +	if (!(entry->flags &
> +	    (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
>   	    (vma->node.start + vma->node.size - 1) >> 32)
>   		return true;
>
> @@ -695,6 +702,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   	struct i915_vma *vma;
>   	struct i915_address_space *vm;
>   	struct list_head ordered_vmas;
> +	struct list_head pinned_vmas;
>   	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>   	int retry;
>
> @@ -703,6 +711,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>
>   	INIT_LIST_HEAD(&ordered_vmas);
> +	INIT_LIST_HEAD(&pinned_vmas);
>   	while (!list_empty(vmas)) {
>   		struct drm_i915_gem_exec_object2 *entry;
>   		bool need_fence, need_mappable;
> @@ -721,7 +730,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   			obj->tiling_mode != I915_TILING_NONE;
>   		need_mappable = need_fence || need_reloc_mappable(vma);
>
> -		if (need_mappable) {
> +		if (entry->flags & EXEC_OBJECT_PINNED)
> +			list_move_tail(&vma->exec_list, &pinned_vmas);
> +		else if (need_mappable) {
>   			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>   			list_move(&vma->exec_list, &ordered_vmas);
>   		} else
> @@ -731,6 +742,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>   		obj->base.pending_write_domain = 0;
>   	}
>   	list_splice(&ordered_vmas, vmas);
> +	list_splice(&pinned_vmas, vmas);
>
>   	/* Attempt to pin all of the buffers into the GTT.
>   	 * This is done in 3 phases:
> @@ -1317,7 +1329,8 @@ eb_get_batch(struct eb_vmas *eb)
>   	 * Note that actual hangs have only been observed on gen7, but for
>   	 * paranoia do it everywhere.
>   	 */
> -	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> +	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> +		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
>   	return vma->obj;
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 484a9fb..fc754a3 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_EU_TOTAL		 34
>   #define I915_PARAM_HAS_GPU_RESET	 35
>   #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> +#define I915_PARAM_HAS_EXEC_SOFTPIN	 37
>
>   typedef struct drm_i915_getparam {
>   	__s32 param;
> @@ -682,8 +683,12 @@ struct drm_i915_gem_exec_object2 {
>   	__u64 alignment;
>
>   	/**
> -	 * Returned value of the updated offset of the object, for future
> -	 * presumed_offset writes.
> +	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
> +	 * the user with the GTT offset at which this object will be pinned.
> +	 * When the I915_EXEC_NO_RELOC flag is specified this must contain the
> +	 * presumed_offset of the object.
> +	 * During execbuffer2 the kernel populates it with the value of the
> +	 * current GTT offset of the object, for future presumed_offset writes.
>   	 */
>   	__u64 offset;
>
> @@ -691,7 +696,8 @@ struct drm_i915_gem_exec_object2 {
>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>   #define EXEC_OBJECT_WRITE	(1<<2)
>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> +#define EXEC_OBJECT_PINNED	(1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>   	__u64 flags;
>
>   	__u64 rsvd1;
>


More information about the Intel-gfx mailing list