[Intel-gfx] [PATCH v4] drm/i915: Add soft-pinning API for execbuffer

Goel, Akash akash.goel at intel.com
Wed Jul 15 07:55:23 PDT 2015



On 6/30/2015 7:50 PM, Daniel, Thomas wrote:
> Many apologies to Michal for incorrectly spelling his name in the CC list.
>
> Thomas.
>
>> -----Original Message-----
>> From: Daniel, Thomas
>> Sent: Tuesday, June 30, 2015 3:13 PM
>> To: intel-gfx at lists.freedesktop.org
>> Cc: Chris Wilson; Goel, Akash; Belgaumkar, Vinay; Michal Winiarsky; Zou,
>> Nanhai; Daniel, Thomas
>> Subject: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
>>
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> Userspace can pass in an offset that it presumes the object is located
>> at. The kernel will then do its utmost to fit the object into that
>> location. The assumption is that userspace is handling its own object
>> locations (for example along with full-ppgtt) and that the kernel will
>> rarely have to make space for the user's requests.
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
>> Wilson.  Fixed incorrect error paths causing crash found by Michal Winiarski.
>> (Not published externally)
>>
>> v3: Rebased because of trivial conflict in object_bind_to_vm.  Fixed eviction
>> to allow eviction of soft-pinned objects when another soft-pinned object used
>> by a subsequent execbuffer overlaps reported by Michal Winiarski.
>> (Not published externally)
>>
>> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
>> pinned first after an address conflict happens to avoid repeated conflicts in
>> rare cases (Suggested by Chris Wilson).  Expanded comment on
>> drm_i915_gem_exec_object2.offset to cover this new API.
>>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Akash Goel <akash.goel at intel.com>
>> Cc: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> Cc: Michal Winiarsky <michal.winiarsky at intel.com>
>> Cc: Zou Nanhai <nanhai.zou at intel.com>
>> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h            |    4 +++
>>   drivers/gpu/drm/i915/i915_gem.c            |   51 ++++++++++++++++++++--------
>>   drivers/gpu/drm/i915/i915_gem_evict.c      |   38 +++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |   16 +++++++--
>>   include/uapi/drm/i915_drm.h                |    9 +++--
>>   5 files changed, 99 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index d90a782..e96c101 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>>   #define PIN_USER	(1<<4)
>>   #define PIN_UPDATE	(1<<5)
>>   #define PIN_FULL_RANGE	(1<<6)
>> +#define PIN_OFFSET_FIXED	(1<<7)
>>   #define PIN_OFFSET_MASK (~4095)
>> +
>>   int __must_check
>>   i915_gem_object_pin(struct drm_i915_gem_object *obj,
>>   		    struct i915_address_space *vm,
>> @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct
>> drm_device *dev,
>>   					  unsigned long start,
>>   					  unsigned long end,
>>   					  unsigned flags);
>> +int __must_check
>> +i915_gem_evict_for_vma(struct i915_vma *target);
>>   int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>>   int i915_gem_evict_everything(struct drm_device *dev);
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index f170da6..a7e5ff2 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct
>> drm_i915_gem_object *obj,
>>   	if (IS_ERR(vma))
>>   		goto err_unpin;
>>
>> +	if (flags & PIN_OFFSET_FIXED) {
>> +		uint64_t offset = flags & PIN_OFFSET_MASK;
>> +		if (offset & (alignment - 1)) {
>> +			ret = -EINVAL;
>> +			goto err_free_vma;
>> +		}
>> +		vma->node.start = offset;
>> +		vma->node.size = size;
>> +		vma->node.color = obj->cache_level;
>> +		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
>> +		if (ret) {
>> +			ret = i915_gem_evict_for_vma(vma);
>> +			if (ret == 0)
>> +				ret = drm_mm_reserve_node(&vm->mm,
>> &vma->node);
>> +		}
>> +		if (ret)
>> +			goto err_free_vma;
>> +	} else {
>>   search_free:
>> -	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
>> -						  size, alignment,
>> -						  obj->cache_level,
>> -						  start, end,
>> -						  search_flag,
>> -						  alloc_flag);
>> -	if (ret) {
>> -		ret = i915_gem_evict_something(dev, vm, size, alignment,
>> -					       obj->cache_level,
>> -					       start, end,
>> -					       flags);
>> -		if (ret == 0)
>> -			goto search_free;
>> +		ret = drm_mm_insert_node_in_range_generic(&vm->mm,
>> &vma->node,
>> +						  	  size, alignment,
>> +						  	  obj->cache_level,
>> +						  	  start, end,
>> +						  	  search_flag,
>> +						  	  alloc_flag);
>> +		if (ret) {
>> +			ret = i915_gem_evict_something(dev, vm, size,
>> alignment,
>> +					       	       obj->cache_level,
>> +					       	       start, end,
>> +					       	       flags);
>> +			if (ret == 0)
>> +				goto search_free;
>>
>> -		goto err_free_vma;
>> +			goto err_free_vma;
>> +		}
>>   	}
>>   	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>>   		ret = -EINVAL;
>> @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma,
>> uint32_t alignment, uint64_t flags)
>>   	    vma->node.start < (flags & PIN_OFFSET_MASK))
>>   		return true;
>>
>> +	if (flags & PIN_OFFSET_FIXED &&
>> +	    vma->node.start != (flags & PIN_OFFSET_MASK))
>> +		return true;
>> +
>>   	return false;
>>   }
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
>> b/drivers/gpu/drm/i915/i915_gem_evict.c
>> index d09e35e..6098e2f 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
>> @@ -199,6 +199,44 @@ found:
>>   	return ret;
>>   }
>>
>> +int
>> +i915_gem_evict_for_vma(struct i915_vma *target)
>> +{
>> +	struct drm_mm_node *node, *next;
>> +
>> +	list_for_each_entry_safe(node, next,
>> +			&target->vm->mm.head_node.node_list,
>> +			node_list) {
>> +		struct i915_vma *vma;
>> +		int ret;
>> +
>> +		if (node->start + node->size <= target->node.start)
>> +			continue;
>> +		if (node->start >= target->node.start + target->node.size)
>> +			break;
>> +
>> +		vma = container_of(node, typeof(*vma), node);
>> +
>> +		if (vma->pin_count) {
>> +			/* We may need to evict a buffer in the same batch */
>> +			if (!vma->exec_entry)
>> +				return -EBUSY;
>> +
>> +			if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
>> +				/* Overlapping fixed objects in the same batch
>> */
>> +				return -EINVAL;
>> +
>> +			return -ENOSPC;

Can we actually hit this condition, considering the soft pinned objects 
are now on the front side of 'eb->vmas' list ?
If we do encounter such a case, it probably means that the overlapping 
object is already pinned from some other path.

Is there a scope of an additional check here ?
i.e. if (vma->pin_count) is > 1, this indicates that the object is not 
only pinned due to execbuffer, hence cannot be evicted, so -EBUSY can be 
straight away returned to User.

Best regards
Akash

>> +		}
>> +
>> +		ret = i915_vma_unbind(vma);
>> +		if (ret)
>> +			return ret;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   /**
>>    * i915_gem_evict_vm - Evict all idle vmas from a vm
>>    * @vm: Address space to cleanse
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 3d94744..4e6955e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma
>> *vma,
>>   			flags |= PIN_GLOBAL | PIN_MAPPABLE;
>>   		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>>   			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
>> +		if (entry->flags & EXEC_OBJECT_PINNED)
>> +			flags |= entry->offset | PIN_OFFSET_FIXED;
>>   	}
>>
>>   	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
>> @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>>   	    vma->node.start & (entry->alignment - 1))
>>   		return true;
>>
>> +	if (entry->flags & EXEC_OBJECT_PINNED &&
>> +	    vma->node.start != entry->offset)
>> +		return true;
>> +
>>   	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>>   	    vma->node.start < BATCH_OFFSET_BIAS)
>>   		return true;
>> @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   	struct i915_vma *vma;
>>   	struct i915_address_space *vm;
>>   	struct list_head ordered_vmas;
>> +	struct list_head pinned_vmas;
>>   	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>>   	int retry;
>>
>> @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>>
>>   	INIT_LIST_HEAD(&ordered_vmas);
>> +	INIT_LIST_HEAD(&pinned_vmas);
>>   	while (!list_empty(vmas)) {
>>   		struct drm_i915_gem_exec_object2 *entry;
>>   		bool need_fence, need_mappable;
>> @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   			obj->tiling_mode != I915_TILING_NONE;
>>   		need_mappable = need_fence || need_reloc_mappable(vma);
>>
>> -		if (need_mappable) {
>> +		if (entry->flags & EXEC_OBJECT_PINNED)
>> +			list_move_tail(&vma->exec_list, &pinned_vmas);
>> +		else if (need_mappable) {
>>   			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>>   			list_move(&vma->exec_list, &ordered_vmas);
>>   		} else
>> @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>>   		obj->base.pending_write_domain = 0;
>>   	}
>>   	list_splice(&ordered_vmas, vmas);
>> +	list_splice(&pinned_vmas, vmas);
>>
>>   	/* Attempt to pin all of the buffers into the GTT.
>>   	 * This is done in 3 phases:
>> @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb)
>>   	 * Note that actual hangs have only been observed on gen7, but for
>>   	 * paranoia do it everywhere.
>>   	 */
>> -	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>> +	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
>> +		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>>
>>   	return vma->obj;
>>   }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 55ba527..18f8f99 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 {
>>   	__u64 alignment;
>>
>>   	/**
>> -	 * Returned value of the updated offset of the object, for future
>> -	 * presumed_offset writes.
>> +	 * When the EXEC_OBJECT_PINNED flag is specified this is populated by
>> +	 * the user with the GTT offset at which this object will be pinned.
>> +	 * Otherwise the kernel populates it with the value of the updated
>> +	 * offset of the object, for future presumed_offset writes.
>>   	 */
>>   	__u64 offset;
>>
>> @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 {
>>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>>   #define EXEC_OBJECT_WRITE	(1<<2)
>>   #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -
>> (EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
>> +#define EXEC_OBJECT_PINNED	(1<<4)
>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>>   	__u64 flags;
>>
>>   	__u64 rsvd1;
>> --
>> 1.7.9.5
>


More information about the Intel-gfx mailing list