[Intel-gfx] [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
Goel, Akash
akash.goel at intel.com
Wed Jul 15 07:55:23 PDT 2015
On 6/30/2015 7:50 PM, Daniel, Thomas wrote:
> Many apologies to Michal for incorrectly spelling his name in the CC list.
>
> Thomas.
>
>> -----Original Message-----
>> From: Daniel, Thomas
>> Sent: Tuesday, June 30, 2015 3:13 PM
>> To: intel-gfx at lists.freedesktop.org
>> Cc: Chris Wilson; Goel, Akash; Belgaumkar, Vinay; Michal Winiarsky; Zou,
>> Nanhai; Daniel, Thomas
>> Subject: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer
>>
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> Userspace can pass in an offset that it presumes the object is located
>> at. The kernel will then do its utmost to fit the object into that
>> location. The assumption is that userspace is handling its own object
>> locations (for example along with full-ppgtt) and that the kernel will
>> rarely have to make space for the user's requests.
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris
>> Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski.
>> (Not published externally)
>>
>> v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction
>> to allow eviction of soft-pinned objects when another soft-pinned object used
>> by a subsequent execbuffer overlaps reported by Michal Winiarski.
>> (Not published externally)
>>
>> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are
>> pinned first after an address conflict happens to avoid repeated conflicts in
>> rare cases (Suggested by Chris Wilson). Expanded comment on
>> drm_i915_gem_exec_object2.offset to cover this new API.
>>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> Cc: Akash Goel <akash.goel at intel.com>
>> Cc: Vinay Belgaumkar <vinay.belgaumkar at intel.com>
>> Cc: Michal Winiarsky <michal.winiarsky at intel.com>
>> Cc: Zou Nanhai <nanhai.zou at intel.com>
>> Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 4 +++
>> drivers/gpu/drm/i915/i915_gem.c | 51 ++++++++++++++++++++--------
>> drivers/gpu/drm/i915/i915_gem_evict.c | 38 +++++++++++++++++++++
>> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++--
>> include/uapi/drm/i915_drm.h | 9 +++--
>> 5 files changed, 99 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index d90a782..e96c101 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>> #define PIN_USER (1<<4)
>> #define PIN_UPDATE (1<<5)
>> #define PIN_FULL_RANGE (1<<6)
>> +#define PIN_OFFSET_FIXED (1<<7)
>> #define PIN_OFFSET_MASK (~4095)
>> +
>> int __must_check
>> i915_gem_object_pin(struct drm_i915_gem_object *obj,
>> struct i915_address_space *vm,
>> @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct
>> drm_device *dev,
>> unsigned long start,
>> unsigned long end,
>> unsigned flags);
>> +int __must_check
>> +i915_gem_evict_for_vma(struct i915_vma *target);
>> int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>> int i915_gem_evict_everything(struct drm_device *dev);
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index f170da6..a7e5ff2 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct
>> drm_i915_gem_object *obj,
>> if (IS_ERR(vma))
>> goto err_unpin;
>>
>> + if (flags & PIN_OFFSET_FIXED) {
>> + uint64_t offset = flags & PIN_OFFSET_MASK;
>> + if (offset & (alignment - 1)) {
>> + ret = -EINVAL;
>> + goto err_free_vma;
>> + }
>> + vma->node.start = offset;
>> + vma->node.size = size;
>> + vma->node.color = obj->cache_level;
>> + ret = drm_mm_reserve_node(&vm->mm, &vma->node);
>> + if (ret) {
>> + ret = i915_gem_evict_for_vma(vma);
>> + if (ret == 0)
>> + ret = drm_mm_reserve_node(&vm->mm,
>> &vma->node);
>> + }
>> + if (ret)
>> + goto err_free_vma;
>> + } else {
>> search_free:
>> - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
>> - size, alignment,
>> - obj->cache_level,
>> - start, end,
>> - search_flag,
>> - alloc_flag);
>> - if (ret) {
>> - ret = i915_gem_evict_something(dev, vm, size, alignment,
>> - obj->cache_level,
>> - start, end,
>> - flags);
>> - if (ret == 0)
>> - goto search_free;
>> + ret = drm_mm_insert_node_in_range_generic(&vm->mm,
>> &vma->node,
>> + size, alignment,
>> + obj->cache_level,
>> + start, end,
>> + search_flag,
>> + alloc_flag);
>> + if (ret) {
>> + ret = i915_gem_evict_something(dev, vm, size,
>> alignment,
>> + obj->cache_level,
>> + start, end,
>> + flags);
>> + if (ret == 0)
>> + goto search_free;
>>
>> - goto err_free_vma;
>> + goto err_free_vma;
>> + }
>> }
>> if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
>> ret = -EINVAL;
>> @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma,
>> uint32_t alignment, uint64_t flags)
>> vma->node.start < (flags & PIN_OFFSET_MASK))
>> return true;
>>
>> + if (flags & PIN_OFFSET_FIXED &&
>> + vma->node.start != (flags & PIN_OFFSET_MASK))
>> + return true;
>> +
>> return false;
>> }
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
>> b/drivers/gpu/drm/i915/i915_gem_evict.c
>> index d09e35e..6098e2f 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
>> @@ -199,6 +199,44 @@ found:
>> return ret;
>> }
>>
>> +int
>> +i915_gem_evict_for_vma(struct i915_vma *target)
>> +{
>> + struct drm_mm_node *node, *next;
>> +
>> + list_for_each_entry_safe(node, next,
>> + &target->vm->mm.head_node.node_list,
>> + node_list) {
>> + struct i915_vma *vma;
>> + int ret;
>> +
>> + if (node->start + node->size <= target->node.start)
>> + continue;
>> + if (node->start >= target->node.start + target->node.size)
>> + break;
>> +
>> + vma = container_of(node, typeof(*vma), node);
>> +
>> + if (vma->pin_count) {
>> + /* We may need to evict a buffer in the same batch */
>> + if (!vma->exec_entry)
>> + return -EBUSY;
>> +
>> + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED)
>> + /* Overlapping fixed objects in the same batch
>> */
>> + return -EINVAL;
>> +
>> + return -ENOSPC;
Can we actually hit this condition, considering the soft pinned objects
are now on the front side of 'eb->vmas' list ?
If we do encounter such a case, it probably means that the overlapping
object is already pinned from some other path.
Is there a scope of an additional check here ?
i.e. if (vma->pin_count) is > 1, this indicates that the object is not
only pinned due to execbuffer, hence cannot be evicted, so -EBUSY can be
straight away returned to User.
Best regards
Akash
>> + }
>> +
>> + ret = i915_vma_unbind(vma);
>> + if (ret)
>> + return ret;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> /**
>> * i915_gem_evict_vm - Evict all idle vmas from a vm
>> * @vm: Address space to cleanse
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 3d94744..4e6955e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma
>> *vma,
>> flags |= PIN_GLOBAL | PIN_MAPPABLE;
>> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
>> flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
>> + if (entry->flags & EXEC_OBJECT_PINNED)
>> + flags |= entry->offset | PIN_OFFSET_FIXED;
>> }
>>
>> ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
>> @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma)
>> vma->node.start & (entry->alignment - 1))
>> return true;
>>
>> + if (entry->flags & EXEC_OBJECT_PINNED &&
>> + vma->node.start != entry->offset)
>> + return true;
>> +
>> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>> vma->node.start < BATCH_OFFSET_BIAS)
>> return true;
>> @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>> struct i915_vma *vma;
>> struct i915_address_space *vm;
>> struct list_head ordered_vmas;
>> + struct list_head pinned_vmas;
>> bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
>> int retry;
>>
>> @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>> vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
>>
>> INIT_LIST_HEAD(&ordered_vmas);
>> + INIT_LIST_HEAD(&pinned_vmas);
>> while (!list_empty(vmas)) {
>> struct drm_i915_gem_exec_object2 *entry;
>> bool need_fence, need_mappable;
>> @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>> obj->tiling_mode != I915_TILING_NONE;
>> need_mappable = need_fence || need_reloc_mappable(vma);
>>
>> - if (need_mappable) {
>> + if (entry->flags & EXEC_OBJECT_PINNED)
>> + list_move_tail(&vma->exec_list, &pinned_vmas);
>> + else if (need_mappable) {
>> entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
>> list_move(&vma->exec_list, &ordered_vmas);
>> } else
>> @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs
>> *ring,
>> obj->base.pending_write_domain = 0;
>> }
>> list_splice(&ordered_vmas, vmas);
>> + list_splice(&pinned_vmas, vmas);
>>
>> /* Attempt to pin all of the buffers into the GTT.
>> * This is done in 3 phases:
>> @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb)
>> * Note that actual hangs have only been observed on gen7, but for
>> * paranoia do it everywhere.
>> */
>> - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>> + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
>> + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>>
>> return vma->obj;
>> }
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 55ba527..18f8f99 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 {
>> __u64 alignment;
>>
>> /**
>> - * Returned value of the updated offset of the object, for future
>> - * presumed_offset writes.
>> + * When the EXEC_OBJECT_PINNED flag is specified this is populated by
>> + * the user with the GTT offset at which this object will be pinned.
>> + * Otherwise the kernel populates it with the value of the updated
>> + * offset of the object, for future presumed_offset writes.
>> */
>> __u64 offset;
>>
>> @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 {
>> #define EXEC_OBJECT_NEEDS_GTT (1<<1)
>> #define EXEC_OBJECT_WRITE (1<<2)
>> #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3)
>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -
>> (EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1)
>> +#define EXEC_OBJECT_PINNED (1<<4)
>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
>> __u64 flags;
>>
>> __u64 rsvd1;
>> --
>> 1.7.9.5
>
More information about the Intel-gfx
mailing list