[Intel-gfx] [PATCH 27/28] drm/i915: Allow vma binding to occur asynchronously
Matthew Auld
matthew.william.auld at gmail.com
Mon Jun 10 17:34:52 UTC 2019
On Mon, 10 Jun 2019 at 08:21, Chris Wilson <chris at chris-wilson.co.uk> wrote:
>
> If we let pages be allocated asynchronously, we also then want to push
> the binding process into an asynchronous task. Make it so, utilising the
> recent improvements to fence error tracking and struct_mutex reduction.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
[snip]
> +static int queue_async_bind(struct i915_vma *vma,
> + enum i915_cache_level cache_level,
> + u32 flags)
> +{
> + bool ready = true;
> +
> + /* We are not allowed to shrink inside vm->mutex! */
> + vma->async.dma = kmalloc(sizeof(*vma->async.dma),
> + GFP_NOWAIT | __GFP_NOWARN);
> + if (!vma->async.dma)
> + return -ENOMEM;
> +
> + dma_fence_init(vma->async.dma,
> + &async_bind_ops,
> + &async_lock,
> + vma->vm->i915->mm.unordered_timeline,
> + 0);
> +
> + /* XXX find and avoid allocations under reservation_object locks */
> + if (!i915_vma_trylock(vma)) {
> + kfree(fetch_and_zero(&vma->async.dma));
> + return -EAGAIN;
> + }
> +
> + if (rcu_access_pointer(vma->resv->fence_excl)) { /* async pages */
> + struct dma_fence *f = reservation_object_get_excl(vma->resv);
> +
> + if (!dma_fence_add_callback(f,
> + &vma->async.cb,
> + __queue_async_bind))
> + ready = false;
> + }
> + reservation_object_add_excl_fence(vma->resv, vma->async.dma);
> + i915_vma_unlock(vma);
> +
> + i915_vm_get(vma->vm);
> + i915_vma_get(vma);
> + __i915_vma_pin(vma); /* avoid being shrunk */
> +
> + vma->async.cache_level = cache_level;
> + vma->async.flags = flags;
Do we need to move this stuff to before the add_callback?
> +
> + if (ready)
> + __queue_async_bind(vma->async.dma, &vma->async.cb);
> +
> + return 0;
> +}
> +
> /**
> * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
> * @vma: VMA to map
> @@ -293,17 +405,12 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
> u32 vma_flags;
> int ret;
>
> + GEM_BUG_ON(!flags);
> GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
> GEM_BUG_ON(vma->size > vma->node.size);
> -
> - if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
> - vma->node.size,
> - vma->vm->total)))
> - return -ENODEV;
> -
> - if (GEM_DEBUG_WARN_ON(!flags))
> - return -EINVAL;
> -
> + GEM_BUG_ON(range_overflows(vma->node.start,
> + vma->node.size,
> + vma->vm->total));
> bind_flags = 0;
> if (flags & PIN_GLOBAL)
> bind_flags |= I915_VMA_GLOBAL_BIND;
> @@ -318,14 +425,18 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
Are we aiming for vma_bind/vma_bind_async/vma_pin/vma_pin_async etc ?
> if (bind_flags == 0)
> return 0;
>
> - GEM_BUG_ON(!vma->pages);
> + if ((bind_flags & ~vma_flags) & I915_VMA_LOCAL_BIND)
> + bind_flags |= I915_VMA_ALLOC_BIND;
>
> trace_i915_vma_bind(vma, bind_flags);
> - ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
> + if (bind_flags & I915_VMA_ALLOC_BIND)
> + ret = queue_async_bind(vma, cache_level, bind_flags);
> + else
> + ret = __vma_bind(vma, cache_level, bind_flags);
> if (ret)
> return ret;
Looks like clear_pages() is called unconditionally in vma_remove() if
we error out here, even though that is now set as part of the worker?
>
> - vma->flags |= bind_flags;
> + vma->flags |= bind_flags & ~I915_VMA_ALLOC_BIND;
> return 0;
> }
>
> @@ -569,7 +680,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> }
>
> if (vma->obj) {
> - ret = i915_gem_object_pin_pages(vma->obj);
> + ret = i915_gem_object_pin_pages_async(vma->obj);
> if (ret)
> return ret;
>
> @@ -578,25 +689,19 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> cache_level = 0;
> }
>
> - GEM_BUG_ON(vma->pages);
> -
> - ret = vma->ops->set_pages(vma);
> - if (ret)
> - goto err_unpin;
> -
I guess one casualty here is the !offset_fixed path for
huge-gtt-pages, since we need to inspect the vma->page_sizes below.
Though probably not the end of the world if that's the case.
> if (flags & PIN_OFFSET_FIXED) {
> u64 offset = flags & PIN_OFFSET_MASK;
> if (!IS_ALIGNED(offset, alignment) ||
> range_overflows(offset, size, end)) {
> ret = -EINVAL;
> - goto err_clear;
> + goto err_unpin;
> }
>
> ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
> size, offset, cache_level,
> flags);
> if (ret)
> - goto err_clear;
> + goto err_unpin;
> } else {
> /*
> * We only support huge gtt pages through the 48b PPGTT,
> @@ -635,7 +740,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> size, alignment, cache_level,
> start, end, flags);
> if (ret)
> - goto err_clear;
> + goto err_unpin;
>
> GEM_BUG_ON(vma->node.start < start);
> GEM_BUG_ON(vma->node.start + vma->node.size > end);
> @@ -654,8 +759,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>
> return 0;
>
> -err_clear:
> - vma->ops->clear_pages(vma);
> err_unpin:
> if (vma->obj)
> i915_gem_object_unpin_pages(vma->obj);
> @@ -790,7 +893,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
>
> spin_lock(&obj->vma.lock);
> list_del(&vma->obj_link);
> - rb_erase(&vma->obj_node, &vma->obj->vma.tree);
> + rb_erase(&vma->obj_node, &obj->vma.tree);
> spin_unlock(&obj->vma.lock);
> }
>
> @@ -930,6 +1033,11 @@ int i915_vma_unbind(struct i915_vma *vma)
>
> lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
>
> + if (vma->async.dma &&
> + dma_fence_wait_timeout(vma->async.dma, true,
> + MAX_SCHEDULE_TIMEOUT) < 0)
> + return -EINTR;
> +
> ret = i915_active_wait(&vma->active);
> if (ret)
> return ret;
> @@ -975,6 +1083,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> }
> vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND);
>
> + dma_fence_put(fetch_and_zero(&vma->async.dma));
> +
> i915_vma_remove(vma);
>
> return 0;
> diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
> index 71088ff4ad59..67e43f5d01f6 100644
> --- a/drivers/gpu/drm/i915/i915_vma.h
> +++ b/drivers/gpu/drm/i915/i915_vma.h
> @@ -103,6 +103,7 @@ struct i915_vma {
> #define I915_VMA_GLOBAL_BIND BIT(9)
> #define I915_VMA_LOCAL_BIND BIT(10)
> #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW)
> +#define I915_VMA_ALLOC_BIND I915_VMA_PIN_OVERFLOW /* not stored */
>
> #define I915_VMA_GGTT BIT(11)
> #define I915_VMA_CAN_FENCE BIT(12)
> @@ -143,6 +144,14 @@ struct i915_vma {
> unsigned int *exec_flags;
> struct hlist_node exec_node;
> u32 exec_handle;
> +
> + struct i915_vma_async_bind {
> + struct dma_fence *dma;
> + struct dma_fence_cb cb;
> + struct work_struct work;
> + unsigned int cache_level;
> + unsigned int flags;
> + } async;
> };
>
> struct i915_vma *
> @@ -305,6 +314,11 @@ static inline void i915_vma_lock(struct i915_vma *vma)
> reservation_object_lock(vma->resv, NULL);
> }
>
> +static inline bool i915_vma_trylock(struct i915_vma *vma)
> +{
> + return reservation_object_trylock(vma->resv);
> +}
> +
> static inline void i915_vma_unlock(struct i915_vma *vma)
> {
> reservation_object_unlock(vma->resv);
> diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
> index a166d9405a94..615ac485c731 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_vma.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
> @@ -204,8 +204,10 @@ static int igt_vma_create(void *arg)
> mock_context_close(ctx);
> }
>
> - list_for_each_entry_safe(obj, on, &objects, st_link)
> + list_for_each_entry_safe(obj, on, &objects, st_link) {
> + i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT);
> i915_gem_object_put(obj);
> + }
> return err;
> }
>
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list