[Intel-gfx] [PATCH 13/21] drm/i915: Pull i915_vma_pin under the vm->mutex

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Sep 19 13:37:01 UTC 2019


On 17/09/2019 19:56, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-09-17 13:37:55)
>> On 02/09/2019 05:02, Chris Wilson wrote:
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>>> index 82db2b783123..9a8c307c5aeb 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
>>> @@ -251,16 +251,6 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>>>                goto err_rpm;
>>>        }
>>>    
>>> -     ret = i915_mutex_lock_interruptible(dev);
>>> -     if (ret)
>>> -             goto err_reset;
>>> -
>>> -     /* Access to snoopable pages through the GTT is incoherent. */
>>> -     if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
>>> -             ret = -EFAULT;
>>> -             goto err_unlock;
>>> -     }
>>> -
>>>        /* Now pin it into the GTT as needed */
>>>        vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
>>>                                       PIN_MAPPABLE |
>>> @@ -293,7 +283,13 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>>>        }
>>>        if (IS_ERR(vma)) {
>>>                ret = PTR_ERR(vma);
>>> -             goto err_unlock;
>>> +             goto err_reset;
>>> +     }
>>> +
>>> +     /* Access to snoopable pages through the GTT is incoherent. */
>>> +     if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(i915)) {
>>> +             ret = -EFAULT;
>>> +             goto err_unpin;
>>>        }
>>
>> Why have you moved this check to after pinning?
> 
> Since we've dropped the lock around this check, the intent is to use the
> pin as a guarantee that the vma cannot be changed. What we actually want
> here, for clarity, is vma->cache_level.
> 
>>>    
>>>        ret = i915_vma_pin_fence(vma);
>>> @@ -321,14 +317,12 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
>>>                intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
>>>                                   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
>>>    
>>> -     i915_vma_set_ggtt_write(vma);
>>> -
>>> +     if (write)
>>> +             i915_vma_set_ggtt_write(vma);
>>
>> Noise for what this patch is concerned?
> 
> Yeah, I had to stare at an incorrect __set_bit(GGTT_WRITE) for too long.
> 
>>
>>>    err_fence:
>>>        i915_vma_unpin_fence(vma);
>>>    err_unpin:
>>>        __i915_vma_unpin(vma);
>>> -err_unlock:
>>> -     mutex_unlock(&dev->struct_mutex);
>>>    err_reset:
>>>        intel_gt_reset_unlock(ggtt->vm.gt, srcu);
>>>    err_rpm:
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>>> index 0ef60dae23a7..dbf9be9a79f4 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
>>> @@ -155,21 +155,30 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>>>    
>>>        wakeref = intel_runtime_pm_get(&i915->runtime_pm);
>>>        llist_for_each_entry_safe(obj, on, freed, freed) {
>>> -             struct i915_vma *vma, *vn;
>>> -
>>>                trace_i915_gem_object_destroy(obj);
>>>    
>>> -             mutex_lock(&i915->drm.struct_mutex);
>>> -
>>> -             list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
>>> -                     GEM_BUG_ON(i915_vma_is_active(vma));
>>> -                     atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
>>> -                     i915_vma_destroy(vma);
>>> +             if (!list_empty(&obj->vma.list)) {
>>> +                     struct i915_vma *vma;
>>> +
>>> +                     /*
>>> +                      * Note that the vma keeps an object reference while
>>> +                      * it is active, so it *should* not sleep while we
>>> +                      * destroy it. Our debug code errs insits it *might*.
>>> +                      * For the moment, play along.
>>> +                      */
>>> +                     spin_lock(&obj->vma.lock);
>>> +                     while ((vma = list_first_entry_or_null(&obj->vma.list,
>>> +                                                            struct i915_vma,
>>> +                                                            obj_link)))
>>
>> What is the point of having a while loop inside top-level if !list_empty
>> check? Looks theoretically racy, and even if that is irrelevant, it
>> would be clearer to just do the while loop.
> 
> We can't add more vma to the object, as there are no more refs to the
> object. The challenge is in dropping the links. In the end it makes no
> difference, it's an unlocked compare before the spinlock (and ugly
> loop).

I was simply thinking it would be one indent level less for readability 
both on the visual and conceptual level. But doesn't matter hugely.

> 
>>>        GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>>>    
>>> +     GEM_BUG_ON(vma->pages);
>>>        vma->pages = obj->mm.pages;
>>> +     atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
>>
>> What is I915_VMA_PAGES_ACTIVE used for? Pinned? Has pages?
> 
> It's a counter for the number of unique binds as opposed to callers
> merely acquiring the pages prior to binding; so that we can balance
> vma_unbind_pages() which is called once (i915_vma_unbind) but
> i915_vma_bind() can be called for both PIN_GLOBAL and PIN_USER.
> 
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
>>> index ca0c2f451742..b9cfae0e4435 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
>>> @@ -181,22 +181,25 @@ static int
>>>    i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
>>>                              int tiling_mode, unsigned int stride)
>>>    {
>>> +     struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
>>>        struct i915_vma *vma;
>>> -     int ret;
>>> +     int ret = 0;
>>>    
>>>        if (tiling_mode == I915_TILING_NONE)
>>>                return 0;
>>>    
>>> +     mutex_lock(&ggtt->vm.mutex);
>>>        for_each_ggtt_vma(vma, obj) {
>>>                if (i915_vma_fence_prepare(vma, tiling_mode, stride))
>>>                        continue;
>>
>> vma_fence_prepare doesn't need to be under mutex, but it's much easier
>> for this loop, yes?
> 
> fence_prepare is just a simple predicate, so no. What we do need though
> is to pull it under the object-lock so that we don't prepare for one
> tiling and bind another. Hmm.
> 
>>> @@ -333,7 +344,11 @@ static int igt_check_page_sizes(struct i915_vma *vma)
>>>        struct drm_i915_private *i915 = vma->vm->i915;
>>>        unsigned int supported = INTEL_INFO(i915)->page_sizes;
>>>        struct drm_i915_gem_object *obj = vma->obj;
>>> -     int err = 0;
>>> +     int err;
>>> +
>>> +     err = i915_active_wait(&vma->active);
>>> +     if (err)
>>> +             return err;
>>
>> Touched upon this in the previous email, but I think we need to put some
>> clear notes in writing, in some good place, regarding when callers need
>> to explicitly do this and when not. Don't know where though.. commit
>> message is weak. Maybe kernel doc next to i915_vma_unbind?
> 
> The couple of places that care are the exception. Though I'm
> contemplating using the async bind to circumvent the stop_machine()...
> 
>>> diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>>> index da54a718c712..aa67c02ba98c 100644
>>> --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
>>> @@ -747,10 +747,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
>>>        if (err)
>>>                goto skip_request;
>>>    
>>> -     i915_vma_unpin(batch);
>>> -     i915_vma_close(batch);
>>> -     i915_vma_put(batch);
>>> -
>>> +     i915_vma_unpin_and_release(&batch, 0);
>>
>> Is this consolidation enabled by this patch or was otherwise possible?
> 
> At this point, it is deduplication. What prompted the change were a few
> callers mixed up the potential use after free of vma->obj.
> 
>> Something rings familiar about some problem with
>> i915_vma_unpin_and_release but I can't remember what.. Semantics are a
>> bit weird though.. whereas pattern before was to i915_vma_put after
>> instantiating a vma, now it is unpin_and_release.. release being a bit
>> of an odd term in this space. There was/is no symmetry with get/put
>> anyway so I guess it's fine.
> 
> It'll be back to i915_vma_put() eventually.
> 
>>> +void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
>>> +{
>>> +     GEM_BUG_ON(i915_active_is_idle(ref));
>>> +
>>> +     dma_fence_get(f);
>>> +
>>> +     rcu_read_lock();
>>> +     if (rcu_access_pointer(ref->excl)) {
>>> +             struct dma_fence *old;
>>> +
>>> +             old = dma_fence_get_rcu_safe(&ref->excl);
>>> +             if (old) {
>>> +                     if (dma_fence_remove_callback(old, &ref->excl_cb))
>>> +                             atomic_dec(&ref->count);
>>> +                     dma_fence_put(old);
>>> +             }
>>
>> Put some commentary in describing the business done with ref->excl and
>> callbacks. Or this goes away later?
> 
> void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
> {
>          GEM_BUG_ON(i915_active_is_idle(ref));
> 
>          mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_);
>          if (!__i915_active_fence_set(&ref->excl, f))
>                  atomic_inc(&ref->count);
>          mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_);
> }

What is this?

> 
>>> +static int excl_wait(struct i915_active *ref)
>>> +{
>>> +     struct dma_fence *old;
>>> +     int err = 0;
>>> +
>>> +     if (!rcu_access_pointer(ref->excl))
>>> +             return 0;
>>> +
>>> +     rcu_read_lock();
>>> +     old = dma_fence_get_rcu_safe(&ref->excl);
>>> +     rcu_read_unlock();
>>
>> ref->excl can go something to NULL but not NULL to something in here?
> 
> Yes, but the wait is on the snapshot of fences at that moment in time.
> For things like unbind if it becomes busy again after we start, what is
> the best course of action -- currently we swear and try again.
> 
> The problem is more evident later in the comment:
> 
> /* Any fence added after the wait begins will not be auto-signaled */
> 
> which means the wait may end waiting for the background worker to flush
> fences.
> 
>>>    int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
>>>    {
>>> -     struct active_node *it, *n;
>>> -     int err;
>>> -
>>> -     if (RB_EMPTY_ROOT(&ref->tree))
>>> -             return 0;
>>> +     int err = 0;
>>>    
>>> -     /* await allocates and so we need to avoid hitting the shrinker */
>>> -     err = i915_active_acquire(ref);
>>> -     if (err)
>>> -             return err;
>>> +     if (rcu_access_pointer(ref->excl)) {
>>
>> excl_wait used if-not-return pattern, but essentialy the same question
>> about possible races. I guess there aren't any since only retirement is
>> relevant which is something-to-NULL and that's handled fine. But I am
>> not sure, possibly comments are in order.
> 
> It's the same as any other wait on a set-of-fence, the wait is on the
> snapshot of fences at that moment.
> 
> If you need stricter control (such as ensuring ordering within a
> particular timeline) you have to provide the ordering on that external
> timeline. That's the difference between simply waiting on the
> i915_active, and setting a fence within an i915_active_fence (the latter
> has a strict order).
> 
>>> @@ -109,20 +121,31 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
>>>        LIST_HEAD(still_in_list);
>>>        int ret = 0;
>>>    
>>> -     lockdep_assert_held(&obj->base.dev->struct_mutex);
>>> -
>>>        spin_lock(&obj->vma.lock);
>>>        while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
>>>                                                       struct i915_vma,
>>>                                                       obj_link))) {
>>> +             struct i915_address_space *vm = vma->vm;
>>> +
>>> +             ret = -EBUSY;
>>> +             if (!i915_vm_tryopen(vm))
>>> +                     break;
>>
>> This is some race between different paths of vm closing/destruction and
>> vma cleanup? We need a comment about it somewhere.. Here I guess is a
>> good place.
> 
> 4 different paths all fighting; and coordinate by preventing the other 2
> destruction paths from running as they run, and spinlocks to prevent the
> other.
>   
>> And why break and not continue, there will be a second call coming from
>> somewhere when it fails?
> 
> It failed to unbind the object, so it would end up in a busy spin
> preventing the actual vm release from removing the vma.

But it could proceed to unbind what can be unbound. Otherwise is it 
guaranteed to be called again when it fails, so eventually it will clean 
up everything?

> 
>>> @@ -1015,14 +1016,9 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>>>                                return ERR_PTR(-ENOSPC);
>>>                }
>>>    
>>> -             WARN(i915_vma_is_pinned(vma),
>>> -                  "bo is already pinned in ggtt with incorrect alignment:"
>>> -                  " offset=%08x, req.alignment=%llx,"
>>> -                  " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
>>> -                  i915_ggtt_offset(vma), alignment,
>>> -                  !!(flags & PIN_MAPPABLE),
>>> -                  i915_vma_is_map_and_fenceable(vma));
>>
>> Why removing the WARN?
> 
> It's fundamentally racy. Not until you lock, flush the active, do you
> know whether or not the vma is actually pinned. (Whereas previously it
> was locked under struct_mutex.) With multiple simultaneous pinners, the
> chance that we end up with a pin we do not like is increased. (Hmm,
> early on this entire function was under a single vm->mutex, that removes
> some race conditions that currently cause an unlikely failure at the
> end, but such races would be ping-ponging, so both strategies have their
> faults.)
> 
>>> +             mutex_lock(&vma->vm->mutex);
>>>                ret = i915_vma_unbind(vma);
>>> +             mutex_unlock(&vma->vm->mutex);
>>>                if (ret)
>>>                        return ERR_PTR(ret);
>>>        }
> 
>>> @@ -127,15 +126,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
>>>                                    min_size, alignment, cache_level,
>>>                                    start, end, mode);
>>>    
>>> -     /*
>>> -      * Retire before we search the active list. Although we have
>>> -      * reasonable accuracy in our retirement lists, we may have
>>> -      * a stray pin (preventing eviction) that can only be resolved by
>>> -      * retiring.
>>> -      */
>>> -     if (!(flags & PIN_NONBLOCK))
>>> -             i915_retire_requests(dev_priv);
>>
>> Not needed any more because?
> 
> It's illegal, requires struct_mutex, we do not hold struct_mutex. (Until
> the end of this series when i915_retire_requests is independent of
> struct_mutex and we restore it.)
> 
>>> @@ -175,7 +178,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
>>>    
>>>    static void ppgtt_unbind_vma(struct i915_vma *vma)
>>>    {
>>> -     vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
>>> +     if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
>>> +             vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
>>
>> VMA bound and allocated are now synonyms?
> 
> clear_range decrements the pt use counters that alloc incremented.

I think I was tasking for a description of I915_VMA_ALLOC_BIT, but not 
sure any more.

> 
>>> +struct i915_vma_work *i915_vma_work(void)
>>> +{
>>> +     struct i915_vma_work *vw;
>>> +
>>> +     vw = kzalloc(sizeof(*vw), GFP_KERNEL);
>>
>> This could be reasonably high traffic to warrant a dedicated slab.
> 
> It's also not going to survive for more than 2 kernel releases. :(
> 
>>> +     if (!vw)
>>> +             return NULL;
>>> +
>>> +     dma_fence_work_init(&vw->base, &bind_ops);
>>
>> Hm, new (for me) API.. what is the advantage/need compared to plain workers?
> 
> It is a plain worker with the dma-fence coupling we've all grown to
> cherish.
> 
>>> @@ -333,9 +375,32 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
>>>        GEM_BUG_ON(!vma->pages);
>>>    
>>>        trace_i915_vma_bind(vma, bind_flags);
>>> -     ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
>>> -     if (ret)
>>> -             return ret;
>>> +     if (work && (bind_flags & ~vma_flags) & vma->vm->bind_alloc) {
>>
>> What is the bitwise logic checking for?
> 
> Whether we are adding a bind that requires allocations that the caller
> requires to be async.
> 
>>> +             work->vma = vma;
>>> +             work->cache_level = cache_level;
>>> +             work->flags = bind_flags | I915_VMA_ALLOC;
>>> +
>>> +             /*
>>> +              * Note we only want to chain up to the migration fence on
>>> +              * the pages (not the object itself). As we don't track that,
>>> +              * yet, we have to use the exclusive fence instead.
>>> +              *
>>> +              * Also note that we do not want to track the async vma as
>>> +              * part of the obj->resv->excl_fence as it only affects
>>> +              * execution and not content or object's backing store lifetime.
>>> +              */
>>> +             GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
>>> +             i915_active_set_exclusive(&vma->active, &work->base.dma);
>>
>> Oh right, dma_fence_work since it's not a worker but callback on
>> signalling the fence... From what context it gets called (holding any
>> locks?) and which locks the callback can/will take?
> 
> dma_fence_work has a dma_fence_cb that schedules a work_stuct which
> executes our function. (And it also has a dma-fence through which we can
> signal the completion/error of that function.)

And this is to avoid some allocation somewhere?

> 
>>> @@ -368,7 +431,8 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
>>>                        goto err;
>>>                }
>>>    
>>> -             vma->iomap = ptr;
>>> +             if (unlikely(cmpxchg(&vma->iomap, NULL, ptr)))
>>> +                     io_mapping_unmap(ptr);
>>
>> Why this change?
> 
> Multiple pinners race to see who win the io_mapping competition. Each
> thread does their own allocation, if first they pass ownership to the
> vma, if last they delete their own io_mapping and go on to use the
> winner's vma->iomap. Except an idiot forgot to update the loser's ptr
> here.
> 
>>> @@ -580,35 +640,21 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>>>                return -ENOSPC;
>>>        }
>>>    
>>> -     if (vma->obj) {
>>> -             ret = i915_gem_object_pin_pages(vma->obj);
>>> -             if (ret)
>>> -                     return ret;
>>> -
>>> +     cache_level = 0;
>>> +     if (vma->obj)
>>>                cache_level = vma->obj->cache_level;
>>> -     } else {
>>> -             cache_level = 0;
>>> -     }
>>> -
>>> -     GEM_BUG_ON(vma->pages);
>>> -
>>> -     ret = vma->ops->set_pages(vma);
>>> -     if (ret)
>>> -             goto err_unpin;
>>>    
>>>        if (flags & PIN_OFFSET_FIXED) {
>>>                u64 offset = flags & PIN_OFFSET_MASK;
>>>                if (!IS_ALIGNED(offset, alignment) ||
>>> -                 range_overflows(offset, size, end)) {
>>> -                     ret = -EINVAL;
>>> -                     goto err_clear;
>>> -             }
>>> +                 range_overflows(offset, size, end))
>>
>> A lot of checks/ops removed from here.. I think I'll need to apply to
>> figure out how flows work. Or review on high level and have faith in CI.
> 
> Checks didn't get dropped, just the ops moved to the caller. Those that
> need to be done before the mutex have to be in the caller. This function
> is now only responsible for find/reserving the place in the drm_mm.
> 
>>> +static bool try_fast_pin(struct i915_vma *vma, unsigned int flags)
>>>    {
>>> -     const unsigned int bound = atomic_read(&vma->flags);
>>> -     int ret;
>>> +     unsigned int bound;
>>> +     bool pinned = true;
>>>    
>>> -     lockdep_assert_held(&vma->vm->i915->drm.struct_mutex);
>>> -     GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0);
>>> -     GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma));
>>> +     bound = atomic_read(&vma->flags);
>>> +     do {
>>> +             if (unlikely(flags & ~bound))
>>> +                     return false;
>>
>> What is this checking for?
> 
> If the requested binding (flags) is already available (bound).
> 
>>> -     if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) {
>>> -             ret = -EBUSY;
>>> -             goto err_unpin;
>>> +             if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
>>> +                     return false;
>>
>> What will the caller try to do in these cases?
> 
> Not do the early return.

How it will recover from error or overflow was the question I was aiming 
at. But that looks to have been before I figured out what do you really 
mean by fast in this context. Fast/slow terminology is not the clearest 
here but I have no better suggestions at the moment.

> 
>>> +
>>> +             if (!(bound & I915_VMA_PIN_MASK))
>>> +                     goto slow;
>>
>> Since fast path can go to slow, not sure what is fast referring to.
>>
>>> +
>>> +             GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
>>> +     } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
>>> +
>>> +     return true;
>>> +
>>> +slow:
>>> +     /*
>>> +      * If pin_count==0, but we are bound, check under the lock to avoid
>>> +      * racing with a concurrent i915_vma_unbind().
>>> +      */
>>> +     mutex_lock(&vma->vm->mutex);
>>
>> This is never from process context to need to be interruptible?
> 
> I started with a tristate return, didn't like it much (try() means bool
> to me). It can be _interruptible here, we then do an alloc before giving
> up.
> 
>>> +     do {
>>> +             if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
>>> +                     pinned = false;
>>> +                     break;
>>> +             }
>>> +
>>> +             if (unlikely(flags & ~bound)) {
>>> +                     pinned = false;
>>> +                     break;
>>> +             }
>>> +     } while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
>>> +     mutex_unlock(&vma->vm->mutex);
>>
>> slow = 0 -> 1 pin transition, the rest are fast?
> 
> 1+ are fast; unbound is slowest.
> 
>>> +
>>> +     return pinned;
>>> +}
>>> +
>>> +static int vma_get_pages(struct i915_vma *vma)
>>> +{
>>> +     int err = 0;
>>> +
>>> +     if (atomic_add_unless(&vma->pages_count, 1, 0))
>>> +             return 0;
>>> +
>>> +     if (mutex_lock_interruptible(&vma->pages_mutex))
>>> +             return -EINTR;
>>> +
>>> +     if (!atomic_read(&vma->pages_count)) {
>>> +             if (vma->obj) {
>>> +                     err = i915_gem_object_pin_pages(vma->obj);
>>> +                     if (err)
>>> +                             goto unlock;
>>> +             }
>>> +
>>> +             err = vma->ops->set_pages(vma);
>>> +             if (err)
>>> +                     goto unlock;
>>>        }
>>> +     atomic_inc(&vma->pages_count);
>>>    
>>> -     if ((bound & I915_VMA_BIND_MASK) == 0) {
>>> -             ret = i915_vma_insert(vma, size, alignment, flags);
>>> -             if (ret)
>>> -                     goto err_unpin;
>>> +unlock:
>>> +     mutex_unlock(&vma->pages_mutex);
>>> +
>>> +     return err;
>>> +}
>>> +
>>> +static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
>>> +{
>>> +     mutex_lock_nested(&vma->pages_mutex, SINGLE_DEPTH_NESTING);
>>
>> Nesting annotation only needed on the put path? Comment to describe why
>> it is needed would be good.
> 
> It's an allocating mutex, so we can be called from underneath another
> vma->pages_mutex. Standard misery.
> 
>>> +     GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
>>> +     if (atomic_sub_return(count, &vma->pages_count) == 0) {
>>> +             vma->ops->clear_pages(vma);
>>> +             GEM_BUG_ON(vma->pages);
>>> +             if (vma->obj)
>>> +                     i915_gem_object_unpin_pages(vma->obj);
>>>        }
>>> -     GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
>>> +     mutex_unlock(&vma->pages_mutex);
>>> +}
>>>    
>>> -     ret = i915_vma_bind(vma, vma->obj ? vma->obj->cache_level : 0, flags);
>>> -     if (ret)
>>> -             goto err_remove;
>>> +static void vma_put_pages(struct i915_vma *vma)
>>> +{
>>> +     if (atomic_add_unless(&vma->pages_count, -1, 1))
>>> +             return;
>>>    
>>> -     GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_BIND_MASK));
>>> +     __vma_put_pages(vma, 1);
>>> +}
>>> +
>>> +static void vma_unbind_pages(struct i915_vma *vma)
>>> +{
>>> +     unsigned int count;
>>> +
>>> +     lockdep_assert_held(&vma->vm->mutex);
>>> +
>>> +     count = atomic_read(&vma->pages_count);
>>> +     count >>= I915_VMA_PAGES_BIAS;
>>> +     GEM_BUG_ON(!count);
>>> +
>>> +     __vma_put_pages(vma, count | count << I915_VMA_PAGES_BIAS);
>>
>> I think we need documentation on various flags and possible states.
> 
> It literally is just a bias! (2 counters within one.)
> 
>>> +int i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>>> +{
>>> +     struct i915_vma_work *work = NULL;
>>> +     unsigned int bound;
>>> +     int err;
>>> +
>>> +     BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND);
>>> +     BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND);
>>> +
>>> +     GEM_BUG_ON(flags & PIN_UPDATE);
>>> +     GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
>>>    
>>> -     if ((bound ^ atomic_read(&vma->flags)) & I915_VMA_GLOBAL_BIND)
>>> -             __i915_vma_set_map_and_fenceable(vma);
>>> +     if (try_fast_pin(vma, flags & I915_VMA_BIND_MASK))
>>> +             return 0;
>>> +
>>> +     err = vma_get_pages(vma);
>>> +     if (err)
>>> +             return err;
>>> +
>>> +     if (flags & PIN_USER) {
>>> +             work = i915_vma_work();
>>> +             if (!work) {
>>> +                     err = -ENOMEM;
>>> +                     goto err_pages;
>>> +             }
>>> +     }
>>
>> Good place for a comment to explain why PIN_USER needs to go via worker
>> and the rest dont.
> 
> Probably will change if my plans for bsw/bxt come to fruition. And
> certainly will change again as we will need to pass down the fence...
> 
>>> +     err = mutex_lock_interruptible(&vma->vm->mutex);
>>> +     if (err)
>>> +             goto err_fence;
>>> +
>>> +     bound = atomic_read(&vma->flags);
>>> +     if (unlikely(bound & I915_VMA_ERROR)) {
>>> +             err = -ENOMEM;
>>> +             goto err_unlock;
>>> +     }
>>>    
>>> +     if (unlikely(!((bound + 1) & I915_VMA_PIN_MASK))) {
>>> +             err = -EAGAIN; /* pins are meant to be fairly temporary */
>>> +             goto err_unlock;
>>> +     }
>>
>> Did not get what this is?
> 
> The overflow test.
> 
>>> +     if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
>>> +             __i915_vma_pin(vma);
>>> +             goto err_unlock;
>>> +     }
>>
>> Or this. (Comment please.)
> 
> Already bound by another thread.
> 
>>> +
>>> +     err = i915_active_acquire(&vma->active);
>>
>> It's dropped somewhere to release it into normal flows?
> 
> Not many lines below before we exit. The active is acquired again for
> the async operation to avoid the early release (on the success path).
> 
>>> +     if (err)
>>> +             goto err_unlock;
>>> +
>>> +     if (!(bound & I915_VMA_BIND_MASK)) {
>>> +             err = i915_vma_insert(vma, size, alignment, flags);
>>> +             if (err)
>>> +                     goto err_active;
>>> +
>>> +             if (i915_is_ggtt(vma->vm))
>>> +                     __i915_vma_set_map_and_fenceable(vma);
>>> +     }
>>> +
>>> +     GEM_BUG_ON(!vma->pages);
>>> +     err = i915_vma_bind(vma,
>>> +                         vma->obj ? vma->obj->cache_level : 0,
>>> +                         flags, work);
>>> +     if (err)
>>> +             goto err_remove;
>>> +
>>> +     /* There should only be at most 2 active bindings (user, global) */
>>
>> s/bindings/pins/ ? I thought one vma is one binding by definition.. so I
>> am confused.
> 
> Oh no, that would be far too simple. On gen6-7 & bsw, the vma is aliased.
> One vma, two binding (PIN_USER -> ggtt->alias, PIN_GLOBAL -> ggtt).

Okay I am not familiar with that and that's a problem when reviewing this.

What fundamentally happens when someone tried to PIN_USER and so far vma 
had PIN_GLOBAL (or vice-versa) in aliasing ggtt world? I see that we go 
to the "slow" path. Just increases the pin count?

> 
>>> +     GEM_BUG_ON(bound + I915_VMA_PAGES_ACTIVE < bound);
>>> +     atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
>>> +     list_move_tail(&vma->vm_link, &vma->vm->bound_list);
>>> +
>>> +     __i915_vma_pin(vma);
>>> +     GEM_BUG_ON(!i915_vma_is_pinned(vma));
>>> +     GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
>>>        GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
>>> -     return 0;
>>>    
>>>    err_remove:
>>> -     if ((bound & I915_VMA_BIND_MASK) == 0) {
>>> +     if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK))
>>>                i915_vma_remove(vma);
>>> -             GEM_BUG_ON(vma->pages);
>>> -             GEM_BUG_ON(atomic_read(&vma->flags) & I915_VMA_BIND_MASK);
>>> -     }
>>> -err_unpin:
>>> -     __i915_vma_unpin(vma);
>>> -     return ret;
>>> +err_active:
>>> +     i915_active_release(&vma->active);
>>> +err_unlock:
>>> +     mutex_unlock(&vma->vm->mutex);
>>> +err_fence:
>>> +     if (work)
>>> +             dma_fence_work_commit(&work->base);
>>> +err_pages:
>>> +     vma_put_pages(vma);
>>> +     return err;
>>>    }
> 
>>>    void i915_vma_parked(struct drm_i915_private *i915)
>>> @@ -831,12 +1011,32 @@ void i915_vma_parked(struct drm_i915_private *i915)
>>>    
>>>        spin_lock_irq(&i915->gt.closed_lock);
>>>        list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) {
>>> -             list_del_init(&vma->closed_link);
>>> +             struct drm_i915_gem_object *obj = vma->obj;
>>> +             struct i915_address_space *vm = vma->vm;
>>> +
>>> +             /* XXX All to avoid keeping a reference on i915_vma itself */
>>
>> Does XXX signify it is a temporary state of code, or just like a comment
>> but more important in some way?
> 
> It's a critique of the ugly code (the comment you asked for before) that
> is required because I didn't want to have to throw away the entire lmem
> object handling code.
> 
>>> @@ -976,7 +977,6 @@ static int igt_vma_remapped_gtt(void *arg)
>>>    
>>>    out:
>>>        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
>>> -     mutex_unlock(&i915->drm.struct_mutex);
>>>        i915_gem_object_put(obj);
>>>    
>>>        return err;
>>>
>>
>> In general, even when I did not explicitly asked for a comment, places
>> where things weren't clear to me should be considered to have them added
>> and probably more.
> 
> How else do I know what isn't clear to you... ;)

Good, when can I expect a more annotated version?

Regards,

Tvrtko


More information about the Intel-gfx mailing list