[Intel-gfx] [PATCH 14/24] drm/i915: Rework intel_context pinning to do everything outside of pin_mutex

Wed Aug 19 10:38:01 UTC 2020

Op 12-08-2020 om 21:14 schreef Thomas Hellström (Intel):
>
> On 8/10/20 12:30 PM, Maarten Lankhorst wrote:
>> Instead of doing everything inside of pin_mutex, we move all pinning
>> outside. Because i915_active has its own reference counting and
>> pinning is also having the same issues vs mutexes, we make sure
>> everything is pinned first, so the pinning in i915_active only needs
>> to bump refcounts. This allows us to take pin refcounts correctly
>> all the time.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_context.c       | 232 +++++++++++-------
>>   drivers/gpu/drm/i915/gt/intel_context_types.h |   4 +-
>>   drivers/gpu/drm/i915/gt/intel_lrc.c           |  34 ++-
>>   .../gpu/drm/i915/gt/intel_ring_submission.c   |  13 +-
>>   drivers/gpu/drm/i915/gt/mock_engine.c         |  13 +-
>>   5 files changed, 190 insertions(+), 106 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
>> index 52db2bde44a3..efe9a7a89ede 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_context.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
>> @@ -93,79 +93,6 @@ static void intel_context_active_release(struct intel_context *ce)
>>       i915_active_release(&ce->active);
>>   }
>>   -int __intel_context_do_pin(struct intel_context *ce)
>> -{
>> -    int err;
>> -
>> -    if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
>> -        err = intel_context_alloc_state(ce);
>> -        if (err)
>> -            return err;
>> -    }
>> -
>> -    err = i915_active_acquire(&ce->active);
>> -    if (err)
>> -        return err;
>> -
>> -    if (mutex_lock_interruptible(&ce->pin_mutex)) {
>> -        err = -EINTR;
>> -        goto out_release;
>> -    }
>> -
>> -    if (unlikely(intel_context_is_closed(ce))) {
>> -        err = -ENOENT;
>> -        goto out_unlock;
>> -    }
>> -
>> -    if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
>> -        err = intel_context_active_acquire(ce);
>> -        if (unlikely(err))
>> -            goto out_unlock;
>> -
>> -        err = ce->ops->pin(ce);
>> -        if (unlikely(err))
>> -            goto err_active;
>> -
>> -        CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
>> -             i915_ggtt_offset(ce->ring->vma),
>> -             ce->ring->head, ce->ring->tail);
>> -
>> -        smp_mb__before_atomic(); /* flush pin before it is visible */
>> -        atomic_inc(&ce->pin_count);
>> -    }
>> -
>> -    GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
>> -    GEM_BUG_ON(i915_active_is_idle(&ce->active));
>> -    goto out_unlock;
>> -
>> -err_active:
>> -    intel_context_active_release(ce);
>> -out_unlock:
>> -    mutex_unlock(&ce->pin_mutex);
>> -out_release:
>> -    i915_active_release(&ce->active);
>> -    return err;
>> -}
>> -
>> -void intel_context_unpin(struct intel_context *ce)
>> -{
>> -    if (!atomic_dec_and_test(&ce->pin_count))
>> -        return;
>> -
>> -    CE_TRACE(ce, "unpin\n");
>> -    ce->ops->unpin(ce);
>> -
>> -    /*
>> -     * Once released, we may asynchronously drop the active reference.
>> -     * As that may be the only reference keeping the context alive,
>> -     * take an extra now so that it is not freed before we finish
>> -     * dereferencing it.
>> -     */
>> -    intel_context_get(ce);
>> -    intel_context_active_release(ce);
>> -    intel_context_put(ce);
>> -}
>> -
>>   static int __context_pin_state(struct i915_vma *vma)
>>   {
>>       unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
>> @@ -225,6 +152,138 @@ static void __ring_retire(struct intel_ring *ring)
>>       intel_ring_unpin(ring);
>>   }
>>   +static int intel_context_pre_pin(struct intel_context *ce)
>> +{
>> +    int err;
>> +
>> +    CE_TRACE(ce, "active\n");
>> +
>> +    err = __ring_active(ce->ring);
>> +    if (err)
>> +        return err;
>> +
>> +    err = intel_timeline_pin(ce->timeline);
>> +    if (err)
>> +        goto err_ring;
>> +
>> +    if (!ce->state)
>> +        return 0;
>> +
>> +    err = __context_pin_state(ce->state);
>> +    if (err)
>> +        goto err_timeline;
>> +
>> +
>> +    return 0;
>> +
>> +err_timeline:
>> +    intel_timeline_unpin(ce->timeline);
>> +err_ring:
>> +    __ring_retire(ce->ring);
>> +    return err;
>> +}
>> +
>> +static void intel_context_post_unpin(struct intel_context *ce)
>> +{
>> +    if (ce->state)
>> +        __context_unpin_state(ce->state);
>> +
>> +    intel_timeline_unpin(ce->timeline);
>> +    __ring_retire(ce->ring);
>> +}
>> +
>> +int __intel_context_do_pin(struct intel_context *ce)
>> +{
>> +    bool handoff = false;
>> +    void *vaddr;
>> +    int err = 0;
>> +
>> +    if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
>> +        err = intel_context_alloc_state(ce);
>> +        if (err)
>> +            return err;
>> +    }
>> +
>> +    /*
>> +     * We always pin the context/ring/timeline here, to ensure a pin
>> +     * refcount for __intel_context_active(), which prevent a lock
>> +     * inversion of ce->pin_mutex vs dma_resv_lock().
>> +     */
>> +    err = intel_context_pre_pin(ce);
>> +    if (err)
>> +        return err;
>> +
>> +    err = i915_active_acquire(&ce->active);
>> +    if (err)
>> +        goto err_ctx_unpin;
>> +
>> +    err = ce->ops->pre_pin(ce, &vaddr);
>> +    if (err)
>> +        goto err_release;
>> +
>> +    err = mutex_lock_interruptible(&ce->pin_mutex);
>> +    if (err)
>> +        goto err_post_unpin;
>> +
>> +    if (unlikely(intel_context_is_closed(ce))) {
>> +        err = -ENOENT;
>> +        goto err_unlock;
>> +    }
>> +
>> +    if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
>> +        err = intel_context_active_acquire(ce);
>> +        if (unlikely(err))
>> +            goto err_unlock;
>> +
>> +        err = ce->ops->pin(ce, vaddr);
>> +        if (err) {
>> +            intel_context_active_release(ce);
>> +            goto err_unlock;
>> +        }
>> +
>> +        CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
>> +             i915_ggtt_offset(ce->ring->vma),
>> +             ce->ring->head, ce->ring->tail);
>> +
>> +        handoff = true;
>> +        smp_mb__before_atomic(); /* flush pin before it is visible */
>> +        atomic_inc(&ce->pin_count);
>> +    }
>> +
>> +    GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
>> +
>> +err_unlock:
>> +    mutex_unlock(&ce->pin_mutex);
>> +err_post_unpin:
>> +    if (!handoff)
>> +        ce->ops->post_unpin(ce);
>> +err_release:
>> +    i915_active_release(&ce->active);
>> +err_ctx_unpin:
>> +    intel_context_post_unpin(ce);
>> +    return err;
>> +}
>> +
>> +void intel_context_unpin(struct intel_context *ce)
>> +{
>> +    if (!atomic_dec_and_test(&ce->pin_count))
>> +        return;
>> +
>> +    CE_TRACE(ce, "unpin\n");
>> +    ce->ops->unpin(ce);
>> +    ce->ops->post_unpin(ce);
>
> What's protecting ops->unpin() here, running concurrently with ops->pin in __intel_context_do_pin()? Do the ops functions have to implement their own locking if needed?
>
> Otherwise LGTM
>
> Reviewed-by: Thomas Hellström <thomas.hellstrom at intel.com>
>
post_unpin can be run concurrently, unpin() for intel_lrc.c is check_redzone(), empty for legacy rings, should be fine. :)