[PATCH 6/6] drm/amdgpu: keep the MMU lock until the update ends v2

Wed Sep 6 19:19:44 UTC 2017

On 2017-09-06 02:42 PM, Christian König wrote:
>> I think all this does (compared to v1) is, avoid taking the same read
>> lock twice, but keep track of how many readers there are outside the
>> rw_semaphore to avoid recursive lock warnings.
> Well the primary problem is that taking a read side twice in the same
> thread can cause a deadlock and is forbidden. That's what lockdep is
> complaining about.
>
>> Am I missing something?
> When the first thread is waiting for the lock to be acquired other
> threads would just continue without waiting for the write side to end.

Yeah, OK, that was pretty obvious. So we still need a spinlock ...

>
> The alternative would be to use recursion counter + another lock + non
> owner release.
>
>> Maybe there is a concern about locking a semaphore in one thread and
>> unlocking it on a different thread.
> It is discouraged, but we could use
> down_read_non_owner()/up_read_non_owner() for this.

Sounds good.

rwsem.h has a comment that the proper abstraction to use here would be a
completion. If you want to avoid a hack, a completion could do the job.
Instead of holding the rwsem until the range_end notifier, we could
signal a completion in range_end (if it's the last range_end).

The writers would have to wait for the completion after taking the write
side lock before proceeding.

I'm not quite sure about how to do that without race conditions or
deadlocks between the lock and the completion, though.

Regards,
  Felix

>
> I'm going to give that a try,
> Christian.
>
> Am 06.09.2017 um 18:00 schrieb Felix Kuehling:
>> I think all this does (compared to v1) is, avoid taking the same read
>> lock twice, but keep track of how many readers there are outside the
>> rw_semaphore to avoid recursive lock warnings.
>>
>> I don't really understand why you need to count readers per task (using
>> the task hashtable in the amdgpu_mn struct). The amdgpu_mn structure is
>> already per process. Each instance should only be used by one process
>> (as identified by its mm_struct). So the counting of readers could be
>> done with a simple atomic counter in the amdgpu_mn struct:
>>
>> lock:
>>
>>      if (atomic_inc_return(mn->read_count) == 1)
>>          down_read(&mn->lock);
>>
>> unlock:
>>
>>      if (atomic_dec_return(mn->read_count) == 0)
>>          up_read(&mn->lock);
>>
>> Am I missing something? Even if there are multiple threads in the
>> process, it shouldn't matter because they all need to use the same read
>> lock anyway and as long as any thread holds the read lock, any other
>> thread is safe to use it. Maybe there is a concern about locking a
>> semaphore in one thread and unlocking it on a different thread. I know
>> that mutexes have quite strict semantics in this regard. But I think
>> semaphores are less strict.
>>
>> Regards,
>>    Felix
>>
>>
>> On 2017-09-06 08:34 AM, Christian König wrote:
>>> From: Christian König <christian.koenig at amd.com>
>>>
>>> This is quite controversial because it adds another lock which is
>>> held during
>>> page table updates, but I don't see much other option.
>>>
>>> v2: allow multiple updates to be in flight at the same time
>>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> (v1)
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 108
>>> +++++++++++++++++++++++++++++++--
>>>   1 file changed, 104 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> index 459f02e..333521e 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> @@ -52,6 +52,10 @@ struct amdgpu_mn {
>>>       /* objects protected by lock */
>>>       struct rw_semaphore    lock;
>>>       struct rb_root        objects;
>>> +
>>> +    /* protected by task_lock */
>>> +    spinlock_t        task_lock;
>>> +    DECLARE_HASHTABLE(task_hash, 7);
>>>   };
>>>     struct amdgpu_mn_node {
>>> @@ -59,6 +63,12 @@ struct amdgpu_mn_node {
>>>       struct list_head        bos;
>>>   };
>>>   +struct amdgpu_mn_task {
>>> +    struct hlist_node    node;
>>> +    struct task_struct    *task;
>>> +    unsigned        recursion;
>>> +};
>>> +
>>>   /**
>>>    * amdgpu_mn_destroy - destroy the rmn
>>>    *
>>> @@ -107,6 +117,75 @@ static void amdgpu_mn_release(struct
>>> mmu_notifier *mn,
>>>   }
>>>     /**
>>> + * amdgpu_mn_read_lock - take the rmn read lock
>>> + *
>>> + * @rmn: our notifier
>>> + *
>>> + * Take the rmn read side lock.
>>> + */
>>> +static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn)
>>> +{
>>> +    struct amdgpu_mn_task *task;
>>> +
>>> +    spin_lock(&rmn->task_lock);
>>> +    hash_for_each_possible(rmn->task_hash, task, node,
>>> +                   (unsigned long)current) {
>>> +        if (task->task != current)
>>> +            continue;
>>> +
>>> +        spin_unlock(&rmn->task_lock);
>>> +        ++task->recursion;
>>> +        return;
>>> +    }
>>> +    spin_unlock(&rmn->task_lock);
>>> +
>>> +    task = kmalloc(sizeof(*task), GFP_KERNEL);
>>> +    task->task = current;
>>> +    task->recursion = 1;
>>> +
>>> +    down_read(&rmn->lock);
>>> +
>>> +    spin_lock(&rmn->task_lock);
>>> +    hash_add(rmn->task_hash, &task->node, (unsigned long)current);
>>> +    spin_unlock(&rmn->task_lock);
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_mn_read_unlock - drop the rmn read lock
>>> + *
>>> + * @rmn: our notifier
>>> + *
>>> + * Drop the rmn read side lock.
>>> + */
>>> +static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn)
>>> +{
>>> +    struct amdgpu_mn_task *task;
>>> +
>>> +    spin_lock(&rmn->task_lock);
>>> +    hash_for_each_possible(rmn->task_hash, task, node,
>>> +                   (unsigned long)current) {
>>> +        if (task->task != current)
>>> +            continue;
>>> +
>>> +        spin_unlock(&rmn->task_lock);
>>> +        if (--task->recursion) {
>>> +            return;
>>> +        }
>>> +
>>> +        spin_lock(&rmn->task_lock);
>>> +        hash_del(&task->node);
>>> +        spin_unlock(&rmn->task_lock);
>>> +        kfree(task);
>>> +
>>> +        up_read(&rmn->lock);
>>> +        return;
>>> +    }
>>> +    spin_unlock(&rmn->task_lock);
>>> +
>>> +    BUG();
>>> +}
>>> +
>>> +/**
>>>    * amdgpu_mn_invalidate_node - unmap all BOs of a node
>>>    *
>>>    * @node: the node with the BOs to unmap
>>> @@ -152,7 +231,7 @@ static void amdgpu_mn_invalidate_page(struct
>>> mmu_notifier *mn,
>>>       struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
>>>       struct interval_tree_node *it;
>>>   -    down_read(&rmn->lock);
>>> +    amdgpu_mn_read_lock(rmn);
>>>         it = interval_tree_iter_first(&rmn->objects, address, address);
>>>       if (it) {
>>> @@ -162,7 +241,7 @@ static void amdgpu_mn_invalidate_page(struct
>>> mmu_notifier *mn,
>>>           amdgpu_mn_invalidate_node(node, address, address);
>>>       }
>>>   -    up_read(&rmn->lock);
>>> +    amdgpu_mn_read_unlock(rmn);
>>>   }
>>>     /**
>>> @@ -187,7 +266,7 @@ static void
>>> amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
>>>       /* notification is exclusive, but interval is inclusive */
>>>       end -= 1;
>>>   -    down_read(&rmn->lock);
>>> +    amdgpu_mn_read_lock(rmn);
>>>         it = interval_tree_iter_first(&rmn->objects, start, end);
>>>       while (it) {
>>> @@ -198,14 +277,33 @@ static void
>>> amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
>>>             amdgpu_mn_invalidate_node(node, start, end);
>>>       }
>>> +}
>>> +
>>> +/**
>>> + * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>>> + *
>>> + * @mn: our notifier
>>> + * @mn: the mm this callback is about
>>> + * @start: start of updated range
>>> + * @end: end of updated range
>>> + *
>>> + * Release the lock again to allow new command submissions.
>>> + */
>>> +static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>> +                       struct mm_struct *mm,
>>> +                       unsigned long start,
>>> +                       unsigned long end)
>>> +{
>>> +    struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
>>>   -    up_read(&rmn->lock);
>>> +    amdgpu_mn_read_unlock(rmn);
>>>   }
>>>     static const struct mmu_notifier_ops amdgpu_mn_ops = {
>>>       .release = amdgpu_mn_release,
>>>       .invalidate_page = amdgpu_mn_invalidate_page,
>>>       .invalidate_range_start = amdgpu_mn_invalidate_range_start,
>>> +    .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>   };
>>>     /**
>>> @@ -242,6 +340,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>> amdgpu_device *adev)
>>>       rmn->mn.ops = &amdgpu_mn_ops;
>>>       init_rwsem(&rmn->lock);
>>>       rmn->objects = RB_ROOT;
>>> +    spin_lock_init(&rmn->task_lock);
>>> +    hash_init(rmn->task_hash);
>>>         r = __mmu_notifier_register(&rmn->mn, mm);
>>>       if (r)
>
>