[PATCH 6/6] drm/amdgpu: keep the MMU lock until the update ends v2

Wed Sep 6 18:42:17 UTC 2017

> I think all this does (compared to v1) is, avoid taking the same read
> lock twice, but keep track of how many readers there are outside the
> rw_semaphore to avoid recursive lock warnings.
Well the primary problem is that taking a read side twice in the same 
thread can cause a deadlock and is forbidden. That's what lockdep is 
complaining about.

> Am I missing something?
When the first thread is waiting for the lock to be acquired other 
threads would just continue without waiting for the write side to end.

The alternative would be to use recursion counter + another lock + non 
owner release.

> Maybe there is a concern about locking a semaphore in one thread and unlocking it on a different thread.
It is discouraged, but we could use 
down_read_non_owner()/up_read_non_owner() for this.

I'm going to give that a try,
Christian.

Am 06.09.2017 um 18:00 schrieb Felix Kuehling:
> I think all this does (compared to v1) is, avoid taking the same read
> lock twice, but keep track of how many readers there are outside the
> rw_semaphore to avoid recursive lock warnings.
>
> I don't really understand why you need to count readers per task (using
> the task hashtable in the amdgpu_mn struct). The amdgpu_mn structure is
> already per process. Each instance should only be used by one process
> (as identified by its mm_struct). So the counting of readers could be
> done with a simple atomic counter in the amdgpu_mn struct:
>
> lock:
>
>      if (atomic_inc_return(mn->read_count) == 1)
>          down_read(&mn->lock);
>
> unlock:
>
>      if (atomic_dec_return(mn->read_count) == 0)
>          up_read(&mn->lock);
>
> Am I missing something? Even if there are multiple threads in the
> process, it shouldn't matter because they all need to use the same read
> lock anyway and as long as any thread holds the read lock, any other
> thread is safe to use it. Maybe there is a concern about locking a
> semaphore in one thread and unlocking it on a different thread. I know
> that mutexes have quite strict semantics in this regard. But I think
> semaphores are less strict.
>
> Regards,
>    Felix
>
>
> On 2017-09-06 08:34 AM, Christian König wrote:
>> From: Christian König <christian.koenig at amd.com>
>>
>> This is quite controversial because it adds another lock which is held during
>> page table updates, but I don't see much other option.
>>
>> v2: allow multiple updates to be in flight at the same time
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com> (v1)
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 108 +++++++++++++++++++++++++++++++--
>>   1 file changed, 104 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 459f02e..333521e 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -52,6 +52,10 @@ struct amdgpu_mn {
>>   	/* objects protected by lock */
>>   	struct rw_semaphore	lock;
>>   	struct rb_root		objects;
>> +
>> +	/* protected by task_lock */
>> +	spinlock_t		task_lock;
>> +	DECLARE_HASHTABLE(task_hash, 7);
>>   };
>>   
>>   struct amdgpu_mn_node {
>> @@ -59,6 +63,12 @@ struct amdgpu_mn_node {
>>   	struct list_head		bos;
>>   };
>>   
>> +struct amdgpu_mn_task {
>> +	struct hlist_node	node;
>> +	struct task_struct	*task;
>> +	unsigned		recursion;
>> +};
>> +
>>   /**
>>    * amdgpu_mn_destroy - destroy the rmn
>>    *
>> @@ -107,6 +117,75 @@ static void amdgpu_mn_release(struct mmu_notifier *mn,
>>   }
>>   
>>   /**
>> + * amdgpu_mn_read_lock - take the rmn read lock
>> + *
>> + * @rmn: our notifier
>> + *
>> + * Take the rmn read side lock.
>> + */
>> +static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn)
>> +{
>> +	struct amdgpu_mn_task *task;
>> +
>> +	spin_lock(&rmn->task_lock);
>> +	hash_for_each_possible(rmn->task_hash, task, node,
>> +			       (unsigned long)current) {
>> +		if (task->task != current)
>> +			continue;
>> +
>> +		spin_unlock(&rmn->task_lock);
>> +		++task->recursion;
>> +		return;
>> +	}
>> +	spin_unlock(&rmn->task_lock);
>> +
>> +	task = kmalloc(sizeof(*task), GFP_KERNEL);
>> +	task->task = current;
>> +	task->recursion = 1;
>> +
>> +	down_read(&rmn->lock);
>> +
>> +	spin_lock(&rmn->task_lock);
>> +	hash_add(rmn->task_hash, &task->node, (unsigned long)current);
>> +	spin_unlock(&rmn->task_lock);
>> +}
>> +
>> +/**
>> + * amdgpu_mn_read_unlock - drop the rmn read lock
>> + *
>> + * @rmn: our notifier
>> + *
>> + * Drop the rmn read side lock.
>> + */
>> +static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn)
>> +{
>> +	struct amdgpu_mn_task *task;
>> +
>> +	spin_lock(&rmn->task_lock);
>> +	hash_for_each_possible(rmn->task_hash, task, node,
>> +			       (unsigned long)current) {
>> +		if (task->task != current)
>> +			continue;
>> +
>> +		spin_unlock(&rmn->task_lock);
>> +		if (--task->recursion) {
>> +			return;
>> +		}
>> +
>> +		spin_lock(&rmn->task_lock);
>> +		hash_del(&task->node);
>> +		spin_unlock(&rmn->task_lock);
>> +		kfree(task);
>> +
>> +		up_read(&rmn->lock);
>> +		return;
>> +	}
>> +	spin_unlock(&rmn->task_lock);
>> +
>> +	BUG();
>> +}
>> +
>> +/**
>>    * amdgpu_mn_invalidate_node - unmap all BOs of a node
>>    *
>>    * @node: the node with the BOs to unmap
>> @@ -152,7 +231,7 @@ static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
>>   	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
>>   	struct interval_tree_node *it;
>>   
>> -	down_read(&rmn->lock);
>> +	amdgpu_mn_read_lock(rmn);
>>   
>>   	it = interval_tree_iter_first(&rmn->objects, address, address);
>>   	if (it) {
>> @@ -162,7 +241,7 @@ static void amdgpu_mn_invalidate_page(struct mmu_notifier *mn,
>>   		amdgpu_mn_invalidate_node(node, address, address);
>>   	}
>>   
>> -	up_read(&rmn->lock);
>> +	amdgpu_mn_read_unlock(rmn);
>>   }
>>   
>>   /**
>> @@ -187,7 +266,7 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
>>   	/* notification is exclusive, but interval is inclusive */
>>   	end -= 1;
>>   
>> -	down_read(&rmn->lock);
>> +	amdgpu_mn_read_lock(rmn);
>>   
>>   	it = interval_tree_iter_first(&rmn->objects, start, end);
>>   	while (it) {
>> @@ -198,14 +277,33 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
>>   
>>   		amdgpu_mn_invalidate_node(node, start, end);
>>   	}
>> +}
>> +
>> +/**
>> + * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> + *
>> + * @mn: our notifier
>> + * @mn: the mm this callback is about
>> + * @start: start of updated range
>> + * @end: end of updated range
>> + *
>> + * Release the lock again to allow new command submissions.
>> + */
>> +static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> +					   struct mm_struct *mm,
>> +					   unsigned long start,
>> +					   unsigned long end)
>> +{
>> +	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
>>   
>> -	up_read(&rmn->lock);
>> +	amdgpu_mn_read_unlock(rmn);
>>   }
>>   
>>   static const struct mmu_notifier_ops amdgpu_mn_ops = {
>>   	.release = amdgpu_mn_release,
>>   	.invalidate_page = amdgpu_mn_invalidate_page,
>>   	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
>> +	.invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>   };
>>   
>>   /**
>> @@ -242,6 +340,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
>>   	rmn->mn.ops = &amdgpu_mn_ops;
>>   	init_rwsem(&rmn->lock);
>>   	rmn->objects = RB_ROOT;
>> +	spin_lock_init(&rmn->task_lock);
>> +	hash_init(rmn->task_hash);
>>   
>>   	r = __mmu_notifier_register(&rmn->mn, mm);
>>   	if (r)