[PATCH 10/14] drm/amdkfd: Use ref count to prevent kfd_process destruction

Tue Nov 28 09:52:46 UTC 2017

Am 28.11.2017 um 00:29 schrieb Felix Kuehling:
> Use a reference counter instead of a lock to prevent process
> destruction while functions running out of process context are using
> the kfd_process structure. In many cases these functions don't need
> the structure to be locked. In the few cases that really do need the
> process lock, take it explicitly.
>
> This helps simplify lock dependencies between the process lock and
> other locks, particularly amdgpu and mm_struct locks. This will be
> important when amdgpu calls back to amdkfd for memory evictions.

Actually that is not only an optimization or cleanup, but a rather 
important bug fix.

Using a mutex as protection to prevent object deletion is illegal 
because mutex_unlock() can accesses the mutex object even after it is 
unlocked.

See this LWN article as well https://lwn.net/Articles/575460/.

If you have other use cases like this in the KFD it should better be 
fixed as well.

> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>

Acked-by: Christian König <christian.koenig at amd.com>

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 14 +++++++-------
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16 +++++++++++++---
>   3 files changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index cb92d4b..93aae5c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -441,7 +441,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
>   	/*
>   	 * Because we are called from arbitrary context (workqueue) as opposed
>   	 * to process context, kfd_process could attempt to exit while we are
> -	 * running so the lookup function returns a locked process.
> +	 * running so the lookup function increments the process ref count.
>   	 */
>   	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>   
> @@ -493,7 +493,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
>   	}
>   
>   	mutex_unlock(&p->event_mutex);
> -	mutex_unlock(&p->mutex);
> +	kfd_unref_process(p);
>   }
>   
>   static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
> @@ -847,7 +847,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
>   	/*
>   	 * Because we are called from arbitrary context (workqueue) as opposed
>   	 * to process context, kfd_process could attempt to exit while we are
> -	 * running so the lookup function returns a locked process.
> +	 * running so the lookup function increments the process ref count.
>   	 */
>   	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>   	struct mm_struct *mm;
> @@ -860,7 +860,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
>   	 */
>   	mm = get_task_mm(p->lead_thread);
>   	if (!mm) {
> -		mutex_unlock(&p->mutex);
> +		kfd_unref_process(p);
>   		return; /* Process is exiting */
>   	}
>   
> @@ -903,7 +903,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
>   			&memory_exception_data);
>   
>   	mutex_unlock(&p->event_mutex);
> -	mutex_unlock(&p->mutex);
> +	kfd_unref_process(p);
>   }
>   
>   void kfd_signal_hw_exception_event(unsigned int pasid)
> @@ -911,7 +911,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
>   	/*
>   	 * Because we are called from arbitrary context (workqueue) as opposed
>   	 * to process context, kfd_process could attempt to exit while we are
> -	 * running so the lookup function returns a locked process.
> +	 * running so the lookup function increments the process ref count.
>   	 */
>   	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>   
> @@ -924,5 +924,5 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
>   	lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
>   
>   	mutex_unlock(&p->event_mutex);
> -	mutex_unlock(&p->mutex);
> +	kfd_unref_process(p);
>   }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 248e4f5..0c96a6b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -606,6 +606,7 @@ void kfd_process_destroy_wq(void);
>   struct kfd_process *kfd_create_process(struct file *filep);
>   struct kfd_process *kfd_get_process(const struct task_struct *);
>   struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
> +void kfd_unref_process(struct kfd_process *p);
>   
>   struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
>   						struct kfd_process *p);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index e02e8a2..509f987 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -49,6 +49,7 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu);
>   static struct workqueue_struct *kfd_process_wq;
>   
>   static struct kfd_process *find_process(const struct task_struct *thread);
> +static void kfd_process_ref_release(struct kref *ref);
>   static struct kfd_process *create_process(const struct task_struct *thread);
>   static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
>   
> @@ -146,6 +147,11 @@ static struct kfd_process *find_process(const struct task_struct *thread)
>   	return p;
>   }
>   
> +void kfd_unref_process(struct kfd_process *p)
> +{
> +	kref_put(&p->ref, kfd_process_ref_release);
> +}
> +
>   /* No process locking is needed in this function, because the process
>    * is not findable any more. We must assume that no other thread is
>    * using it any more, otherwise we couldn't safely free the process
> @@ -201,7 +207,7 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
>   {
>   	struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
>   
> -	kref_put(&p->ref, kfd_process_ref_release);
> +	kfd_unref_process(p);
>   }
>   
>   static void kfd_process_notifier_release(struct mmu_notifier *mn,
> @@ -525,6 +531,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
>   
>   	mutex_unlock(kfd_get_dbgmgr_mutex());
>   
> +	mutex_lock(&p->mutex);
> +
>   	pdd = kfd_get_process_device_data(dev, p);
>   	if (pdd)
>   		/* For GPU relying on IOMMU, we need to dequeue here
> @@ -533,6 +541,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
>   		kfd_process_dequeue_from_device(pdd);
>   
>   	mutex_unlock(&p->mutex);
> +
> +	kfd_unref_process(p);
>   }
>   
>   struct kfd_process_device *kfd_get_first_process_device_data(
> @@ -557,7 +567,7 @@ bool kfd_has_process_device_data(struct kfd_process *p)
>   	return !(list_empty(&p->per_device_data));
>   }
>   
> -/* This returns with process->mutex locked. */
> +/* This increments the process->ref counter. */
>   struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
>   {
>   	struct kfd_process *p;
> @@ -567,7 +577,7 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
>   
>   	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
>   		if (p->pasid == pasid) {
> -			mutex_lock(&p->mutex);
> +			kref_get(&p->ref);
>   			break;
>   		}
>   	}