[PATCH 10/14] drm/amdkfd: Use ref count to prevent kfd_process destruction
Christian König
ckoenig.leichtzumerken at gmail.com
Tue Nov 28 09:52:46 UTC 2017
Am 28.11.2017 um 00:29 schrieb Felix Kuehling:
> Use a reference counter instead of a lock to prevent process
> destruction while functions running out of process context are using
> the kfd_process structure. In many cases these functions don't need
> the structure to be locked. In the few cases that really do need the
> process lock, take it explicitly.
>
> This helps simplify lock dependencies between the process lock and
> other locks, particularly amdgpu and mm_struct locks. This will be
> important when amdgpu calls back to amdkfd for memory evictions.
Actually that is not only an optimization or cleanup, but a rather
important bug fix.
Using a mutex as protection to prevent object deletion is illegal
because mutex_unlock() can accesses the mutex object even after it is
unlocked.
See this LWN article as well https://lwn.net/Articles/575460/.
If you have other use cases like this in the KFD it should better be
fixed as well.
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
Acked-by: Christian König <christian.koenig at amd.com>
Regards,
Christian.
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_events.c | 14 +++++++-------
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 +
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16 +++++++++++++---
> 3 files changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> index cb92d4b..93aae5c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
> @@ -441,7 +441,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
> /*
> * Because we are called from arbitrary context (workqueue) as opposed
> * to process context, kfd_process could attempt to exit while we are
> - * running so the lookup function returns a locked process.
> + * running so the lookup function increments the process ref count.
> */
> struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>
> @@ -493,7 +493,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
> }
>
> mutex_unlock(&p->event_mutex);
> - mutex_unlock(&p->mutex);
> + kfd_unref_process(p);
> }
>
> static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
> @@ -847,7 +847,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
> /*
> * Because we are called from arbitrary context (workqueue) as opposed
> * to process context, kfd_process could attempt to exit while we are
> - * running so the lookup function returns a locked process.
> + * running so the lookup function increments the process ref count.
> */
> struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
> struct mm_struct *mm;
> @@ -860,7 +860,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
> */
> mm = get_task_mm(p->lead_thread);
> if (!mm) {
> - mutex_unlock(&p->mutex);
> + kfd_unref_process(p);
> return; /* Process is exiting */
> }
>
> @@ -903,7 +903,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
> &memory_exception_data);
>
> mutex_unlock(&p->event_mutex);
> - mutex_unlock(&p->mutex);
> + kfd_unref_process(p);
> }
>
> void kfd_signal_hw_exception_event(unsigned int pasid)
> @@ -911,7 +911,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
> /*
> * Because we are called from arbitrary context (workqueue) as opposed
> * to process context, kfd_process could attempt to exit while we are
> - * running so the lookup function returns a locked process.
> + * running so the lookup function increments the process ref count.
> */
> struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>
> @@ -924,5 +924,5 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
> lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL);
>
> mutex_unlock(&p->event_mutex);
> - mutex_unlock(&p->mutex);
> + kfd_unref_process(p);
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 248e4f5..0c96a6b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -606,6 +606,7 @@ void kfd_process_destroy_wq(void);
> struct kfd_process *kfd_create_process(struct file *filep);
> struct kfd_process *kfd_get_process(const struct task_struct *);
> struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
> +void kfd_unref_process(struct kfd_process *p);
>
> struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
> struct kfd_process *p);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index e02e8a2..509f987 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -49,6 +49,7 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu);
> static struct workqueue_struct *kfd_process_wq;
>
> static struct kfd_process *find_process(const struct task_struct *thread);
> +static void kfd_process_ref_release(struct kref *ref);
> static struct kfd_process *create_process(const struct task_struct *thread);
> static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
>
> @@ -146,6 +147,11 @@ static struct kfd_process *find_process(const struct task_struct *thread)
> return p;
> }
>
> +void kfd_unref_process(struct kfd_process *p)
> +{
> + kref_put(&p->ref, kfd_process_ref_release);
> +}
> +
> /* No process locking is needed in this function, because the process
> * is not findable any more. We must assume that no other thread is
> * using it any more, otherwise we couldn't safely free the process
> @@ -201,7 +207,7 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
> {
> struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
>
> - kref_put(&p->ref, kfd_process_ref_release);
> + kfd_unref_process(p);
> }
>
> static void kfd_process_notifier_release(struct mmu_notifier *mn,
> @@ -525,6 +531,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
>
> mutex_unlock(kfd_get_dbgmgr_mutex());
>
> + mutex_lock(&p->mutex);
> +
> pdd = kfd_get_process_device_data(dev, p);
> if (pdd)
> /* For GPU relying on IOMMU, we need to dequeue here
> @@ -533,6 +541,8 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
> kfd_process_dequeue_from_device(pdd);
>
> mutex_unlock(&p->mutex);
> +
> + kfd_unref_process(p);
> }
>
> struct kfd_process_device *kfd_get_first_process_device_data(
> @@ -557,7 +567,7 @@ bool kfd_has_process_device_data(struct kfd_process *p)
> return !(list_empty(&p->per_device_data));
> }
>
> -/* This returns with process->mutex locked. */
> +/* This increments the process->ref counter. */
> struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
> {
> struct kfd_process *p;
> @@ -567,7 +577,7 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
>
> hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
> if (p->pasid == pasid) {
> - mutex_lock(&p->mutex);
> + kref_get(&p->ref);
> break;
> }
> }
More information about the amd-gfx
mailing list