[PATCH 09/14] drm/amdkfd: Make kfd_process reference counted

Sun Dec 10 10:00:18 UTC 2017

On Tue, Nov 28, 2017 at 1:29 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> This will be used to elliminate the use of the process lock for
> preventing concurrent process destruction. This will simplify lock
> dependencies between KFD and KGD.
>
> This also simplifies the process destruction in a few ways:
> * Don't allocate work struct dynamically
> * Remove unnecessary hack that increments mm reference counter
> * Remove unnecessary process locking during destruction
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  4 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 58 ++++++++++++--------------------
>  2 files changed, 26 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index dca493b..248e4f5 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -34,6 +34,7 @@
>  #include <linux/idr.h>
>  #include <linux/kfifo.h>
>  #include <linux/seq_file.h>
> +#include <linux/kref.h>
>  #include <kgd_kfd_interface.h>
>
>  #include "amd_shared.h"
> @@ -537,6 +538,9 @@ struct kfd_process {
>          */
>         void *mm;
>
> +       struct kref ref;
> +       struct work_struct release_work;
> +
>         struct mutex mutex;
>
>         /*
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 660d8bc..e02e8a2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -48,11 +48,6 @@ DEFINE_STATIC_SRCU(kfd_processes_srcu);
>
>  static struct workqueue_struct *kfd_process_wq;
>
> -struct kfd_process_release_work {
> -       struct work_struct kfd_work;
> -       struct kfd_process *p;
> -};
> -
>  static struct kfd_process *find_process(const struct task_struct *thread);
>  static struct kfd_process *create_process(const struct task_struct *thread);
>  static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
> @@ -151,21 +146,20 @@ static struct kfd_process *find_process(const struct task_struct *thread)
>         return p;
>  }
>
> +/* No process locking is needed in this function, because the process
> + * is not findable any more. We must assume that no other thread is
> + * using it any more, otherwise we couldn't safely free the process
> + * structure in the end.
> + */
>  static void kfd_process_wq_release(struct work_struct *work)
>  {
> -       struct kfd_process_release_work *my_work;
> +       struct kfd_process *p = container_of(work, struct kfd_process,
> +                                            release_work);
>         struct kfd_process_device *pdd, *temp;
> -       struct kfd_process *p;
> -
> -       my_work = (struct kfd_process_release_work *) work;
> -
> -       p = my_work->p;
>
>         pr_debug("Releasing process (pasid %d) in workqueue\n",
>                         p->pasid);
>
> -       mutex_lock(&p->mutex);
> -
>         list_for_each_entry_safe(pdd, temp, &p->per_device_data,
>                                                         per_device_list) {
>                 pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
> @@ -188,33 +182,26 @@ static void kfd_process_wq_release(struct work_struct *work)
>         kfd_pasid_free(p->pasid);
>         kfd_free_process_doorbells(p);
>
> -       mutex_unlock(&p->mutex);
> -
>         mutex_destroy(&p->mutex);
>
>         put_task_struct(p->lead_thread);
>
>         kfree(p);
> -
> -       kfree(work);
>  }
>
> -static void kfd_process_destroy_delayed(struct rcu_head *rcu)
> +static void kfd_process_ref_release(struct kref *ref)
>  {
> -       struct kfd_process_release_work *work;
> -       struct kfd_process *p;
> -
> -       p = container_of(rcu, struct kfd_process, rcu);
> +       struct kfd_process *p = container_of(ref, struct kfd_process, ref);
>
> -       mmdrop(p->mm);
> +       INIT_WORK(&p->release_work, kfd_process_wq_release);
> +       queue_work(kfd_process_wq, &p->release_work);
> +}
>
> -       work = kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC);
> +static void kfd_process_destroy_delayed(struct rcu_head *rcu)
> +{
> +       struct kfd_process *p = container_of(rcu, struct kfd_process, rcu);
>
> -       if (work) {
> -               INIT_WORK((struct work_struct *) work, kfd_process_wq_release);
> -               work->p = p;
> -               queue_work(kfd_process_wq, (struct work_struct *) work);
> -       }
> +       kref_put(&p->ref, kfd_process_ref_release);
>  }
>
>  static void kfd_process_notifier_release(struct mmu_notifier *mn,
> @@ -258,15 +245,12 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
>         kfd_process_dequeue_from_all_devices(p);
>         pqm_uninit(&p->pqm);
>
> +       /* Indicate to other users that MM is no longer valid */
> +       p->mm = NULL;
> +
>         mutex_unlock(&p->mutex);
>
> -       /*
> -        * Because we drop mm_count inside kfd_process_destroy_delayed
> -        * and because the mmu_notifier_unregister function also drop
> -        * mm_count we need to take an extra count here.
> -        */
> -       mmgrab(p->mm);
> -       mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
> +       mmu_notifier_unregister_no_release(&p->mmu_notifier, mm);
>         mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
>  }
>
> @@ -331,6 +315,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
>         if (kfd_alloc_process_doorbells(process) < 0)
>                 goto err_alloc_doorbells;
>
> +       kref_init(&process->ref);
> +
>         mutex_init(&process->mutex);
>
>         process->mm = thread->mm;
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com>