[PATCH] drm/amdkfd: Fix incorrect use of process->mm

Oded Gabbay oded.gabbay at gmail.com
Wed Oct 3 19:03:07 UTC 2018


On Wed, Oct 3, 2018 at 1:41 AM Felix Kuehling <Felix.Kuehling at amd.com> wrote:
>
> This mm_struct pointer should never be dereferenced. If running in
> a user thread, just use current->mm. If running in a kernel worker
> use get_task_mm to get a safe reference to the mm_struct.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 37 +++++++++++++++++-----
>  1 file changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index d6af31c..3bc0651d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>                                         struct queue *q,
>                                         struct qcm_process_device *qpd)
>  {
> -       int retval;
>         struct mqd_manager *mqd_mgr;
> +       int retval;
>
>         mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
>         if (!mqd_mgr)
> @@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
>         if (!q->properties.is_active)
>                 return 0;
>
> -       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> -                       &q->properties, q->process->mm);
> +       if (WARN(q->process->mm != current->mm,
> +                "should only run in user thread"))
> +               retval = -EFAULT;
> +       else
> +               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> +                                          &q->properties, current->mm);
>         if (retval)
>                 goto out_uninit_mqd;
>
> @@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>                 retval = map_queues_cpsch(dqm);
>         else if (q->properties.is_active &&
>                  (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> -                 q->properties.type == KFD_QUEUE_TYPE_SDMA))
> -               retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> -                                      &q->properties, q->process->mm);
> +                 q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
> +               if (WARN(q->process->mm != current->mm,
> +                        "should only run in user thread"))
> +                       retval = -EFAULT;
> +               else
> +                       retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
> +                                                  q->pipe, q->queue,
> +                                                  &q->properties, current->mm);
> +       }
>
>  out_unlock:
>         dqm_unlock(dqm);
> @@ -653,6 +663,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
>  static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                                           struct qcm_process_device *qpd)
>  {
> +       struct mm_struct *mm = NULL;
>         struct queue *q;
>         struct mqd_manager *mqd_mgr;
>         struct kfd_process_device *pdd;
> @@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                 kfd_flush_tlb(pdd);
>         }
>
> +       /* Take a safe reference to the mm_struct, which may otherwise
> +        * disappear even while the kfd_process is still referenced.
> +        */
> +       mm = get_task_mm(pdd->process->lead_thread);
> +       if (!mm) {
> +               retval = -EFAULT;
> +               goto out;
> +       }
> +
>         /* activate all active queues on the qpd */
>         list_for_each_entry(q, &qpd->queues_list, list) {
>                 if (!q->properties.is_evicted)
> @@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
>                 q->properties.is_evicted = false;
>                 q->properties.is_active = true;
>                 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
> -                                      q->queue, &q->properties,
> -                                      q->process->mm);
> +                                      q->queue, &q->properties, mm);
>                 if (retval)
>                         goto out;
>                 dqm->queue_count++;
>         }
>         qpd->evicted = 0;
>  out:
> +       if (mm)
> +               mmput(mm);
>         dqm_unlock(dqm);
>         return retval;
>  }
> --
> 2.7.4
>

This patch is:
Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com>


More information about the amd-gfx mailing list