[PATCH] drm/amdkfd: Fix incorrect use of process->mm
Oded Gabbay
oded.gabbay at gmail.com
Wed Oct 3 19:03:07 UTC 2018
On Wed, Oct 3, 2018 at 1:41 AM Felix Kuehling <Felix.Kuehling at amd.com> wrote:
>
> This mm_struct pointer should never be dereferenced. If running in
> a user thread, just use current->mm. If running in a kernel worker
> use get_task_mm to get a safe reference to the mm_struct.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 37 +++++++++++++++++-----
> 1 file changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index d6af31c..3bc0651d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -358,8 +358,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
> struct queue *q,
> struct qcm_process_device *qpd)
> {
> - int retval;
> struct mqd_manager *mqd_mgr;
> + int retval;
>
> mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
> if (!mqd_mgr)
> @@ -387,8 +387,12 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
> if (!q->properties.is_active)
> return 0;
>
> - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> - &q->properties, q->process->mm);
> + if (WARN(q->process->mm != current->mm,
> + "should only run in user thread"))
> + retval = -EFAULT;
> + else
> + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> + &q->properties, current->mm);
> if (retval)
> goto out_uninit_mqd;
>
> @@ -545,9 +549,15 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
> retval = map_queues_cpsch(dqm);
> else if (q->properties.is_active &&
> (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> - q->properties.type == KFD_QUEUE_TYPE_SDMA))
> - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
> - &q->properties, q->process->mm);
> + q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
> + if (WARN(q->process->mm != current->mm,
> + "should only run in user thread"))
> + retval = -EFAULT;
> + else
> + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
> + q->pipe, q->queue,
> + &q->properties, current->mm);
> + }
>
> out_unlock:
> dqm_unlock(dqm);
> @@ -653,6 +663,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
> static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
> struct qcm_process_device *qpd)
> {
> + struct mm_struct *mm = NULL;
> struct queue *q;
> struct mqd_manager *mqd_mgr;
> struct kfd_process_device *pdd;
> @@ -686,6 +697,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
> kfd_flush_tlb(pdd);
> }
>
> + /* Take a safe reference to the mm_struct, which may otherwise
> + * disappear even while the kfd_process is still referenced.
> + */
> + mm = get_task_mm(pdd->process->lead_thread);
> + if (!mm) {
> + retval = -EFAULT;
> + goto out;
> + }
> +
> /* activate all active queues on the qpd */
> list_for_each_entry(q, &qpd->queues_list, list) {
> if (!q->properties.is_evicted)
> @@ -700,14 +720,15 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
> q->properties.is_evicted = false;
> q->properties.is_active = true;
> retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
> - q->queue, &q->properties,
> - q->process->mm);
> + q->queue, &q->properties, mm);
> if (retval)
> goto out;
> dqm->queue_count++;
> }
> qpd->evicted = 0;
> out:
> + if (mm)
> + mmput(mm);
> dqm_unlock(dqm);
> return retval;
> }
> --
> 2.7.4
>
This patch is:
Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com>
More information about the amd-gfx
mailing list