[PATCH 02/17] drm/amdkfd: Don't dereference kfd_process.mm v2

Christian König christian.koenig at amd.com
Fri Nov 3 13:08:35 UTC 2017


Am 30.10.2017 um 17:23 schrieb Felix Kuehling:
> On 2017-10-28 10:44 AM, Christian König wrote:
>> Am 28.10.2017 um 01:35 schrieb Felix Kuehling:
>>> The kfd_process doesn't own a reference to the mm_struct, so it can
>>> disappear without warning even while the kfd_process still exists.
>>>
>>> Therefore, avoid dereferencing the kfd_process.mm pointer and make
>>> it opaque. Use get_task_mm to get a temporary reference to the mm
>>> when it's needed.
>>>
>>> v2: removed unnecessary WARN_ON
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>> Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com> (v1)
>>> ---
>>>    drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 19 +++++++++++++++----
>>>    drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++++-
>>>    drivers/gpu/drm/amd/amdkfd/kfd_process.c |  1 -
>>>    3 files changed, 21 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> index 944abfa..61ce547 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>> @@ -24,8 +24,8 @@
>>>    #include <linux/slab.h>
>>>    #include <linux/types.h>
>>>    #include <linux/sched/signal.h>
>>> +#include <linux/sched/mm.h>
>>>    #include <linux/uaccess.h>
>>> -#include <linux/mm.h>
>>>    #include <linux/mman.h>
>>>    #include <linux/memory.h>
>>>    #include "kfd_priv.h"
>>> @@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev
>>> *dev, unsigned int pasid,
>>>         * running so the lookup function returns a locked process.
>>>         */
>>>        struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>>> +    struct mm_struct *mm;
>>>          if (!p)
>>>            return; /* Presumably process exited. */
>>>    +    /* Take a safe reference to the mm_struct, which may otherwise
>>> +     * disappear even while the kfd_process is still referenced.
>>> +     */
>>> +    mm = get_task_mm(p->lead_thread);
>>> +    if (!mm) {
>>> +        mutex_unlock(&p->mutex);
>>> +        return; /* Process is exiting */
>>> +    }
>>> +
>>>        memset(&memory_exception_data, 0, sizeof(memory_exception_data));
>>>    -    down_read(&p->mm->mmap_sem);
>>> -    vma = find_vma(p->mm, address);
>>> +    down_read(&mm->mmap_sem);
>>> +    vma = find_vma(mm, address);
>>>          memory_exception_data.gpu_id = dev->id;
>>>        memory_exception_data.va = address;
>>> @@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
>>> unsigned int pasid,
>>>            }
>>>        }
>>>    -    up_read(&p->mm->mmap_sem);
>>> +    up_read(&mm->mmap_sem);
>>> +    mmput(mm);
>>>          mutex_lock(&p->event_mutex);
>>>    diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index 7d86ec9..1a483a7 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -494,7 +494,12 @@ struct kfd_process {
>>>         */
>>>        struct hlist_node kfd_processes;
>>>    -    struct mm_struct *mm;
>>> +    /*
>>> +     * Opaque pointer to mm_struct. We don't hold a reference to
>>> +     * it so it should never be dereferenced from here. This is
>>> +     * only used for looking up processes by their mm.
>>> +     */
>>> +    void *mm;
>> Using a void* here still doesn't look like the right thing to do.
> For added context, this is how it's used:
>
>      static struct kfd_process *create_process(const struct task_struct *thread)
>      {
>      ...
>              process->mm = thread->mm;
>      ...
>              hash_add_rcu(kfd_processes_table, &process->kfd_processes,
>                              (uintptr_t)process->mm);
>      ...
>      }
>
>      static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
>      {
>              struct kfd_process *process;
>
>              hash_for_each_possible_rcu(kfd_processes_table, process,
>                                              kfd_processes, (uintptr_t)mm)
>                      if (process->mm == mm)
>                              return process;
>
>              return NULL;
>      }

In this case I would make p->mm a uintptr_t and not void*. I would use 
void* only if you really don't know what pointer it is.

Anyway the patch is fine with me as it is, just wanted to understand 
what's happening here.

Regards,
Christian.

>
>
>>>          struct mutex mutex;
>>>    diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> index 3ccb3b5..695fa2a 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>> @@ -200,7 +200,6 @@ static void kfd_process_destroy_delayed(struct
>>> rcu_head *rcu)
>>>        struct kfd_process *p;
>>>          p = container_of(rcu, struct kfd_process, rcu);
>>> -    WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>>          mmdrop(p->mm);
>> If you are concerned that p->mm is used after this just set it to NULL.
> I'm not concerned about it.
>
> Regards,
>    Felix
>
>> Regards,
>> Christian.
>>
>>>    
>>



More information about the amd-gfx mailing list