[PATCH 02/17] drm/amdkfd: Don't dereference kfd_process.mm v2

Felix Kuehling felix.kuehling at amd.com
Mon Oct 30 16:23:29 UTC 2017


On 2017-10-28 10:44 AM, Christian König wrote:
> Am 28.10.2017 um 01:35 schrieb Felix Kuehling:
>> The kfd_process doesn't own a reference to the mm_struct, so it can
>> disappear without warning even while the kfd_process still exists.
>>
>> Therefore, avoid dereferencing the kfd_process.mm pointer and make
>> it opaque. Use get_task_mm to get a temporary reference to the mm
>> when it's needed.
>>
>> v2: removed unnecessary WARN_ON
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> Reviewed-by: Oded Gabbay <oded.gabbay at gmail.com> (v1)
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 19 +++++++++++++++----
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++++-
>>   drivers/gpu/drm/amd/amdkfd/kfd_process.c |  1 -
>>   3 files changed, 21 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> index 944abfa..61ce547 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>> @@ -24,8 +24,8 @@
>>   #include <linux/slab.h>
>>   #include <linux/types.h>
>>   #include <linux/sched/signal.h>
>> +#include <linux/sched/mm.h>
>>   #include <linux/uaccess.h>
>> -#include <linux/mm.h>
>>   #include <linux/mman.h>
>>   #include <linux/memory.h>
>>   #include "kfd_priv.h"
>> @@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev
>> *dev, unsigned int pasid,
>>        * running so the lookup function returns a locked process.
>>        */
>>       struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>> +    struct mm_struct *mm;
>>         if (!p)
>>           return; /* Presumably process exited. */
>>   +    /* Take a safe reference to the mm_struct, which may otherwise
>> +     * disappear even while the kfd_process is still referenced.
>> +     */
>> +    mm = get_task_mm(p->lead_thread);
>> +    if (!mm) {
>> +        mutex_unlock(&p->mutex);
>> +        return; /* Process is exiting */
>> +    }
>> +
>>       memset(&memory_exception_data, 0, sizeof(memory_exception_data));
>>   -    down_read(&p->mm->mmap_sem);
>> -    vma = find_vma(p->mm, address);
>> +    down_read(&mm->mmap_sem);
>> +    vma = find_vma(mm, address);
>>         memory_exception_data.gpu_id = dev->id;
>>       memory_exception_data.va = address;
>> @@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
>> unsigned int pasid,
>>           }
>>       }
>>   -    up_read(&p->mm->mmap_sem);
>> +    up_read(&mm->mmap_sem);
>> +    mmput(mm);
>>         mutex_lock(&p->event_mutex);
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 7d86ec9..1a483a7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -494,7 +494,12 @@ struct kfd_process {
>>        */
>>       struct hlist_node kfd_processes;
>>   -    struct mm_struct *mm;
>> +    /*
>> +     * Opaque pointer to mm_struct. We don't hold a reference to
>> +     * it so it should never be dereferenced from here. This is
>> +     * only used for looking up processes by their mm.
>> +     */
>> +    void *mm;
>
> Using a void* here still doesn't look like the right thing to do.

For added context, this is how it's used:

    static struct kfd_process *create_process(const struct task_struct *thread)
    {
    ...
            process->mm = thread->mm;
    ...
            hash_add_rcu(kfd_processes_table, &process->kfd_processes,
                            (uintptr_t)process->mm);
    ...
    }

    static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
    {
            struct kfd_process *process;

            hash_for_each_possible_rcu(kfd_processes_table, process,
                                            kfd_processes, (uintptr_t)mm)
                    if (process->mm == mm)
                            return process;

            return NULL;
    }


>
>>         struct mutex mutex;
>>   diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 3ccb3b5..695fa2a 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -200,7 +200,6 @@ static void kfd_process_destroy_delayed(struct
>> rcu_head *rcu)
>>       struct kfd_process *p;
>>         p = container_of(rcu, struct kfd_process, rcu);
>> -    WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>         mmdrop(p->mm);
>
> If you are concerned that p->mm is used after this just set it to NULL.

I'm not concerned about it.

Regards,
  Felix

>
> Regards,
> Christian.
>
>>   
>
>



More information about the amd-gfx mailing list