[PATCH 02/16] drm/amdkfd: Don't dereference kfd_process.mm

Christian König christian.koenig at amd.com
Fri Oct 27 07:41:19 UTC 2017


Am 27.10.2017 um 09:22 schrieb Christian König:
> Am 26.10.2017 um 20:54 schrieb Felix Kuehling:
>> On 2017-10-26 02:11 PM, Christian König wrote:
>>> But now reading the patch there is something else which I stumbled 
>>> over:
>>>> - WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>>> +    /*
>>>> +     * This cast should be safe here because we grabbed a
>>>> +     * reference to the mm in kfd_process_notifier_release
>>>> +     */
>>>> +    WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <= 
>>>> 0);
>>>>           mmdrop(p->mm);
>>> Well that isn't good coding style. You shouldn't obfuscate what
>>> pointer it is by changing it to "void*", but rather set it to NULL as
>>> soon as you know that it is stale.
>>>
>>> Additional to that it is certainly not job of the driver to warn on a
>>> run over mm_count.
>> Yeah. We don't have this in our current staging branch. The whole
>> process teardown has changed quite a bit. I just fixed this up to make
>> it work with current upstream.
>>
>> If you prefer, I could just remove the WARN_ON.
>
> That sounds like a good idea to me, yes.

Wait a second, now that I though once more about it what do you mean 
with this comment:
> +    /*
> +     * Opaque pointer to mm_struct. We don't hold a reference to
> +     * it so it should never be dereferenced from here. This is
> +     * only used for looking up processes by their mm.
> +     */ 
E.g. what means looking up the processes by their mm?

Do you use a hashtable or something like this and then check if you got 
the correct mm structure by comparing the pointers?

If that is the case then you should certainly set p->mm to NULL after 
mmdrop(p->mm).

Otherwise it can happen that a new mm structure is allocated at the same 
location as the old one and you run into problems because the kernel 
driver accidentally uses the wrong kfd_process structure.

Regards,
Christian.

>
> Regards,
> Christian.
>
>>
>> Regards,
>>    Felix
>>
>>> Regards,
>>> Christian.
>>>
>>>> Regards,
>>>>     Felix
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 19 
>>>>>> +++++++++++++++----
>>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++++-
>>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_process.c |  6 +++++-
>>>>>>     3 files changed, 26 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> index 944abfa..61ce547 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> @@ -24,8 +24,8 @@
>>>>>>     #include <linux/slab.h>
>>>>>>     #include <linux/types.h>
>>>>>>     #include <linux/sched/signal.h>
>>>>>> +#include <linux/sched/mm.h>
>>>>>>     #include <linux/uaccess.h>
>>>>>> -#include <linux/mm.h>
>>>>>>     #include <linux/mman.h>
>>>>>>     #include <linux/memory.h>
>>>>>>     #include "kfd_priv.h"
>>>>>> @@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev
>>>>>> *dev, unsigned int pasid,
>>>>>>          * running so the lookup function returns a locked process.
>>>>>>          */
>>>>>>         struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>>>>>> +    struct mm_struct *mm;
>>>>>>           if (!p)
>>>>>>             return; /* Presumably process exited. */
>>>>>>     +    /* Take a safe reference to the mm_struct, which may 
>>>>>> otherwise
>>>>>> +     * disappear even while the kfd_process is still referenced.
>>>>>> +     */
>>>>>> +    mm = get_task_mm(p->lead_thread);
>>>>>> +    if (!mm) {
>>>>>> +        mutex_unlock(&p->mutex);
>>>>>> +        return; /* Process is exiting */
>>>>>> +    }
>>>>>> +
>>>>>>         memset(&memory_exception_data, 0,
>>>>>> sizeof(memory_exception_data));
>>>>>>     -    down_read(&p->mm->mmap_sem);
>>>>>> -    vma = find_vma(p->mm, address);
>>>>>> +    down_read(&mm->mmap_sem);
>>>>>> +    vma = find_vma(mm, address);
>>>>>>           memory_exception_data.gpu_id = dev->id;
>>>>>>         memory_exception_data.va = address;
>>>>>> @@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
>>>>>> unsigned int pasid,
>>>>>>             }
>>>>>>         }
>>>>>>     -    up_read(&p->mm->mmap_sem);
>>>>>> +    up_read(&mm->mmap_sem);
>>>>>> +    mmput(mm);
>>>>>>           mutex_lock(&p->event_mutex);
>>>>>>     diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> index 7d86ec9..1a483a7 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> @@ -494,7 +494,12 @@ struct kfd_process {
>>>>>>          */
>>>>>>         struct hlist_node kfd_processes;
>>>>>>     -    struct mm_struct *mm;
>>>>>> +    /*
>>>>>> +     * Opaque pointer to mm_struct. We don't hold a reference to
>>>>>> +     * it so it should never be dereferenced from here. This is
>>>>>> +     * only used for looking up processes by their mm.
>>>>>> +     */
>>>>>> +    void *mm;
>>>>>>           struct mutex mutex;
>>>>>>     diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> index 3ccb3b5..21d27e5 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> @@ -200,7 +200,11 @@ static void kfd_process_destroy_delayed(struct
>>>>>> rcu_head *rcu)
>>>>>>         struct kfd_process *p;
>>>>>>           p = container_of(rcu, struct kfd_process, rcu);
>>>>>> -    WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>>>>> +    /*
>>>>>> +     * This cast should be safe here because we grabbed a
>>>>>> +     * reference to the mm in kfd_process_notifier_release
>>>>>> +     */
>>>>>> +    WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <=
>>>>>> 0);
>>>>>>           mmdrop(p->mm);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>



More information about the amd-gfx mailing list