[PATCH 02/16] drm/amdkfd: Don't dereference kfd_process.mm

Christian König ckoenig.leichtzumerken at gmail.com
Fri Oct 27 07:22:01 UTC 2017


Am 26.10.2017 um 20:54 schrieb Felix Kuehling:
> On 2017-10-26 02:11 PM, Christian König wrote:
>> But now reading the patch there is something else which I stumbled over:
>>> -    WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>> +    /*
>>> +     * This cast should be safe here because we grabbed a
>>> +     * reference to the mm in kfd_process_notifier_release
>>> +     */
>>> +    WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <= 0);
>>>           mmdrop(p->mm);
>> Well that isn't good coding style. You shouldn't obfuscate what
>> pointer it is by changing it to "void*", but rather set it to NULL as
>> soon as you know that it is stale.
>>
>> Additional to that it is certainly not job of the driver to warn on a
>> run over mm_count.
> Yeah. We don't have this in our current staging branch. The whole
> process teardown has changed quite a bit. I just fixed this up to make
> it work with current upstream.
>
> If you prefer, I could just remove the WARN_ON.

That sounds like a good idea to me, yes.

Regards,
Christian.

>
> Regards,
>    Felix
>
>> Regards,
>> Christian.
>>
>>> Regards,
>>>     Felix
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 19 +++++++++++++++----
>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++++-
>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_process.c |  6 +++++-
>>>>>     3 files changed, 26 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>> index 944abfa..61ce547 100644
>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>> @@ -24,8 +24,8 @@
>>>>>     #include <linux/slab.h>
>>>>>     #include <linux/types.h>
>>>>>     #include <linux/sched/signal.h>
>>>>> +#include <linux/sched/mm.h>
>>>>>     #include <linux/uaccess.h>
>>>>> -#include <linux/mm.h>
>>>>>     #include <linux/mman.h>
>>>>>     #include <linux/memory.h>
>>>>>     #include "kfd_priv.h"
>>>>> @@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev
>>>>> *dev, unsigned int pasid,
>>>>>          * running so the lookup function returns a locked process.
>>>>>          */
>>>>>         struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>>>>> +    struct mm_struct *mm;
>>>>>           if (!p)
>>>>>             return; /* Presumably process exited. */
>>>>>     +    /* Take a safe reference to the mm_struct, which may otherwise
>>>>> +     * disappear even while the kfd_process is still referenced.
>>>>> +     */
>>>>> +    mm = get_task_mm(p->lead_thread);
>>>>> +    if (!mm) {
>>>>> +        mutex_unlock(&p->mutex);
>>>>> +        return; /* Process is exiting */
>>>>> +    }
>>>>> +
>>>>>         memset(&memory_exception_data, 0,
>>>>> sizeof(memory_exception_data));
>>>>>     -    down_read(&p->mm->mmap_sem);
>>>>> -    vma = find_vma(p->mm, address);
>>>>> +    down_read(&mm->mmap_sem);
>>>>> +    vma = find_vma(mm, address);
>>>>>           memory_exception_data.gpu_id = dev->id;
>>>>>         memory_exception_data.va = address;
>>>>> @@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
>>>>> unsigned int pasid,
>>>>>             }
>>>>>         }
>>>>>     -    up_read(&p->mm->mmap_sem);
>>>>> +    up_read(&mm->mmap_sem);
>>>>> +    mmput(mm);
>>>>>           mutex_lock(&p->event_mutex);
>>>>>     diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>> index 7d86ec9..1a483a7 100644
>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>> @@ -494,7 +494,12 @@ struct kfd_process {
>>>>>          */
>>>>>         struct hlist_node kfd_processes;
>>>>>     -    struct mm_struct *mm;
>>>>> +    /*
>>>>> +     * Opaque pointer to mm_struct. We don't hold a reference to
>>>>> +     * it so it should never be dereferenced from here. This is
>>>>> +     * only used for looking up processes by their mm.
>>>>> +     */
>>>>> +    void *mm;
>>>>>           struct mutex mutex;
>>>>>     diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>> index 3ccb3b5..21d27e5 100644
>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>> @@ -200,7 +200,11 @@ static void kfd_process_destroy_delayed(struct
>>>>> rcu_head *rcu)
>>>>>         struct kfd_process *p;
>>>>>           p = container_of(rcu, struct kfd_process, rcu);
>>>>> -    WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>>>> +    /*
>>>>> +     * This cast should be safe here because we grabbed a
>>>>> +     * reference to the mm in kfd_process_notifier_release
>>>>> +     */
>>>>> +    WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <=
>>>>> 0);
>>>>>           mmdrop(p->mm);
>>>>>     
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx




More information about the amd-gfx mailing list