[PATCH 02/16] drm/amdkfd: Don't dereference kfd_process.mm

Felix Kuehling felix.kuehling at amd.com
Thu Oct 26 18:54:54 UTC 2017


On 2017-10-26 02:11 PM, Christian König wrote:
> But now reading the patch there is something else which I stumbled over:
>> -    WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>> +    /*
>> +     * This cast should be safe here because we grabbed a
>> +     * reference to the mm in kfd_process_notifier_release
>> +     */
>> +    WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <= 0);
>>          mmdrop(p->mm);
> Well that isn't good coding style. You shouldn't obfuscate what
> pointer it is by changing it to "void*", but rather set it to NULL as
> soon as you know that it is stale.
>
> Additional to that it is certainly not job of the driver to warn on a
> run over mm_count.

Yeah. We don't have this in our current staging branch. The whole
process teardown has changed quite a bit. I just fixed this up to make
it work with current upstream.

If you prefer, I could just remove the WARN_ON.

Regards,
  Felix

>
> Regards,
> Christian.
>
>>
>> Regards,
>>    Felix
>>
>>> Regards,
>>> Christian.
>>>
>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 19 +++++++++++++++----
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++++-
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_process.c |  6 +++++-
>>>>    3 files changed, 26 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>> index 944abfa..61ce547 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>> @@ -24,8 +24,8 @@
>>>>    #include <linux/slab.h>
>>>>    #include <linux/types.h>
>>>>    #include <linux/sched/signal.h>
>>>> +#include <linux/sched/mm.h>
>>>>    #include <linux/uaccess.h>
>>>> -#include <linux/mm.h>
>>>>    #include <linux/mman.h>
>>>>    #include <linux/memory.h>
>>>>    #include "kfd_priv.h"
>>>> @@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev
>>>> *dev, unsigned int pasid,
>>>>         * running so the lookup function returns a locked process.
>>>>         */
>>>>        struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>>>> +    struct mm_struct *mm;
>>>>          if (!p)
>>>>            return; /* Presumably process exited. */
>>>>    +    /* Take a safe reference to the mm_struct, which may otherwise
>>>> +     * disappear even while the kfd_process is still referenced.
>>>> +     */
>>>> +    mm = get_task_mm(p->lead_thread);
>>>> +    if (!mm) {
>>>> +        mutex_unlock(&p->mutex);
>>>> +        return; /* Process is exiting */
>>>> +    }
>>>> +
>>>>        memset(&memory_exception_data, 0,
>>>> sizeof(memory_exception_data));
>>>>    -    down_read(&p->mm->mmap_sem);
>>>> -    vma = find_vma(p->mm, address);
>>>> +    down_read(&mm->mmap_sem);
>>>> +    vma = find_vma(mm, address);
>>>>          memory_exception_data.gpu_id = dev->id;
>>>>        memory_exception_data.va = address;
>>>> @@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
>>>> unsigned int pasid,
>>>>            }
>>>>        }
>>>>    -    up_read(&p->mm->mmap_sem);
>>>> +    up_read(&mm->mmap_sem);
>>>> +    mmput(mm);
>>>>          mutex_lock(&p->event_mutex);
>>>>    diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> index 7d86ec9..1a483a7 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> @@ -494,7 +494,12 @@ struct kfd_process {
>>>>         */
>>>>        struct hlist_node kfd_processes;
>>>>    -    struct mm_struct *mm;
>>>> +    /*
>>>> +     * Opaque pointer to mm_struct. We don't hold a reference to
>>>> +     * it so it should never be dereferenced from here. This is
>>>> +     * only used for looking up processes by their mm.
>>>> +     */
>>>> +    void *mm;
>>>>          struct mutex mutex;
>>>>    diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> index 3ccb3b5..21d27e5 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>> @@ -200,7 +200,11 @@ static void kfd_process_destroy_delayed(struct
>>>> rcu_head *rcu)
>>>>        struct kfd_process *p;
>>>>          p = container_of(rcu, struct kfd_process, rcu);
>>>> -    WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>>> +    /*
>>>> +     * This cast should be safe here because we grabbed a
>>>> +     * reference to the mm in kfd_process_notifier_release
>>>> +     */
>>>> +    WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <=
>>>> 0);
>>>>          mmdrop(p->mm);
>>>>    
>>>
>



More information about the amd-gfx mailing list