[PATCH 02/16] drm/amdkfd: Don't dereference kfd_process.mm
Christian König
christian.koenig at amd.com
Fri Oct 27 07:41:19 UTC 2017
Am 27.10.2017 um 09:22 schrieb Christian König:
> Am 26.10.2017 um 20:54 schrieb Felix Kuehling:
>> On 2017-10-26 02:11 PM, Christian König wrote:
>>> But now reading the patch there is something else which I stumbled
>>> over:
>>>> - WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>>> + /*
>>>> + * This cast should be safe here because we grabbed a
>>>> + * reference to the mm in kfd_process_notifier_release
>>>> + */
>>>> + WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <=
>>>> 0);
>>>> mmdrop(p->mm);
>>> Well that isn't good coding style. You shouldn't obfuscate what
>>> pointer it is by changing it to "void*", but rather set it to NULL as
>>> soon as you know that it is stale.
>>>
>>> Additional to that it is certainly not job of the driver to warn on a
>>> run over mm_count.
>> Yeah. We don't have this in our current staging branch. The whole
>> process teardown has changed quite a bit. I just fixed this up to make
>> it work with current upstream.
>>
>> If you prefer, I could just remove the WARN_ON.
>
> That sounds like a good idea to me, yes.
Wait a second, now that I though once more about it what do you mean
with this comment:
> + /*
> + * Opaque pointer to mm_struct. We don't hold a reference to
> + * it so it should never be dereferenced from here. This is
> + * only used for looking up processes by their mm.
> + */
E.g. what means looking up the processes by their mm?
Do you use a hashtable or something like this and then check if you got
the correct mm structure by comparing the pointers?
If that is the case then you should certainly set p->mm to NULL after
mmdrop(p->mm).
Otherwise it can happen that a new mm structure is allocated at the same
location as the old one and you run into problems because the kernel
driver accidentally uses the wrong kfd_process structure.
Regards,
Christian.
>
> Regards,
> Christian.
>
>>
>> Regards,
>> Felix
>>
>>> Regards,
>>> Christian.
>>>
>>>> Regards,
>>>> Felix
>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>>>>> ---
>>>>>> drivers/gpu/drm/amd/amdkfd/kfd_events.c | 19
>>>>>> +++++++++++++++----
>>>>>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 ++++++-
>>>>>> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++++-
>>>>>> 3 files changed, 26 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> index 944abfa..61ce547 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
>>>>>> @@ -24,8 +24,8 @@
>>>>>> #include <linux/slab.h>
>>>>>> #include <linux/types.h>
>>>>>> #include <linux/sched/signal.h>
>>>>>> +#include <linux/sched/mm.h>
>>>>>> #include <linux/uaccess.h>
>>>>>> -#include <linux/mm.h>
>>>>>> #include <linux/mman.h>
>>>>>> #include <linux/memory.h>
>>>>>> #include "kfd_priv.h"
>>>>>> @@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev
>>>>>> *dev, unsigned int pasid,
>>>>>> * running so the lookup function returns a locked process.
>>>>>> */
>>>>>> struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
>>>>>> + struct mm_struct *mm;
>>>>>> if (!p)
>>>>>> return; /* Presumably process exited. */
>>>>>> + /* Take a safe reference to the mm_struct, which may
>>>>>> otherwise
>>>>>> + * disappear even while the kfd_process is still referenced.
>>>>>> + */
>>>>>> + mm = get_task_mm(p->lead_thread);
>>>>>> + if (!mm) {
>>>>>> + mutex_unlock(&p->mutex);
>>>>>> + return; /* Process is exiting */
>>>>>> + }
>>>>>> +
>>>>>> memset(&memory_exception_data, 0,
>>>>>> sizeof(memory_exception_data));
>>>>>> - down_read(&p->mm->mmap_sem);
>>>>>> - vma = find_vma(p->mm, address);
>>>>>> + down_read(&mm->mmap_sem);
>>>>>> + vma = find_vma(mm, address);
>>>>>> memory_exception_data.gpu_id = dev->id;
>>>>>> memory_exception_data.va = address;
>>>>>> @@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
>>>>>> unsigned int pasid,
>>>>>> }
>>>>>> }
>>>>>> - up_read(&p->mm->mmap_sem);
>>>>>> + up_read(&mm->mmap_sem);
>>>>>> + mmput(mm);
>>>>>> mutex_lock(&p->event_mutex);
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> index 7d86ec9..1a483a7 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>>>> @@ -494,7 +494,12 @@ struct kfd_process {
>>>>>> */
>>>>>> struct hlist_node kfd_processes;
>>>>>> - struct mm_struct *mm;
>>>>>> + /*
>>>>>> + * Opaque pointer to mm_struct. We don't hold a reference to
>>>>>> + * it so it should never be dereferenced from here. This is
>>>>>> + * only used for looking up processes by their mm.
>>>>>> + */
>>>>>> + void *mm;
>>>>>> struct mutex mutex;
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> index 3ccb3b5..21d27e5 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>>>>>> @@ -200,7 +200,11 @@ static void kfd_process_destroy_delayed(struct
>>>>>> rcu_head *rcu)
>>>>>> struct kfd_process *p;
>>>>>> p = container_of(rcu, struct kfd_process, rcu);
>>>>>> - WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
>>>>>> + /*
>>>>>> + * This cast should be safe here because we grabbed a
>>>>>> + * reference to the mm in kfd_process_notifier_release
>>>>>> + */
>>>>>> + WARN_ON(atomic_read(&((struct mm_struct *)p->mm)->mm_count) <=
>>>>>> 0);
>>>>>> mmdrop(p->mm);
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
More information about the amd-gfx
mailing list