[PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Fri Sep 14 20:21:57 UTC 2018

On 2018-09-14 01:52 PM, Christian König wrote:
> Am 14.09.2018 um 19:47 schrieb Philip Yang:
>> On 2018-09-14 03:51 AM, Christian König wrote:
>>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>>> Replace our MMU notifier with
>>>>> hmm_mirror_ops.sync_cpu_device_pagetables
>>>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>>>> DRM_AMDGPU_USERPTR Kconfig.
>>>>>
>>>>> It supports both KFD userptr and gfx userptr paths.
>>>>>
>>>>> This depends on several HMM patchset from Jérôme Glisse queued for
>>>>> upstream.
>>>>>
>>>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>>>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121
>>>>> ++++++++++++++-------------------
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>>>>   4 files changed, 56 insertions(+), 75 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> index 9221e54..960a633 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>>>>   config DRM_AMDGPU_USERPTR
>>>>>       bool "Always enable userptr write support"
>>>>>       depends on DRM_AMDGPU
>>>>> -    select MMU_NOTIFIER
>>>>> +    select HMM_MIRROR
>>>>>       help
>>>>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>>>> -      selected to enabled full userptr support.
>>>>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>>>> +      isn't already selected to enabled full userptr support.
>>>>>     config DRM_AMDGPU_GART_DEBUGFS
>>>>>       bool "Allow GART access through debugfs"
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> index 138cb78..c1e5d43 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>>> @@ -171,7 +171,7 @@ endif
>>>>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>>>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>>>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>>>>>     include $(FULL_AMD_PATH)/powerplay/Makefile
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> index e55508b..ad52f34 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>>> @@ -45,7 +45,7 @@
>>>>>     #include <linux/firmware.h>
>>>>>   #include <linux/module.h>
>>>>> -#include <linux/mmu_notifier.h>
>>>>> +#include <linux/hmm.h>
>>>>>   #include <linux/interval_tree.h>
>>>>>   #include <drm/drmP.h>
>>>>>   #include <drm/drm.h>
>>>>> @@ -66,6 +66,7 @@
>>>> Need to remove @mn documentation.
>>>>
>>>>>    * @objects: interval tree containing amdgpu_mn_nodes
>>>>>    * @read_lock: mutex for recursive locking of @lock
>>>>>    * @recursion: depth of recursion
>>>>> + * @mirror: HMM mirror function support
>>>>>    *
>>>>>    * Data for each amdgpu device and process address space.
>>>>>    */
>>>>> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>>>>>       /* constant after initialisation */
>>>>>       struct amdgpu_device    *adev;
>>>>>       struct mm_struct    *mm;
>>>>> -    struct mmu_notifier    mn;
>>>>>       enum amdgpu_mn_type    type;
>>>>>         /* only used on destruction */
>>>>> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>>>>>       struct rb_root_cached    objects;
>>>>>       struct mutex        read_lock;
>>>>>       atomic_t        recursion;
>>>>> +
>>>>> +    /* HMM mirror */
>>>>> +    struct hmm_mirror    mirror;
>>>>>   };
>>>>>     /**
>>>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>>>>>   };
>>>>>     /**
>>>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>>>>    *
>>>>>    * @work: previously sheduled work item
>>>>>    *
>>>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct
>>>>> work_struct *work)
>>>>>       }
>>>>>       up_write(&amn->lock);
>>>>>       mutex_unlock(&adev->mn_lock);
>>>>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>>>> +    hmm_mirror_unregister(&amn->mirror);
>>>>> +
>>>>>       kfree(amn);
>>>>>   }
>>>>>     /**
>>>>>    * amdgpu_mn_release - callback to notify about mm destruction
>>>> Update the function name in the comment.
>>>>
>>>>>    *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> + * @mirror: the HMM mirror (mm) this callback is about
>>>>>    *
>>>>> - * Shedule a work item to lazy destroy our notifier.
>>>>> + * Shedule a work item to lazy destroy HMM mirror.
>>>>>    */
>>>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>>>> -                  struct mm_struct *mm)
>>>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>>>>   {
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct
>>>>> amdgpu_mn, mirror);
>>>>>         INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>>>>       schedule_work(&amn->work);
>>>>>   }
>>>>>   -
>>>>>   /**
>>>>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>>>>    *
>>>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct
>>>>> amdgpu_mn_node *node,
>>>>>   /**
>>>>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify
>>>>> about mm change
>>>>>    *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> - * @start: start of updated range
>>>>> - * @end: end of updated range
>>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>>> + * @update: the update start, end address
>>>>>    *
>>>>>    * Block for operations on BOs to finish and mark pages as
>>>>> accessed and
>>>>>    * potentially dirty.
>>>>>    */
>>>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct
>>>>> mmu_notifier *mn,
>>>>> -                         struct mm_struct *mm,
>>>>> -                         unsigned long start,
>>>>> -                         unsigned long end,
>>>>> -                         bool blockable)
>>>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror
>>>>> *mirror,
>>>>> +            const struct hmm_update *update)
>>>>>   {
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct
>>>>> amdgpu_mn, mirror);
>>>>> +    unsigned long start = update->start;
>>>>> +    unsigned long end = update->end;
>>>>> +    bool blockable = update->blockable;
>>>>>       struct interval_tree_node *it;
>>>>>         /* notification is exclusive, but interval is inclusive */
>>>>> @@ -278,28 +277,28 @@ static int
>>>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>>>>           amdgpu_mn_invalidate_node(node, start, end);
>>>>>       }
>>>>>   +    amdgpu_mn_read_unlock(amn);
>>>>> +
>>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>>> overlapping or nested invalidation ranges. But if you'r locking and
>>>> unlocking in the same function. Is that still a concern?
>>>
>> I don't understand the possible recursive case, but
>> amdgpu_mn_read_lock() still support recursive locking.
>>> Well the real problem is that unlocking them here won't work.
>>>
>>> We need to hold the lock until we are sure that the operation which
>>> updates the page tables is completed.
>>>
>> The reason for this change is because hmm mirror has invalidate_start
>> callback, no invalidate_end callback
>>
>> Check mmu_notifier.c and hmm.c again, below is entire logic to update
>> CPU page tables and callback:
>>
>> mn lock amn->lock is used to protect interval tree access because
>> user may submit/register new userptr anytime.
>> This is same for old and new way.
>>
>> step 2 guarantee the GPU operation is done before updating CPU page
>> table.
>>
>> So I think the change is safe. We don't need hold mn lock until the
>> CPU page tables update is completed.
>
> No, that isn't even remotely correct. The lock doesn't protects the
> interval tree.
>
>>
>> Old:
>>    1. down_read_non_owner(&amn->lock)
>>    2. loop to handle BOs from node->bos through interval tree
>> amn->object nodes
>>        gfx: wait for pending BOs fence operation done, mark user
>> pages dirty
>>        kfd: evict user queues of the process, wait for queue
>> unmap/map operation done
>>    3. update CPU page tables
>>    4. up_read(&amn->lock)
>>
>> New, switch step 3 and 4
>>    1. down_read_non_owner(&amn->lock)
>>    2. loop to handle BOs from node->bos through interval tree
>> amn->object nodes
>>        gfx: wait for pending BOs fence operation done, mark user
>> pages dirty
>>        kfd: evict user queues of the process, wait for queue
>> unmap/map operation done
>>    3. up_read(&amn->lock)
>>    4. update CPU page tables
>
> The lock is there to make sure that we serialize page table updates
> with command submission.

As I understand it, the idea is to prevent command submission (adding
new fences to BOs) while a page table invalidation is in progress. But
do we really need another lock for this? Wouldn't the re-validation of
userptr BOs (currently calling get_user_pages) force synchronization
with the ongoing page table invalidation through the mmap_sem or other
MM locks?

Regards,
  Felix

>
> If HMM doesn't provide a callback for the end of the invalidating then
> it can't be used for this.
>
> Adding Jerome as well, since we are certainly missing something here.
>
> Regards,
> Christian.
>
>>
>> Regards,
>> Philip
>>> Christian.
>>>
>>>>
>>>>>       return 0;
>>>>>   }
>>>>>     /**
>>>>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify
>>>>> about mm change
>>>>>    *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> - * @start: start of updated range
>>>>> - * @end: end of updated range
>>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>>> + * @update: the update start, end address
>>>>>    *
>>>>>    * We temporarily evict all BOs between start and end. This
>>>>>    * necessitates evicting all user-mode queues of the process.
>>>>> The BOs
>>>>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>>>>    */
>>>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct
>>>>> mmu_notifier *mn,
>>>>> -                         struct mm_struct *mm,
>>>>> -                         unsigned long start,
>>>>> -                         unsigned long end,
>>>>> -                         bool blockable)
>>>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror
>>>>> *mirror,
>>>>> +            const struct hmm_update *update)
>>>>>   {
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct
>>>>> amdgpu_mn, mirror);
>>>>> +    unsigned long start = update->start;
>>>>> +    unsigned long end = update->end;
>>>>> +    bool blockable = update->blockable;
>>>>>       struct interval_tree_node *it;
>>>>>         /* notification is exclusive, but interval is inclusive */
>>>>> @@ -326,59 +325,41 @@ static int
>>>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>>>>                 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>>>>                                start, end))
>>>>> -                amdgpu_amdkfd_evict_userptr(mem, mm);
>>>>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>>>>           }
>>>>>       }
>>>>>   +    amdgpu_mn_read_unlock(amn);
>>>>> +
>>>>>       return 0;
>>>>>   }
>>>>>   -/**
>>>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm
>>>>> change
>>>>> - *
>>>>> - * @mn: our notifier
>>>>> - * @mm: the mm this callback is about
>>>>> - * @start: start of updated range
>>>>> - * @end: end of updated range
>>>>> - *
>>>>> - * Release the lock again to allow new command submissions.
>>>>> +/* Low bits of any reasonable mm pointer will be unused due to
>>>>> struct
>>>>> + * alignment. Use these bits to make a unique key from the mm
>>>>> pointer
>>>>> + * and notifier type.
>>>>>    */
>>>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>>>> -                       struct mm_struct *mm,
>>>>> -                       unsigned long start,
>>>>> -                       unsigned long end)
>>>>> -{
>>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>>> -
>>>>> -    amdgpu_mn_read_unlock(amn);
>>>>> -}
>>>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>>>   -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>>>>       [AMDGPU_MN_TYPE_GFX] = {
>>>>> -        .release = amdgpu_mn_release,
>>>>> -        .invalidate_range_start =
>>>>> amdgpu_mn_invalidate_range_start_gfx,
>>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>>> +        .sync_cpu_device_pagetables =
>>>>> +                amdgpu_mn_invalidate_range_start_gfx,
>>>>> +        .release = amdgpu_hmm_mirror_release
>>>>>       },
>>>>>       [AMDGPU_MN_TYPE_HSA] = {
>>>>> -        .release = amdgpu_mn_release,
>>>>> -        .invalidate_range_start =
>>>>> amdgpu_mn_invalidate_range_start_hsa,
>>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>>> +        .sync_cpu_device_pagetables =
>>>>> +                amdgpu_mn_invalidate_range_start_hsa,
>>>>> +        .release = amdgpu_hmm_mirror_release
>>>>>       },
>>>>>   };
>>>>>   -/* Low bits of any reasonable mm pointer will be unused due to
>>>>> struct
>>>>> - * alignment. Use these bits to make a unique key from the mm
>>>>> pointer
>>>>> - * and notifier type.
>>>>> - */
>>>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>>> -
>>>>>   /**
>>>>> - * amdgpu_mn_get - create notifier context
>>>>> + * amdgpu_mn_get - create HMM mirror context
>>>>>    *
>>>>>    * @adev: amdgpu device pointer
>>>>>    * @type: type of MMU notifier context
>>>>>    *
>>>>> - * Creates a notifier context for current->mm.
>>>>> + * Creates a HMM mirror context for current->mm.
>>>>>    */
>>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>>>                   enum amdgpu_mn_type type)
>>>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>>>> amdgpu_device *adev,
>>>>>       amn->mm = mm;
>>>>>       init_rwsem(&amn->lock);
>>>>>       amn->type = type;
>>>>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>>>>       amn->objects = RB_ROOT_CACHED;
>>>>>       mutex_init(&amn->read_lock);
>>>>>       atomic_set(&amn->recursion, 0);
>>>>>   -    r = __mmu_notifier_register(&amn->mn, mm);
>>>>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>>>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>>>>       if (r)
>>>>>           goto free_amn;
>>>>>   @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>>>> amdgpu_device *adev,
>>>>>    * @bo: amdgpu buffer object
>>>>>    * @addr: userptr addr we should monitor
>>>>>    *
>>>>> - * Registers an MMU notifier for the given BO at the specified
>>>>> address.
>>>>> + * Registers an HMM mirror for the given BO at the specified
>>>>> address.
>>>>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>>>>    */
>>>>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
>>>>> unsigned long addr)
>>>>>   }
>>>>>     /**
>>>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>>>>    *
>>>>>    * @bo: amdgpu buffer object
>>>>>    *
>>>>> - * Remove any registration of MMU notifier updates from the
>>>>> buffer object.
>>>>> + * Remove any registration of HMM mirror updates from the buffer
>>>>> object.
>>>>>    */
>>>>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>>>>   {
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> index eb0f432..0e27526 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>>>>       AMDGPU_MN_TYPE_HSA,
>>>>>   };
>>>>>   -#if defined(CONFIG_MMU_NOTIFIER)
>>>>> +#if defined(CONFIG_HMM)
>>>>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>>>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx at lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>
>