[PATCH] drm/amdgpu: use HMM mirror callback to replace mmu notifier v4

Fri Sep 14 17:52:14 UTC 2018

Am 14.09.2018 um 19:47 schrieb Philip Yang:
> On 2018-09-14 03:51 AM, Christian König wrote:
>> Am 13.09.2018 um 23:51 schrieb Felix Kuehling:
>>> On 2018-09-13 04:52 PM, Philip Yang wrote:
>>>> Replace our MMU notifier with 
>>>> hmm_mirror_ops.sync_cpu_device_pagetables
>>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>>> DRM_AMDGPU_USERPTR Kconfig.
>>>>
>>>> It supports both KFD userptr and gfx userptr paths.
>>>>
>>>> This depends on several HMM patchset from Jérôme Glisse queued for
>>>> upstream.
>>>>
>>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 121 
>>>> ++++++++++++++-------------------
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>>>   4 files changed, 56 insertions(+), 75 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
>>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>> index 9221e54..960a633 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>>>   config DRM_AMDGPU_USERPTR
>>>>       bool "Always enable userptr write support"
>>>>       depends on DRM_AMDGPU
>>>> -    select MMU_NOTIFIER
>>>> +    select HMM_MIRROR
>>>>       help
>>>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>>> -      selected to enabled full userptr support.
>>>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>>> +      isn't already selected to enabled full userptr support.
>>>>     config DRM_AMDGPU_GART_DEBUGFS
>>>>       bool "Allow GART access through debugfs"
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> index 138cb78..c1e5d43 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>>> @@ -171,7 +171,7 @@ endif
>>>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>>> +amdgpu-$(CONFIG_HMM) += amdgpu_mn.o
>>>>     include $(FULL_AMD_PATH)/powerplay/Makefile
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>> index e55508b..ad52f34 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>>> @@ -45,7 +45,7 @@
>>>>     #include <linux/firmware.h>
>>>>   #include <linux/module.h>
>>>> -#include <linux/mmu_notifier.h>
>>>> +#include <linux/hmm.h>
>>>>   #include <linux/interval_tree.h>
>>>>   #include <drm/drmP.h>
>>>>   #include <drm/drm.h>
>>>> @@ -66,6 +66,7 @@
>>> Need to remove @mn documentation.
>>>
>>>>    * @objects: interval tree containing amdgpu_mn_nodes
>>>>    * @read_lock: mutex for recursive locking of @lock
>>>>    * @recursion: depth of recursion
>>>> + * @mirror: HMM mirror function support
>>>>    *
>>>>    * Data for each amdgpu device and process address space.
>>>>    */
>>>> @@ -73,7 +74,6 @@ struct amdgpu_mn {
>>>>       /* constant after initialisation */
>>>>       struct amdgpu_device    *adev;
>>>>       struct mm_struct    *mm;
>>>> -    struct mmu_notifier    mn;
>>>>       enum amdgpu_mn_type    type;
>>>>         /* only used on destruction */
>>>> @@ -87,6 +87,9 @@ struct amdgpu_mn {
>>>>       struct rb_root_cached    objects;
>>>>       struct mutex        read_lock;
>>>>       atomic_t        recursion;
>>>> +
>>>> +    /* HMM mirror */
>>>> +    struct hmm_mirror    mirror;
>>>>   };
>>>>     /**
>>>> @@ -103,7 +106,7 @@ struct amdgpu_mn_node {
>>>>   };
>>>>     /**
>>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>>>    *
>>>>    * @work: previously sheduled work item
>>>>    *
>>>> @@ -129,28 +132,26 @@ static void amdgpu_mn_destroy(struct 
>>>> work_struct *work)
>>>>       }
>>>>       up_write(&amn->lock);
>>>>       mutex_unlock(&adev->mn_lock);
>>>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>>> +    hmm_mirror_unregister(&amn->mirror);
>>>> +
>>>>       kfree(amn);
>>>>   }
>>>>     /**
>>>>    * amdgpu_mn_release - callback to notify about mm destruction
>>> Update the function name in the comment.
>>>
>>>>    *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> + * @mirror: the HMM mirror (mm) this callback is about
>>>>    *
>>>> - * Shedule a work item to lazy destroy our notifier.
>>>> + * Shedule a work item to lazy destroy HMM mirror.
>>>>    */
>>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>>> -                  struct mm_struct *mm)
>>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>>>   {
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>>> mirror);
>>>>         INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>>>       schedule_work(&amn->work);
>>>>   }
>>>>   -
>>>>   /**
>>>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>>>    *
>>>> @@ -237,21 +238,19 @@ static void amdgpu_mn_invalidate_node(struct 
>>>> amdgpu_mn_node *node,
>>>>   /**
>>>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify 
>>>> about mm change
>>>>    *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> - * @start: start of updated range
>>>> - * @end: end of updated range
>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>> + * @update: the update start, end address
>>>>    *
>>>>    * Block for operations on BOs to finish and mark pages as 
>>>> accessed and
>>>>    * potentially dirty.
>>>>    */
>>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct 
>>>> mmu_notifier *mn,
>>>> -                         struct mm_struct *mm,
>>>> -                         unsigned long start,
>>>> -                         unsigned long end,
>>>> -                         bool blockable)
>>>> +static int amdgpu_mn_invalidate_range_start_gfx(struct hmm_mirror 
>>>> *mirror,
>>>> +            const struct hmm_update *update)
>>>>   {
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>>> mirror);
>>>> +    unsigned long start = update->start;
>>>> +    unsigned long end = update->end;
>>>> +    bool blockable = update->blockable;
>>>>       struct interval_tree_node *it;
>>>>         /* notification is exclusive, but interval is inclusive */
>>>> @@ -278,28 +277,28 @@ static int 
>>>> amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>>>           amdgpu_mn_invalidate_node(node, start, end);
>>>>       }
>>>>   +    amdgpu_mn_read_unlock(amn);
>>>> +
>>> amdgpu_mn_read_lock/unlock support recursive locking for multiple
>>> overlapping or nested invalidation ranges. But if you'r locking and
>>> unlocking in the same function. Is that still a concern?
>>
> I don't understand the possible recursive case, but 
> amdgpu_mn_read_lock() still support recursive locking.
>> Well the real problem is that unlocking them here won't work.
>>
>> We need to hold the lock until we are sure that the operation which 
>> updates the page tables is completed.
>>
> The reason for this change is because hmm mirror has invalidate_start 
> callback, no invalidate_end callback
>
> Check mmu_notifier.c and hmm.c again, below is entire logic to update 
> CPU page tables and callback:
>
> mn lock amn->lock is used to protect interval tree access because user 
> may submit/register new userptr anytime.
> This is same for old and new way.
>
> step 2 guarantee the GPU operation is done before updating CPU page 
> table.
>
> So I think the change is safe. We don't need hold mn lock until the 
> CPU page tables update is completed.

No, that isn't even remotely correct. The lock doesn't protects the 
interval tree.

>
> Old:
>    1. down_read_non_owner(&amn->lock)
>    2. loop to handle BOs from node->bos through interval tree 
> amn->object nodes
>        gfx: wait for pending BOs fence operation done, mark user pages 
> dirty
>        kfd: evict user queues of the process, wait for queue unmap/map 
> operation done
>    3. update CPU page tables
>    4. up_read(&amn->lock)
>
> New, switch step 3 and 4
>    1. down_read_non_owner(&amn->lock)
>    2. loop to handle BOs from node->bos through interval tree 
> amn->object nodes
>        gfx: wait for pending BOs fence operation done, mark user pages 
> dirty
>        kfd: evict user queues of the process, wait for queue unmap/map 
> operation done
>    3. up_read(&amn->lock)
>    4. update CPU page tables

The lock is there to make sure that we serialize page table updates with 
command submission.

If HMM doesn't provide a callback for the end of the invalidating then 
it can't be used for this.

Adding Jerome as well, since we are certainly missing something here.

Regards,
Christian.

>
> Regards,
> Philip
>> Christian.
>>
>>>
>>>>       return 0;
>>>>   }
>>>>     /**
>>>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify 
>>>> about mm change
>>>>    *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> - * @start: start of updated range
>>>> - * @end: end of updated range
>>>> + * @mirror: the hmm_mirror (mm) is about to update
>>>> + * @update: the update start, end address
>>>>    *
>>>>    * We temporarily evict all BOs between start and end. This
>>>>    * necessitates evicting all user-mode queues of the process. The 
>>>> BOs
>>>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>>>    */
>>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct 
>>>> mmu_notifier *mn,
>>>> -                         struct mm_struct *mm,
>>>> -                         unsigned long start,
>>>> -                         unsigned long end,
>>>> -                         bool blockable)
>>>> +static int amdgpu_mn_invalidate_range_start_hsa(struct hmm_mirror 
>>>> *mirror,
>>>> +            const struct hmm_update *update)
>>>>   {
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>>>> mirror);
>>>> +    unsigned long start = update->start;
>>>> +    unsigned long end = update->end;
>>>> +    bool blockable = update->blockable;
>>>>       struct interval_tree_node *it;
>>>>         /* notification is exclusive, but interval is inclusive */
>>>> @@ -326,59 +325,41 @@ static int 
>>>> amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>>>                 if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>>>                                start, end))
>>>> -                amdgpu_amdkfd_evict_userptr(mem, mm);
>>>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>>>           }
>>>>       }
>>>>   +    amdgpu_mn_read_unlock(amn);
>>>> +
>>>>       return 0;
>>>>   }
>>>>   -/**
>>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm 
>>>> change
>>>> - *
>>>> - * @mn: our notifier
>>>> - * @mm: the mm this callback is about
>>>> - * @start: start of updated range
>>>> - * @end: end of updated range
>>>> - *
>>>> - * Release the lock again to allow new command submissions.
>>>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>>>> + * alignment. Use these bits to make a unique key from the mm pointer
>>>> + * and notifier type.
>>>>    */
>>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>>> -                       struct mm_struct *mm,
>>>> -                       unsigned long start,
>>>> -                       unsigned long end)
>>>> -{
>>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>>> -
>>>> -    amdgpu_mn_read_unlock(amn);
>>>> -}
>>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>>   -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>>>       [AMDGPU_MN_TYPE_GFX] = {
>>>> -        .release = amdgpu_mn_release,
>>>> -        .invalidate_range_start = 
>>>> amdgpu_mn_invalidate_range_start_gfx,
>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>> +        .sync_cpu_device_pagetables =
>>>> +                amdgpu_mn_invalidate_range_start_gfx,
>>>> +        .release = amdgpu_hmm_mirror_release
>>>>       },
>>>>       [AMDGPU_MN_TYPE_HSA] = {
>>>> -        .release = amdgpu_mn_release,
>>>> -        .invalidate_range_start = 
>>>> amdgpu_mn_invalidate_range_start_hsa,
>>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>>> +        .sync_cpu_device_pagetables =
>>>> +                amdgpu_mn_invalidate_range_start_hsa,
>>>> +        .release = amdgpu_hmm_mirror_release
>>>>       },
>>>>   };
>>>>   -/* Low bits of any reasonable mm pointer will be unused due to 
>>>> struct
>>>> - * alignment. Use these bits to make a unique key from the mm pointer
>>>> - * and notifier type.
>>>> - */
>>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>>> -
>>>>   /**
>>>> - * amdgpu_mn_get - create notifier context
>>>> + * amdgpu_mn_get - create HMM mirror context
>>>>    *
>>>>    * @adev: amdgpu device pointer
>>>>    * @type: type of MMU notifier context
>>>>    *
>>>> - * Creates a notifier context for current->mm.
>>>> + * Creates a HMM mirror context for current->mm.
>>>>    */
>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>>                   enum amdgpu_mn_type type)
>>>> @@ -408,12 +389,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>>> amdgpu_device *adev,
>>>>       amn->mm = mm;
>>>>       init_rwsem(&amn->lock);
>>>>       amn->type = type;
>>>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>>>       amn->objects = RB_ROOT_CACHED;
>>>>       mutex_init(&amn->read_lock);
>>>>       atomic_set(&amn->recursion, 0);
>>>>   -    r = __mmu_notifier_register(&amn->mn, mm);
>>>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>>>       if (r)
>>>>           goto free_amn;
>>>>   @@ -439,7 +420,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>>>> amdgpu_device *adev,
>>>>    * @bo: amdgpu buffer object
>>>>    * @addr: userptr addr we should monitor
>>>>    *
>>>> - * Registers an MMU notifier for the given BO at the specified 
>>>> address.
>>>> + * Registers an HMM mirror for the given BO at the specified address.
>>>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>>>    */
>>>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>>> @@ -495,11 +476,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, 
>>>> unsigned long addr)
>>>>   }
>>>>     /**
>>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>>>    *
>>>>    * @bo: amdgpu buffer object
>>>>    *
>>>> - * Remove any registration of MMU notifier updates from the buffer 
>>>> object.
>>>> + * Remove any registration of HMM mirror updates from the buffer 
>>>> object.
>>>>    */
>>>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>>>   {
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>> index eb0f432..0e27526 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>>>       AMDGPU_MN_TYPE_HSA,
>>>>   };
>>>>   -#if defined(CONFIG_MMU_NOTIFIER)
>>>> +#if defined(CONFIG_HMM)
>>>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>