[PATCH 1/5] drm/amdgpu: re-apply "use the new cursor in the VM code" v2

Fri Apr 30 14:58:55 UTC 2021

Am 30.04.21 um 16:57 schrieb Felix Kuehling:
> Am 2021-04-30 um 9:50 a.m. schrieb Christian König:
>>
>> Am 30.04.21 um 15:45 schrieb Felix Kuehling:
>>> Am 2021-04-30 um 4:54 a.m. schrieb Christian König:
>>>> Am 30.04.21 um 10:32 schrieb Felix Kuehling:
>>>>> Am 2021-04-27 um 6:54 a.m. schrieb Christian König:
>>>>>> Now that we found the underlying problem we can re-apply this patch.
>>>>>>
>>>>>> This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.
>>>>>>
>>>>>> v2: rebase on KFD changes
>>>>>>
>>>>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>>>> Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>
>>>> Thanks, just for this patch or the series?
>>> I meant it for the patch. I could probably extend it to the series. But
>>> I'm a bit confused how the patches relate to each other. There seems to
>>> be an implication that everything leads up to enabling patch 5, i.e.
>>> removing users of mm_nodes so that you can in patch 5 make the data
>>> structure containing mm_nodes private.
>>>
>>> But it's not the same data structure. I don't think patch 5 really
>>> depends on any of the previous patches, because all the other patches
>>> deal with ttm_resource.mm_nodes and patch 5 deals with
>>> amdgpu_vram_reservation.mm_nodes.
>> No, you are close but still misunderstood the intention here a bit.
>>
>> I want to stop using drm_mm_node directly in the amdgpu code so that
>> we are able to switch the VRAM management to a buddy allocator sooner
>> or later.
>>
>> mm_nodes and amdgpu_vram_reservation.mm_nodes are both of that type.
> Yes, I got that. So all of these patches are preparation for something
> bigger. That's the part that wasn't clear. The series is
>
> Acked-by: Felix Kuehling <Felix.Kuehling at amd.com>
>
> We still have a few uses of mm_node in kfd_migrate.c. That code is about
> to change as Alejandro is beginning to rework our VRAM management for
> HMM migration to allow partial migration failures and mixed VRAM/System
> mappings. But we'll still need a way to find VRAM addresses within a BO.

Using amdgpu_res_coursor.h for that should work.

Regards,
Christian.

>
> Regards,
>    Felix
>
>
>> Regards,
>> Christian.
>>
>>> Regards,
>>>     Felix
>>>
>>>
>>>> Christian.
>>>>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55
>>>>>> +++++++++-----------------
>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
>>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_svm.c   |  3 +-
>>>>>>     3 files changed, 20 insertions(+), 40 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index dae51992c607..fa43d332a979 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -37,6 +37,7 @@
>>>>>>     #include "amdgpu_gmc.h"
>>>>>>     #include "amdgpu_xgmi.h"
>>>>>>     #include "amdgpu_dma_buf.h"
>>>>>> +#include "amdgpu_res_cursor.h"
>>>>>>     #include "kfd_svm.h"
>>>>>>       /**
>>>>>> @@ -1606,7 +1607,7 @@ static int amdgpu_vm_update_ptes(struct
>>>>>> amdgpu_vm_update_params *params,
>>>>>>      * @last: last mapped entry
>>>>>>      * @flags: flags for the entries
>>>>>>      * @offset: offset into nodes and pages_addr
>>>>>> - * @nodes: array of drm_mm_nodes with the MC addresses
>>>>>> + * @res: ttm_resource to map
>>>>>>      * @pages_addr: DMA addresses to use for mapping
>>>>>>      * @fence: optional resulting fence
>>>>>>      *
>>>>>> @@ -1621,13 +1622,13 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>>                     bool unlocked, struct dma_resv *resv,
>>>>>>                     uint64_t start, uint64_t last,
>>>>>>                     uint64_t flags, uint64_t offset,
>>>>>> -                struct drm_mm_node *nodes,
>>>>>> +                struct ttm_resource *res,
>>>>>>                     dma_addr_t *pages_addr,
>>>>>>                     struct dma_fence **fence)
>>>>>>     {
>>>>>>         struct amdgpu_vm_update_params params;
>>>>>> +    struct amdgpu_res_cursor cursor;
>>>>>>         enum amdgpu_sync_mode sync_mode;
>>>>>> -    uint64_t pfn;
>>>>>>         int r;
>>>>>>           memset(&params, 0, sizeof(params));
>>>>>> @@ -1645,14 +1646,6 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>>         else
>>>>>>             sync_mode = AMDGPU_SYNC_EXPLICIT;
>>>>>>     -    pfn = offset >> PAGE_SHIFT;
>>>>>> -    if (nodes) {
>>>>>> -        while (pfn >= nodes->size) {
>>>>>> -            pfn -= nodes->size;
>>>>>> -            ++nodes;
>>>>>> -        }
>>>>>> -    }
>>>>>> -
>>>>>>         amdgpu_vm_eviction_lock(vm);
>>>>>>         if (vm->evicting) {
>>>>>>             r = -EBUSY;
>>>>>> @@ -1671,23 +1664,17 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>>         if (r)
>>>>>>             goto error_unlock;
>>>>>>     -    do {
>>>>>> +    amdgpu_res_first(res, offset, (last - start + 1) *
>>>>>> AMDGPU_GPU_PAGE_SIZE,
>>>>>> +             &cursor);
>>>>>> +    while (cursor.remaining) {
>>>>>>             uint64_t tmp, num_entries, addr;
>>>>>>     -
>>>>>> -        num_entries = last - start + 1;
>>>>>> -        if (nodes) {
>>>>>> -            addr = nodes->start << PAGE_SHIFT;
>>>>>> -            num_entries = min((nodes->size - pfn) *
>>>>>> -                AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
>>>>>> -        } else {
>>>>>> -            addr = 0;
>>>>>> -        }
>>>>>> -
>>>>>> +        num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
>>>>>>             if (pages_addr) {
>>>>>>                 bool contiguous = true;
>>>>>>                   if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
>>>>>> +                uint64_t pfn = cursor.start >> PAGE_SHIFT;
>>>>>>                     uint64_t count;
>>>>>>                       contiguous = pages_addr[pfn + 1] ==
>>>>>> @@ -1707,16 +1694,18 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>>                 }
>>>>>>                   if (!contiguous) {
>>>>>> -                addr = pfn << PAGE_SHIFT;
>>>>>> +                addr = cursor.start;
>>>>>>                     params.pages_addr = pages_addr;
>>>>>>                 } else {
>>>>>> -                addr = pages_addr[pfn];
>>>>>> +                addr = pages_addr[cursor.start >> PAGE_SHIFT];
>>>>>>                     params.pages_addr = NULL;
>>>>>>                 }
>>>>>>               } else if (flags & (AMDGPU_PTE_VALID |
>>>>>> AMDGPU_PTE_PRT)) {
>>>>>> -            addr += bo_adev->vm_manager.vram_base_offset;
>>>>>> -            addr += pfn << PAGE_SHIFT;
>>>>>> +            addr = bo_adev->vm_manager.vram_base_offset +
>>>>>> +                cursor.start;
>>>>>> +        } else {
>>>>>> +            addr = 0;
>>>>>>             }
>>>>>>               tmp = start + num_entries;
>>>>>> @@ -1724,14 +1713,9 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>>             if (r)
>>>>>>                 goto error_unlock;
>>>>>>     -        pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
>>>>>> -        if (nodes && nodes->size == pfn) {
>>>>>> -            pfn = 0;
>>>>>> -            ++nodes;
>>>>>> -        }
>>>>>> +        amdgpu_res_next(&cursor, num_entries *
>>>>>> AMDGPU_GPU_PAGE_SIZE);
>>>>>>             start = tmp;
>>>>>> -
>>>>>> -    } while (unlikely(start != last + 1));
>>>>>> +    };
>>>>>>           r = vm->update_funcs->commit(&params, fence);
>>>>>>     @@ -1760,7 +1744,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>>>         struct amdgpu_bo_va_mapping *mapping;
>>>>>>         dma_addr_t *pages_addr = NULL;
>>>>>>         struct ttm_resource *mem;
>>>>>> -    struct drm_mm_node *nodes;
>>>>>>         struct dma_fence **last_update;
>>>>>>         struct dma_resv *resv;
>>>>>>         uint64_t flags;
>>>>>> @@ -1769,7 +1752,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>>>           if (clear || !bo) {
>>>>>>             mem = NULL;
>>>>>> -        nodes = NULL;
>>>>>>             resv = vm->root.base.bo->tbo.base.resv;
>>>>>>         } else {
>>>>>>             struct drm_gem_object *obj = &bo->tbo.base;
>>>>>> @@ -1784,7 +1766,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>>>                     bo = gem_to_amdgpu_bo(gobj);
>>>>>>             }
>>>>>>             mem = &bo->tbo.mem;
>>>>>> -        nodes = mem->mm_node;
>>>>>>             if (mem->mem_type == TTM_PL_TT)
>>>>>>                 pages_addr = bo->tbo.ttm->dma_address;
>>>>>>         }
>>>>>> @@ -1833,7 +1814,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>>>>>> *adev, struct amdgpu_bo_va *bo_va,
>>>>>>             r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false,
>>>>>> false,
>>>>>>                             resv, mapping->start,
>>>>>>                             mapping->last, update_flags,
>>>>>> -                        mapping->offset, nodes,
>>>>>> +                        mapping->offset, mem,
>>>>>>                             pages_addr, last_update);
>>>>>>             if (r)
>>>>>>                 return r;
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> index e5a3f18be2b7..1ae5ea8db497 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> @@ -402,7 +402,7 @@ int amdgpu_vm_bo_update_mapping(struct
>>>>>> amdgpu_device *adev,
>>>>>>                     bool unlocked, struct dma_resv *resv,
>>>>>>                     uint64_t start, uint64_t last,
>>>>>>                     uint64_t flags, uint64_t offset,
>>>>>> -                struct drm_mm_node *nodes,
>>>>>> +                struct ttm_resource *res,
>>>>>>                     dma_addr_t *pages_addr,
>>>>>>                     struct dma_fence **fence);
>>>>>>     int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> index e4ce97ab6e26..0b0e76e16ddc 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>>>>> @@ -1151,8 +1151,7 @@ svm_range_map_to_gpu(struct amdgpu_device
>>>>>> *adev, struct amdgpu_vm *vm,
>>>>>>                         prange->mapping.start,
>>>>>>                         prange->mapping.last, pte_flags,
>>>>>>                         prange->mapping.offset,
>>>>>> -                    prange->ttm_res ?
>>>>>> -                        prange->ttm_res->mm_node : NULL,
>>>>>> +                    prange->ttm_res,
>>>>>>                         dma_addr, &vm->last_update);
>>>>>>         if (r) {
>>>>>>             pr_debug("failed %d to map to gpu 0x%lx\n", r,
>>>>>> prange->start);