[PATCH] drm/amdgpu: Limit the maximum fragment to granularity size

Fri Jan 26 15:35:52 UTC 2024

Am 26.01.24 um 16:17 schrieb Philip Yang:
> On 2024-01-26 09:59, Christian König wrote:
>> Am 26.01.24 um 15:38 schrieb Philip Yang:
>>> svm range support partial migration and mapping update, for size 4MB
>>> virtual address 4MB alignment and physical address continuous range, if
>>> mapping to GPU with fs=10, after updating mapping of the first 2MB,
>>> if the second 2MB mapping fs=10 in cache TLB, this causes the first 2MB
>>> access to the stale mapping.
>>
>> Well that sounds fishy. When that happens with (for example) 4MiB and 
>> 2MiB, why doesn't it happen with 8KiB and 4KiB as well?
>
> unmap svm range is aligned to granularity size, if the range size is 
> 8KB (all within one 2MB granularity range), it will be mapped/unmapped 
> as 8KB, even if only 4KB is migrated. This is handled in another patch 
> series "amd/amdkfd: Unmap range from GPU based on granularity".
>

Ok that makes a bit more sense.

But when you have a linear 4MiB mapping and unmap the first 2MiB of it 
you need to flush the TLB anyway.

So why would that cause a stale access?

Regards,
Christian.

> Regards,
>
> Philip
>
>>
>> Christian.
>>
>>>
>>> Limit the maximum fragment size to granularity size, 2MB by default,
>>> with the mapping and unmapping based on gramularity size, to solve this
>>> issue.
>>>
>>> The change is only for SVM map/unmap range, no change for gfx and 
>>> legacy
>>> API path.
>>>
>>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 12 +++++++-----
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  4 ++--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 22 ++++++++++++----------
>>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c      |  9 +++++----
>>>   4 files changed, 26 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index ed4a8c5d26d7..a2bef94cb959 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -897,6 +897,7 @@ static void amdgpu_vm_tlb_seq_cb(struct 
>>> dma_fence *fence,
>>>    * @res: ttm_resource to map
>>>    * @pages_addr: DMA addresses to use for mapping
>>>    * @fence: optional resulting fence
>>> + * @frag_size: max map fragment size
>>>    *
>>>    * Fill in the page table entries between @start and @last.
>>>    *
>>> @@ -908,7 +909,7 @@ int amdgpu_vm_update_range(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>>                  struct dma_resv *resv, uint64_t start, uint64_t last,
>>>                  uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>                  struct ttm_resource *res, dma_addr_t *pages_addr,
>>> -               struct dma_fence **fence)
>>> +               struct dma_fence **fence, unsigned int frag_size)
>>>   {
>>>       struct amdgpu_vm_update_params params;
>>>       struct amdgpu_vm_tlb_seq_struct *tlb_cb;
>>> @@ -1016,7 +1017,7 @@ int amdgpu_vm_update_range(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>           }
>>>             tmp = start + num_entries;
>>> -        r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
>>> +        r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags, 
>>> frag_size);
>>>           if (r)
>>>               goto error_free;
>>>   @@ -1197,7 +1198,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>                          !uncached, resv, mapping->start, 
>>> mapping->last,
>>>                          update_flags, mapping->offset,
>>>                          vram_base, mem, pages_addr,
>>> -                       last_update);
>>> +                       last_update, 0);
>>>           if (r)
>>>               return r;
>>>       }
>>> @@ -1392,7 +1393,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device 
>>> *adev,
>>>           r = amdgpu_vm_update_range(adev, vm, false, false, true, 
>>> false,
>>>                          resv, mapping->start, mapping->last,
>>>                          init_pte_value, 0, 0, NULL, NULL,
>>> -                       &f);
>>> +                       &f, 0);
>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>           if (r) {
>>>               dma_fence_put(f);
>>> @@ -2733,7 +2734,8 @@ bool amdgpu_vm_handle_fault(struct 
>>> amdgpu_device *adev, u32 pasid,
>>>       }
>>>         r = amdgpu_vm_update_range(adev, vm, true, false, false, false,
>>> -                   NULL, addr, addr, flags, value, 0, NULL, NULL, 
>>> NULL);
>>> +                   NULL, addr, addr, flags, value, 0, NULL, NULL,
>>> +                   NULL, 0);
>>>       if (r)
>>>           goto error_unlock;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 666698a57192..b34466b5086f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -465,7 +465,7 @@ int amdgpu_vm_update_range(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>>                  struct dma_resv *resv, uint64_t start, uint64_t last,
>>>                  uint64_t flags, uint64_t offset, uint64_t vram_base,
>>>                  struct ttm_resource *res, dma_addr_t *pages_addr,
>>> -               struct dma_fence **fence);
>>> +               struct dma_fence **fence, unsigned int frag_size);
>>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>>               struct amdgpu_bo_va *bo_va,
>>>               bool clear);
>>> @@ -531,7 +531,7 @@ int amdgpu_vm_pde_update(struct 
>>> amdgpu_vm_update_params *params,
>>>                struct amdgpu_vm_bo_base *entry);
>>>   int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
>>>                 uint64_t start, uint64_t end,
>>> -              uint64_t dst, uint64_t flags);
>>> +              uint64_t dst, uint64_t flags, unsigned int frag_size);
>>>   void amdgpu_vm_pt_free_work(struct work_struct *work);
>>>     #if defined(CONFIG_DEBUG_FS)
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> index a160265ddc07..4647f700f1c6 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> @@ -860,12 +860,14 @@ static void amdgpu_vm_pte_update_flags(struct 
>>> amdgpu_vm_update_params *params,
>>>    * @flags: hw mapping flags
>>>    * @frag: resulting fragment size
>>>    * @frag_end: end of this fragment
>>> + * @frag_size: max map fragment size
>>>    *
>>>    * Returns the first possible fragment for the start and end address.
>>>    */
>>>   static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params 
>>> *params,
>>>                      uint64_t start, uint64_t end, uint64_t flags,
>>> -                   unsigned int *frag, uint64_t *frag_end)
>>> +                   unsigned int *frag, uint64_t *frag_end,
>>> +                   unsigned int frag_size)
>>>   {
>>>       /**
>>>        * The MC L1 TLB supports variable sized pages, based on a 
>>> fragment
>>> @@ -893,7 +895,7 @@ static void amdgpu_vm_pte_fragment(struct 
>>> amdgpu_vm_update_params *params,
>>>       if (params->adev->asic_type < CHIP_VEGA10)
>>>           max_frag = params->adev->vm_manager.fragment_size;
>>>       else
>>> -        max_frag = 31;
>>> +        max_frag = frag_size ? frag_size : 31;
>>>         /* system pages are non continuously */
>>>       if (params->pages_addr) {
>>> @@ -904,12 +906,10 @@ static void amdgpu_vm_pte_fragment(struct 
>>> amdgpu_vm_update_params *params,
>>>         /* This intentionally wraps around if no bit is set */
>>>       *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) 
>>> - 1);
>>> -    if (*frag >= max_frag) {
>>> +    if (*frag >= max_frag)
>>>           *frag = max_frag;
>>> -        *frag_end = end & ~((1ULL << max_frag) - 1);
>>> -    } else {
>>> -        *frag_end = start + (1 << *frag);
>>> -    }
>>> +
>>> +    *frag_end = start + (1 << *frag);
>>>   }
>>>     /**
>>> @@ -920,6 +920,7 @@ static void amdgpu_vm_pte_fragment(struct 
>>> amdgpu_vm_update_params *params,
>>>    * @end: end of GPU address range
>>>    * @dst: destination address to map to, the next dst inside the 
>>> function
>>>    * @flags: mapping flags
>>> + * @frag_size: max map fragment size
>>>    *
>>>    * Update the page tables in the range @start - @end.
>>>    *
>>> @@ -928,7 +929,7 @@ static void amdgpu_vm_pte_fragment(struct 
>>> amdgpu_vm_update_params *params,
>>>    */
>>>   int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
>>>                 uint64_t start, uint64_t end,
>>> -              uint64_t dst, uint64_t flags)
>>> +              uint64_t dst, uint64_t flags, unsigned int frag_size)
>>>   {
>>>       struct amdgpu_device *adev = params->adev;
>>>       struct amdgpu_vm_pt_cursor cursor;
>>> @@ -938,7 +939,7 @@ int amdgpu_vm_ptes_update(struct 
>>> amdgpu_vm_update_params *params,
>>>         /* figure out the initial fragment */
>>>       amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
>>> -                   &frag_end);
>>> +                   &frag_end, frag_size);
>>>         /* walk over the address space and update the PTs */
>>>       amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
>>> @@ -1040,7 +1041,8 @@ int amdgpu_vm_ptes_update(struct 
>>> amdgpu_vm_update_params *params,
>>>               if (frag_start >= frag_end) {
>>>                   /* figure out the next fragment */
>>>                   amdgpu_vm_pte_fragment(params, frag_start, end,
>>> -                               flags, &frag, &frag_end);
>>> +                               flags, &frag, &frag_end,
>>> +                               frag_size);
>>>                   if (frag < shift)
>>>                       break;
>>>               }
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> index ed35a490fd9e..d71b2c8bf51a 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>>> @@ -1488,7 +1488,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
>>>   static int
>>>   svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct 
>>> amdgpu_vm *vm,
>>>                uint64_t start, uint64_t last,
>>> -             struct dma_fence **fence)
>>> +             struct dma_fence **fence, unsigned int frag_size)
>>>   {
>>>       uint64_t init_pte_value = 0;
>>>   @@ -1496,7 +1496,7 @@ svm_range_unmap_from_gpu(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>         return amdgpu_vm_update_range(adev, vm, false, true, true, 
>>> false, NULL, start,
>>>                         last, init_pte_value, 0, 0, NULL, NULL,
>>> -                      fence);
>>> +                      fence, frag_size);
>>>   }
>>>     /**
>>> @@ -1579,7 +1579,7 @@ svm_range_unmap_from_gpus(struct svm_range 
>>> *prange, unsigned long start,
>>>             r = svm_range_unmap_from_gpu(pdd->dev->adev,
>>>                            drm_priv_to_vm(pdd->drm_priv),
>>> -                         start, last, &fence);
>>> +                         start, last, &fence, prange->granularity);
>>>           if (r)
>>>               break;
>>>   @@ -1647,7 +1647,8 @@ svm_range_map_to_gpu(struct 
>>> kfd_process_device *pdd, struct svm_range *prange,
>>>                          pte_flags,
>>>                          (last_start - prange->start) << PAGE_SHIFT,
>>>                          bo_adev ? 
>>> bo_adev->vm_manager.vram_base_offset : 0,
>>> -                       NULL, dma_addr, &vm->last_update);
>>> +                       NULL, dma_addr, &vm->last_update,
>>> +                       prange->granularity);
>>>             for (j = last_start - prange->start; j <= i; j++)
>>>               dma_addr[j] |= last_domain;
>>