[PATCH] drm/amdkfd: Set pte_flags for actual BO location

Mon Aug 29 16:07:04 UTC 2022

Am 2022-08-29 um 11:38 schrieb Christian König:
> Am 27.08.22 um 01:16 schrieb Felix Kuehling:
>> BOs can be in a different location than was intended at allocation time,
>> for example when restoring fails after an eviction or BOs get pinned in
>> system memory. On some GPUs the MTYPE for coherent mappings depends on
>> the actual memory location.
>>
>> Use the actual location to determine the pte_flags every time the page
>> tables are updated.
>
> For a workaround ok, but looks a bit awkward. Basically we need 
> different MTYPE based on the location, right?

Yes. On Aldebaran and Arcturus we need different MTYPEs for fine-grained 
coherence depending on the location.

Regards,
   Felix

>
> Christian.
>
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
>> ---
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  9 ++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 19 +++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 +
>>   3 files changed, 28 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index cbd593f7d553..5dd89f5a032f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -405,6 +405,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, 
>> struct amdgpu_sync *sync)
>>   static uint64_t get_pte_flags(struct amdgpu_device *adev, struct 
>> kgd_mem *mem)
>>   {
>>       struct amdgpu_device *bo_adev = 
>> amdgpu_ttm_adev(mem->bo->tbo.bdev);
>> +    bool is_vram = mem->bo->tbo.resource->mem_type == TTM_PL_VRAM;
>>       bool coherent = mem->alloc_flags & 
>> KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>>       bool uncached = mem->alloc_flags & 
>> KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
>>       uint32_t mapping_flags;
>> @@ -420,7 +421,7 @@ static uint64_t get_pte_flags(struct 
>> amdgpu_device *adev, struct kgd_mem *mem)
>>       switch (adev->asic_type) {
>>       case CHIP_ARCTURUS:
>>       case CHIP_ALDEBARAN:
>> -        if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>> +        if (is_vram) {
>>               if (bo_adev == adev) {
>>                   if (uncached)
>>                       mapping_flags |= AMDGPU_VM_MTYPE_UC;
>> @@ -1236,12 +1237,18 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
>>   {
>>       struct amdgpu_bo_va *bo_va = entry->bo_va;
>>       struct amdgpu_device *adev = entry->adev;
>> +    uint64_t pte_flags = get_pte_flags(adev, mem);
>>       int ret;
>>         ret = kfd_mem_dmamap_attachment(mem, entry);
>>       if (ret)
>>           return ret;
>>   +    if (unlikely(entry->pte_flags != pte_flags)) {
>> +        amdgpu_vm_bo_update_flags(bo_va, pte_flags);
>> +        entry->pte_flags = pte_flags;
>> +    }
>> +
>>       /* Update the page tables  */
>>       ret = amdgpu_vm_bo_update(adev, bo_va, false);
>>       if (ret) {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 59cac347baa3..954a40d5d828 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1862,6 +1862,25 @@ void amdgpu_vm_bo_invalidate(struct 
>> amdgpu_device *adev,
>>       }
>>   }
>>   +/**
>> + * amdgpu_vm_bo_update_flags - Update mapping flags of invalid mappings
>> + *
>> + * @bo_va: identifies the BO and VM
>> + * @flags: new mapping flags
>> + *
>> + * The update is only applied to invalid mappings. This allows 
>> updating the
>> + * mapping flags after a migration to maintain the desired 
>> coherence. The next
>> + * call to amdgpu_vm_bo_update() will apply the new @flags to the 
>> page table.
>> + */
>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va,
>> +                   uint64_t flags)
>> +{
>> +    struct amdgpu_bo_va_mapping *mapping;
>> +
>> +    list_for_each_entry(mapping, &bo_va->invalids, list)
>> +        mapping->flags = flags;
>> +}
>> +
>>   /**
>>    * amdgpu_vm_get_block_size - calculate VM page table size as power 
>> of two
>>    *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 9ecb7f663e19..11793716cd8b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -413,6 +413,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>   bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
>>   void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
>>                    struct amdgpu_bo *bo, bool evicted);
>> +void amdgpu_vm_bo_update_flags(struct amdgpu_bo_va *bo_va, uint64_t 
>> flags);
>>   uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t 
>> addr);
>>   struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
>>                          struct amdgpu_bo *bo);
>