[PATCH] drm/amdgpu: flush TLB if valid PDE turns into PTE

Felix Kuehling felix.kuehling at amd.com
Wed May 12 15:54:31 UTC 2021


Am 2021-05-12 um 8:38 a.m. schrieb Christian König:
>
>
> Am 12.05.21 um 14:34 schrieb Philip Yang:
>> Mapping huge page, 2MB aligned address with 2MB size, uses PDE0 as PTE.
>> If previously valid PDE0, PDE0.V=1 and PDE0.P=0 turns into PTE, this
>> requires TLB flush, otherwise page table walker will not read updated
>> PDE0.
>>
>> Change page table update mapping to return free_table flag to indicate
>> the previously valid PDE may have turned into a PTE if page table is
>> freed.
>>
>> Signed-off-by: Philip Yang <Philip.Yang at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++++++++++------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 12 ++++++++++--
>>   3 files changed, 22 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 3dcdcc9ff522..27418f9407f1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1457,7 +1457,7 @@ static void amdgpu_vm_fragment(struct
>> amdgpu_vm_update_params *params,
>>    */
>>   static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params
>> *params,
>>                    uint64_t start, uint64_t end,
>> -                 uint64_t dst, uint64_t flags)
>> +                 uint64_t dst, uint64_t flags, bool *free_table)
>
> Please put that flag into the params structure instead.
>
> Christian.

I agree. Christian, I think we also need to keep track of this for
graphics command submissions. amdgpu_cs_vm_handling needs get this flag
from amdgpu_vm_bo_update, and amdgpu_cs_submit needs to update
job->vm_needs_flushed based on this.

Regards,
  Felix


>
>>   {
>>       struct amdgpu_device *adev = params->adev;
>>       struct amdgpu_vm_pt_cursor cursor;
>> @@ -1583,6 +1583,8 @@ static int amdgpu_vm_update_ptes(struct
>> amdgpu_vm_update_params *params,
>>               while (cursor.pfn < frag_start) {
>>                   amdgpu_vm_free_pts(adev, params->vm, &cursor);
>>                   amdgpu_vm_pt_next(adev, &cursor);
>> +                if (free_table)
>> +                    *free_table = true;
>>               }
>>             } else if (frag >= shift) {
>> @@ -1610,6 +1612,7 @@ static int amdgpu_vm_update_ptes(struct
>> amdgpu_vm_update_params *params,
>>    * @nodes: array of drm_mm_nodes with the MC addresses
>>    * @pages_addr: DMA addresses to use for mapping
>>    * @fence: optional resulting fence
>> + * @free_table: return true if page table is freed
>>    *
>>    * Fill in the page table entries between @start and @last.
>>    *
>> @@ -1624,7 +1627,8 @@ int amdgpu_vm_bo_update_mapping(struct
>> amdgpu_device *adev,
>>                   uint64_t flags, uint64_t offset,
>>                   struct drm_mm_node *nodes,
>>                   dma_addr_t *pages_addr,
>> -                struct dma_fence **fence)
>> +                struct dma_fence **fence,
>> +                bool *free_table)
>>   {
>>       struct amdgpu_vm_update_params params;
>>       enum amdgpu_sync_mode sync_mode;
>> @@ -1721,7 +1725,8 @@ int amdgpu_vm_bo_update_mapping(struct
>> amdgpu_device *adev,
>>           }
>>             tmp = start + num_entries;
>> -        r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags);
>> +        r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags,
>> +                      free_table);
>>           if (r)
>>               goto error_unlock;
>>   @@ -1879,7 +1884,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
>> *adev, struct amdgpu_bo_va *bo_va,
>>                           resv, mapping->start,
>>                           mapping->last, update_flags,
>>                           mapping->offset, nodes,
>> -                        pages_addr, last_update);
>> +                        pages_addr, last_update, NULL);
>>           if (r)
>>               return r;
>>       }
>> @@ -2090,7 +2095,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device
>> *adev,
>>           r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
>>                           resv, mapping->start,
>>                           mapping->last, init_pte_value,
>> -                        0, NULL, NULL, &f);
>> +                        0, NULL, NULL, &f, NULL);
>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>           if (r) {
>>               dma_fence_put(f);
>> @@ -3428,7 +3433,7 @@ bool amdgpu_vm_handle_fault(struct
>> amdgpu_device *adev, u32 pasid,
>>       }
>>         r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false,
>> NULL, addr,
>> -                    addr, flags, value, NULL, NULL,
>> +                    addr, flags, value, NULL, NULL, NULL,
>>                       NULL);
>>       if (r)
>>           goto error_unlock;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index ea60ec122b51..f61c20b70b79 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -404,7 +404,7 @@ int amdgpu_vm_bo_update_mapping(struct
>> amdgpu_device *adev,
>>                   uint64_t flags, uint64_t offset,
>>                   struct drm_mm_node *nodes,
>>                   dma_addr_t *pages_addr,
>> -                struct dma_fence **fence);
>> +                struct dma_fence **fence, bool *free_table);
>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>               struct amdgpu_bo_va *bo_va,
>>               bool clear);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index b665e9ff77e3..4f28052d44bf 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -1084,7 +1084,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>         return amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
>> true, NULL,
>>                          start, last, init_pte_value, 0,
>> -                       NULL, NULL, fence);
>> +                       NULL, NULL, fence, NULL);
>>   }
>>     static int
>> @@ -1137,12 +1137,15 @@ svm_range_map_to_gpu(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>                struct amdgpu_device *bo_adev, struct dma_fence **fence)
>>   {
>>       struct amdgpu_bo_va bo_va;
>> +    bool free_table = false;
>> +    struct kfd_process *p;
>>       uint64_t pte_flags;
>>       int r = 0;
>>         pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
>> prange->start,
>>            prange->last);
>>   +    p = container_of(prange->svms, struct kfd_process, svms);
>>       if (prange->svm_bo && prange->ttm_res) {
>>           bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
>>           prange->mapping.bo_va = &bo_va;
>> @@ -1159,7 +1162,8 @@ svm_range_map_to_gpu(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>                       prange->mapping.offset,
>>                       prange->ttm_res ?
>>                           prange->ttm_res->mm_node : NULL,
>> -                    dma_addr, &vm->last_update);
>> +                    dma_addr, &vm->last_update,
>> +                    &free_table);
>>       if (r) {
>>           pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
>>           goto out;
>> @@ -1175,6 +1179,10 @@ svm_range_map_to_gpu(struct amdgpu_device
>> *adev, struct amdgpu_vm *vm,
>>       if (fence)
>>           *fence = dma_fence_get(vm->last_update);
>>   +    if (free_table)
>> +        amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
>> +                          p->pasid);
>> +
>>   out:
>>       prange->mapping.bo_va = NULL;
>>       return r;
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list